##// END OF EJS Templates
revlog: make the index always return the same tuple...
marmoute -
r47913:4292bed8 default
parent child Browse files
Show More
@@ -1,3038 +1,3031 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 104 char format_version; /* size of index headers. Differs in v1 v.s. v2
105 105 format */
106 106 };
107 107
108 108 static Py_ssize_t index_length(const indexObject *self)
109 109 {
110 110 return self->length + self->new_length;
111 111 }
112 112
113 113 static const char nullid[32] = {0};
114 114 static const Py_ssize_t nullrev = -1;
115 115
116 116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
117 117
118 118 static int index_find_node(indexObject *self, const char *node);
119 119
120 120 #if LONG_MAX == 0x7fffffffL
121 121 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
122 122 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
123 123 #else
124 124 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
125 125 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
126 126 #endif
127 127
128 128 /* A RevlogNG v1 index entry is 64 bytes long. */
129 129 static const long v1_entry_size = 64;
130 130
131 131 /* A Revlogv2 index entry is 96 bytes long. */
132 132 static const long v2_entry_size = 96;
133 133
134 134 static const long format_v1 = 1; /* Internal only, could be any number */
135 135 static const long format_v2 = 2; /* Internal only, could be any number */
136 136
137 137 static void raise_revlog_error(void)
138 138 {
139 139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
140 140
141 141 mod = PyImport_ImportModule("mercurial.error");
142 142 if (mod == NULL) {
143 143 goto cleanup;
144 144 }
145 145
146 146 dict = PyModule_GetDict(mod);
147 147 if (dict == NULL) {
148 148 goto cleanup;
149 149 }
150 150 Py_INCREF(dict);
151 151
152 152 errclass = PyDict_GetItemString(dict, "RevlogError");
153 153 if (errclass == NULL) {
154 154 PyErr_SetString(PyExc_SystemError,
155 155 "could not find RevlogError");
156 156 goto cleanup;
157 157 }
158 158
159 159 /* value of exception is ignored by callers */
160 160 PyErr_SetString(errclass, "RevlogError");
161 161
162 162 cleanup:
163 163 Py_XDECREF(dict);
164 164 Py_XDECREF(mod);
165 165 }
166 166
167 167 /*
168 168 * Return a pointer to the beginning of a RevlogNG record.
169 169 */
170 170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
171 171 {
172 172 if (pos >= self->length)
173 173 return self->added + (pos - self->length) * self->entry_size;
174 174
175 175 if (self->inlined && pos > 0) {
176 176 if (self->offsets == NULL) {
177 177 Py_ssize_t ret;
178 178 self->offsets =
179 179 PyMem_Malloc(self->length * sizeof(*self->offsets));
180 180 if (self->offsets == NULL)
181 181 return (const char *)PyErr_NoMemory();
182 182 ret = inline_scan(self, self->offsets);
183 183 if (ret == -1) {
184 184 return NULL;
185 185 };
186 186 }
187 187 return self->offsets[pos];
188 188 }
189 189
190 190 return (const char *)(self->buf.buf) + pos * self->entry_size;
191 191 }
192 192
193 193 /*
194 194 * Get parents of the given rev.
195 195 *
196 196 * The specified rev must be valid and must not be nullrev. A returned
197 197 * parent revision may be nullrev, but is guaranteed to be in valid range.
198 198 */
199 199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
200 200 int maxrev)
201 201 {
202 202 const char *data = index_deref(self, rev);
203 203
204 204 ps[0] = getbe32(data + 24);
205 205 ps[1] = getbe32(data + 28);
206 206
207 207 /* If index file is corrupted, ps[] may point to invalid revisions. So
208 208 * there is a risk of buffer overflow to trust them unconditionally. */
209 209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
210 210 PyErr_SetString(PyExc_ValueError, "parent out of range");
211 211 return -1;
212 212 }
213 213 return 0;
214 214 }
215 215
216 216 /*
217 217 * Get parents of the given rev.
218 218 *
219 219 * If the specified rev is out of range, IndexError will be raised. If the
220 220 * revlog entry is corrupted, ValueError may be raised.
221 221 *
222 222 * Returns 0 on success or -1 on failure.
223 223 */
224 224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
225 225 {
226 226 int tiprev;
227 227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
228 228 PyErr_BadInternalCall();
229 229 return -1;
230 230 }
231 231 tiprev = (int)index_length((indexObject *)op) - 1;
232 232 if (rev < -1 || rev > tiprev) {
233 233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
234 234 return -1;
235 235 } else if (rev == -1) {
236 236 ps[0] = ps[1] = -1;
237 237 return 0;
238 238 } else {
239 239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
240 240 }
241 241 }
242 242
243 243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
244 244 {
245 245 const char *data;
246 246 uint64_t offset;
247 247
248 248 if (rev == nullrev)
249 249 return 0;
250 250
251 251 data = index_deref(self, rev);
252 252 offset = getbe32(data + 4);
253 253 if (rev == 0) {
254 254 /* mask out version number for the first entry */
255 255 offset &= 0xFFFF;
256 256 } else {
257 257 uint32_t offset_high = getbe32(data);
258 258 offset |= ((uint64_t)offset_high) << 32;
259 259 }
260 260 return (int64_t)(offset >> 16);
261 261 }
262 262
263 263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 264 {
265 265 const char *data;
266 266 int tmp;
267 267
268 268 if (rev == nullrev)
269 269 return 0;
270 270
271 271 data = index_deref(self, rev);
272 272
273 273 tmp = (int)getbe32(data + 8);
274 274 if (tmp < 0) {
275 275 PyErr_Format(PyExc_OverflowError,
276 276 "revlog entry size out of bound (%d)", tmp);
277 277 return -1;
278 278 }
279 279 return tmp;
280 280 }
281 281
282 282 /*
283 283 * RevlogNG format (all in big endian, data may be inlined):
284 284 * 6 bytes: offset
285 285 * 2 bytes: flags
286 286 * 4 bytes: compressed length
287 287 * 4 bytes: uncompressed length
288 288 * 4 bytes: base revision
289 289 * 4 bytes: link revision
290 290 * 4 bytes: parent 1 revision
291 291 * 4 bytes: parent 2 revision
292 292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
293 293 */
294 294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
295 295 {
296 296 uint64_t offset_flags, sidedata_offset;
297 297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
298 298 sidedata_comp_len;
299 299 const char *c_node_id;
300 300 const char *data;
301 301 Py_ssize_t length = index_length(self);
302 302
303 303 if (pos == nullrev) {
304 304 Py_INCREF(self->nullentry);
305 305 return self->nullentry;
306 306 }
307 307
308 308 if (pos < 0 || pos >= length) {
309 309 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
310 310 return NULL;
311 311 }
312 312
313 313 data = index_deref(self, pos);
314 314 if (data == NULL)
315 315 return NULL;
316 316
317 317 offset_flags = getbe32(data + 4);
318 318 /*
319 319 * The first entry on-disk needs the version number masked out,
320 320 * but this doesn't apply if entries are added to an empty index.
321 321 */
322 322 if (self->length && pos == 0)
323 323 offset_flags &= 0xFFFF;
324 324 else {
325 325 uint32_t offset_high = getbe32(data);
326 326 offset_flags |= ((uint64_t)offset_high) << 32;
327 327 }
328 328
329 329 comp_len = getbe32(data + 8);
330 330 uncomp_len = getbe32(data + 12);
331 331 base_rev = getbe32(data + 16);
332 332 link_rev = getbe32(data + 20);
333 333 parent_1 = getbe32(data + 24);
334 334 parent_2 = getbe32(data + 28);
335 335 c_node_id = data + 32;
336 336
337 337 if (self->entry_size == v1_entry_size) {
338 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
339 uncomp_len, base_rev, link_rev, parent_1,
340 parent_2, c_node_id, self->nodelen);
338 sidedata_offset = 0;
339 sidedata_comp_len = 0;
341 340 } else {
342 341 sidedata_offset = getbe64(data + 64);
343 342 sidedata_comp_len = getbe32(data + 72);
344
345 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
346 uncomp_len, base_rev, link_rev, parent_1,
347 parent_2, c_node_id, self->nodelen,
348 sidedata_offset, sidedata_comp_len);
349 343 }
344
345 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
346 uncomp_len, base_rev, link_rev, parent_1, parent_2,
347 c_node_id, self->nodelen, sidedata_offset,
348 sidedata_comp_len);
350 349 }
351 350 /*
352 351 * Pack header information in binary
353 352 */
354 353 static PyObject *index_pack_header(indexObject *self, PyObject *args)
355 354 {
356 355 int header;
357 356 char out[4];
358 357 if (!PyArg_ParseTuple(args, "I", &header)) {
359 358 return NULL;
360 359 }
361 360 putbe32(header, out);
362 361 return PyBytes_FromStringAndSize(out, 4);
363 362 }
364 363 /*
365 364 * Return the raw binary string representing a revision
366 365 */
367 366 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
368 367 {
369 368 long rev;
370 369 const char *data;
371 370 Py_ssize_t length = index_length(self);
372 371
373 372 if (!pylong_to_long(value, &rev)) {
374 373 return NULL;
375 374 }
376 375 if (rev < 0 || rev >= length) {
377 376 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
378 377 rev);
379 378 return NULL;
380 379 };
381 380
382 381 data = index_deref(self, rev);
383 382 if (data == NULL)
384 383 return NULL;
385 384 if (rev == 0) {
386 385 /* the header is eating the start of the first entry */
387 386 return PyBytes_FromStringAndSize(data + 4,
388 387 self->entry_size - 4);
389 388 }
390 389 return PyBytes_FromStringAndSize(data, self->entry_size);
391 390 }
392 391
393 392 /*
394 393 * Return the hash of node corresponding to the given rev.
395 394 */
396 395 static const char *index_node(indexObject *self, Py_ssize_t pos)
397 396 {
398 397 Py_ssize_t length = index_length(self);
399 398 const char *data;
400 399
401 400 if (pos == nullrev)
402 401 return nullid;
403 402
404 403 if (pos >= length)
405 404 return NULL;
406 405
407 406 data = index_deref(self, pos);
408 407 return data ? data + 32 : NULL;
409 408 }
410 409
411 410 /*
412 411 * Return the hash of the node corresponding to the given rev. The
413 412 * rev is assumed to be existing. If not, an exception is set.
414 413 */
415 414 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
416 415 {
417 416 const char *node = index_node(self, pos);
418 417 if (node == NULL) {
419 418 PyErr_Format(PyExc_IndexError, "could not access rev %d",
420 419 (int)pos);
421 420 }
422 421 return node;
423 422 }
424 423
425 424 static int nt_insert(nodetree *self, const char *node, int rev);
426 425
427 426 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
428 427 {
429 428 Py_ssize_t thisnodelen;
430 429 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
431 430 return -1;
432 431 if (nodelen == thisnodelen)
433 432 return 0;
434 433 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
435 434 thisnodelen, nodelen);
436 435 return -1;
437 436 }
438 437
439 438 static PyObject *index_append(indexObject *self, PyObject *obj)
440 439 {
441 440 uint64_t offset_flags, sidedata_offset;
442 441 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
443 442 Py_ssize_t c_node_id_len, sidedata_comp_len;
444 443 const char *c_node_id;
445 444 char *data;
446 445
447 446 if (self->entry_size == v1_entry_size) {
448 447 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
449 448 &comp_len, &uncomp_len, &base_rev,
450 449 &link_rev, &parent_1, &parent_2,
451 450 &c_node_id, &c_node_id_len)) {
452 451 PyErr_SetString(PyExc_TypeError, "8-tuple required");
453 452 return NULL;
454 453 }
455 454 } else {
456 455 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
457 456 &comp_len, &uncomp_len, &base_rev,
458 457 &link_rev, &parent_1, &parent_2,
459 458 &c_node_id, &c_node_id_len,
460 459 &sidedata_offset, &sidedata_comp_len)) {
461 460 PyErr_SetString(PyExc_TypeError, "10-tuple required");
462 461 return NULL;
463 462 }
464 463 }
465 464
466 465 if (c_node_id_len != self->nodelen) {
467 466 PyErr_SetString(PyExc_TypeError, "invalid node");
468 467 return NULL;
469 468 }
470 469
471 470 if (self->new_length == self->added_length) {
472 471 size_t new_added_length =
473 472 self->added_length ? self->added_length * 2 : 4096;
474 473 void *new_added = PyMem_Realloc(
475 474 self->added, new_added_length * self->entry_size);
476 475 if (!new_added)
477 476 return PyErr_NoMemory();
478 477 self->added = new_added;
479 478 self->added_length = new_added_length;
480 479 }
481 480 rev = self->length + self->new_length;
482 481 data = self->added + self->entry_size * self->new_length++;
483 482 putbe32(offset_flags >> 32, data);
484 483 putbe32(offset_flags & 0xffffffffU, data + 4);
485 484 putbe32(comp_len, data + 8);
486 485 putbe32(uncomp_len, data + 12);
487 486 putbe32(base_rev, data + 16);
488 487 putbe32(link_rev, data + 20);
489 488 putbe32(parent_1, data + 24);
490 489 putbe32(parent_2, data + 28);
491 490 memcpy(data + 32, c_node_id, c_node_id_len);
492 491 /* Padding since SHA-1 is only 20 bytes for now */
493 492 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
494 493 if (self->entry_size != v1_entry_size) {
495 494 putbe64(sidedata_offset, data + 64);
496 495 putbe32(sidedata_comp_len, data + 72);
497 496 /* Padding for 96 bytes alignment */
498 497 memset(data + 76, 0, self->entry_size - 76);
499 498 }
500 499
501 500 if (self->ntinitialized)
502 501 nt_insert(&self->nt, c_node_id, rev);
503 502
504 503 Py_CLEAR(self->headrevs);
505 504 Py_RETURN_NONE;
506 505 }
507 506
508 507 /* Replace an existing index entry's sidedata offset and length with new ones.
509 508 This cannot be used outside of the context of sidedata rewriting,
510 509 inside the transaction that creates the given revision. */
511 510 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
512 511 {
513 512 uint64_t offset_flags, sidedata_offset;
514 513 int rev;
515 514 Py_ssize_t sidedata_comp_len;
516 515 char *data;
517 516 #if LONG_MAX == 0x7fffffffL
518 517 const char *const sidedata_format = PY23("nKiK", "nKiK");
519 518 #else
520 519 const char *const sidedata_format = PY23("nkik", "nkik");
521 520 #endif
522 521
523 522 if (self->entry_size == v1_entry_size || self->inlined) {
524 523 /*
525 524 There is a bug in the transaction handling when going from an
526 525 inline revlog to a separate index and data file. Turn it off until
527 526 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
528 527 See issue6485.
529 528 */
530 529 raise_revlog_error();
531 530 return NULL;
532 531 }
533 532
534 533 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
535 534 &sidedata_comp_len, &offset_flags))
536 535 return NULL;
537 536
538 537 if (rev < 0 || rev >= index_length(self)) {
539 538 PyErr_SetString(PyExc_IndexError, "revision outside index");
540 539 return NULL;
541 540 }
542 541 if (rev < self->length) {
543 542 PyErr_SetString(
544 543 PyExc_IndexError,
545 544 "cannot rewrite entries outside of this transaction");
546 545 return NULL;
547 546 }
548 547
549 548 /* Find the newly added node, offset from the "already on-disk" length
550 549 */
551 550 data = self->added + self->entry_size * (rev - self->length);
552 551 putbe64(offset_flags, data);
553 552 putbe64(sidedata_offset, data + 64);
554 553 putbe32(sidedata_comp_len, data + 72);
555 554
556 555 Py_RETURN_NONE;
557 556 }
558 557
559 558 static PyObject *index_stats(indexObject *self)
560 559 {
561 560 PyObject *obj = PyDict_New();
562 561 PyObject *s = NULL;
563 562 PyObject *t = NULL;
564 563
565 564 if (obj == NULL)
566 565 return NULL;
567 566
568 567 #define istat(__n, __d) \
569 568 do { \
570 569 s = PyBytes_FromString(__d); \
571 570 t = PyInt_FromSsize_t(self->__n); \
572 571 if (!s || !t) \
573 572 goto bail; \
574 573 if (PyDict_SetItem(obj, s, t) == -1) \
575 574 goto bail; \
576 575 Py_CLEAR(s); \
577 576 Py_CLEAR(t); \
578 577 } while (0)
579 578
580 579 if (self->added_length)
581 580 istat(new_length, "index entries added");
582 581 istat(length, "revs in memory");
583 582 istat(ntlookups, "node trie lookups");
584 583 istat(ntmisses, "node trie misses");
585 584 istat(ntrev, "node trie last rev scanned");
586 585 if (self->ntinitialized) {
587 586 istat(nt.capacity, "node trie capacity");
588 587 istat(nt.depth, "node trie depth");
589 588 istat(nt.length, "node trie count");
590 589 istat(nt.splits, "node trie splits");
591 590 }
592 591
593 592 #undef istat
594 593
595 594 return obj;
596 595
597 596 bail:
598 597 Py_XDECREF(obj);
599 598 Py_XDECREF(s);
600 599 Py_XDECREF(t);
601 600 return NULL;
602 601 }
603 602
604 603 /*
605 604 * When we cache a list, we want to be sure the caller can't mutate
606 605 * the cached copy.
607 606 */
608 607 static PyObject *list_copy(PyObject *list)
609 608 {
610 609 Py_ssize_t len = PyList_GET_SIZE(list);
611 610 PyObject *newlist = PyList_New(len);
612 611 Py_ssize_t i;
613 612
614 613 if (newlist == NULL)
615 614 return NULL;
616 615
617 616 for (i = 0; i < len; i++) {
618 617 PyObject *obj = PyList_GET_ITEM(list, i);
619 618 Py_INCREF(obj);
620 619 PyList_SET_ITEM(newlist, i, obj);
621 620 }
622 621
623 622 return newlist;
624 623 }
625 624
626 625 static int check_filter(PyObject *filter, Py_ssize_t arg)
627 626 {
628 627 if (filter) {
629 628 PyObject *arglist, *result;
630 629 int isfiltered;
631 630
632 631 arglist = Py_BuildValue("(n)", arg);
633 632 if (!arglist) {
634 633 return -1;
635 634 }
636 635
637 636 result = PyObject_Call(filter, arglist, NULL);
638 637 Py_DECREF(arglist);
639 638 if (!result) {
640 639 return -1;
641 640 }
642 641
643 642 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
644 643 * same as this function, so we can just return it directly.*/
645 644 isfiltered = PyObject_IsTrue(result);
646 645 Py_DECREF(result);
647 646 return isfiltered;
648 647 } else {
649 648 return 0;
650 649 }
651 650 }
652 651
653 652 static inline void set_phase_from_parents(char *phases, int parent_1,
654 653 int parent_2, Py_ssize_t i)
655 654 {
656 655 if (parent_1 >= 0 && phases[parent_1] > phases[i])
657 656 phases[i] = phases[parent_1];
658 657 if (parent_2 >= 0 && phases[parent_2] > phases[i])
659 658 phases[i] = phases[parent_2];
660 659 }
661 660
662 661 static PyObject *reachableroots2(indexObject *self, PyObject *args)
663 662 {
664 663
665 664 /* Input */
666 665 long minroot;
667 666 PyObject *includepatharg = NULL;
668 667 int includepath = 0;
669 668 /* heads and roots are lists */
670 669 PyObject *heads = NULL;
671 670 PyObject *roots = NULL;
672 671 PyObject *reachable = NULL;
673 672
674 673 PyObject *val;
675 674 Py_ssize_t len = index_length(self);
676 675 long revnum;
677 676 Py_ssize_t k;
678 677 Py_ssize_t i;
679 678 Py_ssize_t l;
680 679 int r;
681 680 int parents[2];
682 681
683 682 /* Internal data structure:
684 683 * tovisit: array of length len+1 (all revs + nullrev), filled upto
685 684 * lentovisit
686 685 *
687 686 * revstates: array of length len+1 (all revs + nullrev) */
688 687 int *tovisit = NULL;
689 688 long lentovisit = 0;
690 689 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
691 690 char *revstates = NULL;
692 691
693 692 /* Get arguments */
694 693 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
695 694 &PyList_Type, &roots, &PyBool_Type,
696 695 &includepatharg))
697 696 goto bail;
698 697
699 698 if (includepatharg == Py_True)
700 699 includepath = 1;
701 700
702 701 /* Initialize return set */
703 702 reachable = PyList_New(0);
704 703 if (reachable == NULL)
705 704 goto bail;
706 705
707 706 /* Initialize internal datastructures */
708 707 tovisit = (int *)malloc((len + 1) * sizeof(int));
709 708 if (tovisit == NULL) {
710 709 PyErr_NoMemory();
711 710 goto bail;
712 711 }
713 712
714 713 revstates = (char *)calloc(len + 1, 1);
715 714 if (revstates == NULL) {
716 715 PyErr_NoMemory();
717 716 goto bail;
718 717 }
719 718
720 719 l = PyList_GET_SIZE(roots);
721 720 for (i = 0; i < l; i++) {
722 721 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
723 722 if (revnum == -1 && PyErr_Occurred())
724 723 goto bail;
725 724 /* If root is out of range, e.g. wdir(), it must be unreachable
726 725 * from heads. So we can just ignore it. */
727 726 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
728 727 continue;
729 728 revstates[revnum + 1] |= RS_ROOT;
730 729 }
731 730
732 731 /* Populate tovisit with all the heads */
733 732 l = PyList_GET_SIZE(heads);
734 733 for (i = 0; i < l; i++) {
735 734 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
736 735 if (revnum == -1 && PyErr_Occurred())
737 736 goto bail;
738 737 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
739 738 PyErr_SetString(PyExc_IndexError, "head out of range");
740 739 goto bail;
741 740 }
742 741 if (!(revstates[revnum + 1] & RS_SEEN)) {
743 742 tovisit[lentovisit++] = (int)revnum;
744 743 revstates[revnum + 1] |= RS_SEEN;
745 744 }
746 745 }
747 746
748 747 /* Visit the tovisit list and find the reachable roots */
749 748 k = 0;
750 749 while (k < lentovisit) {
751 750 /* Add the node to reachable if it is a root*/
752 751 revnum = tovisit[k++];
753 752 if (revstates[revnum + 1] & RS_ROOT) {
754 753 revstates[revnum + 1] |= RS_REACHABLE;
755 754 val = PyInt_FromLong(revnum);
756 755 if (val == NULL)
757 756 goto bail;
758 757 r = PyList_Append(reachable, val);
759 758 Py_DECREF(val);
760 759 if (r < 0)
761 760 goto bail;
762 761 if (includepath == 0)
763 762 continue;
764 763 }
765 764
766 765 /* Add its parents to the list of nodes to visit */
767 766 if (revnum == nullrev)
768 767 continue;
769 768 r = index_get_parents(self, revnum, parents, (int)len - 1);
770 769 if (r < 0)
771 770 goto bail;
772 771 for (i = 0; i < 2; i++) {
773 772 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
774 773 parents[i] >= minroot) {
775 774 tovisit[lentovisit++] = parents[i];
776 775 revstates[parents[i] + 1] |= RS_SEEN;
777 776 }
778 777 }
779 778 }
780 779
781 780 /* Find all the nodes in between the roots we found and the heads
782 781 * and add them to the reachable set */
783 782 if (includepath == 1) {
784 783 long minidx = minroot;
785 784 if (minidx < 0)
786 785 minidx = 0;
787 786 for (i = minidx; i < len; i++) {
788 787 if (!(revstates[i + 1] & RS_SEEN))
789 788 continue;
790 789 r = index_get_parents(self, i, parents, (int)len - 1);
791 790 /* Corrupted index file, error is set from
792 791 * index_get_parents */
793 792 if (r < 0)
794 793 goto bail;
795 794 if (((revstates[parents[0] + 1] |
796 795 revstates[parents[1] + 1]) &
797 796 RS_REACHABLE) &&
798 797 !(revstates[i + 1] & RS_REACHABLE)) {
799 798 revstates[i + 1] |= RS_REACHABLE;
800 799 val = PyInt_FromSsize_t(i);
801 800 if (val == NULL)
802 801 goto bail;
803 802 r = PyList_Append(reachable, val);
804 803 Py_DECREF(val);
805 804 if (r < 0)
806 805 goto bail;
807 806 }
808 807 }
809 808 }
810 809
811 810 free(revstates);
812 811 free(tovisit);
813 812 return reachable;
814 813 bail:
815 814 Py_XDECREF(reachable);
816 815 free(revstates);
817 816 free(tovisit);
818 817 return NULL;
819 818 }
820 819
821 820 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
822 821 char phase)
823 822 {
824 823 Py_ssize_t len = index_length(self);
825 824 PyObject *item;
826 825 PyObject *iterator;
827 826 int rev, minrev = -1;
828 827 char *node;
829 828
830 829 if (!PySet_Check(roots)) {
831 830 PyErr_SetString(PyExc_TypeError,
832 831 "roots must be a set of nodes");
833 832 return -2;
834 833 }
835 834 iterator = PyObject_GetIter(roots);
836 835 if (iterator == NULL)
837 836 return -2;
838 837 while ((item = PyIter_Next(iterator))) {
839 838 if (node_check(self->nodelen, item, &node) == -1)
840 839 goto failed;
841 840 rev = index_find_node(self, node);
842 841 /* null is implicitly public, so negative is invalid */
843 842 if (rev < 0 || rev >= len)
844 843 goto failed;
845 844 phases[rev] = phase;
846 845 if (minrev == -1 || minrev > rev)
847 846 minrev = rev;
848 847 Py_DECREF(item);
849 848 }
850 849 Py_DECREF(iterator);
851 850 return minrev;
852 851 failed:
853 852 Py_DECREF(iterator);
854 853 Py_DECREF(item);
855 854 return -2;
856 855 }
857 856
858 857 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
859 858 {
860 859 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
861 860 96: internal */
862 861 static const char trackedphases[] = {1, 2, 32, 96};
863 862 PyObject *roots = Py_None;
864 863 PyObject *phasesetsdict = NULL;
865 864 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
866 865 Py_ssize_t len = index_length(self);
867 866 char *phases = NULL;
868 867 int minphaserev = -1, rev, i;
869 868 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
870 869
871 870 if (!PyArg_ParseTuple(args, "O", &roots))
872 871 return NULL;
873 872 if (roots == NULL || !PyDict_Check(roots)) {
874 873 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
875 874 return NULL;
876 875 }
877 876
878 877 phases = calloc(len, 1);
879 878 if (phases == NULL) {
880 879 PyErr_NoMemory();
881 880 return NULL;
882 881 }
883 882
884 883 for (i = 0; i < numphases; ++i) {
885 884 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
886 885 PyObject *phaseroots = NULL;
887 886 if (pyphase == NULL)
888 887 goto release;
889 888 phaseroots = PyDict_GetItem(roots, pyphase);
890 889 Py_DECREF(pyphase);
891 890 if (phaseroots == NULL)
892 891 continue;
893 892 rev = add_roots_get_min(self, phaseroots, phases,
894 893 trackedphases[i]);
895 894 if (rev == -2)
896 895 goto release;
897 896 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
898 897 minphaserev = rev;
899 898 }
900 899
901 900 for (i = 0; i < numphases; ++i) {
902 901 phasesets[i] = PySet_New(NULL);
903 902 if (phasesets[i] == NULL)
904 903 goto release;
905 904 }
906 905
907 906 if (minphaserev == -1)
908 907 minphaserev = len;
909 908 for (rev = minphaserev; rev < len; ++rev) {
910 909 PyObject *pyphase = NULL;
911 910 PyObject *pyrev = NULL;
912 911 int parents[2];
913 912 /*
914 913 * The parent lookup could be skipped for phaseroots, but
915 914 * phase --force would historically not recompute them
916 915 * correctly, leaving descendents with a lower phase around.
917 916 * As such, unconditionally recompute the phase.
918 917 */
919 918 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
920 919 goto release;
921 920 set_phase_from_parents(phases, parents[0], parents[1], rev);
922 921 switch (phases[rev]) {
923 922 case 0:
924 923 continue;
925 924 case 1:
926 925 pyphase = phasesets[0];
927 926 break;
928 927 case 2:
929 928 pyphase = phasesets[1];
930 929 break;
931 930 case 32:
932 931 pyphase = phasesets[2];
933 932 break;
934 933 case 96:
935 934 pyphase = phasesets[3];
936 935 break;
937 936 default:
938 937 /* this should never happen since the phase number is
939 938 * specified by this function. */
940 939 PyErr_SetString(PyExc_SystemError,
941 940 "bad phase number in internal list");
942 941 goto release;
943 942 }
944 943 pyrev = PyInt_FromLong(rev);
945 944 if (pyrev == NULL)
946 945 goto release;
947 946 if (PySet_Add(pyphase, pyrev) == -1) {
948 947 Py_DECREF(pyrev);
949 948 goto release;
950 949 }
951 950 Py_DECREF(pyrev);
952 951 }
953 952
954 953 phasesetsdict = _dict_new_presized(numphases);
955 954 if (phasesetsdict == NULL)
956 955 goto release;
957 956 for (i = 0; i < numphases; ++i) {
958 957 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
959 958 if (pyphase == NULL)
960 959 goto release;
961 960 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
962 961 -1) {
963 962 Py_DECREF(pyphase);
964 963 goto release;
965 964 }
966 965 Py_DECREF(phasesets[i]);
967 966 phasesets[i] = NULL;
968 967 }
969 968
970 969 return Py_BuildValue("nN", len, phasesetsdict);
971 970
972 971 release:
973 972 for (i = 0; i < numphases; ++i)
974 973 Py_XDECREF(phasesets[i]);
975 974 Py_XDECREF(phasesetsdict);
976 975
977 976 free(phases);
978 977 return NULL;
979 978 }
980 979
981 980 static PyObject *index_headrevs(indexObject *self, PyObject *args)
982 981 {
983 982 Py_ssize_t i, j, len;
984 983 char *nothead = NULL;
985 984 PyObject *heads = NULL;
986 985 PyObject *filter = NULL;
987 986 PyObject *filteredrevs = Py_None;
988 987
989 988 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
990 989 return NULL;
991 990 }
992 991
993 992 if (self->headrevs && filteredrevs == self->filteredrevs)
994 993 return list_copy(self->headrevs);
995 994
996 995 Py_DECREF(self->filteredrevs);
997 996 self->filteredrevs = filteredrevs;
998 997 Py_INCREF(filteredrevs);
999 998
1000 999 if (filteredrevs != Py_None) {
1001 1000 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1002 1001 if (!filter) {
1003 1002 PyErr_SetString(
1004 1003 PyExc_TypeError,
1005 1004 "filteredrevs has no attribute __contains__");
1006 1005 goto bail;
1007 1006 }
1008 1007 }
1009 1008
1010 1009 len = index_length(self);
1011 1010 heads = PyList_New(0);
1012 1011 if (heads == NULL)
1013 1012 goto bail;
1014 1013 if (len == 0) {
1015 1014 PyObject *nullid = PyInt_FromLong(-1);
1016 1015 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1017 1016 Py_XDECREF(nullid);
1018 1017 goto bail;
1019 1018 }
1020 1019 goto done;
1021 1020 }
1022 1021
1023 1022 nothead = calloc(len, 1);
1024 1023 if (nothead == NULL) {
1025 1024 PyErr_NoMemory();
1026 1025 goto bail;
1027 1026 }
1028 1027
1029 1028 for (i = len - 1; i >= 0; i--) {
1030 1029 int isfiltered;
1031 1030 int parents[2];
1032 1031
1033 1032 /* If nothead[i] == 1, it means we've seen an unfiltered child
1034 1033 * of this node already, and therefore this node is not
1035 1034 * filtered. So we can skip the expensive check_filter step.
1036 1035 */
1037 1036 if (nothead[i] != 1) {
1038 1037 isfiltered = check_filter(filter, i);
1039 1038 if (isfiltered == -1) {
1040 1039 PyErr_SetString(PyExc_TypeError,
1041 1040 "unable to check filter");
1042 1041 goto bail;
1043 1042 }
1044 1043
1045 1044 if (isfiltered) {
1046 1045 nothead[i] = 1;
1047 1046 continue;
1048 1047 }
1049 1048 }
1050 1049
1051 1050 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1052 1051 goto bail;
1053 1052 for (j = 0; j < 2; j++) {
1054 1053 if (parents[j] >= 0)
1055 1054 nothead[parents[j]] = 1;
1056 1055 }
1057 1056 }
1058 1057
1059 1058 for (i = 0; i < len; i++) {
1060 1059 PyObject *head;
1061 1060
1062 1061 if (nothead[i])
1063 1062 continue;
1064 1063 head = PyInt_FromSsize_t(i);
1065 1064 if (head == NULL || PyList_Append(heads, head) == -1) {
1066 1065 Py_XDECREF(head);
1067 1066 goto bail;
1068 1067 }
1069 1068 }
1070 1069
1071 1070 done:
1072 1071 self->headrevs = heads;
1073 1072 Py_XDECREF(filter);
1074 1073 free(nothead);
1075 1074 return list_copy(self->headrevs);
1076 1075 bail:
1077 1076 Py_XDECREF(filter);
1078 1077 Py_XDECREF(heads);
1079 1078 free(nothead);
1080 1079 return NULL;
1081 1080 }
1082 1081
1083 1082 /**
1084 1083 * Obtain the base revision index entry.
1085 1084 *
1086 1085 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1087 1086 */
1088 1087 static inline int index_baserev(indexObject *self, int rev)
1089 1088 {
1090 1089 const char *data;
1091 1090 int result;
1092 1091
1093 1092 data = index_deref(self, rev);
1094 1093 if (data == NULL)
1095 1094 return -2;
1096 1095 result = getbe32(data + 16);
1097 1096
1098 1097 if (result > rev) {
1099 1098 PyErr_Format(
1100 1099 PyExc_ValueError,
1101 1100 "corrupted revlog, revision base above revision: %d, %d",
1102 1101 rev, result);
1103 1102 return -2;
1104 1103 }
1105 1104 if (result < -1) {
1106 1105 PyErr_Format(
1107 1106 PyExc_ValueError,
1108 1107 "corrupted revlog, revision base out of range: %d, %d", rev,
1109 1108 result);
1110 1109 return -2;
1111 1110 }
1112 1111 return result;
1113 1112 }
1114 1113
1115 1114 /**
1116 1115 * Find if a revision is a snapshot or not
1117 1116 *
1118 1117 * Only relevant for sparse-revlog case.
1119 1118 * Callers must ensure that rev is in a valid range.
1120 1119 */
1121 1120 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1122 1121 {
1123 1122 int ps[2];
1124 1123 Py_ssize_t base;
1125 1124 while (rev >= 0) {
1126 1125 base = (Py_ssize_t)index_baserev(self, rev);
1127 1126 if (base == rev) {
1128 1127 base = -1;
1129 1128 }
1130 1129 if (base == -2) {
1131 1130 assert(PyErr_Occurred());
1132 1131 return -1;
1133 1132 }
1134 1133 if (base == -1) {
1135 1134 return 1;
1136 1135 }
1137 1136 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1138 1137 assert(PyErr_Occurred());
1139 1138 return -1;
1140 1139 };
1141 1140 if (base == ps[0] || base == ps[1]) {
1142 1141 return 0;
1143 1142 }
1144 1143 rev = base;
1145 1144 }
1146 1145 return rev == -1;
1147 1146 }
1148 1147
1149 1148 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1150 1149 {
1151 1150 long rev;
1152 1151 int issnap;
1153 1152 Py_ssize_t length = index_length(self);
1154 1153
1155 1154 if (!pylong_to_long(value, &rev)) {
1156 1155 return NULL;
1157 1156 }
1158 1157 if (rev < -1 || rev >= length) {
1159 1158 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1160 1159 rev);
1161 1160 return NULL;
1162 1161 };
1163 1162 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1164 1163 if (issnap < 0) {
1165 1164 return NULL;
1166 1165 };
1167 1166 return PyBool_FromLong((long)issnap);
1168 1167 }
1169 1168
1170 1169 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1171 1170 {
1172 1171 Py_ssize_t start_rev;
1173 1172 PyObject *cache;
1174 1173 Py_ssize_t base;
1175 1174 Py_ssize_t rev;
1176 1175 PyObject *key = NULL;
1177 1176 PyObject *value = NULL;
1178 1177 const Py_ssize_t length = index_length(self);
1179 1178 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1180 1179 return NULL;
1181 1180 }
1182 1181 for (rev = start_rev; rev < length; rev++) {
1183 1182 int issnap;
1184 1183 PyObject *allvalues = NULL;
1185 1184 issnap = index_issnapshotrev(self, rev);
1186 1185 if (issnap < 0) {
1187 1186 goto bail;
1188 1187 }
1189 1188 if (issnap == 0) {
1190 1189 continue;
1191 1190 }
1192 1191 base = (Py_ssize_t)index_baserev(self, rev);
1193 1192 if (base == rev) {
1194 1193 base = -1;
1195 1194 }
1196 1195 if (base == -2) {
1197 1196 assert(PyErr_Occurred());
1198 1197 goto bail;
1199 1198 }
1200 1199 key = PyInt_FromSsize_t(base);
1201 1200 allvalues = PyDict_GetItem(cache, key);
1202 1201 if (allvalues == NULL && PyErr_Occurred()) {
1203 1202 goto bail;
1204 1203 }
1205 1204 if (allvalues == NULL) {
1206 1205 int r;
1207 1206 allvalues = PyList_New(0);
1208 1207 if (!allvalues) {
1209 1208 goto bail;
1210 1209 }
1211 1210 r = PyDict_SetItem(cache, key, allvalues);
1212 1211 Py_DECREF(allvalues);
1213 1212 if (r < 0) {
1214 1213 goto bail;
1215 1214 }
1216 1215 }
1217 1216 value = PyInt_FromSsize_t(rev);
1218 1217 if (PyList_Append(allvalues, value)) {
1219 1218 goto bail;
1220 1219 }
1221 1220 Py_CLEAR(key);
1222 1221 Py_CLEAR(value);
1223 1222 }
1224 1223 Py_RETURN_NONE;
1225 1224 bail:
1226 1225 Py_XDECREF(key);
1227 1226 Py_XDECREF(value);
1228 1227 return NULL;
1229 1228 }
1230 1229
1231 1230 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1232 1231 {
1233 1232 int rev, generaldelta;
1234 1233 PyObject *stoparg;
1235 1234 int stoprev, iterrev, baserev = -1;
1236 1235 int stopped;
1237 1236 PyObject *chain = NULL, *result = NULL;
1238 1237 const Py_ssize_t length = index_length(self);
1239 1238
1240 1239 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1241 1240 return NULL;
1242 1241 }
1243 1242
1244 1243 if (PyInt_Check(stoparg)) {
1245 1244 stoprev = (int)PyInt_AsLong(stoparg);
1246 1245 if (stoprev == -1 && PyErr_Occurred()) {
1247 1246 return NULL;
1248 1247 }
1249 1248 } else if (stoparg == Py_None) {
1250 1249 stoprev = -2;
1251 1250 } else {
1252 1251 PyErr_SetString(PyExc_ValueError,
1253 1252 "stoprev must be integer or None");
1254 1253 return NULL;
1255 1254 }
1256 1255
1257 1256 if (rev < 0 || rev >= length) {
1258 1257 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1259 1258 return NULL;
1260 1259 }
1261 1260
1262 1261 chain = PyList_New(0);
1263 1262 if (chain == NULL) {
1264 1263 return NULL;
1265 1264 }
1266 1265
1267 1266 baserev = index_baserev(self, rev);
1268 1267
1269 1268 /* This should never happen. */
1270 1269 if (baserev <= -2) {
1271 1270 /* Error should be set by index_deref() */
1272 1271 assert(PyErr_Occurred());
1273 1272 goto bail;
1274 1273 }
1275 1274
1276 1275 iterrev = rev;
1277 1276
1278 1277 while (iterrev != baserev && iterrev != stoprev) {
1279 1278 PyObject *value = PyInt_FromLong(iterrev);
1280 1279 if (value == NULL) {
1281 1280 goto bail;
1282 1281 }
1283 1282 if (PyList_Append(chain, value)) {
1284 1283 Py_DECREF(value);
1285 1284 goto bail;
1286 1285 }
1287 1286 Py_DECREF(value);
1288 1287
1289 1288 if (generaldelta) {
1290 1289 iterrev = baserev;
1291 1290 } else {
1292 1291 iterrev--;
1293 1292 }
1294 1293
1295 1294 if (iterrev < 0) {
1296 1295 break;
1297 1296 }
1298 1297
1299 1298 if (iterrev >= length) {
1300 1299 PyErr_SetString(PyExc_IndexError,
1301 1300 "revision outside index");
1302 1301 return NULL;
1303 1302 }
1304 1303
1305 1304 baserev = index_baserev(self, iterrev);
1306 1305
1307 1306 /* This should never happen. */
1308 1307 if (baserev <= -2) {
1309 1308 /* Error should be set by index_deref() */
1310 1309 assert(PyErr_Occurred());
1311 1310 goto bail;
1312 1311 }
1313 1312 }
1314 1313
1315 1314 if (iterrev == stoprev) {
1316 1315 stopped = 1;
1317 1316 } else {
1318 1317 PyObject *value = PyInt_FromLong(iterrev);
1319 1318 if (value == NULL) {
1320 1319 goto bail;
1321 1320 }
1322 1321 if (PyList_Append(chain, value)) {
1323 1322 Py_DECREF(value);
1324 1323 goto bail;
1325 1324 }
1326 1325 Py_DECREF(value);
1327 1326
1328 1327 stopped = 0;
1329 1328 }
1330 1329
1331 1330 if (PyList_Reverse(chain)) {
1332 1331 goto bail;
1333 1332 }
1334 1333
1335 1334 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1336 1335 Py_DECREF(chain);
1337 1336 return result;
1338 1337
1339 1338 bail:
1340 1339 Py_DECREF(chain);
1341 1340 return NULL;
1342 1341 }
1343 1342
1344 1343 static inline int64_t
1345 1344 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1346 1345 {
1347 1346 int64_t start_offset;
1348 1347 int64_t end_offset;
1349 1348 int end_size;
1350 1349 start_offset = index_get_start(self, start_rev);
1351 1350 if (start_offset < 0) {
1352 1351 return -1;
1353 1352 }
1354 1353 end_offset = index_get_start(self, end_rev);
1355 1354 if (end_offset < 0) {
1356 1355 return -1;
1357 1356 }
1358 1357 end_size = index_get_length(self, end_rev);
1359 1358 if (end_size < 0) {
1360 1359 return -1;
1361 1360 }
1362 1361 if (end_offset < start_offset) {
1363 1362 PyErr_Format(PyExc_ValueError,
1364 1363 "corrupted revlog index: inconsistent offset "
1365 1364 "between revisions (%zd) and (%zd)",
1366 1365 start_rev, end_rev);
1367 1366 return -1;
1368 1367 }
1369 1368 return (end_offset - start_offset) + (int64_t)end_size;
1370 1369 }
1371 1370
1372 1371 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1373 1372 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1374 1373 Py_ssize_t startidx, Py_ssize_t endidx)
1375 1374 {
1376 1375 int length;
1377 1376 while (endidx > 1 && endidx > startidx) {
1378 1377 length = index_get_length(self, revs[endidx - 1]);
1379 1378 if (length < 0) {
1380 1379 return -1;
1381 1380 }
1382 1381 if (length != 0) {
1383 1382 break;
1384 1383 }
1385 1384 endidx -= 1;
1386 1385 }
1387 1386 return endidx;
1388 1387 }
1389 1388
1390 1389 struct Gap {
1391 1390 int64_t size;
1392 1391 Py_ssize_t idx;
1393 1392 };
1394 1393
1395 1394 static int gap_compare(const void *left, const void *right)
1396 1395 {
1397 1396 const struct Gap *l_left = ((const struct Gap *)left);
1398 1397 const struct Gap *l_right = ((const struct Gap *)right);
1399 1398 if (l_left->size < l_right->size) {
1400 1399 return -1;
1401 1400 } else if (l_left->size > l_right->size) {
1402 1401 return 1;
1403 1402 }
1404 1403 return 0;
1405 1404 }
1406 1405 static int Py_ssize_t_compare(const void *left, const void *right)
1407 1406 {
1408 1407 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1409 1408 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1410 1409 if (l_left < l_right) {
1411 1410 return -1;
1412 1411 } else if (l_left > l_right) {
1413 1412 return 1;
1414 1413 }
1415 1414 return 0;
1416 1415 }
1417 1416
1418 1417 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1419 1418 {
1420 1419 /* method arguments */
1421 1420 PyObject *list_revs = NULL; /* revisions in the chain */
1422 1421 double targetdensity = 0; /* min density to achieve */
1423 1422 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1424 1423
1425 1424 /* other core variables */
1426 1425 Py_ssize_t idxlen = index_length(self);
1427 1426 Py_ssize_t i; /* used for various iteration */
1428 1427 PyObject *result = NULL; /* the final return of the function */
1429 1428
1430 1429 /* generic information about the delta chain being slice */
1431 1430 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1432 1431 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1433 1432 int64_t chainpayload = 0; /* sum of all delta in the chain */
1434 1433 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1435 1434
1436 1435 /* variable used for slicing the delta chain */
1437 1436 int64_t readdata = 0; /* amount of data currently planned to be read */
1438 1437 double density = 0; /* ration of payload data compared to read ones */
1439 1438 int64_t previous_end;
1440 1439 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1441 1440 Py_ssize_t num_gaps =
1442 1441 0; /* total number of notable gap recorded so far */
1443 1442 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1444 1443 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1445 1444 PyObject *chunk = NULL; /* individual slice */
1446 1445 PyObject *allchunks = NULL; /* all slices */
1447 1446 Py_ssize_t previdx;
1448 1447
1449 1448 /* parsing argument */
1450 1449 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1451 1450 &targetdensity, &mingapsize)) {
1452 1451 goto bail;
1453 1452 }
1454 1453
1455 1454 /* If the delta chain contains a single element, we do not need slicing
1456 1455 */
1457 1456 num_revs = PyList_GET_SIZE(list_revs);
1458 1457 if (num_revs <= 1) {
1459 1458 result = PyTuple_Pack(1, list_revs);
1460 1459 goto done;
1461 1460 }
1462 1461
1463 1462 /* Turn the python list into a native integer array (for efficiency) */
1464 1463 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1465 1464 if (revs == NULL) {
1466 1465 PyErr_NoMemory();
1467 1466 goto bail;
1468 1467 }
1469 1468 for (i = 0; i < num_revs; i++) {
1470 1469 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1471 1470 if (revnum == -1 && PyErr_Occurred()) {
1472 1471 goto bail;
1473 1472 }
1474 1473 if (revnum < nullrev || revnum >= idxlen) {
1475 1474 PyErr_Format(PyExc_IndexError,
1476 1475 "index out of range: %zd", revnum);
1477 1476 goto bail;
1478 1477 }
1479 1478 revs[i] = revnum;
1480 1479 }
1481 1480
1482 1481 /* Compute and check various property of the unsliced delta chain */
1483 1482 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1484 1483 if (deltachainspan < 0) {
1485 1484 goto bail;
1486 1485 }
1487 1486
1488 1487 if (deltachainspan <= mingapsize) {
1489 1488 result = PyTuple_Pack(1, list_revs);
1490 1489 goto done;
1491 1490 }
1492 1491 chainpayload = 0;
1493 1492 for (i = 0; i < num_revs; i++) {
1494 1493 int tmp = index_get_length(self, revs[i]);
1495 1494 if (tmp < 0) {
1496 1495 goto bail;
1497 1496 }
1498 1497 chainpayload += tmp;
1499 1498 }
1500 1499
1501 1500 readdata = deltachainspan;
1502 1501 density = 1.0;
1503 1502
1504 1503 if (0 < deltachainspan) {
1505 1504 density = (double)chainpayload / (double)deltachainspan;
1506 1505 }
1507 1506
1508 1507 if (density >= targetdensity) {
1509 1508 result = PyTuple_Pack(1, list_revs);
1510 1509 goto done;
1511 1510 }
1512 1511
1513 1512 /* if chain is too sparse, look for relevant gaps */
1514 1513 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1515 1514 if (gaps == NULL) {
1516 1515 PyErr_NoMemory();
1517 1516 goto bail;
1518 1517 }
1519 1518
1520 1519 previous_end = -1;
1521 1520 for (i = 0; i < num_revs; i++) {
1522 1521 int64_t revstart;
1523 1522 int revsize;
1524 1523 revstart = index_get_start(self, revs[i]);
1525 1524 if (revstart < 0) {
1526 1525 goto bail;
1527 1526 };
1528 1527 revsize = index_get_length(self, revs[i]);
1529 1528 if (revsize < 0) {
1530 1529 goto bail;
1531 1530 };
1532 1531 if (revsize == 0) {
1533 1532 continue;
1534 1533 }
1535 1534 if (previous_end >= 0) {
1536 1535 int64_t gapsize = revstart - previous_end;
1537 1536 if (gapsize > mingapsize) {
1538 1537 gaps[num_gaps].size = gapsize;
1539 1538 gaps[num_gaps].idx = i;
1540 1539 num_gaps += 1;
1541 1540 }
1542 1541 }
1543 1542 previous_end = revstart + revsize;
1544 1543 }
1545 1544 if (num_gaps == 0) {
1546 1545 result = PyTuple_Pack(1, list_revs);
1547 1546 goto done;
1548 1547 }
1549 1548 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1550 1549
1551 1550 /* Slice the largest gap first, they improve the density the most */
1552 1551 selected_indices =
1553 1552 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1554 1553 if (selected_indices == NULL) {
1555 1554 PyErr_NoMemory();
1556 1555 goto bail;
1557 1556 }
1558 1557
1559 1558 for (i = num_gaps - 1; i >= 0; i--) {
1560 1559 selected_indices[num_selected] = gaps[i].idx;
1561 1560 readdata -= gaps[i].size;
1562 1561 num_selected += 1;
1563 1562 if (readdata <= 0) {
1564 1563 density = 1.0;
1565 1564 } else {
1566 1565 density = (double)chainpayload / (double)readdata;
1567 1566 }
1568 1567 if (density >= targetdensity) {
1569 1568 break;
1570 1569 }
1571 1570 }
1572 1571 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1573 1572 &Py_ssize_t_compare);
1574 1573
1575 1574 /* create the resulting slice */
1576 1575 allchunks = PyList_New(0);
1577 1576 if (allchunks == NULL) {
1578 1577 goto bail;
1579 1578 }
1580 1579 previdx = 0;
1581 1580 selected_indices[num_selected] = num_revs;
1582 1581 for (i = 0; i <= num_selected; i++) {
1583 1582 Py_ssize_t idx = selected_indices[i];
1584 1583 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1585 1584 if (endidx < 0) {
1586 1585 goto bail;
1587 1586 }
1588 1587 if (previdx < endidx) {
1589 1588 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1590 1589 if (chunk == NULL) {
1591 1590 goto bail;
1592 1591 }
1593 1592 if (PyList_Append(allchunks, chunk) == -1) {
1594 1593 goto bail;
1595 1594 }
1596 1595 Py_DECREF(chunk);
1597 1596 chunk = NULL;
1598 1597 }
1599 1598 previdx = idx;
1600 1599 }
1601 1600 result = allchunks;
1602 1601 goto done;
1603 1602
1604 1603 bail:
1605 1604 Py_XDECREF(allchunks);
1606 1605 Py_XDECREF(chunk);
1607 1606 done:
1608 1607 free(revs);
1609 1608 free(gaps);
1610 1609 free(selected_indices);
1611 1610 return result;
1612 1611 }
1613 1612
1614 1613 static inline int nt_level(const char *node, Py_ssize_t level)
1615 1614 {
1616 1615 int v = node[level >> 1];
1617 1616 if (!(level & 1))
1618 1617 v >>= 4;
1619 1618 return v & 0xf;
1620 1619 }
1621 1620
1622 1621 /*
1623 1622 * Return values:
1624 1623 *
1625 1624 * -4: match is ambiguous (multiple candidates)
1626 1625 * -2: not found
1627 1626 * rest: valid rev
1628 1627 */
1629 1628 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1630 1629 int hex)
1631 1630 {
1632 1631 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1633 1632 int level, maxlevel, off;
1634 1633
1635 1634 /* If the input is binary, do a fast check for the nullid first. */
1636 1635 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1637 1636 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1638 1637 return -1;
1639 1638
1640 1639 if (hex)
1641 1640 maxlevel = nodelen;
1642 1641 else
1643 1642 maxlevel = 2 * nodelen;
1644 1643 if (maxlevel > 2 * self->nodelen)
1645 1644 maxlevel = 2 * self->nodelen;
1646 1645
1647 1646 for (level = off = 0; level < maxlevel; level++) {
1648 1647 int k = getnybble(node, level);
1649 1648 nodetreenode *n = &self->nodes[off];
1650 1649 int v = n->children[k];
1651 1650
1652 1651 if (v < 0) {
1653 1652 const char *n;
1654 1653 Py_ssize_t i;
1655 1654
1656 1655 v = -(v + 2);
1657 1656 n = index_node(self->index, v);
1658 1657 if (n == NULL)
1659 1658 return -2;
1660 1659 for (i = level; i < maxlevel; i++)
1661 1660 if (getnybble(node, i) != nt_level(n, i))
1662 1661 return -2;
1663 1662 return v;
1664 1663 }
1665 1664 if (v == 0)
1666 1665 return -2;
1667 1666 off = v;
1668 1667 }
1669 1668 /* multiple matches against an ambiguous prefix */
1670 1669 return -4;
1671 1670 }
1672 1671
1673 1672 static int nt_new(nodetree *self)
1674 1673 {
1675 1674 if (self->length == self->capacity) {
1676 1675 size_t newcapacity;
1677 1676 nodetreenode *newnodes;
1678 1677 newcapacity = self->capacity * 2;
1679 1678 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1680 1679 PyErr_SetString(PyExc_MemoryError,
1681 1680 "overflow in nt_new");
1682 1681 return -1;
1683 1682 }
1684 1683 newnodes =
1685 1684 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1686 1685 if (newnodes == NULL) {
1687 1686 PyErr_SetString(PyExc_MemoryError, "out of memory");
1688 1687 return -1;
1689 1688 }
1690 1689 self->capacity = newcapacity;
1691 1690 self->nodes = newnodes;
1692 1691 memset(&self->nodes[self->length], 0,
1693 1692 sizeof(nodetreenode) * (self->capacity - self->length));
1694 1693 }
1695 1694 return self->length++;
1696 1695 }
1697 1696
1698 1697 static int nt_insert(nodetree *self, const char *node, int rev)
1699 1698 {
1700 1699 int level = 0;
1701 1700 int off = 0;
1702 1701
1703 1702 while (level < 2 * self->nodelen) {
1704 1703 int k = nt_level(node, level);
1705 1704 nodetreenode *n;
1706 1705 int v;
1707 1706
1708 1707 n = &self->nodes[off];
1709 1708 v = n->children[k];
1710 1709
1711 1710 if (v == 0) {
1712 1711 n->children[k] = -rev - 2;
1713 1712 return 0;
1714 1713 }
1715 1714 if (v < 0) {
1716 1715 const char *oldnode =
1717 1716 index_node_existing(self->index, -(v + 2));
1718 1717 int noff;
1719 1718
1720 1719 if (oldnode == NULL)
1721 1720 return -1;
1722 1721 if (!memcmp(oldnode, node, self->nodelen)) {
1723 1722 n->children[k] = -rev - 2;
1724 1723 return 0;
1725 1724 }
1726 1725 noff = nt_new(self);
1727 1726 if (noff == -1)
1728 1727 return -1;
1729 1728 /* self->nodes may have been changed by realloc */
1730 1729 self->nodes[off].children[k] = noff;
1731 1730 off = noff;
1732 1731 n = &self->nodes[off];
1733 1732 n->children[nt_level(oldnode, ++level)] = v;
1734 1733 if (level > self->depth)
1735 1734 self->depth = level;
1736 1735 self->splits += 1;
1737 1736 } else {
1738 1737 level += 1;
1739 1738 off = v;
1740 1739 }
1741 1740 }
1742 1741
1743 1742 return -1;
1744 1743 }
1745 1744
1746 1745 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1747 1746 {
1748 1747 Py_ssize_t rev;
1749 1748 const char *node;
1750 1749 Py_ssize_t length;
1751 1750 if (!PyArg_ParseTuple(args, "n", &rev))
1752 1751 return NULL;
1753 1752 length = index_length(self->nt.index);
1754 1753 if (rev < 0 || rev >= length) {
1755 1754 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1756 1755 return NULL;
1757 1756 }
1758 1757 node = index_node_existing(self->nt.index, rev);
1759 1758 if (nt_insert(&self->nt, node, (int)rev) == -1)
1760 1759 return NULL;
1761 1760 Py_RETURN_NONE;
1762 1761 }
1763 1762
1764 1763 static int nt_delete_node(nodetree *self, const char *node)
1765 1764 {
1766 1765 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1767 1766 */
1768 1767 return nt_insert(self, node, -2);
1769 1768 }
1770 1769
1771 1770 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1772 1771 {
1773 1772 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1774 1773 self->nodes = NULL;
1775 1774
1776 1775 self->index = index;
1777 1776 /* The input capacity is in terms of revisions, while the field is in
1778 1777 * terms of nodetree nodes. */
1779 1778 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1780 1779 self->nodelen = index->nodelen;
1781 1780 self->depth = 0;
1782 1781 self->splits = 0;
1783 1782 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1784 1783 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1785 1784 return -1;
1786 1785 }
1787 1786 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1788 1787 if (self->nodes == NULL) {
1789 1788 PyErr_NoMemory();
1790 1789 return -1;
1791 1790 }
1792 1791 self->length = 1;
1793 1792 return 0;
1794 1793 }
1795 1794
1796 1795 static int ntobj_init(nodetreeObject *self, PyObject *args)
1797 1796 {
1798 1797 PyObject *index;
1799 1798 unsigned capacity;
1800 1799 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1801 1800 &capacity))
1802 1801 return -1;
1803 1802 Py_INCREF(index);
1804 1803 return nt_init(&self->nt, (indexObject *)index, capacity);
1805 1804 }
1806 1805
1807 1806 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1808 1807 {
1809 1808 return nt_find(self, node, nodelen, 1);
1810 1809 }
1811 1810
1812 1811 /*
1813 1812 * Find the length of the shortest unique prefix of node.
1814 1813 *
1815 1814 * Return values:
1816 1815 *
1817 1816 * -3: error (exception set)
1818 1817 * -2: not found (no exception set)
1819 1818 * rest: length of shortest prefix
1820 1819 */
1821 1820 static int nt_shortest(nodetree *self, const char *node)
1822 1821 {
1823 1822 int level, off;
1824 1823
1825 1824 for (level = off = 0; level < 2 * self->nodelen; level++) {
1826 1825 int k, v;
1827 1826 nodetreenode *n = &self->nodes[off];
1828 1827 k = nt_level(node, level);
1829 1828 v = n->children[k];
1830 1829 if (v < 0) {
1831 1830 const char *n;
1832 1831 v = -(v + 2);
1833 1832 n = index_node_existing(self->index, v);
1834 1833 if (n == NULL)
1835 1834 return -3;
1836 1835 if (memcmp(node, n, self->nodelen) != 0)
1837 1836 /*
1838 1837 * Found a unique prefix, but it wasn't for the
1839 1838 * requested node (i.e the requested node does
1840 1839 * not exist).
1841 1840 */
1842 1841 return -2;
1843 1842 return level + 1;
1844 1843 }
1845 1844 if (v == 0)
1846 1845 return -2;
1847 1846 off = v;
1848 1847 }
1849 1848 /*
1850 1849 * The node was still not unique after 40 hex digits, so this won't
1851 1850 * happen. Also, if we get here, then there's a programming error in
1852 1851 * this file that made us insert a node longer than 40 hex digits.
1853 1852 */
1854 1853 PyErr_SetString(PyExc_Exception, "broken node tree");
1855 1854 return -3;
1856 1855 }
1857 1856
1858 1857 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1859 1858 {
1860 1859 PyObject *val;
1861 1860 char *node;
1862 1861 int length;
1863 1862
1864 1863 if (!PyArg_ParseTuple(args, "O", &val))
1865 1864 return NULL;
1866 1865 if (node_check(self->nt.nodelen, val, &node) == -1)
1867 1866 return NULL;
1868 1867
1869 1868 length = nt_shortest(&self->nt, node);
1870 1869 if (length == -3)
1871 1870 return NULL;
1872 1871 if (length == -2) {
1873 1872 raise_revlog_error();
1874 1873 return NULL;
1875 1874 }
1876 1875 return PyInt_FromLong(length);
1877 1876 }
1878 1877
1879 1878 static void nt_dealloc(nodetree *self)
1880 1879 {
1881 1880 free(self->nodes);
1882 1881 self->nodes = NULL;
1883 1882 }
1884 1883
1885 1884 static void ntobj_dealloc(nodetreeObject *self)
1886 1885 {
1887 1886 Py_XDECREF(self->nt.index);
1888 1887 nt_dealloc(&self->nt);
1889 1888 PyObject_Del(self);
1890 1889 }
1891 1890
1892 1891 static PyMethodDef ntobj_methods[] = {
1893 1892 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1894 1893 "insert an index entry"},
1895 1894 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1896 1895 "find length of shortest hex nodeid of a binary ID"},
1897 1896 {NULL} /* Sentinel */
1898 1897 };
1899 1898
1900 1899 static PyTypeObject nodetreeType = {
1901 1900 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1902 1901 "parsers.nodetree", /* tp_name */
1903 1902 sizeof(nodetreeObject), /* tp_basicsize */
1904 1903 0, /* tp_itemsize */
1905 1904 (destructor)ntobj_dealloc, /* tp_dealloc */
1906 1905 0, /* tp_print */
1907 1906 0, /* tp_getattr */
1908 1907 0, /* tp_setattr */
1909 1908 0, /* tp_compare */
1910 1909 0, /* tp_repr */
1911 1910 0, /* tp_as_number */
1912 1911 0, /* tp_as_sequence */
1913 1912 0, /* tp_as_mapping */
1914 1913 0, /* tp_hash */
1915 1914 0, /* tp_call */
1916 1915 0, /* tp_str */
1917 1916 0, /* tp_getattro */
1918 1917 0, /* tp_setattro */
1919 1918 0, /* tp_as_buffer */
1920 1919 Py_TPFLAGS_DEFAULT, /* tp_flags */
1921 1920 "nodetree", /* tp_doc */
1922 1921 0, /* tp_traverse */
1923 1922 0, /* tp_clear */
1924 1923 0, /* tp_richcompare */
1925 1924 0, /* tp_weaklistoffset */
1926 1925 0, /* tp_iter */
1927 1926 0, /* tp_iternext */
1928 1927 ntobj_methods, /* tp_methods */
1929 1928 0, /* tp_members */
1930 1929 0, /* tp_getset */
1931 1930 0, /* tp_base */
1932 1931 0, /* tp_dict */
1933 1932 0, /* tp_descr_get */
1934 1933 0, /* tp_descr_set */
1935 1934 0, /* tp_dictoffset */
1936 1935 (initproc)ntobj_init, /* tp_init */
1937 1936 0, /* tp_alloc */
1938 1937 };
1939 1938
1940 1939 static int index_init_nt(indexObject *self)
1941 1940 {
1942 1941 if (!self->ntinitialized) {
1943 1942 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1944 1943 nt_dealloc(&self->nt);
1945 1944 return -1;
1946 1945 }
1947 1946 if (nt_insert(&self->nt, nullid, -1) == -1) {
1948 1947 nt_dealloc(&self->nt);
1949 1948 return -1;
1950 1949 }
1951 1950 self->ntinitialized = 1;
1952 1951 self->ntrev = (int)index_length(self);
1953 1952 self->ntlookups = 1;
1954 1953 self->ntmisses = 0;
1955 1954 }
1956 1955 return 0;
1957 1956 }
1958 1957
1959 1958 /*
1960 1959 * Return values:
1961 1960 *
1962 1961 * -3: error (exception set)
1963 1962 * -2: not found (no exception set)
1964 1963 * rest: valid rev
1965 1964 */
1966 1965 static int index_find_node(indexObject *self, const char *node)
1967 1966 {
1968 1967 int rev;
1969 1968
1970 1969 if (index_init_nt(self) == -1)
1971 1970 return -3;
1972 1971
1973 1972 self->ntlookups++;
1974 1973 rev = nt_find(&self->nt, node, self->nodelen, 0);
1975 1974 if (rev >= -1)
1976 1975 return rev;
1977 1976
1978 1977 /*
1979 1978 * For the first handful of lookups, we scan the entire index,
1980 1979 * and cache only the matching nodes. This optimizes for cases
1981 1980 * like "hg tip", where only a few nodes are accessed.
1982 1981 *
1983 1982 * After that, we cache every node we visit, using a single
1984 1983 * scan amortized over multiple lookups. This gives the best
1985 1984 * bulk performance, e.g. for "hg log".
1986 1985 */
1987 1986 if (self->ntmisses++ < 4) {
1988 1987 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1989 1988 const char *n = index_node_existing(self, rev);
1990 1989 if (n == NULL)
1991 1990 return -3;
1992 1991 if (memcmp(node, n, self->nodelen) == 0) {
1993 1992 if (nt_insert(&self->nt, n, rev) == -1)
1994 1993 return -3;
1995 1994 break;
1996 1995 }
1997 1996 }
1998 1997 } else {
1999 1998 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2000 1999 const char *n = index_node_existing(self, rev);
2001 2000 if (n == NULL)
2002 2001 return -3;
2003 2002 if (nt_insert(&self->nt, n, rev) == -1) {
2004 2003 self->ntrev = rev + 1;
2005 2004 return -3;
2006 2005 }
2007 2006 if (memcmp(node, n, self->nodelen) == 0) {
2008 2007 break;
2009 2008 }
2010 2009 }
2011 2010 self->ntrev = rev;
2012 2011 }
2013 2012
2014 2013 if (rev >= 0)
2015 2014 return rev;
2016 2015 return -2;
2017 2016 }
2018 2017
2019 2018 static PyObject *index_getitem(indexObject *self, PyObject *value)
2020 2019 {
2021 2020 char *node;
2022 2021 int rev;
2023 2022
2024 2023 if (PyInt_Check(value)) {
2025 2024 long idx;
2026 2025 if (!pylong_to_long(value, &idx)) {
2027 2026 return NULL;
2028 2027 }
2029 2028 return index_get(self, idx);
2030 2029 }
2031 2030
2032 2031 if (node_check(self->nodelen, value, &node) == -1)
2033 2032 return NULL;
2034 2033 rev = index_find_node(self, node);
2035 2034 if (rev >= -1)
2036 2035 return PyInt_FromLong(rev);
2037 2036 if (rev == -2)
2038 2037 raise_revlog_error();
2039 2038 return NULL;
2040 2039 }
2041 2040
2042 2041 /*
2043 2042 * Fully populate the radix tree.
2044 2043 */
2045 2044 static int index_populate_nt(indexObject *self)
2046 2045 {
2047 2046 int rev;
2048 2047 if (self->ntrev > 0) {
2049 2048 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2050 2049 const char *n = index_node_existing(self, rev);
2051 2050 if (n == NULL)
2052 2051 return -1;
2053 2052 if (nt_insert(&self->nt, n, rev) == -1)
2054 2053 return -1;
2055 2054 }
2056 2055 self->ntrev = -1;
2057 2056 }
2058 2057 return 0;
2059 2058 }
2060 2059
2061 2060 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2062 2061 {
2063 2062 const char *fullnode;
2064 2063 Py_ssize_t nodelen;
2065 2064 char *node;
2066 2065 int rev, i;
2067 2066
2068 2067 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2069 2068 return NULL;
2070 2069
2071 2070 if (nodelen < 1) {
2072 2071 PyErr_SetString(PyExc_ValueError, "key too short");
2073 2072 return NULL;
2074 2073 }
2075 2074
2076 2075 if (nodelen > 2 * self->nodelen) {
2077 2076 PyErr_SetString(PyExc_ValueError, "key too long");
2078 2077 return NULL;
2079 2078 }
2080 2079
2081 2080 for (i = 0; i < nodelen; i++)
2082 2081 hexdigit(node, i);
2083 2082 if (PyErr_Occurred()) {
2084 2083 /* input contains non-hex characters */
2085 2084 PyErr_Clear();
2086 2085 Py_RETURN_NONE;
2087 2086 }
2088 2087
2089 2088 if (index_init_nt(self) == -1)
2090 2089 return NULL;
2091 2090 if (index_populate_nt(self) == -1)
2092 2091 return NULL;
2093 2092 rev = nt_partialmatch(&self->nt, node, nodelen);
2094 2093
2095 2094 switch (rev) {
2096 2095 case -4:
2097 2096 raise_revlog_error();
2098 2097 return NULL;
2099 2098 case -2:
2100 2099 Py_RETURN_NONE;
2101 2100 case -1:
2102 2101 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2103 2102 }
2104 2103
2105 2104 fullnode = index_node_existing(self, rev);
2106 2105 if (fullnode == NULL) {
2107 2106 return NULL;
2108 2107 }
2109 2108 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2110 2109 }
2111 2110
2112 2111 static PyObject *index_shortest(indexObject *self, PyObject *args)
2113 2112 {
2114 2113 PyObject *val;
2115 2114 char *node;
2116 2115 int length;
2117 2116
2118 2117 if (!PyArg_ParseTuple(args, "O", &val))
2119 2118 return NULL;
2120 2119 if (node_check(self->nodelen, val, &node) == -1)
2121 2120 return NULL;
2122 2121
2123 2122 self->ntlookups++;
2124 2123 if (index_init_nt(self) == -1)
2125 2124 return NULL;
2126 2125 if (index_populate_nt(self) == -1)
2127 2126 return NULL;
2128 2127 length = nt_shortest(&self->nt, node);
2129 2128 if (length == -3)
2130 2129 return NULL;
2131 2130 if (length == -2) {
2132 2131 raise_revlog_error();
2133 2132 return NULL;
2134 2133 }
2135 2134 return PyInt_FromLong(length);
2136 2135 }
2137 2136
2138 2137 static PyObject *index_m_get(indexObject *self, PyObject *args)
2139 2138 {
2140 2139 PyObject *val;
2141 2140 char *node;
2142 2141 int rev;
2143 2142
2144 2143 if (!PyArg_ParseTuple(args, "O", &val))
2145 2144 return NULL;
2146 2145 if (node_check(self->nodelen, val, &node) == -1)
2147 2146 return NULL;
2148 2147 rev = index_find_node(self, node);
2149 2148 if (rev == -3)
2150 2149 return NULL;
2151 2150 if (rev == -2)
2152 2151 Py_RETURN_NONE;
2153 2152 return PyInt_FromLong(rev);
2154 2153 }
2155 2154
2156 2155 static int index_contains(indexObject *self, PyObject *value)
2157 2156 {
2158 2157 char *node;
2159 2158
2160 2159 if (PyInt_Check(value)) {
2161 2160 long rev;
2162 2161 if (!pylong_to_long(value, &rev)) {
2163 2162 return -1;
2164 2163 }
2165 2164 return rev >= -1 && rev < index_length(self);
2166 2165 }
2167 2166
2168 2167 if (node_check(self->nodelen, value, &node) == -1)
2169 2168 return -1;
2170 2169
2171 2170 switch (index_find_node(self, node)) {
2172 2171 case -3:
2173 2172 return -1;
2174 2173 case -2:
2175 2174 return 0;
2176 2175 default:
2177 2176 return 1;
2178 2177 }
2179 2178 }
2180 2179
2181 2180 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2182 2181 {
2183 2182 int ret = index_contains(self, args);
2184 2183 if (ret < 0)
2185 2184 return NULL;
2186 2185 return PyBool_FromLong((long)ret);
2187 2186 }
2188 2187
2189 2188 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2190 2189 {
2191 2190 char *node;
2192 2191 int rev;
2193 2192
2194 2193 if (node_check(self->nodelen, val, &node) == -1)
2195 2194 return NULL;
2196 2195 rev = index_find_node(self, node);
2197 2196 if (rev >= -1)
2198 2197 return PyInt_FromLong(rev);
2199 2198 if (rev == -2)
2200 2199 raise_revlog_error();
2201 2200 return NULL;
2202 2201 }
2203 2202
2204 2203 typedef uint64_t bitmask;
2205 2204
2206 2205 /*
2207 2206 * Given a disjoint set of revs, return all candidates for the
2208 2207 * greatest common ancestor. In revset notation, this is the set
2209 2208 * "heads(::a and ::b and ...)"
2210 2209 */
2211 2210 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2212 2211 int revcount)
2213 2212 {
2214 2213 const bitmask allseen = (1ull << revcount) - 1;
2215 2214 const bitmask poison = 1ull << revcount;
2216 2215 PyObject *gca = PyList_New(0);
2217 2216 int i, v, interesting;
2218 2217 int maxrev = -1;
2219 2218 bitmask sp;
2220 2219 bitmask *seen;
2221 2220
2222 2221 if (gca == NULL)
2223 2222 return PyErr_NoMemory();
2224 2223
2225 2224 for (i = 0; i < revcount; i++) {
2226 2225 if (revs[i] > maxrev)
2227 2226 maxrev = revs[i];
2228 2227 }
2229 2228
2230 2229 seen = calloc(sizeof(*seen), maxrev + 1);
2231 2230 if (seen == NULL) {
2232 2231 Py_DECREF(gca);
2233 2232 return PyErr_NoMemory();
2234 2233 }
2235 2234
2236 2235 for (i = 0; i < revcount; i++)
2237 2236 seen[revs[i]] = 1ull << i;
2238 2237
2239 2238 interesting = revcount;
2240 2239
2241 2240 for (v = maxrev; v >= 0 && interesting; v--) {
2242 2241 bitmask sv = seen[v];
2243 2242 int parents[2];
2244 2243
2245 2244 if (!sv)
2246 2245 continue;
2247 2246
2248 2247 if (sv < poison) {
2249 2248 interesting -= 1;
2250 2249 if (sv == allseen) {
2251 2250 PyObject *obj = PyInt_FromLong(v);
2252 2251 if (obj == NULL)
2253 2252 goto bail;
2254 2253 if (PyList_Append(gca, obj) == -1) {
2255 2254 Py_DECREF(obj);
2256 2255 goto bail;
2257 2256 }
2258 2257 sv |= poison;
2259 2258 for (i = 0; i < revcount; i++) {
2260 2259 if (revs[i] == v)
2261 2260 goto done;
2262 2261 }
2263 2262 }
2264 2263 }
2265 2264 if (index_get_parents(self, v, parents, maxrev) < 0)
2266 2265 goto bail;
2267 2266
2268 2267 for (i = 0; i < 2; i++) {
2269 2268 int p = parents[i];
2270 2269 if (p == -1)
2271 2270 continue;
2272 2271 sp = seen[p];
2273 2272 if (sv < poison) {
2274 2273 if (sp == 0) {
2275 2274 seen[p] = sv;
2276 2275 interesting++;
2277 2276 } else if (sp != sv)
2278 2277 seen[p] |= sv;
2279 2278 } else {
2280 2279 if (sp && sp < poison)
2281 2280 interesting--;
2282 2281 seen[p] = sv;
2283 2282 }
2284 2283 }
2285 2284 }
2286 2285
2287 2286 done:
2288 2287 free(seen);
2289 2288 return gca;
2290 2289 bail:
2291 2290 free(seen);
2292 2291 Py_XDECREF(gca);
2293 2292 return NULL;
2294 2293 }
2295 2294
2296 2295 /*
2297 2296 * Given a disjoint set of revs, return the subset with the longest
2298 2297 * path to the root.
2299 2298 */
2300 2299 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2301 2300 {
2302 2301 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2303 2302 static const Py_ssize_t capacity = 24;
2304 2303 int *depth, *interesting = NULL;
2305 2304 int i, j, v, ninteresting;
2306 2305 PyObject *dict = NULL, *keys = NULL;
2307 2306 long *seen = NULL;
2308 2307 int maxrev = -1;
2309 2308 long final;
2310 2309
2311 2310 if (revcount > capacity) {
2312 2311 PyErr_Format(PyExc_OverflowError,
2313 2312 "bitset size (%ld) > capacity (%ld)",
2314 2313 (long)revcount, (long)capacity);
2315 2314 return NULL;
2316 2315 }
2317 2316
2318 2317 for (i = 0; i < revcount; i++) {
2319 2318 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2320 2319 if (n > maxrev)
2321 2320 maxrev = n;
2322 2321 }
2323 2322
2324 2323 depth = calloc(sizeof(*depth), maxrev + 1);
2325 2324 if (depth == NULL)
2326 2325 return PyErr_NoMemory();
2327 2326
2328 2327 seen = calloc(sizeof(*seen), maxrev + 1);
2329 2328 if (seen == NULL) {
2330 2329 PyErr_NoMemory();
2331 2330 goto bail;
2332 2331 }
2333 2332
2334 2333 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2335 2334 if (interesting == NULL) {
2336 2335 PyErr_NoMemory();
2337 2336 goto bail;
2338 2337 }
2339 2338
2340 2339 if (PyList_Sort(revs) == -1)
2341 2340 goto bail;
2342 2341
2343 2342 for (i = 0; i < revcount; i++) {
2344 2343 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2345 2344 long b = 1l << i;
2346 2345 depth[n] = 1;
2347 2346 seen[n] = b;
2348 2347 interesting[b] = 1;
2349 2348 }
2350 2349
2351 2350 /* invariant: ninteresting is the number of non-zero entries in
2352 2351 * interesting. */
2353 2352 ninteresting = (int)revcount;
2354 2353
2355 2354 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2356 2355 int dv = depth[v];
2357 2356 int parents[2];
2358 2357 long sv;
2359 2358
2360 2359 if (dv == 0)
2361 2360 continue;
2362 2361
2363 2362 sv = seen[v];
2364 2363 if (index_get_parents(self, v, parents, maxrev) < 0)
2365 2364 goto bail;
2366 2365
2367 2366 for (i = 0; i < 2; i++) {
2368 2367 int p = parents[i];
2369 2368 long sp;
2370 2369 int dp;
2371 2370
2372 2371 if (p == -1)
2373 2372 continue;
2374 2373
2375 2374 dp = depth[p];
2376 2375 sp = seen[p];
2377 2376 if (dp <= dv) {
2378 2377 depth[p] = dv + 1;
2379 2378 if (sp != sv) {
2380 2379 interesting[sv] += 1;
2381 2380 seen[p] = sv;
2382 2381 if (sp) {
2383 2382 interesting[sp] -= 1;
2384 2383 if (interesting[sp] == 0)
2385 2384 ninteresting -= 1;
2386 2385 }
2387 2386 }
2388 2387 } else if (dv == dp - 1) {
2389 2388 long nsp = sp | sv;
2390 2389 if (nsp == sp)
2391 2390 continue;
2392 2391 seen[p] = nsp;
2393 2392 interesting[sp] -= 1;
2394 2393 if (interesting[sp] == 0)
2395 2394 ninteresting -= 1;
2396 2395 if (interesting[nsp] == 0)
2397 2396 ninteresting += 1;
2398 2397 interesting[nsp] += 1;
2399 2398 }
2400 2399 }
2401 2400 interesting[sv] -= 1;
2402 2401 if (interesting[sv] == 0)
2403 2402 ninteresting -= 1;
2404 2403 }
2405 2404
2406 2405 final = 0;
2407 2406 j = ninteresting;
2408 2407 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2409 2408 if (interesting[i] == 0)
2410 2409 continue;
2411 2410 final |= i;
2412 2411 j -= 1;
2413 2412 }
2414 2413 if (final == 0) {
2415 2414 keys = PyList_New(0);
2416 2415 goto bail;
2417 2416 }
2418 2417
2419 2418 dict = PyDict_New();
2420 2419 if (dict == NULL)
2421 2420 goto bail;
2422 2421
2423 2422 for (i = 0; i < revcount; i++) {
2424 2423 PyObject *key;
2425 2424
2426 2425 if ((final & (1 << i)) == 0)
2427 2426 continue;
2428 2427
2429 2428 key = PyList_GET_ITEM(revs, i);
2430 2429 Py_INCREF(key);
2431 2430 Py_INCREF(Py_None);
2432 2431 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2433 2432 Py_DECREF(key);
2434 2433 Py_DECREF(Py_None);
2435 2434 goto bail;
2436 2435 }
2437 2436 }
2438 2437
2439 2438 keys = PyDict_Keys(dict);
2440 2439
2441 2440 bail:
2442 2441 free(depth);
2443 2442 free(seen);
2444 2443 free(interesting);
2445 2444 Py_XDECREF(dict);
2446 2445
2447 2446 return keys;
2448 2447 }
2449 2448
2450 2449 /*
2451 2450 * Given a (possibly overlapping) set of revs, return all the
2452 2451 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2453 2452 */
2454 2453 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2455 2454 {
2456 2455 PyObject *ret = NULL;
2457 2456 Py_ssize_t argcount, i, len;
2458 2457 bitmask repeat = 0;
2459 2458 int revcount = 0;
2460 2459 int *revs;
2461 2460
2462 2461 argcount = PySequence_Length(args);
2463 2462 revs = PyMem_Malloc(argcount * sizeof(*revs));
2464 2463 if (argcount > 0 && revs == NULL)
2465 2464 return PyErr_NoMemory();
2466 2465 len = index_length(self);
2467 2466
2468 2467 for (i = 0; i < argcount; i++) {
2469 2468 static const int capacity = 24;
2470 2469 PyObject *obj = PySequence_GetItem(args, i);
2471 2470 bitmask x;
2472 2471 long val;
2473 2472
2474 2473 if (!PyInt_Check(obj)) {
2475 2474 PyErr_SetString(PyExc_TypeError,
2476 2475 "arguments must all be ints");
2477 2476 Py_DECREF(obj);
2478 2477 goto bail;
2479 2478 }
2480 2479 val = PyInt_AsLong(obj);
2481 2480 Py_DECREF(obj);
2482 2481 if (val == -1) {
2483 2482 ret = PyList_New(0);
2484 2483 goto done;
2485 2484 }
2486 2485 if (val < 0 || val >= len) {
2487 2486 PyErr_SetString(PyExc_IndexError, "index out of range");
2488 2487 goto bail;
2489 2488 }
2490 2489 /* this cheesy bloom filter lets us avoid some more
2491 2490 * expensive duplicate checks in the common set-is-disjoint
2492 2491 * case */
2493 2492 x = 1ull << (val & 0x3f);
2494 2493 if (repeat & x) {
2495 2494 int k;
2496 2495 for (k = 0; k < revcount; k++) {
2497 2496 if (val == revs[k])
2498 2497 goto duplicate;
2499 2498 }
2500 2499 } else
2501 2500 repeat |= x;
2502 2501 if (revcount >= capacity) {
2503 2502 PyErr_Format(PyExc_OverflowError,
2504 2503 "bitset size (%d) > capacity (%d)",
2505 2504 revcount, capacity);
2506 2505 goto bail;
2507 2506 }
2508 2507 revs[revcount++] = (int)val;
2509 2508 duplicate:;
2510 2509 }
2511 2510
2512 2511 if (revcount == 0) {
2513 2512 ret = PyList_New(0);
2514 2513 goto done;
2515 2514 }
2516 2515 if (revcount == 1) {
2517 2516 PyObject *obj;
2518 2517 ret = PyList_New(1);
2519 2518 if (ret == NULL)
2520 2519 goto bail;
2521 2520 obj = PyInt_FromLong(revs[0]);
2522 2521 if (obj == NULL)
2523 2522 goto bail;
2524 2523 PyList_SET_ITEM(ret, 0, obj);
2525 2524 goto done;
2526 2525 }
2527 2526
2528 2527 ret = find_gca_candidates(self, revs, revcount);
2529 2528 if (ret == NULL)
2530 2529 goto bail;
2531 2530
2532 2531 done:
2533 2532 PyMem_Free(revs);
2534 2533 return ret;
2535 2534
2536 2535 bail:
2537 2536 PyMem_Free(revs);
2538 2537 Py_XDECREF(ret);
2539 2538 return NULL;
2540 2539 }
2541 2540
2542 2541 /*
2543 2542 * Given a (possibly overlapping) set of revs, return the greatest
2544 2543 * common ancestors: those with the longest path to the root.
2545 2544 */
2546 2545 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2547 2546 {
2548 2547 PyObject *ret;
2549 2548 PyObject *gca = index_commonancestorsheads(self, args);
2550 2549 if (gca == NULL)
2551 2550 return NULL;
2552 2551
2553 2552 if (PyList_GET_SIZE(gca) <= 1) {
2554 2553 return gca;
2555 2554 }
2556 2555
2557 2556 ret = find_deepest(self, gca);
2558 2557 Py_DECREF(gca);
2559 2558 return ret;
2560 2559 }
2561 2560
2562 2561 /*
2563 2562 * Invalidate any trie entries introduced by added revs.
2564 2563 */
2565 2564 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2566 2565 {
2567 2566 Py_ssize_t i, len;
2568 2567
2569 2568 len = self->length + self->new_length;
2570 2569 i = start - self->length;
2571 2570 if (i < 0)
2572 2571 return;
2573 2572
2574 2573 for (i = start; i < len; i++)
2575 2574 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2576 2575
2577 2576 self->new_length = start - self->length;
2578 2577 }
2579 2578
2580 2579 /*
2581 2580 * Delete a numeric range of revs, which must be at the end of the
2582 2581 * range.
2583 2582 */
2584 2583 static int index_slice_del(indexObject *self, PyObject *item)
2585 2584 {
2586 2585 Py_ssize_t start, stop, step, slicelength;
2587 2586 Py_ssize_t length = index_length(self) + 1;
2588 2587 int ret = 0;
2589 2588
2590 2589 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2591 2590 #ifdef IS_PY3K
2592 2591 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2593 2592 &slicelength) < 0)
2594 2593 #else
2595 2594 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2596 2595 &step, &slicelength) < 0)
2597 2596 #endif
2598 2597 return -1;
2599 2598
2600 2599 if (slicelength <= 0)
2601 2600 return 0;
2602 2601
2603 2602 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2604 2603 stop = start;
2605 2604
2606 2605 if (step < 0) {
2607 2606 stop = start + 1;
2608 2607 start = stop + step * (slicelength - 1) - 1;
2609 2608 step = -step;
2610 2609 }
2611 2610
2612 2611 if (step != 1) {
2613 2612 PyErr_SetString(PyExc_ValueError,
2614 2613 "revlog index delete requires step size of 1");
2615 2614 return -1;
2616 2615 }
2617 2616
2618 2617 if (stop != length - 1) {
2619 2618 PyErr_SetString(PyExc_IndexError,
2620 2619 "revlog index deletion indices are invalid");
2621 2620 return -1;
2622 2621 }
2623 2622
2624 2623 if (start < self->length) {
2625 2624 if (self->ntinitialized) {
2626 2625 Py_ssize_t i;
2627 2626
2628 2627 for (i = start; i < self->length; i++) {
2629 2628 const char *node = index_node_existing(self, i);
2630 2629 if (node == NULL)
2631 2630 return -1;
2632 2631
2633 2632 nt_delete_node(&self->nt, node);
2634 2633 }
2635 2634 if (self->new_length)
2636 2635 index_invalidate_added(self, self->length);
2637 2636 if (self->ntrev > start)
2638 2637 self->ntrev = (int)start;
2639 2638 } else if (self->new_length) {
2640 2639 self->new_length = 0;
2641 2640 }
2642 2641
2643 2642 self->length = start;
2644 2643 goto done;
2645 2644 }
2646 2645
2647 2646 if (self->ntinitialized) {
2648 2647 index_invalidate_added(self, start);
2649 2648 if (self->ntrev > start)
2650 2649 self->ntrev = (int)start;
2651 2650 } else {
2652 2651 self->new_length = start - self->length;
2653 2652 }
2654 2653 done:
2655 2654 Py_CLEAR(self->headrevs);
2656 2655 return ret;
2657 2656 }
2658 2657
2659 2658 /*
2660 2659 * Supported ops:
2661 2660 *
2662 2661 * slice deletion
2663 2662 * string assignment (extend node->rev mapping)
2664 2663 * string deletion (shrink node->rev mapping)
2665 2664 */
2666 2665 static int index_assign_subscript(indexObject *self, PyObject *item,
2667 2666 PyObject *value)
2668 2667 {
2669 2668 char *node;
2670 2669 long rev;
2671 2670
2672 2671 if (PySlice_Check(item) && value == NULL)
2673 2672 return index_slice_del(self, item);
2674 2673
2675 2674 if (node_check(self->nodelen, item, &node) == -1)
2676 2675 return -1;
2677 2676
2678 2677 if (value == NULL)
2679 2678 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2680 2679 : 0;
2681 2680 rev = PyInt_AsLong(value);
2682 2681 if (rev > INT_MAX || rev < 0) {
2683 2682 if (!PyErr_Occurred())
2684 2683 PyErr_SetString(PyExc_ValueError, "rev out of range");
2685 2684 return -1;
2686 2685 }
2687 2686
2688 2687 if (index_init_nt(self) == -1)
2689 2688 return -1;
2690 2689 return nt_insert(&self->nt, node, (int)rev);
2691 2690 }
2692 2691
2693 2692 /*
2694 2693 * Find all RevlogNG entries in an index that has inline data. Update
2695 2694 * the optional "offsets" table with those entries.
2696 2695 */
2697 2696 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2698 2697 {
2699 2698 const char *data = (const char *)self->buf.buf;
2700 2699 Py_ssize_t pos = 0;
2701 2700 Py_ssize_t end = self->buf.len;
2702 2701 long incr = self->entry_size;
2703 2702 Py_ssize_t len = 0;
2704 2703
2705 2704 while (pos + self->entry_size <= end && pos >= 0) {
2706 2705 uint32_t comp_len, sidedata_comp_len = 0;
2707 2706 /* 3rd element of header is length of compressed inline data */
2708 2707 comp_len = getbe32(data + pos + 8);
2709 2708 if (self->entry_size == v2_entry_size) {
2710 2709 sidedata_comp_len = getbe32(data + pos + 72);
2711 2710 }
2712 2711 incr = self->entry_size + comp_len + sidedata_comp_len;
2713 2712 if (offsets)
2714 2713 offsets[len] = data + pos;
2715 2714 len++;
2716 2715 pos += incr;
2717 2716 }
2718 2717
2719 2718 if (pos != end) {
2720 2719 if (!PyErr_Occurred())
2721 2720 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2722 2721 return -1;
2723 2722 }
2724 2723
2725 2724 return len;
2726 2725 }
2727 2726
2728 2727 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2729 2728 {
2730 2729 PyObject *data_obj, *inlined_obj, *revlogv2;
2731 2730 Py_ssize_t size;
2732 2731
2733 2732 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2734 2733
2735 2734 /* Initialize before argument-checking to avoid index_dealloc() crash.
2736 2735 */
2737 2736 self->added = NULL;
2738 2737 self->new_length = 0;
2739 2738 self->added_length = 0;
2740 2739 self->data = NULL;
2741 2740 memset(&self->buf, 0, sizeof(self->buf));
2742 2741 self->headrevs = NULL;
2743 2742 self->filteredrevs = Py_None;
2744 2743 Py_INCREF(Py_None);
2745 2744 self->ntinitialized = 0;
2746 2745 self->offsets = NULL;
2747 2746 self->nodelen = 20;
2748 2747 self->nullentry = NULL;
2749 2748
2750 2749 revlogv2 = NULL;
2751 2750 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2752 2751 &data_obj, &inlined_obj, &revlogv2))
2753 2752 return -1;
2754 2753 if (!PyObject_CheckBuffer(data_obj)) {
2755 2754 PyErr_SetString(PyExc_TypeError,
2756 2755 "data does not support buffer interface");
2757 2756 return -1;
2758 2757 }
2759 2758 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2760 2759 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2761 2760 return -1;
2762 2761 }
2763 2762
2764 2763 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2765 2764 self->format_version = format_v2;
2766 2765 self->entry_size = v2_entry_size;
2767 2766 } else {
2768 2767 self->format_version = format_v1;
2769 2768 self->entry_size = v1_entry_size;
2770 2769 }
2771 2770
2772 if (self->format_version == format_v1) {
2773 self->nullentry =
2774 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2775 -1, -1, -1, nullid, self->nodelen);
2776 } else {
2777 self->nullentry =
2778 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2779 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2780 }
2771 self->nullentry =
2772 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1,
2773 -1, -1, nullid, self->nodelen, 0, 0);
2781 2774
2782 2775 if (!self->nullentry)
2783 2776 return -1;
2784 2777 PyObject_GC_UnTrack(self->nullentry);
2785 2778
2786 2779 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2787 2780 return -1;
2788 2781 size = self->buf.len;
2789 2782
2790 2783 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2791 2784 self->data = data_obj;
2792 2785
2793 2786 self->ntlookups = self->ntmisses = 0;
2794 2787 self->ntrev = -1;
2795 2788 Py_INCREF(self->data);
2796 2789
2797 2790 if (self->inlined) {
2798 2791 Py_ssize_t len = inline_scan(self, NULL);
2799 2792 if (len == -1)
2800 2793 goto bail;
2801 2794 self->length = len;
2802 2795 } else {
2803 2796 if (size % self->entry_size) {
2804 2797 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2805 2798 goto bail;
2806 2799 }
2807 2800 self->length = size / self->entry_size;
2808 2801 }
2809 2802
2810 2803 return 0;
2811 2804 bail:
2812 2805 return -1;
2813 2806 }
2814 2807
2815 2808 static PyObject *index_nodemap(indexObject *self)
2816 2809 {
2817 2810 Py_INCREF(self);
2818 2811 return (PyObject *)self;
2819 2812 }
2820 2813
2821 2814 static void _index_clearcaches(indexObject *self)
2822 2815 {
2823 2816 if (self->offsets) {
2824 2817 PyMem_Free((void *)self->offsets);
2825 2818 self->offsets = NULL;
2826 2819 }
2827 2820 if (self->ntinitialized) {
2828 2821 nt_dealloc(&self->nt);
2829 2822 }
2830 2823 self->ntinitialized = 0;
2831 2824 Py_CLEAR(self->headrevs);
2832 2825 }
2833 2826
2834 2827 static PyObject *index_clearcaches(indexObject *self)
2835 2828 {
2836 2829 _index_clearcaches(self);
2837 2830 self->ntrev = -1;
2838 2831 self->ntlookups = self->ntmisses = 0;
2839 2832 Py_RETURN_NONE;
2840 2833 }
2841 2834
2842 2835 static void index_dealloc(indexObject *self)
2843 2836 {
2844 2837 _index_clearcaches(self);
2845 2838 Py_XDECREF(self->filteredrevs);
2846 2839 if (self->buf.buf) {
2847 2840 PyBuffer_Release(&self->buf);
2848 2841 memset(&self->buf, 0, sizeof(self->buf));
2849 2842 }
2850 2843 Py_XDECREF(self->data);
2851 2844 PyMem_Free(self->added);
2852 2845 Py_XDECREF(self->nullentry);
2853 2846 PyObject_Del(self);
2854 2847 }
2855 2848
2856 2849 static PySequenceMethods index_sequence_methods = {
2857 2850 (lenfunc)index_length, /* sq_length */
2858 2851 0, /* sq_concat */
2859 2852 0, /* sq_repeat */
2860 2853 (ssizeargfunc)index_get, /* sq_item */
2861 2854 0, /* sq_slice */
2862 2855 0, /* sq_ass_item */
2863 2856 0, /* sq_ass_slice */
2864 2857 (objobjproc)index_contains, /* sq_contains */
2865 2858 };
2866 2859
2867 2860 static PyMappingMethods index_mapping_methods = {
2868 2861 (lenfunc)index_length, /* mp_length */
2869 2862 (binaryfunc)index_getitem, /* mp_subscript */
2870 2863 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2871 2864 };
2872 2865
2873 2866 static PyMethodDef index_methods[] = {
2874 2867 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2875 2868 "return the gca set of the given revs"},
2876 2869 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2877 2870 METH_VARARGS,
2878 2871 "return the heads of the common ancestors of the given revs"},
2879 2872 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2880 2873 "clear the index caches"},
2881 2874 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2882 2875 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2883 2876 "return `rev` associated with a node or None"},
2884 2877 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2885 2878 "return True if the node exist in the index"},
2886 2879 {"rev", (PyCFunction)index_m_rev, METH_O,
2887 2880 "return `rev` associated with a node or raise RevlogError"},
2888 2881 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2889 2882 "compute phases"},
2890 2883 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2891 2884 "reachableroots"},
2892 2885 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2893 2886 METH_VARARGS, "replace an existing index entry with a new value"},
2894 2887 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2895 2888 "get head revisions"}, /* Can do filtering since 3.2 */
2896 2889 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2897 2890 "get filtered head revisions"}, /* Can always do filtering */
2898 2891 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2899 2892 "True if the object is a snapshot"},
2900 2893 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2901 2894 "Gather snapshot data in a cache dict"},
2902 2895 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2903 2896 "determine revisions with deltas to reconstruct fulltext"},
2904 2897 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2905 2898 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2906 2899 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2907 2900 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2908 2901 "match a potentially ambiguous node ID"},
2909 2902 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2910 2903 "find length of shortest hex nodeid of a binary ID"},
2911 2904 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2912 2905 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2913 2906 "return an entry in binary form"},
2914 2907 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2915 2908 "pack the revlog header information into binary"},
2916 2909 {NULL} /* Sentinel */
2917 2910 };
2918 2911
2919 2912 static PyGetSetDef index_getset[] = {
2920 2913 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2921 2914 {NULL} /* Sentinel */
2922 2915 };
2923 2916
2924 2917 static PyMemberDef index_members[] = {
2925 2918 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2926 2919 "size of an index entry"},
2927 2920 {NULL} /* Sentinel */
2928 2921 };
2929 2922
2930 2923 PyTypeObject HgRevlogIndex_Type = {
2931 2924 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2932 2925 "parsers.index", /* tp_name */
2933 2926 sizeof(indexObject), /* tp_basicsize */
2934 2927 0, /* tp_itemsize */
2935 2928 (destructor)index_dealloc, /* tp_dealloc */
2936 2929 0, /* tp_print */
2937 2930 0, /* tp_getattr */
2938 2931 0, /* tp_setattr */
2939 2932 0, /* tp_compare */
2940 2933 0, /* tp_repr */
2941 2934 0, /* tp_as_number */
2942 2935 &index_sequence_methods, /* tp_as_sequence */
2943 2936 &index_mapping_methods, /* tp_as_mapping */
2944 2937 0, /* tp_hash */
2945 2938 0, /* tp_call */
2946 2939 0, /* tp_str */
2947 2940 0, /* tp_getattro */
2948 2941 0, /* tp_setattro */
2949 2942 0, /* tp_as_buffer */
2950 2943 Py_TPFLAGS_DEFAULT, /* tp_flags */
2951 2944 "revlog index", /* tp_doc */
2952 2945 0, /* tp_traverse */
2953 2946 0, /* tp_clear */
2954 2947 0, /* tp_richcompare */
2955 2948 0, /* tp_weaklistoffset */
2956 2949 0, /* tp_iter */
2957 2950 0, /* tp_iternext */
2958 2951 index_methods, /* tp_methods */
2959 2952 index_members, /* tp_members */
2960 2953 index_getset, /* tp_getset */
2961 2954 0, /* tp_base */
2962 2955 0, /* tp_dict */
2963 2956 0, /* tp_descr_get */
2964 2957 0, /* tp_descr_set */
2965 2958 0, /* tp_dictoffset */
2966 2959 (initproc)index_init, /* tp_init */
2967 2960 0, /* tp_alloc */
2968 2961 };
2969 2962
2970 2963 /*
2971 2964 * returns a tuple of the form (index, cache) with elements as
2972 2965 * follows:
2973 2966 *
2974 2967 * index: an index object that lazily parses Revlog (v1 or v2) records
2975 2968 * cache: if data is inlined, a tuple (0, index_file_content), else None
2976 2969 * index_file_content could be a string, or a buffer
2977 2970 *
2978 2971 * added complications are for backwards compatibility
2979 2972 */
2980 2973 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2981 2974 {
2982 2975 PyObject *cache = NULL;
2983 2976 indexObject *idx;
2984 2977 int ret;
2985 2978
2986 2979 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2987 2980 if (idx == NULL)
2988 2981 goto bail;
2989 2982
2990 2983 ret = index_init(idx, args, kwargs);
2991 2984 if (ret == -1)
2992 2985 goto bail;
2993 2986
2994 2987 if (idx->inlined) {
2995 2988 cache = Py_BuildValue("iO", 0, idx->data);
2996 2989 if (cache == NULL)
2997 2990 goto bail;
2998 2991 } else {
2999 2992 cache = Py_None;
3000 2993 Py_INCREF(cache);
3001 2994 }
3002 2995
3003 2996 return Py_BuildValue("NN", idx, cache);
3004 2997
3005 2998 bail:
3006 2999 Py_XDECREF(idx);
3007 3000 Py_XDECREF(cache);
3008 3001 return NULL;
3009 3002 }
3010 3003
3011 3004 static Revlog_CAPI CAPI = {
3012 3005 /* increment the abi_version field upon each change in the Revlog_CAPI
3013 3006 struct or in the ABI of the listed functions */
3014 3007 2,
3015 3008 index_length,
3016 3009 index_node,
3017 3010 HgRevlogIndex_GetParents,
3018 3011 };
3019 3012
3020 3013 void revlog_module_init(PyObject *mod)
3021 3014 {
3022 3015 PyObject *caps = NULL;
3023 3016 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3024 3017 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3025 3018 return;
3026 3019 Py_INCREF(&HgRevlogIndex_Type);
3027 3020 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3028 3021
3029 3022 nodetreeType.tp_new = PyType_GenericNew;
3030 3023 if (PyType_Ready(&nodetreeType) < 0)
3031 3024 return;
3032 3025 Py_INCREF(&nodetreeType);
3033 3026 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3034 3027
3035 3028 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3036 3029 if (caps != NULL)
3037 3030 PyModule_AddObject(mod, "revlog_CAPI", caps);
3038 3031 }
@@ -1,384 +1,391 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 from ..revlogutils import nodemap as nodemaputil
23 23 from ..revlogutils import constants as revlog_constants
24 24
25 25 stringio = pycompat.bytesio
26 26
27 27
28 28 _pack = struct.pack
29 29 _unpack = struct.unpack
30 30 _compress = zlib.compress
31 31 _decompress = zlib.decompress
32 32
33 33 # Some code below makes tuples directly because it's more convenient. However,
34 34 # code outside this module should always use dirstatetuple.
35 35 def dirstatetuple(*x):
36 36 # x is a tuple
37 37 return x
38 38
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43
44 44 def offset_type(offset, type):
45 45 return int(int(offset) << 16 | type)
46 46
47 47
48 48 class BaseIndexObject(object):
49 49 # Format of an index entry according to Python's `struct` language
50 50 index_format = revlog_constants.INDEX_ENTRY_V1
51 51 # Size of a C unsigned long long int, platform independent
52 52 big_int_size = struct.calcsize(b'>Q')
53 53 # Size of a C long int, platform independent
54 54 int_size = struct.calcsize(b'>i')
55 55 # An empty index entry, used as a default value to be overridden, or nullrev
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
57 57
58 58 @util.propertycache
59 59 def entry_size(self):
60 60 return self.index_format.size
61 61
62 62 @property
63 63 def nodemap(self):
64 64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 66 return self._nodemap
67 67
68 68 @util.propertycache
69 69 def _nodemap(self):
70 70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 71 for r in range(0, len(self)):
72 72 n = self[r][7]
73 73 nodemap[n] = r
74 74 return nodemap
75 75
76 76 def has_node(self, node):
77 77 """return True if the node exist in the index"""
78 78 return node in self._nodemap
79 79
80 80 def rev(self, node):
81 81 """return a revision for a node
82 82
83 83 If the node is unknown, raise a RevlogError"""
84 84 return self._nodemap[node]
85 85
86 86 def get_rev(self, node):
87 87 """return a revision for a node
88 88
89 89 If the node is unknown, return None"""
90 90 return self._nodemap.get(node)
91 91
92 92 def _stripnodes(self, start):
93 93 if '_nodemap' in vars(self):
94 94 for r in range(start, len(self)):
95 95 n = self[r][7]
96 96 del self._nodemap[n]
97 97
98 98 def clearcaches(self):
99 99 self.__dict__.pop('_nodemap', None)
100 100
101 101 def __len__(self):
102 102 return self._lgt + len(self._extra)
103 103
104 104 def append(self, tup):
105 105 if '_nodemap' in vars(self):
106 106 self._nodemap[tup[7]] = len(self)
107 107 data = self.index_format.pack(*tup)
108 108 self._extra.append(data)
109 109
110 110 def _check_index(self, i):
111 111 if not isinstance(i, int):
112 112 raise TypeError(b"expecting int indexes")
113 113 if i < 0 or i >= len(self):
114 114 raise IndexError
115 115
116 116 def __getitem__(self, i):
117 117 if i == -1:
118 118 return self.null_item
119 119 self._check_index(i)
120 120 if i >= self._lgt:
121 121 data = self._extra[i - self._lgt]
122 122 else:
123 123 index = self._calculate_index(i)
124 124 data = self._data[index : index + self.entry_size]
125 r = self.index_format.unpack(data)
125 r = self._unpack_entry(data)
126 126 if self._lgt and i == 0:
127 127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 128 return r
129 129
130 def _unpack_entry(self, data):
131 r = self.index_format.unpack(data)
132 r = r + (0, 0)
133 return r
134
130 135 def pack_header(self, header):
131 136 """pack header information as binary"""
132 137 v_fmt = revlog_constants.INDEX_HEADER
133 138 return v_fmt.pack(header)
134 139
135 140 def entry_binary(self, rev):
136 141 """return the raw binary string representing a revision"""
137 142 entry = self[rev]
138 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
143 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
139 144 if rev == 0:
140 145 p = p[revlog_constants.INDEX_HEADER.size :]
141 146 return p
142 147
143 148
144 149 class IndexObject(BaseIndexObject):
145 150 def __init__(self, data):
146 151 assert len(data) % self.entry_size == 0, (
147 152 len(data),
148 153 self.entry_size,
149 154 len(data) % self.entry_size,
150 155 )
151 156 self._data = data
152 157 self._lgt = len(data) // self.entry_size
153 158 self._extra = []
154 159
155 160 def _calculate_index(self, i):
156 161 return i * self.entry_size
157 162
158 163 def __delitem__(self, i):
159 164 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
160 165 raise ValueError(b"deleting slices only supports a:-1 with step 1")
161 166 i = i.start
162 167 self._check_index(i)
163 168 self._stripnodes(i)
164 169 if i < self._lgt:
165 170 self._data = self._data[: i * self.entry_size]
166 171 self._lgt = i
167 172 self._extra = []
168 173 else:
169 174 self._extra = self._extra[: i - self._lgt]
170 175
171 176
172 177 class PersistentNodeMapIndexObject(IndexObject):
173 178 """a Debug oriented class to test persistent nodemap
174 179
175 180 We need a simple python object to test API and higher level behavior. See
176 181 the Rust implementation for more serious usage. This should be used only
177 182 through the dedicated `devel.persistent-nodemap` config.
178 183 """
179 184
180 185 def nodemap_data_all(self):
181 186 """Return bytes containing a full serialization of a nodemap
182 187
183 188 The nodemap should be valid for the full set of revisions in the
184 189 index."""
185 190 return nodemaputil.persistent_data(self)
186 191
187 192 def nodemap_data_incremental(self):
188 193 """Return bytes containing a incremental update to persistent nodemap
189 194
190 195 This containst the data for an append-only update of the data provided
191 196 in the last call to `update_nodemap_data`.
192 197 """
193 198 if self._nm_root is None:
194 199 return None
195 200 docket = self._nm_docket
196 201 changed, data = nodemaputil.update_persistent_data(
197 202 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
198 203 )
199 204
200 205 self._nm_root = self._nm_max_idx = self._nm_docket = None
201 206 return docket, changed, data
202 207
203 208 def update_nodemap_data(self, docket, nm_data):
204 209 """provide full block of persisted binary data for a nodemap
205 210
206 211 The data are expected to come from disk. See `nodemap_data_all` for a
207 212 produceur of such data."""
208 213 if nm_data is not None:
209 214 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
210 215 if self._nm_root:
211 216 self._nm_docket = docket
212 217 else:
213 218 self._nm_root = self._nm_max_idx = self._nm_docket = None
214 219
215 220
216 221 class InlinedIndexObject(BaseIndexObject):
217 222 def __init__(self, data, inline=0):
218 223 self._data = data
219 224 self._lgt = self._inline_scan(None)
220 225 self._inline_scan(self._lgt)
221 226 self._extra = []
222 227
223 228 def _inline_scan(self, lgt):
224 229 off = 0
225 230 if lgt is not None:
226 231 self._offsets = [0] * lgt
227 232 count = 0
228 233 while off <= len(self._data) - self.entry_size:
229 234 start = off + self.big_int_size
230 235 (s,) = struct.unpack(
231 236 b'>i',
232 237 self._data[start : start + self.int_size],
233 238 )
234 239 if lgt is not None:
235 240 self._offsets[count] = off
236 241 count += 1
237 242 off += self.entry_size + s
238 243 if off != len(self._data):
239 244 raise ValueError(b"corrupted data")
240 245 return count
241 246
242 247 def __delitem__(self, i):
243 248 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 249 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 250 i = i.start
246 251 self._check_index(i)
247 252 self._stripnodes(i)
248 253 if i < self._lgt:
249 254 self._offsets = self._offsets[:i]
250 255 self._lgt = i
251 256 self._extra = []
252 257 else:
253 258 self._extra = self._extra[: i - self._lgt]
254 259
255 260 def _calculate_index(self, i):
256 261 return self._offsets[i]
257 262
258 263
259 264 def parse_index2(data, inline, revlogv2=False):
260 265 if not inline:
261 266 cls = IndexObject2 if revlogv2 else IndexObject
262 267 return cls(data), None
263 268 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
264 269 return cls(data, inline), (0, data)
265 270
266 271
267 272 class Index2Mixin(object):
268 273 index_format = revlog_constants.INDEX_ENTRY_V2
269 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
270 274
271 275 def replace_sidedata_info(
272 276 self, i, sidedata_offset, sidedata_length, offset_flags
273 277 ):
274 278 """
275 279 Replace an existing index entry's sidedata offset and length with new
276 280 ones.
277 281 This cannot be used outside of the context of sidedata rewriting,
278 282 inside the transaction that creates the revision `i`.
279 283 """
280 284 if i < 0:
281 285 raise KeyError
282 286 self._check_index(i)
283 287 sidedata_format = b">Qi"
284 288 packed_size = struct.calcsize(sidedata_format)
285 289 if i >= self._lgt:
286 290 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
287 291 old = self._extra[i - self._lgt]
288 292 offset_flags = struct.pack(b">Q", offset_flags)
289 293 new = offset_flags + old[8:64] + packed + old[64 + packed_size :]
290 294 self._extra[i - self._lgt] = new
291 295 else:
292 296 msg = b"cannot rewrite entries outside of this transaction"
293 297 raise KeyError(msg)
294 298
299 def _unpack_entry(self, data):
300 return self.index_format.unpack(data)
301
295 302 def entry_binary(self, rev):
296 303 """return the raw binary string representing a revision"""
297 304 entry = self[rev]
298 305 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
299 306 if rev == 0:
300 307 p = p[revlog_constants.INDEX_HEADER.size :]
301 308 return p
302 309
303 310
304 311 class IndexObject2(Index2Mixin, IndexObject):
305 312 pass
306 313
307 314
308 315 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
309 316 def _inline_scan(self, lgt):
310 317 sidedata_length_pos = 72
311 318 off = 0
312 319 if lgt is not None:
313 320 self._offsets = [0] * lgt
314 321 count = 0
315 322 while off <= len(self._data) - self.entry_size:
316 323 start = off + self.big_int_size
317 324 (data_size,) = struct.unpack(
318 325 b'>i',
319 326 self._data[start : start + self.int_size],
320 327 )
321 328 start = off + sidedata_length_pos
322 329 (side_data_size,) = struct.unpack(
323 330 b'>i', self._data[start : start + self.int_size]
324 331 )
325 332 if lgt is not None:
326 333 self._offsets[count] = off
327 334 count += 1
328 335 off += self.entry_size + data_size + side_data_size
329 336 if off != len(self._data):
330 337 raise ValueError(b"corrupted data")
331 338 return count
332 339
333 340
334 341 def parse_index_devel_nodemap(data, inline):
335 342 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
336 343 return PersistentNodeMapIndexObject(data), None
337 344
338 345
339 346 def parse_dirstate(dmap, copymap, st):
340 347 parents = [st[:20], st[20:40]]
341 348 # dereference fields so they will be local in loop
342 349 format = b">cllll"
343 350 e_size = struct.calcsize(format)
344 351 pos1 = 40
345 352 l = len(st)
346 353
347 354 # the inner loop
348 355 while pos1 < l:
349 356 pos2 = pos1 + e_size
350 357 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
351 358 pos1 = pos2 + e[4]
352 359 f = st[pos2:pos1]
353 360 if b'\0' in f:
354 361 f, c = f.split(b'\0')
355 362 copymap[f] = c
356 363 dmap[f] = e[:4]
357 364 return parents
358 365
359 366
360 367 def pack_dirstate(dmap, copymap, pl, now):
361 368 now = int(now)
362 369 cs = stringio()
363 370 write = cs.write
364 371 write(b"".join(pl))
365 372 for f, e in pycompat.iteritems(dmap):
366 373 if e[0] == b'n' and e[3] == now:
367 374 # The file was last modified "simultaneously" with the current
368 375 # write to dirstate (i.e. within the same second for file-
369 376 # systems with a granularity of 1 sec). This commonly happens
370 377 # for at least a couple of files on 'update'.
371 378 # The user could change the file without changing its size
372 379 # within the same second. Invalidate the file's mtime in
373 380 # dirstate, forcing future 'status' calls to compare the
374 381 # contents of the file if the size is the same. This prevents
375 382 # mistakenly treating such files as clean.
376 383 e = dirstatetuple(e[0], e[1], e[2], -1)
377 384 dmap[f] = e
378 385
379 386 if f in copymap:
380 387 f = b"%s\0%s" % (f, copymap[f])
381 388 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
382 389 write(e)
383 390 write(f)
384 391 return cs.getvalue()
@@ -1,144 +1,146 b''
1 1 # revlogv0 - code related to revlog format "V0"
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9
10 10 from ..node import sha1nodeconstants
11 11 from .constants import (
12 12 INDEX_ENTRY_V0,
13 13 )
14 14 from ..i18n import _
15 15
16 16 from .. import (
17 17 error,
18 18 node,
19 19 pycompat,
20 20 util,
21 21 )
22 22
23 23 from . import (
24 24 flagutil,
25 25 nodemap as nodemaputil,
26 26 )
27 27
28 28
29 29 def getoffset(q):
30 30 return int(q >> 16)
31 31
32 32
33 33 def gettype(q):
34 34 return int(q & 0xFFFF)
35 35
36 36
37 37 def offset_type(offset, type):
38 38 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
39 39 raise ValueError(b'unknown revlog index flags')
40 40 return int(int(offset) << 16 | type)
41 41
42 42
43 43 class revlogoldindex(list):
44 44 entry_size = INDEX_ENTRY_V0.size
45 45
46 46 @property
47 47 def nodemap(self):
48 48 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
49 49 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
50 50 return self._nodemap
51 51
52 52 @util.propertycache
53 53 def _nodemap(self):
54 54 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: node.nullrev})
55 55 for r in range(0, len(self)):
56 56 n = self[r][7]
57 57 nodemap[n] = r
58 58 return nodemap
59 59
60 60 def has_node(self, node):
61 61 """return True if the node exist in the index"""
62 62 return node in self._nodemap
63 63
64 64 def rev(self, node):
65 65 """return a revision for a node
66 66
67 67 If the node is unknown, raise a RevlogError"""
68 68 return self._nodemap[node]
69 69
70 70 def get_rev(self, node):
71 71 """return a revision for a node
72 72
73 73 If the node is unknown, return None"""
74 74 return self._nodemap.get(node)
75 75
76 76 def append(self, tup):
77 77 self._nodemap[tup[7]] = len(self)
78 78 super(revlogoldindex, self).append(tup)
79 79
80 80 def __delitem__(self, i):
81 81 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
82 82 raise ValueError(b"deleting slices only supports a:-1 with step 1")
83 83 for r in pycompat.xrange(i.start, len(self)):
84 84 del self._nodemap[self[r][7]]
85 85 super(revlogoldindex, self).__delitem__(i)
86 86
87 87 def clearcaches(self):
88 88 self.__dict__.pop('_nodemap', None)
89 89
90 90 def __getitem__(self, i):
91 91 if i == -1:
92 92 return (0, 0, 0, -1, -1, -1, -1, node.nullid)
93 93 return list.__getitem__(self, i)
94 94
95 95 def pack_header(self, header):
96 96 """pack header information in binary"""
97 97 return b''
98 98
99 99 def entry_binary(self, rev):
100 100 """return the raw binary string representing a revision"""
101 101 entry = self[rev]
102 102 if gettype(entry[0]):
103 103 raise error.RevlogError(
104 104 _(b'index entry flags need revlog version 1')
105 105 )
106 106 e2 = (
107 107 getoffset(entry[0]),
108 108 entry[1],
109 109 entry[3],
110 110 entry[4],
111 111 self[entry[5]][7],
112 112 self[entry[6]][7],
113 113 entry[7],
114 114 )
115 115 return INDEX_ENTRY_V0.pack(*e2)
116 116
117 117
118 118 def parse_index_v0(data, inline):
119 119 s = INDEX_ENTRY_V0.size
120 120 index = []
121 121 nodemap = nodemaputil.NodeMap({node.nullid: node.nullrev})
122 122 n = off = 0
123 123 l = len(data)
124 124 while off + s <= l:
125 125 cur = data[off : off + s]
126 126 off += s
127 127 e = INDEX_ENTRY_V0.unpack(cur)
128 128 # transform to revlogv1 format
129 129 e2 = (
130 130 offset_type(e[0], 0),
131 131 e[1],
132 132 -1,
133 133 e[2],
134 134 e[3],
135 135 nodemap.get(e[4], node.nullrev),
136 136 nodemap.get(e[5], node.nullrev),
137 137 e[6],
138 0, # no side data support
139 0, # no side data support
138 140 )
139 141 index.append(e2)
140 142 nodemap[e[6]] = n
141 143 n += 1
142 144
143 145 index = revlogoldindex(index)
144 146 return index, None
@@ -1,295 +1,306 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18
19 19 from . import (
20 20 changelog,
21 21 cmdutil,
22 22 encoding,
23 23 error,
24 24 filelog,
25 25 localrepo,
26 26 manifest,
27 27 mdiff,
28 28 pathutil,
29 29 revlog,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33
34 34
35 35 class unionrevlog(revlog.revlog):
36 36 def __init__(self, opener, indexfile, revlog2, linkmapper):
37 37 # How it works:
38 38 # To retrieve a revision, we just need to know the node id so we can
39 39 # look it up in revlog2.
40 40 #
41 41 # To differentiate a rev in the second revlog from a rev in the revlog,
42 42 # we check revision against repotiprev.
43 43 opener = vfsmod.readonlyvfs(opener)
44 44 target = getattr(revlog2, 'target', None)
45 45 if target is None:
46 46 # a revlog wrapper, eg: the manifestlog that is not an actual revlog
47 47 target = revlog2._revlog.target
48 48 revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
49 49 self.revlog2 = revlog2
50 50
51 51 n = len(self)
52 52 self.repotiprev = n - 1
53 53 self.bundlerevs = set() # used by 'bundle()' revset expression
54 54 for rev2 in self.revlog2:
55 55 rev = self.revlog2.index[rev2]
56 56 # rev numbers - in revlog2, very different from self.rev
57 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
57 (
58 _start,
59 _csize,
60 rsize,
61 base,
62 linkrev,
63 p1rev,
64 p2rev,
65 node,
66 _sdo,
67 _sds,
68 ) = rev
58 69 flags = _start & 0xFFFF
59 70
60 71 if linkmapper is None: # link is to same revlog
61 72 assert linkrev == rev2 # we never link back
62 73 link = n
63 74 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
64 75 link = linkmapper(linkrev)
65 76
66 77 if linkmapper is not None: # link is to same revlog
67 78 base = linkmapper(base)
68 79
69 80 this_rev = self.index.get_rev(node)
70 81 if this_rev is not None:
71 82 # this happens for the common revlog revisions
72 83 self.bundlerevs.add(this_rev)
73 84 continue
74 85
75 86 p1node = self.revlog2.node(p1rev)
76 87 p2node = self.revlog2.node(p2rev)
77 88
78 89 # TODO: it's probably wrong to set compressed length to -1, but
79 90 # I have no idea if csize is valid in the base revlog context.
80 91 e = (
81 92 flags,
82 93 -1,
83 94 rsize,
84 95 base,
85 96 link,
86 97 self.rev(p1node),
87 98 self.rev(p2node),
88 99 node,
89 100 )
90 101 self.index.append(e)
91 102 self.bundlerevs.add(n)
92 103 n += 1
93 104
94 105 def _chunk(self, rev):
95 106 if rev <= self.repotiprev:
96 107 return revlog.revlog._chunk(self, rev)
97 108 return self.revlog2._chunk(self.node(rev))
98 109
99 110 def revdiff(self, rev1, rev2):
100 111 """return or calculate a delta between two revisions"""
101 112 if rev1 > self.repotiprev and rev2 > self.repotiprev:
102 113 return self.revlog2.revdiff(
103 114 self.revlog2.rev(self.node(rev1)),
104 115 self.revlog2.rev(self.node(rev2)),
105 116 )
106 117 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
107 118 return super(unionrevlog, self).revdiff(rev1, rev2)
108 119
109 120 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
110 121
111 122 def _revisiondata(self, nodeorrev, _df=None, raw=False):
112 123 if isinstance(nodeorrev, int):
113 124 rev = nodeorrev
114 125 node = self.node(rev)
115 126 else:
116 127 node = nodeorrev
117 128 rev = self.rev(node)
118 129
119 130 if rev > self.repotiprev:
120 131 # work around manifestrevlog NOT being a revlog
121 132 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
122 133 func = revlog2._revisiondata
123 134 else:
124 135 func = super(unionrevlog, self)._revisiondata
125 136 return func(node, _df=_df, raw=raw)
126 137
127 138 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
128 139 raise NotImplementedError
129 140
130 141 def addgroup(
131 142 self,
132 143 deltas,
133 144 linkmapper,
134 145 transaction,
135 146 alwayscache=False,
136 147 addrevisioncb=None,
137 148 duplicaterevisioncb=None,
138 149 maybemissingparents=False,
139 150 ):
140 151 raise NotImplementedError
141 152
142 153 def strip(self, minlink, transaction):
143 154 raise NotImplementedError
144 155
145 156 def checksize(self):
146 157 raise NotImplementedError
147 158
148 159
149 160 class unionchangelog(unionrevlog, changelog.changelog):
150 161 def __init__(self, opener, opener2):
151 162 changelog.changelog.__init__(self, opener)
152 163 linkmapper = None
153 164 changelog2 = changelog.changelog(opener2)
154 165 unionrevlog.__init__(
155 166 self, opener, self.indexfile, changelog2, linkmapper
156 167 )
157 168
158 169
159 170 class unionmanifest(unionrevlog, manifest.manifestrevlog):
160 171 def __init__(self, nodeconstants, opener, opener2, linkmapper):
161 172 manifest.manifestrevlog.__init__(self, nodeconstants, opener)
162 173 manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
163 174 unionrevlog.__init__(
164 175 self, opener, self.indexfile, manifest2, linkmapper
165 176 )
166 177
167 178
168 179 class unionfilelog(filelog.filelog):
169 180 def __init__(self, opener, path, opener2, linkmapper, repo):
170 181 filelog.filelog.__init__(self, opener, path)
171 182 filelog2 = filelog.filelog(opener2, path)
172 183 self._revlog = unionrevlog(
173 184 opener, self.indexfile, filelog2._revlog, linkmapper
174 185 )
175 186 self._repo = repo
176 187 self.repotiprev = self._revlog.repotiprev
177 188 self.revlog2 = self._revlog.revlog2
178 189
179 190 def iscensored(self, rev):
180 191 """Check if a revision is censored."""
181 192 if rev <= self.repotiprev:
182 193 return filelog.filelog.iscensored(self, rev)
183 194 node = self.node(rev)
184 195 return self.revlog2.iscensored(self.revlog2.rev(node))
185 196
186 197
187 198 class unionpeer(localrepo.localpeer):
188 199 def canpush(self):
189 200 return False
190 201
191 202
192 203 class unionrepository(object):
193 204 """Represents the union of data in 2 repositories.
194 205
195 206 Instances are not usable if constructed directly. Use ``instance()``
196 207 or ``makeunionrepository()`` to create a usable instance.
197 208 """
198 209
199 210 def __init__(self, repo2, url):
200 211 self.repo2 = repo2
201 212 self._url = url
202 213
203 214 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
204 215
205 216 @localrepo.unfilteredpropertycache
206 217 def changelog(self):
207 218 return unionchangelog(self.svfs, self.repo2.svfs)
208 219
209 220 @localrepo.unfilteredpropertycache
210 221 def manifestlog(self):
211 222 rootstore = unionmanifest(
212 223 self.nodeconstants,
213 224 self.svfs,
214 225 self.repo2.svfs,
215 226 self.unfiltered()._clrev,
216 227 )
217 228 return manifest.manifestlog(
218 229 self.svfs, self, rootstore, self.narrowmatch()
219 230 )
220 231
221 232 def _clrev(self, rev2):
222 233 """map from repo2 changelog rev to temporary rev in self.changelog"""
223 234 node = self.repo2.changelog.node(rev2)
224 235 return self.changelog.rev(node)
225 236
226 237 def url(self):
227 238 return self._url
228 239
229 240 def file(self, f):
230 241 return unionfilelog(
231 242 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
232 243 )
233 244
234 245 def close(self):
235 246 self.repo2.close()
236 247
237 248 def cancopy(self):
238 249 return False
239 250
240 251 def peer(self):
241 252 return unionpeer(self)
242 253
243 254 def getcwd(self):
244 255 return encoding.getcwd() # always outside the repo
245 256
246 257
247 258 def instance(ui, path, create, intents=None, createopts=None):
248 259 if create:
249 260 raise error.Abort(_(b'cannot create new union repository'))
250 261 parentpath = ui.config(b"bundle", b"mainreporoot")
251 262 if not parentpath:
252 263 # try to find the correct path to the working directory repo
253 264 parentpath = cmdutil.findrepo(encoding.getcwd())
254 265 if parentpath is None:
255 266 parentpath = b''
256 267 if parentpath:
257 268 # Try to make the full path relative so we get a nice, short URL.
258 269 # In particular, we don't want temp dir names in test outputs.
259 270 cwd = encoding.getcwd()
260 271 if parentpath == cwd:
261 272 parentpath = b''
262 273 else:
263 274 cwd = pathutil.normasprefix(cwd)
264 275 if parentpath.startswith(cwd):
265 276 parentpath = parentpath[len(cwd) :]
266 277 if path.startswith(b'union:'):
267 278 s = path.split(b":", 1)[1].split(b"+", 1)
268 279 if len(s) == 1:
269 280 repopath, repopath2 = parentpath, s[0]
270 281 else:
271 282 repopath, repopath2 = s
272 283 else:
273 284 repopath, repopath2 = parentpath, path
274 285
275 286 return makeunionrepository(ui, repopath, repopath2)
276 287
277 288
278 289 def makeunionrepository(ui, repopath1, repopath2):
279 290 """Make a union repository object from 2 local repo paths."""
280 291 repo1 = localrepo.instance(ui, repopath1, create=False)
281 292 repo2 = localrepo.instance(ui, repopath2, create=False)
282 293
283 294 url = b'union:%s+%s' % (
284 295 util.expandpath(repopath1),
285 296 util.expandpath(repopath2),
286 297 )
287 298
288 299 class derivedunionrepository(unionrepository, repo1.__class__):
289 300 pass
290 301
291 302 repo = repo1
292 303 repo.__class__ = derivedunionrepository
293 304 unionrepository.__init__(repo1, repo2, url)
294 305
295 306 return repo
@@ -1,284 +1,286 b''
1 1 """This unit test primarily tests parsers.parse_index2().
2 2
3 3 It also checks certain aspects of the parsers module as a whole.
4 4 """
5 5
6 6 from __future__ import absolute_import, print_function
7 7
8 8 import os
9 9 import struct
10 10 import subprocess
11 11 import sys
12 12 import unittest
13 13
14 14 from mercurial.node import (
15 15 bin,
16 16 hex,
17 17 nullrev,
18 18 sha1nodeconstants,
19 19 )
20 20 from mercurial import (
21 21 policy,
22 22 pycompat,
23 23 )
24 24
25 25 parsers = policy.importmod('parsers')
26 26
27 27 # original python implementation
28 28 def gettype(q):
29 29 return int(q & 0xFFFF)
30 30
31 31
32 32 def offset_type(offset, type):
33 33 return int(int(offset) << 16 | type)
34 34
35 35
36 36 indexformatng = ">Qiiiiii20s12x"
37 37
38 38
39 39 def py_parseindex(data, inline):
40 40 s = 64
41 41 cache = None
42 42 index = []
43 43 nodemap = {sha1nodeconstants.nullid: nullrev}
44 44 n = off = 0
45 45
46 46 l = len(data) - s
47 47 append = index.append
48 48 if inline:
49 49 cache = (0, data)
50 50 while off <= l:
51 51 e = struct.unpack(indexformatng, data[off : off + s])
52 e = e + (0, 0)
52 53 nodemap[e[7]] = n
53 54 append(e)
54 55 n += 1
55 56 if e[1] < 0:
56 57 break
57 58 off += e[1] + s
58 59 else:
59 60 while off <= l:
60 61 e = struct.unpack(indexformatng, data[off : off + s])
62 e = e + (0, 0)
61 63 nodemap[e[7]] = n
62 64 append(e)
63 65 n += 1
64 66 off += s
65 67
66 68 e = list(index[0])
67 69 type = gettype(e[0])
68 70 e[0] = offset_type(0, type)
69 71 index[0] = tuple(e)
70 72
71 73 return index, cache
72 74
73 75
74 76 data_inlined = (
75 77 b'\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c'
76 78 b'\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff'
77 79 b'\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b'
78 80 b'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
79 81 b'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95'
80 82 b'\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@'
81 83 b'\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)'
82 84 b'\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9'
83 85 b'\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W'
84 86 b'\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f'
85 87 b'_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06'
86 88 b'\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6'
87 89 b'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1'
88 90 b'&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa'
89 91 b'\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2'
90 92 b'\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01'
91 93 b'\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06'
92 94 b'\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3'
93 95 b'^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc'
94 96 b'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b'
95 97 b'\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4'
96 98 b'\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9'
97 99 b'\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
98 100 )
99 101
100 102 data_non_inlined = (
101 103 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
102 104 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
103 105 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
104 106 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
105 107 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
106 108 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
107 109 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
108 110 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
109 111 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
110 112 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
111 113 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
112 114 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
113 115 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
114 116 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
115 117 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
116 118 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
117 119 )
118 120
119 121
120 122 def parse_index2(data, inline, revlogv2=False):
121 123 index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
122 124 return list(index), chunkcache
123 125
124 126
125 127 def importparsers(hexversion):
126 128 """Import mercurial.parsers with the given sys.hexversion."""
127 129 # The file parsers.c inspects sys.hexversion to determine the version
128 130 # of the currently-running Python interpreter, so we monkey-patch
129 131 # sys.hexversion to simulate using different versions.
130 132 code = (
131 133 "import sys; sys.hexversion=%s; "
132 134 "import mercurial.cext.parsers" % hexversion
133 135 )
134 136 cmd = "\"%s\" -c \"%s\"" % (os.environ['PYTHON'], code)
135 137 # We need to do these tests inside a subprocess because parser.c's
136 138 # version-checking code happens inside the module init function, and
137 139 # when using reload() to reimport an extension module, "The init function
138 140 # of extension modules is not called a second time"
139 141 # (from http://docs.python.org/2/library/functions.html?#reload).
140 142 p = subprocess.Popen(
141 143 cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
142 144 )
143 145 return p.communicate() # returns stdout, stderr
144 146
145 147
146 148 def hexfailmsg(testnumber, hexversion, stdout, expected):
147 149 try:
148 150 hexstring = hex(hexversion)
149 151 except TypeError:
150 152 hexstring = None
151 153 return (
152 154 "FAILED: version test #%s with Python %s and patched "
153 155 "sys.hexversion %r (%r):\n Expected %s but got:\n-->'%s'\n"
154 156 % (
155 157 testnumber,
156 158 sys.version_info,
157 159 hexversion,
158 160 hexstring,
159 161 expected,
160 162 stdout,
161 163 )
162 164 )
163 165
164 166
165 167 def makehex(major, minor, micro):
166 168 return int("%x%02x%02x00" % (major, minor, micro), 16)
167 169
168 170
169 171 class parseindex2tests(unittest.TestCase):
170 172 def assertversionokay(self, testnumber, hexversion):
171 173 stdout, stderr = importparsers(hexversion)
172 174 self.assertFalse(
173 175 stdout, hexfailmsg(testnumber, hexversion, stdout, 'no stdout')
174 176 )
175 177
176 178 def assertversionfail(self, testnumber, hexversion):
177 179 stdout, stderr = importparsers(hexversion)
178 180 # We include versionerrortext to distinguish from other ImportErrors.
179 181 errtext = b"ImportError: %s" % pycompat.sysbytes(
180 182 parsers.versionerrortext
181 183 )
182 184 self.assertIn(
183 185 errtext,
184 186 stdout,
185 187 hexfailmsg(
186 188 testnumber,
187 189 hexversion,
188 190 stdout,
189 191 expected="stdout to contain %r" % errtext,
190 192 ),
191 193 )
192 194
193 195 def testversiondetection(self):
194 196 """Check the version-detection logic when importing parsers."""
195 197 # Only test the version-detection logic if it is present.
196 198 try:
197 199 parsers.versionerrortext
198 200 except AttributeError:
199 201 return
200 202 info = sys.version_info
201 203 major, minor, micro = info[0], info[1], info[2]
202 204 # Test same major-minor versions.
203 205 self.assertversionokay(1, makehex(major, minor, micro))
204 206 self.assertversionokay(2, makehex(major, minor, micro + 1))
205 207 # Test different major-minor versions.
206 208 self.assertversionfail(3, makehex(major + 1, minor, micro))
207 209 self.assertversionfail(4, makehex(major, minor + 1, micro))
208 210 self.assertversionfail(5, "'foo'")
209 211
210 212 def testbadargs(self):
211 213 # Check that parse_index2() raises TypeError on bad arguments.
212 214 with self.assertRaises(TypeError):
213 215 parse_index2(0, True)
214 216
215 217 def testparseindexfile(self):
216 218 # Check parsers.parse_index2() on an index file against the
217 219 # original Python implementation of parseindex, both with and
218 220 # without inlined data.
219 221
220 222 want = py_parseindex(data_inlined, True)
221 223 got = parse_index2(data_inlined, True)
222 224 self.assertEqual(want, got) # inline data
223 225
224 226 want = py_parseindex(data_non_inlined, False)
225 227 got = parse_index2(data_non_inlined, False)
226 228 self.assertEqual(want, got) # no inline data
227 229
228 230 ix = parsers.parse_index2(data_inlined, True)[0]
229 231 for i, r in enumerate(ix):
230 232 if r[7] == sha1nodeconstants.nullid:
231 233 i = -1
232 234 try:
233 235 self.assertEqual(
234 236 ix[r[7]],
235 237 i,
236 238 'Reverse lookup inconsistent for %r' % hex(r[7]),
237 239 )
238 240 except TypeError:
239 241 # pure version doesn't support this
240 242 break
241 243
242 244 def testminusone(self):
243 want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
245 want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
244 246 index, junk = parsers.parse_index2(data_inlined, True)
245 247 got = index[-1]
246 248 self.assertEqual(want, got) # inline data
247 249
248 250 index, junk = parsers.parse_index2(data_non_inlined, False)
249 251 got = index[-1]
250 252 self.assertEqual(want, got) # no inline data
251 253
252 254 def testdelitemwithoutnodetree(self):
253 255 index, _junk = parsers.parse_index2(data_non_inlined, False)
254 256
255 257 def hexrev(rev):
256 258 if rev == nullrev:
257 259 return b'\xff\xff\xff\xff'
258 260 else:
259 261 return bin('%08x' % rev)
260 262
261 263 def appendrev(p1, p2=nullrev):
262 264 # node won't matter for this test, let's just make sure
263 265 # they don't collide. Other data don't matter either.
264 266 node = hexrev(p1) + hexrev(p2) + b'.' * 12
265 267 index.append((0, 0, 12, 1, 34, p1, p2, node))
266 268
267 269 appendrev(4)
268 270 appendrev(5)
269 271 appendrev(6)
270 272 self.assertEqual(len(index), 7)
271 273
272 274 del index[1:-1]
273 275
274 276 # assertions that failed before correction
275 277 self.assertEqual(len(index), 1) # was 4
276 278 headrevs = getattr(index, 'headrevs', None)
277 279 if headrevs is not None: # not implemented in pure
278 280 self.assertEqual(index.headrevs(), [0]) # gave ValueError
279 281
280 282
281 283 if __name__ == '__main__':
282 284 import silenttestrunner
283 285
284 286 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now