##// END OF EJS Templates
rank: add a "rank" value to the revlog-entry tuple...
marmoute -
r49330:52034c42 default
parent child Browse files
Show More
@@ -1,3062 +1,3066 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 104 long rust_ext_compat; /* compatibility with being used in rust
105 105 extensions */
106 106 char format_version; /* size of index headers. Differs in v1 v.s. v2
107 107 format */
108 108 };
109 109
110 110 static Py_ssize_t index_length(const indexObject *self)
111 111 {
112 112 return self->length + self->new_length;
113 113 }
114 114
115 115 static const char nullid[32] = {0};
116 116 static const Py_ssize_t nullrev = -1;
117 117
118 118 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
119 119
120 120 static int index_find_node(indexObject *self, const char *node);
121 121
122 122 #if LONG_MAX == 0x7fffffffL
123 static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
123 static const char *const tuple_format =
124 PY23("Kiiiiiis#KiBBi", "Kiiiiiiy#KiBBi");
124 125 #else
125 static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
126 static const char *const tuple_format =
127 PY23("kiiiiiis#kiBBi", "kiiiiiiy#kiBBi");
126 128 #endif
127 129
128 130 /* A RevlogNG v1 index entry is 64 bytes long. */
129 131 static const long v1_entry_size = 64;
130 132
131 133 /* A Revlogv2 index entry is 96 bytes long. */
132 134 static const long v2_entry_size = 96;
133 135
134 136 static const long format_v1 = 1; /* Internal only, could be any number */
135 137 static const long format_v2 = 2; /* Internal only, could be any number */
136 138
137 139 static const char comp_mode_inline = 2;
140 static const char rank_unknown = -1;
138 141
139 142 static void raise_revlog_error(void)
140 143 {
141 144 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
142 145
143 146 mod = PyImport_ImportModule("mercurial.error");
144 147 if (mod == NULL) {
145 148 goto cleanup;
146 149 }
147 150
148 151 dict = PyModule_GetDict(mod);
149 152 if (dict == NULL) {
150 153 goto cleanup;
151 154 }
152 155 Py_INCREF(dict);
153 156
154 157 errclass = PyDict_GetItemString(dict, "RevlogError");
155 158 if (errclass == NULL) {
156 159 PyErr_SetString(PyExc_SystemError,
157 160 "could not find RevlogError");
158 161 goto cleanup;
159 162 }
160 163
161 164 /* value of exception is ignored by callers */
162 165 PyErr_SetString(errclass, "RevlogError");
163 166
164 167 cleanup:
165 168 Py_XDECREF(dict);
166 169 Py_XDECREF(mod);
167 170 }
168 171
169 172 /*
170 173 * Return a pointer to the beginning of a RevlogNG record.
171 174 */
172 175 static const char *index_deref(indexObject *self, Py_ssize_t pos)
173 176 {
174 177 if (pos >= self->length)
175 178 return self->added + (pos - self->length) * self->entry_size;
176 179
177 180 if (self->inlined && pos > 0) {
178 181 if (self->offsets == NULL) {
179 182 Py_ssize_t ret;
180 183 self->offsets =
181 184 PyMem_Malloc(self->length * sizeof(*self->offsets));
182 185 if (self->offsets == NULL)
183 186 return (const char *)PyErr_NoMemory();
184 187 ret = inline_scan(self, self->offsets);
185 188 if (ret == -1) {
186 189 return NULL;
187 190 };
188 191 }
189 192 return self->offsets[pos];
190 193 }
191 194
192 195 return (const char *)(self->buf.buf) + pos * self->entry_size;
193 196 }
194 197
195 198 /*
196 199 * Get parents of the given rev.
197 200 *
198 201 * The specified rev must be valid and must not be nullrev. A returned
199 202 * parent revision may be nullrev, but is guaranteed to be in valid range.
200 203 */
201 204 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
202 205 int maxrev)
203 206 {
204 207 const char *data = index_deref(self, rev);
205 208
206 209 ps[0] = getbe32(data + 24);
207 210 ps[1] = getbe32(data + 28);
208 211
209 212 /* If index file is corrupted, ps[] may point to invalid revisions. So
210 213 * there is a risk of buffer overflow to trust them unconditionally. */
211 214 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
212 215 PyErr_SetString(PyExc_ValueError, "parent out of range");
213 216 return -1;
214 217 }
215 218 return 0;
216 219 }
217 220
218 221 /*
219 222 * Get parents of the given rev.
220 223 *
221 224 * If the specified rev is out of range, IndexError will be raised. If the
222 225 * revlog entry is corrupted, ValueError may be raised.
223 226 *
224 227 * Returns 0 on success or -1 on failure.
225 228 */
226 229 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
227 230 {
228 231 int tiprev;
229 232 if (!op || !HgRevlogIndex_Check(op) || !ps) {
230 233 PyErr_BadInternalCall();
231 234 return -1;
232 235 }
233 236 tiprev = (int)index_length((indexObject *)op) - 1;
234 237 if (rev < -1 || rev > tiprev) {
235 238 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
236 239 return -1;
237 240 } else if (rev == -1) {
238 241 ps[0] = ps[1] = -1;
239 242 return 0;
240 243 } else {
241 244 return index_get_parents((indexObject *)op, rev, ps, tiprev);
242 245 }
243 246 }
244 247
245 248 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
246 249 {
247 250 const char *data;
248 251 uint64_t offset;
249 252
250 253 if (rev == nullrev)
251 254 return 0;
252 255
253 256 data = index_deref(self, rev);
254 257 offset = getbe32(data + 4);
255 258 if (rev == 0) {
256 259 /* mask out version number for the first entry */
257 260 offset &= 0xFFFF;
258 261 } else {
259 262 uint32_t offset_high = getbe32(data);
260 263 offset |= ((uint64_t)offset_high) << 32;
261 264 }
262 265 return (int64_t)(offset >> 16);
263 266 }
264 267
265 268 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
266 269 {
267 270 const char *data;
268 271 int tmp;
269 272
270 273 if (rev == nullrev)
271 274 return 0;
272 275
273 276 data = index_deref(self, rev);
274 277
275 278 tmp = (int)getbe32(data + 8);
276 279 if (tmp < 0) {
277 280 PyErr_Format(PyExc_OverflowError,
278 281 "revlog entry size out of bound (%d)", tmp);
279 282 return -1;
280 283 }
281 284 return tmp;
282 285 }
283 286
284 287 /*
285 288 * RevlogNG format (all in big endian, data may be inlined):
286 289 * 6 bytes: offset
287 290 * 2 bytes: flags
288 291 * 4 bytes: compressed length
289 292 * 4 bytes: uncompressed length
290 293 * 4 bytes: base revision
291 294 * 4 bytes: link revision
292 295 * 4 bytes: parent 1 revision
293 296 * 4 bytes: parent 2 revision
294 297 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
295 298 */
296 299 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
297 300 {
298 301 uint64_t offset_flags, sidedata_offset;
299 302 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
300 303 sidedata_comp_len;
301 304 char data_comp_mode, sidedata_comp_mode;
302 305 const char *c_node_id;
303 306 const char *data;
304 307 Py_ssize_t length = index_length(self);
305 308
306 309 if (pos == nullrev) {
307 310 Py_INCREF(self->nullentry);
308 311 return self->nullentry;
309 312 }
310 313
311 314 if (pos < 0 || pos >= length) {
312 315 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
313 316 return NULL;
314 317 }
315 318
316 319 data = index_deref(self, pos);
317 320 if (data == NULL)
318 321 return NULL;
319 322
320 323 offset_flags = getbe32(data + 4);
321 324 /*
322 325 * The first entry on-disk needs the version number masked out,
323 326 * but this doesn't apply if entries are added to an empty index.
324 327 */
325 328 if (self->length && pos == 0)
326 329 offset_flags &= 0xFFFF;
327 330 else {
328 331 uint32_t offset_high = getbe32(data);
329 332 offset_flags |= ((uint64_t)offset_high) << 32;
330 333 }
331 334
332 335 comp_len = getbe32(data + 8);
333 336 uncomp_len = getbe32(data + 12);
334 337 base_rev = getbe32(data + 16);
335 338 link_rev = getbe32(data + 20);
336 339 parent_1 = getbe32(data + 24);
337 340 parent_2 = getbe32(data + 28);
338 341 c_node_id = data + 32;
339 342
340 343 if (self->format_version == format_v1) {
341 344 sidedata_offset = 0;
342 345 sidedata_comp_len = 0;
343 346 data_comp_mode = comp_mode_inline;
344 347 sidedata_comp_mode = comp_mode_inline;
345 348 } else {
346 349 sidedata_offset = getbe64(data + 64);
347 350 sidedata_comp_len = getbe32(data + 72);
348 351 data_comp_mode = data[76] & 3;
349 352 sidedata_comp_mode = ((data[76] >> 2) & 3);
350 353 }
351 354
352 355 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
353 356 base_rev, link_rev, parent_1, parent_2, c_node_id,
354 357 self->nodelen, sidedata_offset, sidedata_comp_len,
355 data_comp_mode, sidedata_comp_mode);
358 data_comp_mode, sidedata_comp_mode, rank_unknown);
356 359 }
357 360 /*
358 361 * Pack header information in binary
359 362 */
360 363 static PyObject *index_pack_header(indexObject *self, PyObject *args)
361 364 {
362 365 int header;
363 366 char out[4];
364 367 if (!PyArg_ParseTuple(args, "I", &header)) {
365 368 return NULL;
366 369 }
367 370 if (self->format_version != format_v1) {
368 371 PyErr_Format(PyExc_RuntimeError,
369 372 "version header should go in the docket, not the "
370 373 "index: %lu",
371 374 header);
372 375 return NULL;
373 376 }
374 377 putbe32(header, out);
375 378 return PyBytes_FromStringAndSize(out, 4);
376 379 }
377 380 /*
378 381 * Return the raw binary string representing a revision
379 382 */
380 383 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
381 384 {
382 385 long rev;
383 386 const char *data;
384 387 Py_ssize_t length = index_length(self);
385 388
386 389 if (!pylong_to_long(value, &rev)) {
387 390 return NULL;
388 391 }
389 392 if (rev < 0 || rev >= length) {
390 393 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
391 394 rev);
392 395 return NULL;
393 396 };
394 397
395 398 data = index_deref(self, rev);
396 399 if (data == NULL)
397 400 return NULL;
398 401 if (rev == 0 && self->format_version == format_v1) {
399 402 /* the header is eating the start of the first entry */
400 403 return PyBytes_FromStringAndSize(data + 4,
401 404 self->entry_size - 4);
402 405 }
403 406 return PyBytes_FromStringAndSize(data, self->entry_size);
404 407 }
405 408
406 409 /*
407 410 * Return the hash of node corresponding to the given rev.
408 411 */
409 412 static const char *index_node(indexObject *self, Py_ssize_t pos)
410 413 {
411 414 Py_ssize_t length = index_length(self);
412 415 const char *data;
413 416
414 417 if (pos == nullrev)
415 418 return nullid;
416 419
417 420 if (pos >= length)
418 421 return NULL;
419 422
420 423 data = index_deref(self, pos);
421 424 return data ? data + 32 : NULL;
422 425 }
423 426
424 427 /*
425 428 * Return the hash of the node corresponding to the given rev. The
426 429 * rev is assumed to be existing. If not, an exception is set.
427 430 */
428 431 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
429 432 {
430 433 const char *node = index_node(self, pos);
431 434 if (node == NULL) {
432 435 PyErr_Format(PyExc_IndexError, "could not access rev %d",
433 436 (int)pos);
434 437 }
435 438 return node;
436 439 }
437 440
438 441 static int nt_insert(nodetree *self, const char *node, int rev);
439 442
440 443 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
441 444 {
442 445 Py_ssize_t thisnodelen;
443 446 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
444 447 return -1;
445 448 if (nodelen == thisnodelen)
446 449 return 0;
447 450 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
448 451 thisnodelen, nodelen);
449 452 return -1;
450 453 }
451 454
452 455 static PyObject *index_append(indexObject *self, PyObject *obj)
453 456 {
454 457 uint64_t offset_flags, sidedata_offset;
455 458 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
456 sidedata_comp_len;
459 sidedata_comp_len, rank;
457 460 char data_comp_mode, sidedata_comp_mode;
458 461 Py_ssize_t c_node_id_len;
459 462 const char *c_node_id;
460 463 char comp_field;
461 464 char *data;
462 465
463 466 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
464 467 &uncomp_len, &base_rev, &link_rev, &parent_1,
465 468 &parent_2, &c_node_id, &c_node_id_len,
466 469 &sidedata_offset, &sidedata_comp_len,
467 &data_comp_mode, &sidedata_comp_mode)) {
468 PyErr_SetString(PyExc_TypeError, "11-tuple required");
470 &data_comp_mode, &sidedata_comp_mode, &rank)) {
471 PyErr_SetString(PyExc_TypeError, "12-tuple required");
469 472 return NULL;
470 473 }
471 474
472 475 if (c_node_id_len != self->nodelen) {
473 476 PyErr_SetString(PyExc_TypeError, "invalid node");
474 477 return NULL;
475 478 }
476 479 if (self->format_version == format_v1) {
477 480
478 481 if (data_comp_mode != comp_mode_inline) {
479 482 PyErr_Format(PyExc_ValueError,
480 483 "invalid data compression mode: %i",
481 484 data_comp_mode);
482 485 return NULL;
483 486 }
484 487 if (sidedata_comp_mode != comp_mode_inline) {
485 488 PyErr_Format(PyExc_ValueError,
486 489 "invalid sidedata compression mode: %i",
487 490 sidedata_comp_mode);
488 491 return NULL;
489 492 }
490 493 }
491 494
492 495 if (self->new_length == self->added_length) {
493 496 size_t new_added_length =
494 497 self->added_length ? self->added_length * 2 : 4096;
495 498 void *new_added = PyMem_Realloc(
496 499 self->added, new_added_length * self->entry_size);
497 500 if (!new_added)
498 501 return PyErr_NoMemory();
499 502 self->added = new_added;
500 503 self->added_length = new_added_length;
501 504 }
502 505 rev = self->length + self->new_length;
503 506 data = self->added + self->entry_size * self->new_length++;
504 507 putbe32(offset_flags >> 32, data);
505 508 putbe32(offset_flags & 0xffffffffU, data + 4);
506 509 putbe32(comp_len, data + 8);
507 510 putbe32(uncomp_len, data + 12);
508 511 putbe32(base_rev, data + 16);
509 512 putbe32(link_rev, data + 20);
510 513 putbe32(parent_1, data + 24);
511 514 putbe32(parent_2, data + 28);
512 515 memcpy(data + 32, c_node_id, c_node_id_len);
513 516 /* Padding since SHA-1 is only 20 bytes for now */
514 517 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
515 518 if (self->format_version == format_v2) {
516 519 putbe64(sidedata_offset, data + 64);
517 520 putbe32(sidedata_comp_len, data + 72);
518 521 comp_field = data_comp_mode & 3;
519 522 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
520 523 data[76] = comp_field;
521 524 /* Padding for 96 bytes alignment */
522 525 memset(data + 77, 0, self->entry_size - 77);
523 526 }
524 527
525 528 if (self->ntinitialized)
526 529 nt_insert(&self->nt, c_node_id, rev);
527 530
528 531 Py_CLEAR(self->headrevs);
529 532 Py_RETURN_NONE;
530 533 }
531 534
532 535 /* Replace an existing index entry's sidedata offset and length with new ones.
533 536 This cannot be used outside of the context of sidedata rewriting,
534 537 inside the transaction that creates the given revision. */
535 538 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
536 539 {
537 540 uint64_t offset_flags, sidedata_offset;
538 541 Py_ssize_t rev;
539 542 int sidedata_comp_len;
540 543 char comp_mode;
541 544 char *data;
542 545 #if LONG_MAX == 0x7fffffffL
543 546 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
544 547 #else
545 548 const char *const sidedata_format = PY23("nkikB", "nkikB");
546 549 #endif
547 550
548 551 if (self->entry_size == v1_entry_size || self->inlined) {
549 552 /*
550 553 There is a bug in the transaction handling when going from an
551 554 inline revlog to a separate index and data file. Turn it off until
552 555 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
553 556 See issue6485.
554 557 */
555 558 raise_revlog_error();
556 559 return NULL;
557 560 }
558 561
559 562 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
560 563 &sidedata_comp_len, &offset_flags, &comp_mode))
561 564 return NULL;
562 565
563 566 if (rev < 0 || rev >= index_length(self)) {
564 567 PyErr_SetString(PyExc_IndexError, "revision outside index");
565 568 return NULL;
566 569 }
567 570 if (rev < self->length) {
568 571 PyErr_SetString(
569 572 PyExc_IndexError,
570 573 "cannot rewrite entries outside of this transaction");
571 574 return NULL;
572 575 }
573 576
574 577 /* Find the newly added node, offset from the "already on-disk" length
575 578 */
576 579 data = self->added + self->entry_size * (rev - self->length);
577 580 putbe64(offset_flags, data);
578 581 putbe64(sidedata_offset, data + 64);
579 582 putbe32(sidedata_comp_len, data + 72);
580 583 data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
581 584
582 585 Py_RETURN_NONE;
583 586 }
584 587
585 588 static PyObject *index_stats(indexObject *self)
586 589 {
587 590 PyObject *obj = PyDict_New();
588 591 PyObject *s = NULL;
589 592 PyObject *t = NULL;
590 593
591 594 if (obj == NULL)
592 595 return NULL;
593 596
594 597 #define istat(__n, __d) \
595 598 do { \
596 599 s = PyBytes_FromString(__d); \
597 600 t = PyInt_FromSsize_t(self->__n); \
598 601 if (!s || !t) \
599 602 goto bail; \
600 603 if (PyDict_SetItem(obj, s, t) == -1) \
601 604 goto bail; \
602 605 Py_CLEAR(s); \
603 606 Py_CLEAR(t); \
604 607 } while (0)
605 608
606 609 if (self->added_length)
607 610 istat(new_length, "index entries added");
608 611 istat(length, "revs in memory");
609 612 istat(ntlookups, "node trie lookups");
610 613 istat(ntmisses, "node trie misses");
611 614 istat(ntrev, "node trie last rev scanned");
612 615 if (self->ntinitialized) {
613 616 istat(nt.capacity, "node trie capacity");
614 617 istat(nt.depth, "node trie depth");
615 618 istat(nt.length, "node trie count");
616 619 istat(nt.splits, "node trie splits");
617 620 }
618 621
619 622 #undef istat
620 623
621 624 return obj;
622 625
623 626 bail:
624 627 Py_XDECREF(obj);
625 628 Py_XDECREF(s);
626 629 Py_XDECREF(t);
627 630 return NULL;
628 631 }
629 632
630 633 /*
631 634 * When we cache a list, we want to be sure the caller can't mutate
632 635 * the cached copy.
633 636 */
634 637 static PyObject *list_copy(PyObject *list)
635 638 {
636 639 Py_ssize_t len = PyList_GET_SIZE(list);
637 640 PyObject *newlist = PyList_New(len);
638 641 Py_ssize_t i;
639 642
640 643 if (newlist == NULL)
641 644 return NULL;
642 645
643 646 for (i = 0; i < len; i++) {
644 647 PyObject *obj = PyList_GET_ITEM(list, i);
645 648 Py_INCREF(obj);
646 649 PyList_SET_ITEM(newlist, i, obj);
647 650 }
648 651
649 652 return newlist;
650 653 }
651 654
652 655 static int check_filter(PyObject *filter, Py_ssize_t arg)
653 656 {
654 657 if (filter) {
655 658 PyObject *arglist, *result;
656 659 int isfiltered;
657 660
658 661 arglist = Py_BuildValue("(n)", arg);
659 662 if (!arglist) {
660 663 return -1;
661 664 }
662 665
663 666 result = PyObject_Call(filter, arglist, NULL);
664 667 Py_DECREF(arglist);
665 668 if (!result) {
666 669 return -1;
667 670 }
668 671
669 672 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
670 673 * same as this function, so we can just return it directly.*/
671 674 isfiltered = PyObject_IsTrue(result);
672 675 Py_DECREF(result);
673 676 return isfiltered;
674 677 } else {
675 678 return 0;
676 679 }
677 680 }
678 681
679 682 static inline void set_phase_from_parents(char *phases, int parent_1,
680 683 int parent_2, Py_ssize_t i)
681 684 {
682 685 if (parent_1 >= 0 && phases[parent_1] > phases[i])
683 686 phases[i] = phases[parent_1];
684 687 if (parent_2 >= 0 && phases[parent_2] > phases[i])
685 688 phases[i] = phases[parent_2];
686 689 }
687 690
688 691 static PyObject *reachableroots2(indexObject *self, PyObject *args)
689 692 {
690 693
691 694 /* Input */
692 695 long minroot;
693 696 PyObject *includepatharg = NULL;
694 697 int includepath = 0;
695 698 /* heads and roots are lists */
696 699 PyObject *heads = NULL;
697 700 PyObject *roots = NULL;
698 701 PyObject *reachable = NULL;
699 702
700 703 PyObject *val;
701 704 Py_ssize_t len = index_length(self);
702 705 long revnum;
703 706 Py_ssize_t k;
704 707 Py_ssize_t i;
705 708 Py_ssize_t l;
706 709 int r;
707 710 int parents[2];
708 711
709 712 /* Internal data structure:
710 713 * tovisit: array of length len+1 (all revs + nullrev), filled upto
711 714 * lentovisit
712 715 *
713 716 * revstates: array of length len+1 (all revs + nullrev) */
714 717 int *tovisit = NULL;
715 718 long lentovisit = 0;
716 719 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
717 720 char *revstates = NULL;
718 721
719 722 /* Get arguments */
720 723 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
721 724 &PyList_Type, &roots, &PyBool_Type,
722 725 &includepatharg))
723 726 goto bail;
724 727
725 728 if (includepatharg == Py_True)
726 729 includepath = 1;
727 730
728 731 /* Initialize return set */
729 732 reachable = PyList_New(0);
730 733 if (reachable == NULL)
731 734 goto bail;
732 735
733 736 /* Initialize internal datastructures */
734 737 tovisit = (int *)malloc((len + 1) * sizeof(int));
735 738 if (tovisit == NULL) {
736 739 PyErr_NoMemory();
737 740 goto bail;
738 741 }
739 742
740 743 revstates = (char *)calloc(len + 1, 1);
741 744 if (revstates == NULL) {
742 745 PyErr_NoMemory();
743 746 goto bail;
744 747 }
745 748
746 749 l = PyList_GET_SIZE(roots);
747 750 for (i = 0; i < l; i++) {
748 751 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
749 752 if (revnum == -1 && PyErr_Occurred())
750 753 goto bail;
751 754 /* If root is out of range, e.g. wdir(), it must be unreachable
752 755 * from heads. So we can just ignore it. */
753 756 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
754 757 continue;
755 758 revstates[revnum + 1] |= RS_ROOT;
756 759 }
757 760
758 761 /* Populate tovisit with all the heads */
759 762 l = PyList_GET_SIZE(heads);
760 763 for (i = 0; i < l; i++) {
761 764 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
762 765 if (revnum == -1 && PyErr_Occurred())
763 766 goto bail;
764 767 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
765 768 PyErr_SetString(PyExc_IndexError, "head out of range");
766 769 goto bail;
767 770 }
768 771 if (!(revstates[revnum + 1] & RS_SEEN)) {
769 772 tovisit[lentovisit++] = (int)revnum;
770 773 revstates[revnum + 1] |= RS_SEEN;
771 774 }
772 775 }
773 776
774 777 /* Visit the tovisit list and find the reachable roots */
775 778 k = 0;
776 779 while (k < lentovisit) {
777 780 /* Add the node to reachable if it is a root*/
778 781 revnum = tovisit[k++];
779 782 if (revstates[revnum + 1] & RS_ROOT) {
780 783 revstates[revnum + 1] |= RS_REACHABLE;
781 784 val = PyInt_FromLong(revnum);
782 785 if (val == NULL)
783 786 goto bail;
784 787 r = PyList_Append(reachable, val);
785 788 Py_DECREF(val);
786 789 if (r < 0)
787 790 goto bail;
788 791 if (includepath == 0)
789 792 continue;
790 793 }
791 794
792 795 /* Add its parents to the list of nodes to visit */
793 796 if (revnum == nullrev)
794 797 continue;
795 798 r = index_get_parents(self, revnum, parents, (int)len - 1);
796 799 if (r < 0)
797 800 goto bail;
798 801 for (i = 0; i < 2; i++) {
799 802 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
800 803 parents[i] >= minroot) {
801 804 tovisit[lentovisit++] = parents[i];
802 805 revstates[parents[i] + 1] |= RS_SEEN;
803 806 }
804 807 }
805 808 }
806 809
807 810 /* Find all the nodes in between the roots we found and the heads
808 811 * and add them to the reachable set */
809 812 if (includepath == 1) {
810 813 long minidx = minroot;
811 814 if (minidx < 0)
812 815 minidx = 0;
813 816 for (i = minidx; i < len; i++) {
814 817 if (!(revstates[i + 1] & RS_SEEN))
815 818 continue;
816 819 r = index_get_parents(self, i, parents, (int)len - 1);
817 820 /* Corrupted index file, error is set from
818 821 * index_get_parents */
819 822 if (r < 0)
820 823 goto bail;
821 824 if (((revstates[parents[0] + 1] |
822 825 revstates[parents[1] + 1]) &
823 826 RS_REACHABLE) &&
824 827 !(revstates[i + 1] & RS_REACHABLE)) {
825 828 revstates[i + 1] |= RS_REACHABLE;
826 829 val = PyInt_FromSsize_t(i);
827 830 if (val == NULL)
828 831 goto bail;
829 832 r = PyList_Append(reachable, val);
830 833 Py_DECREF(val);
831 834 if (r < 0)
832 835 goto bail;
833 836 }
834 837 }
835 838 }
836 839
837 840 free(revstates);
838 841 free(tovisit);
839 842 return reachable;
840 843 bail:
841 844 Py_XDECREF(reachable);
842 845 free(revstates);
843 846 free(tovisit);
844 847 return NULL;
845 848 }
846 849
847 850 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
848 851 char phase)
849 852 {
850 853 Py_ssize_t len = index_length(self);
851 854 PyObject *item;
852 855 PyObject *iterator;
853 856 int rev, minrev = -1;
854 857 char *node;
855 858
856 859 if (!PySet_Check(roots)) {
857 860 PyErr_SetString(PyExc_TypeError,
858 861 "roots must be a set of nodes");
859 862 return -2;
860 863 }
861 864 iterator = PyObject_GetIter(roots);
862 865 if (iterator == NULL)
863 866 return -2;
864 867 while ((item = PyIter_Next(iterator))) {
865 868 if (node_check(self->nodelen, item, &node) == -1)
866 869 goto failed;
867 870 rev = index_find_node(self, node);
868 871 /* null is implicitly public, so negative is invalid */
869 872 if (rev < 0 || rev >= len)
870 873 goto failed;
871 874 phases[rev] = phase;
872 875 if (minrev == -1 || minrev > rev)
873 876 minrev = rev;
874 877 Py_DECREF(item);
875 878 }
876 879 Py_DECREF(iterator);
877 880 return minrev;
878 881 failed:
879 882 Py_DECREF(iterator);
880 883 Py_DECREF(item);
881 884 return -2;
882 885 }
883 886
884 887 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
885 888 {
886 889 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
887 890 96: internal */
888 891 static const char trackedphases[] = {1, 2, 32, 96};
889 892 PyObject *roots = Py_None;
890 893 PyObject *phasesetsdict = NULL;
891 894 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
892 895 Py_ssize_t len = index_length(self);
893 896 char *phases = NULL;
894 897 int minphaserev = -1, rev, i;
895 898 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
896 899
897 900 if (!PyArg_ParseTuple(args, "O", &roots))
898 901 return NULL;
899 902 if (roots == NULL || !PyDict_Check(roots)) {
900 903 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
901 904 return NULL;
902 905 }
903 906
904 907 phases = calloc(len, 1);
905 908 if (phases == NULL) {
906 909 PyErr_NoMemory();
907 910 return NULL;
908 911 }
909 912
910 913 for (i = 0; i < numphases; ++i) {
911 914 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
912 915 PyObject *phaseroots = NULL;
913 916 if (pyphase == NULL)
914 917 goto release;
915 918 phaseroots = PyDict_GetItem(roots, pyphase);
916 919 Py_DECREF(pyphase);
917 920 if (phaseroots == NULL)
918 921 continue;
919 922 rev = add_roots_get_min(self, phaseroots, phases,
920 923 trackedphases[i]);
921 924 if (rev == -2)
922 925 goto release;
923 926 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
924 927 minphaserev = rev;
925 928 }
926 929
927 930 for (i = 0; i < numphases; ++i) {
928 931 phasesets[i] = PySet_New(NULL);
929 932 if (phasesets[i] == NULL)
930 933 goto release;
931 934 }
932 935
933 936 if (minphaserev == -1)
934 937 minphaserev = len;
935 938 for (rev = minphaserev; rev < len; ++rev) {
936 939 PyObject *pyphase = NULL;
937 940 PyObject *pyrev = NULL;
938 941 int parents[2];
939 942 /*
940 943 * The parent lookup could be skipped for phaseroots, but
941 944 * phase --force would historically not recompute them
942 945 * correctly, leaving descendents with a lower phase around.
943 946 * As such, unconditionally recompute the phase.
944 947 */
945 948 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
946 949 goto release;
947 950 set_phase_from_parents(phases, parents[0], parents[1], rev);
948 951 switch (phases[rev]) {
949 952 case 0:
950 953 continue;
951 954 case 1:
952 955 pyphase = phasesets[0];
953 956 break;
954 957 case 2:
955 958 pyphase = phasesets[1];
956 959 break;
957 960 case 32:
958 961 pyphase = phasesets[2];
959 962 break;
960 963 case 96:
961 964 pyphase = phasesets[3];
962 965 break;
963 966 default:
964 967 /* this should never happen since the phase number is
965 968 * specified by this function. */
966 969 PyErr_SetString(PyExc_SystemError,
967 970 "bad phase number in internal list");
968 971 goto release;
969 972 }
970 973 pyrev = PyInt_FromLong(rev);
971 974 if (pyrev == NULL)
972 975 goto release;
973 976 if (PySet_Add(pyphase, pyrev) == -1) {
974 977 Py_DECREF(pyrev);
975 978 goto release;
976 979 }
977 980 Py_DECREF(pyrev);
978 981 }
979 982
980 983 phasesetsdict = _dict_new_presized(numphases);
981 984 if (phasesetsdict == NULL)
982 985 goto release;
983 986 for (i = 0; i < numphases; ++i) {
984 987 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
985 988 if (pyphase == NULL)
986 989 goto release;
987 990 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
988 991 -1) {
989 992 Py_DECREF(pyphase);
990 993 goto release;
991 994 }
992 995 Py_DECREF(phasesets[i]);
993 996 phasesets[i] = NULL;
994 997 }
995 998
996 999 free(phases);
997 1000 return Py_BuildValue("nN", len, phasesetsdict);
998 1001
999 1002 release:
1000 1003 for (i = 0; i < numphases; ++i)
1001 1004 Py_XDECREF(phasesets[i]);
1002 1005 Py_XDECREF(phasesetsdict);
1003 1006
1004 1007 free(phases);
1005 1008 return NULL;
1006 1009 }
1007 1010
1008 1011 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1009 1012 {
1010 1013 Py_ssize_t i, j, len;
1011 1014 char *nothead = NULL;
1012 1015 PyObject *heads = NULL;
1013 1016 PyObject *filter = NULL;
1014 1017 PyObject *filteredrevs = Py_None;
1015 1018
1016 1019 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1017 1020 return NULL;
1018 1021 }
1019 1022
1020 1023 if (self->headrevs && filteredrevs == self->filteredrevs)
1021 1024 return list_copy(self->headrevs);
1022 1025
1023 1026 Py_DECREF(self->filteredrevs);
1024 1027 self->filteredrevs = filteredrevs;
1025 1028 Py_INCREF(filteredrevs);
1026 1029
1027 1030 if (filteredrevs != Py_None) {
1028 1031 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1029 1032 if (!filter) {
1030 1033 PyErr_SetString(
1031 1034 PyExc_TypeError,
1032 1035 "filteredrevs has no attribute __contains__");
1033 1036 goto bail;
1034 1037 }
1035 1038 }
1036 1039
1037 1040 len = index_length(self);
1038 1041 heads = PyList_New(0);
1039 1042 if (heads == NULL)
1040 1043 goto bail;
1041 1044 if (len == 0) {
1042 1045 PyObject *nullid = PyInt_FromLong(-1);
1043 1046 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1044 1047 Py_XDECREF(nullid);
1045 1048 goto bail;
1046 1049 }
1047 1050 goto done;
1048 1051 }
1049 1052
1050 1053 nothead = calloc(len, 1);
1051 1054 if (nothead == NULL) {
1052 1055 PyErr_NoMemory();
1053 1056 goto bail;
1054 1057 }
1055 1058
1056 1059 for (i = len - 1; i >= 0; i--) {
1057 1060 int isfiltered;
1058 1061 int parents[2];
1059 1062
1060 1063 /* If nothead[i] == 1, it means we've seen an unfiltered child
1061 1064 * of this node already, and therefore this node is not
1062 1065 * filtered. So we can skip the expensive check_filter step.
1063 1066 */
1064 1067 if (nothead[i] != 1) {
1065 1068 isfiltered = check_filter(filter, i);
1066 1069 if (isfiltered == -1) {
1067 1070 PyErr_SetString(PyExc_TypeError,
1068 1071 "unable to check filter");
1069 1072 goto bail;
1070 1073 }
1071 1074
1072 1075 if (isfiltered) {
1073 1076 nothead[i] = 1;
1074 1077 continue;
1075 1078 }
1076 1079 }
1077 1080
1078 1081 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1079 1082 goto bail;
1080 1083 for (j = 0; j < 2; j++) {
1081 1084 if (parents[j] >= 0)
1082 1085 nothead[parents[j]] = 1;
1083 1086 }
1084 1087 }
1085 1088
1086 1089 for (i = 0; i < len; i++) {
1087 1090 PyObject *head;
1088 1091
1089 1092 if (nothead[i])
1090 1093 continue;
1091 1094 head = PyInt_FromSsize_t(i);
1092 1095 if (head == NULL || PyList_Append(heads, head) == -1) {
1093 1096 Py_XDECREF(head);
1094 1097 goto bail;
1095 1098 }
1096 1099 }
1097 1100
1098 1101 done:
1099 1102 self->headrevs = heads;
1100 1103 Py_XDECREF(filter);
1101 1104 free(nothead);
1102 1105 return list_copy(self->headrevs);
1103 1106 bail:
1104 1107 Py_XDECREF(filter);
1105 1108 Py_XDECREF(heads);
1106 1109 free(nothead);
1107 1110 return NULL;
1108 1111 }
1109 1112
1110 1113 /**
1111 1114 * Obtain the base revision index entry.
1112 1115 *
1113 1116 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1114 1117 */
1115 1118 static inline int index_baserev(indexObject *self, int rev)
1116 1119 {
1117 1120 const char *data;
1118 1121 int result;
1119 1122
1120 1123 data = index_deref(self, rev);
1121 1124 if (data == NULL)
1122 1125 return -2;
1123 1126 result = getbe32(data + 16);
1124 1127
1125 1128 if (result > rev) {
1126 1129 PyErr_Format(
1127 1130 PyExc_ValueError,
1128 1131 "corrupted revlog, revision base above revision: %d, %d",
1129 1132 rev, result);
1130 1133 return -2;
1131 1134 }
1132 1135 if (result < -1) {
1133 1136 PyErr_Format(
1134 1137 PyExc_ValueError,
1135 1138 "corrupted revlog, revision base out of range: %d, %d", rev,
1136 1139 result);
1137 1140 return -2;
1138 1141 }
1139 1142 return result;
1140 1143 }
1141 1144
1142 1145 /**
1143 1146 * Find if a revision is a snapshot or not
1144 1147 *
1145 1148 * Only relevant for sparse-revlog case.
1146 1149 * Callers must ensure that rev is in a valid range.
1147 1150 */
1148 1151 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1149 1152 {
1150 1153 int ps[2];
1151 1154 Py_ssize_t base;
1152 1155 while (rev >= 0) {
1153 1156 base = (Py_ssize_t)index_baserev(self, rev);
1154 1157 if (base == rev) {
1155 1158 base = -1;
1156 1159 }
1157 1160 if (base == -2) {
1158 1161 assert(PyErr_Occurred());
1159 1162 return -1;
1160 1163 }
1161 1164 if (base == -1) {
1162 1165 return 1;
1163 1166 }
1164 1167 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1165 1168 assert(PyErr_Occurred());
1166 1169 return -1;
1167 1170 };
1168 1171 if (base == ps[0] || base == ps[1]) {
1169 1172 return 0;
1170 1173 }
1171 1174 rev = base;
1172 1175 }
1173 1176 return rev == -1;
1174 1177 }
1175 1178
1176 1179 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1177 1180 {
1178 1181 long rev;
1179 1182 int issnap;
1180 1183 Py_ssize_t length = index_length(self);
1181 1184
1182 1185 if (!pylong_to_long(value, &rev)) {
1183 1186 return NULL;
1184 1187 }
1185 1188 if (rev < -1 || rev >= length) {
1186 1189 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1187 1190 rev);
1188 1191 return NULL;
1189 1192 };
1190 1193 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1191 1194 if (issnap < 0) {
1192 1195 return NULL;
1193 1196 };
1194 1197 return PyBool_FromLong((long)issnap);
1195 1198 }
1196 1199
1197 1200 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1198 1201 {
1199 1202 Py_ssize_t start_rev;
1200 1203 PyObject *cache;
1201 1204 Py_ssize_t base;
1202 1205 Py_ssize_t rev;
1203 1206 PyObject *key = NULL;
1204 1207 PyObject *value = NULL;
1205 1208 const Py_ssize_t length = index_length(self);
1206 1209 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1207 1210 return NULL;
1208 1211 }
1209 1212 for (rev = start_rev; rev < length; rev++) {
1210 1213 int issnap;
1211 1214 PyObject *allvalues = NULL;
1212 1215 issnap = index_issnapshotrev(self, rev);
1213 1216 if (issnap < 0) {
1214 1217 goto bail;
1215 1218 }
1216 1219 if (issnap == 0) {
1217 1220 continue;
1218 1221 }
1219 1222 base = (Py_ssize_t)index_baserev(self, rev);
1220 1223 if (base == rev) {
1221 1224 base = -1;
1222 1225 }
1223 1226 if (base == -2) {
1224 1227 assert(PyErr_Occurred());
1225 1228 goto bail;
1226 1229 }
1227 1230 key = PyInt_FromSsize_t(base);
1228 1231 allvalues = PyDict_GetItem(cache, key);
1229 1232 if (allvalues == NULL && PyErr_Occurred()) {
1230 1233 goto bail;
1231 1234 }
1232 1235 if (allvalues == NULL) {
1233 1236 int r;
1234 1237 allvalues = PyList_New(0);
1235 1238 if (!allvalues) {
1236 1239 goto bail;
1237 1240 }
1238 1241 r = PyDict_SetItem(cache, key, allvalues);
1239 1242 Py_DECREF(allvalues);
1240 1243 if (r < 0) {
1241 1244 goto bail;
1242 1245 }
1243 1246 }
1244 1247 value = PyInt_FromSsize_t(rev);
1245 1248 if (PyList_Append(allvalues, value)) {
1246 1249 goto bail;
1247 1250 }
1248 1251 Py_CLEAR(key);
1249 1252 Py_CLEAR(value);
1250 1253 }
1251 1254 Py_RETURN_NONE;
1252 1255 bail:
1253 1256 Py_XDECREF(key);
1254 1257 Py_XDECREF(value);
1255 1258 return NULL;
1256 1259 }
1257 1260
1258 1261 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1259 1262 {
1260 1263 int rev, generaldelta;
1261 1264 PyObject *stoparg;
1262 1265 int stoprev, iterrev, baserev = -1;
1263 1266 int stopped;
1264 1267 PyObject *chain = NULL, *result = NULL;
1265 1268 const Py_ssize_t length = index_length(self);
1266 1269
1267 1270 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1268 1271 return NULL;
1269 1272 }
1270 1273
1271 1274 if (PyInt_Check(stoparg)) {
1272 1275 stoprev = (int)PyInt_AsLong(stoparg);
1273 1276 if (stoprev == -1 && PyErr_Occurred()) {
1274 1277 return NULL;
1275 1278 }
1276 1279 } else if (stoparg == Py_None) {
1277 1280 stoprev = -2;
1278 1281 } else {
1279 1282 PyErr_SetString(PyExc_ValueError,
1280 1283 "stoprev must be integer or None");
1281 1284 return NULL;
1282 1285 }
1283 1286
1284 1287 if (rev < 0 || rev >= length) {
1285 1288 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1286 1289 return NULL;
1287 1290 }
1288 1291
1289 1292 chain = PyList_New(0);
1290 1293 if (chain == NULL) {
1291 1294 return NULL;
1292 1295 }
1293 1296
1294 1297 baserev = index_baserev(self, rev);
1295 1298
1296 1299 /* This should never happen. */
1297 1300 if (baserev <= -2) {
1298 1301 /* Error should be set by index_deref() */
1299 1302 assert(PyErr_Occurred());
1300 1303 goto bail;
1301 1304 }
1302 1305
1303 1306 iterrev = rev;
1304 1307
1305 1308 while (iterrev != baserev && iterrev != stoprev) {
1306 1309 PyObject *value = PyInt_FromLong(iterrev);
1307 1310 if (value == NULL) {
1308 1311 goto bail;
1309 1312 }
1310 1313 if (PyList_Append(chain, value)) {
1311 1314 Py_DECREF(value);
1312 1315 goto bail;
1313 1316 }
1314 1317 Py_DECREF(value);
1315 1318
1316 1319 if (generaldelta) {
1317 1320 iterrev = baserev;
1318 1321 } else {
1319 1322 iterrev--;
1320 1323 }
1321 1324
1322 1325 if (iterrev < 0) {
1323 1326 break;
1324 1327 }
1325 1328
1326 1329 if (iterrev >= length) {
1327 1330 PyErr_SetString(PyExc_IndexError,
1328 1331 "revision outside index");
1329 1332 return NULL;
1330 1333 }
1331 1334
1332 1335 baserev = index_baserev(self, iterrev);
1333 1336
1334 1337 /* This should never happen. */
1335 1338 if (baserev <= -2) {
1336 1339 /* Error should be set by index_deref() */
1337 1340 assert(PyErr_Occurred());
1338 1341 goto bail;
1339 1342 }
1340 1343 }
1341 1344
1342 1345 if (iterrev == stoprev) {
1343 1346 stopped = 1;
1344 1347 } else {
1345 1348 PyObject *value = PyInt_FromLong(iterrev);
1346 1349 if (value == NULL) {
1347 1350 goto bail;
1348 1351 }
1349 1352 if (PyList_Append(chain, value)) {
1350 1353 Py_DECREF(value);
1351 1354 goto bail;
1352 1355 }
1353 1356 Py_DECREF(value);
1354 1357
1355 1358 stopped = 0;
1356 1359 }
1357 1360
1358 1361 if (PyList_Reverse(chain)) {
1359 1362 goto bail;
1360 1363 }
1361 1364
1362 1365 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1363 1366 Py_DECREF(chain);
1364 1367 return result;
1365 1368
1366 1369 bail:
1367 1370 Py_DECREF(chain);
1368 1371 return NULL;
1369 1372 }
1370 1373
1371 1374 static inline int64_t
1372 1375 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1373 1376 {
1374 1377 int64_t start_offset;
1375 1378 int64_t end_offset;
1376 1379 int end_size;
1377 1380 start_offset = index_get_start(self, start_rev);
1378 1381 if (start_offset < 0) {
1379 1382 return -1;
1380 1383 }
1381 1384 end_offset = index_get_start(self, end_rev);
1382 1385 if (end_offset < 0) {
1383 1386 return -1;
1384 1387 }
1385 1388 end_size = index_get_length(self, end_rev);
1386 1389 if (end_size < 0) {
1387 1390 return -1;
1388 1391 }
1389 1392 if (end_offset < start_offset) {
1390 1393 PyErr_Format(PyExc_ValueError,
1391 1394 "corrupted revlog index: inconsistent offset "
1392 1395 "between revisions (%zd) and (%zd)",
1393 1396 start_rev, end_rev);
1394 1397 return -1;
1395 1398 }
1396 1399 return (end_offset - start_offset) + (int64_t)end_size;
1397 1400 }
1398 1401
1399 1402 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1400 1403 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1401 1404 Py_ssize_t startidx, Py_ssize_t endidx)
1402 1405 {
1403 1406 int length;
1404 1407 while (endidx > 1 && endidx > startidx) {
1405 1408 length = index_get_length(self, revs[endidx - 1]);
1406 1409 if (length < 0) {
1407 1410 return -1;
1408 1411 }
1409 1412 if (length != 0) {
1410 1413 break;
1411 1414 }
1412 1415 endidx -= 1;
1413 1416 }
1414 1417 return endidx;
1415 1418 }
1416 1419
1417 1420 struct Gap {
1418 1421 int64_t size;
1419 1422 Py_ssize_t idx;
1420 1423 };
1421 1424
1422 1425 static int gap_compare(const void *left, const void *right)
1423 1426 {
1424 1427 const struct Gap *l_left = ((const struct Gap *)left);
1425 1428 const struct Gap *l_right = ((const struct Gap *)right);
1426 1429 if (l_left->size < l_right->size) {
1427 1430 return -1;
1428 1431 } else if (l_left->size > l_right->size) {
1429 1432 return 1;
1430 1433 }
1431 1434 return 0;
1432 1435 }
1433 1436 static int Py_ssize_t_compare(const void *left, const void *right)
1434 1437 {
1435 1438 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1436 1439 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1437 1440 if (l_left < l_right) {
1438 1441 return -1;
1439 1442 } else if (l_left > l_right) {
1440 1443 return 1;
1441 1444 }
1442 1445 return 0;
1443 1446 }
1444 1447
1445 1448 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1446 1449 {
1447 1450 /* method arguments */
1448 1451 PyObject *list_revs = NULL; /* revisions in the chain */
1449 1452 double targetdensity = 0; /* min density to achieve */
1450 1453 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1451 1454
1452 1455 /* other core variables */
1453 1456 Py_ssize_t idxlen = index_length(self);
1454 1457 Py_ssize_t i; /* used for various iteration */
1455 1458 PyObject *result = NULL; /* the final return of the function */
1456 1459
1457 1460 /* generic information about the delta chain being slice */
1458 1461 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1459 1462 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1460 1463 int64_t chainpayload = 0; /* sum of all delta in the chain */
1461 1464 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1462 1465
1463 1466 /* variable used for slicing the delta chain */
1464 1467 int64_t readdata = 0; /* amount of data currently planned to be read */
1465 1468 double density = 0; /* ration of payload data compared to read ones */
1466 1469 int64_t previous_end;
1467 1470 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1468 1471 Py_ssize_t num_gaps =
1469 1472 0; /* total number of notable gap recorded so far */
1470 1473 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1471 1474 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1472 1475 PyObject *chunk = NULL; /* individual slice */
1473 1476 PyObject *allchunks = NULL; /* all slices */
1474 1477 Py_ssize_t previdx;
1475 1478
1476 1479 /* parsing argument */
1477 1480 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1478 1481 &targetdensity, &mingapsize)) {
1479 1482 goto bail;
1480 1483 }
1481 1484
1482 1485 /* If the delta chain contains a single element, we do not need slicing
1483 1486 */
1484 1487 num_revs = PyList_GET_SIZE(list_revs);
1485 1488 if (num_revs <= 1) {
1486 1489 result = PyTuple_Pack(1, list_revs);
1487 1490 goto done;
1488 1491 }
1489 1492
1490 1493 /* Turn the python list into a native integer array (for efficiency) */
1491 1494 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1492 1495 if (revs == NULL) {
1493 1496 PyErr_NoMemory();
1494 1497 goto bail;
1495 1498 }
1496 1499 for (i = 0; i < num_revs; i++) {
1497 1500 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1498 1501 if (revnum == -1 && PyErr_Occurred()) {
1499 1502 goto bail;
1500 1503 }
1501 1504 if (revnum < nullrev || revnum >= idxlen) {
1502 1505 PyErr_Format(PyExc_IndexError,
1503 1506 "index out of range: %zd", revnum);
1504 1507 goto bail;
1505 1508 }
1506 1509 revs[i] = revnum;
1507 1510 }
1508 1511
1509 1512 /* Compute and check various property of the unsliced delta chain */
1510 1513 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1511 1514 if (deltachainspan < 0) {
1512 1515 goto bail;
1513 1516 }
1514 1517
1515 1518 if (deltachainspan <= mingapsize) {
1516 1519 result = PyTuple_Pack(1, list_revs);
1517 1520 goto done;
1518 1521 }
1519 1522 chainpayload = 0;
1520 1523 for (i = 0; i < num_revs; i++) {
1521 1524 int tmp = index_get_length(self, revs[i]);
1522 1525 if (tmp < 0) {
1523 1526 goto bail;
1524 1527 }
1525 1528 chainpayload += tmp;
1526 1529 }
1527 1530
1528 1531 readdata = deltachainspan;
1529 1532 density = 1.0;
1530 1533
1531 1534 if (0 < deltachainspan) {
1532 1535 density = (double)chainpayload / (double)deltachainspan;
1533 1536 }
1534 1537
1535 1538 if (density >= targetdensity) {
1536 1539 result = PyTuple_Pack(1, list_revs);
1537 1540 goto done;
1538 1541 }
1539 1542
1540 1543 /* if chain is too sparse, look for relevant gaps */
1541 1544 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1542 1545 if (gaps == NULL) {
1543 1546 PyErr_NoMemory();
1544 1547 goto bail;
1545 1548 }
1546 1549
1547 1550 previous_end = -1;
1548 1551 for (i = 0; i < num_revs; i++) {
1549 1552 int64_t revstart;
1550 1553 int revsize;
1551 1554 revstart = index_get_start(self, revs[i]);
1552 1555 if (revstart < 0) {
1553 1556 goto bail;
1554 1557 };
1555 1558 revsize = index_get_length(self, revs[i]);
1556 1559 if (revsize < 0) {
1557 1560 goto bail;
1558 1561 };
1559 1562 if (revsize == 0) {
1560 1563 continue;
1561 1564 }
1562 1565 if (previous_end >= 0) {
1563 1566 int64_t gapsize = revstart - previous_end;
1564 1567 if (gapsize > mingapsize) {
1565 1568 gaps[num_gaps].size = gapsize;
1566 1569 gaps[num_gaps].idx = i;
1567 1570 num_gaps += 1;
1568 1571 }
1569 1572 }
1570 1573 previous_end = revstart + revsize;
1571 1574 }
1572 1575 if (num_gaps == 0) {
1573 1576 result = PyTuple_Pack(1, list_revs);
1574 1577 goto done;
1575 1578 }
1576 1579 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1577 1580
1578 1581 /* Slice the largest gap first, they improve the density the most */
1579 1582 selected_indices =
1580 1583 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1581 1584 if (selected_indices == NULL) {
1582 1585 PyErr_NoMemory();
1583 1586 goto bail;
1584 1587 }
1585 1588
1586 1589 for (i = num_gaps - 1; i >= 0; i--) {
1587 1590 selected_indices[num_selected] = gaps[i].idx;
1588 1591 readdata -= gaps[i].size;
1589 1592 num_selected += 1;
1590 1593 if (readdata <= 0) {
1591 1594 density = 1.0;
1592 1595 } else {
1593 1596 density = (double)chainpayload / (double)readdata;
1594 1597 }
1595 1598 if (density >= targetdensity) {
1596 1599 break;
1597 1600 }
1598 1601 }
1599 1602 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1600 1603 &Py_ssize_t_compare);
1601 1604
1602 1605 /* create the resulting slice */
1603 1606 allchunks = PyList_New(0);
1604 1607 if (allchunks == NULL) {
1605 1608 goto bail;
1606 1609 }
1607 1610 previdx = 0;
1608 1611 selected_indices[num_selected] = num_revs;
1609 1612 for (i = 0; i <= num_selected; i++) {
1610 1613 Py_ssize_t idx = selected_indices[i];
1611 1614 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1612 1615 if (endidx < 0) {
1613 1616 goto bail;
1614 1617 }
1615 1618 if (previdx < endidx) {
1616 1619 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1617 1620 if (chunk == NULL) {
1618 1621 goto bail;
1619 1622 }
1620 1623 if (PyList_Append(allchunks, chunk) == -1) {
1621 1624 goto bail;
1622 1625 }
1623 1626 Py_DECREF(chunk);
1624 1627 chunk = NULL;
1625 1628 }
1626 1629 previdx = idx;
1627 1630 }
1628 1631 result = allchunks;
1629 1632 goto done;
1630 1633
1631 1634 bail:
1632 1635 Py_XDECREF(allchunks);
1633 1636 Py_XDECREF(chunk);
1634 1637 done:
1635 1638 free(revs);
1636 1639 free(gaps);
1637 1640 free(selected_indices);
1638 1641 return result;
1639 1642 }
1640 1643
1641 1644 static inline int nt_level(const char *node, Py_ssize_t level)
1642 1645 {
1643 1646 int v = node[level >> 1];
1644 1647 if (!(level & 1))
1645 1648 v >>= 4;
1646 1649 return v & 0xf;
1647 1650 }
1648 1651
1649 1652 /*
1650 1653 * Return values:
1651 1654 *
1652 1655 * -4: match is ambiguous (multiple candidates)
1653 1656 * -2: not found
1654 1657 * rest: valid rev
1655 1658 */
1656 1659 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1657 1660 int hex)
1658 1661 {
1659 1662 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1660 1663 int level, maxlevel, off;
1661 1664
1662 1665 /* If the input is binary, do a fast check for the nullid first. */
1663 1666 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1664 1667 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1665 1668 return -1;
1666 1669
1667 1670 if (hex)
1668 1671 maxlevel = nodelen;
1669 1672 else
1670 1673 maxlevel = 2 * nodelen;
1671 1674 if (maxlevel > 2 * self->nodelen)
1672 1675 maxlevel = 2 * self->nodelen;
1673 1676
1674 1677 for (level = off = 0; level < maxlevel; level++) {
1675 1678 int k = getnybble(node, level);
1676 1679 nodetreenode *n = &self->nodes[off];
1677 1680 int v = n->children[k];
1678 1681
1679 1682 if (v < 0) {
1680 1683 const char *n;
1681 1684 Py_ssize_t i;
1682 1685
1683 1686 v = -(v + 2);
1684 1687 n = index_node(self->index, v);
1685 1688 if (n == NULL)
1686 1689 return -2;
1687 1690 for (i = level; i < maxlevel; i++)
1688 1691 if (getnybble(node, i) != nt_level(n, i))
1689 1692 return -2;
1690 1693 return v;
1691 1694 }
1692 1695 if (v == 0)
1693 1696 return -2;
1694 1697 off = v;
1695 1698 }
1696 1699 /* multiple matches against an ambiguous prefix */
1697 1700 return -4;
1698 1701 }
1699 1702
1700 1703 static int nt_new(nodetree *self)
1701 1704 {
1702 1705 if (self->length == self->capacity) {
1703 1706 size_t newcapacity;
1704 1707 nodetreenode *newnodes;
1705 1708 newcapacity = self->capacity * 2;
1706 1709 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1707 1710 PyErr_SetString(PyExc_MemoryError,
1708 1711 "overflow in nt_new");
1709 1712 return -1;
1710 1713 }
1711 1714 newnodes =
1712 1715 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1713 1716 if (newnodes == NULL) {
1714 1717 PyErr_SetString(PyExc_MemoryError, "out of memory");
1715 1718 return -1;
1716 1719 }
1717 1720 self->capacity = newcapacity;
1718 1721 self->nodes = newnodes;
1719 1722 memset(&self->nodes[self->length], 0,
1720 1723 sizeof(nodetreenode) * (self->capacity - self->length));
1721 1724 }
1722 1725 return self->length++;
1723 1726 }
1724 1727
1725 1728 static int nt_insert(nodetree *self, const char *node, int rev)
1726 1729 {
1727 1730 int level = 0;
1728 1731 int off = 0;
1729 1732
1730 1733 while (level < 2 * self->nodelen) {
1731 1734 int k = nt_level(node, level);
1732 1735 nodetreenode *n;
1733 1736 int v;
1734 1737
1735 1738 n = &self->nodes[off];
1736 1739 v = n->children[k];
1737 1740
1738 1741 if (v == 0) {
1739 1742 n->children[k] = -rev - 2;
1740 1743 return 0;
1741 1744 }
1742 1745 if (v < 0) {
1743 1746 const char *oldnode =
1744 1747 index_node_existing(self->index, -(v + 2));
1745 1748 int noff;
1746 1749
1747 1750 if (oldnode == NULL)
1748 1751 return -1;
1749 1752 if (!memcmp(oldnode, node, self->nodelen)) {
1750 1753 n->children[k] = -rev - 2;
1751 1754 return 0;
1752 1755 }
1753 1756 noff = nt_new(self);
1754 1757 if (noff == -1)
1755 1758 return -1;
1756 1759 /* self->nodes may have been changed by realloc */
1757 1760 self->nodes[off].children[k] = noff;
1758 1761 off = noff;
1759 1762 n = &self->nodes[off];
1760 1763 n->children[nt_level(oldnode, ++level)] = v;
1761 1764 if (level > self->depth)
1762 1765 self->depth = level;
1763 1766 self->splits += 1;
1764 1767 } else {
1765 1768 level += 1;
1766 1769 off = v;
1767 1770 }
1768 1771 }
1769 1772
1770 1773 return -1;
1771 1774 }
1772 1775
1773 1776 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1774 1777 {
1775 1778 Py_ssize_t rev;
1776 1779 const char *node;
1777 1780 Py_ssize_t length;
1778 1781 if (!PyArg_ParseTuple(args, "n", &rev))
1779 1782 return NULL;
1780 1783 length = index_length(self->nt.index);
1781 1784 if (rev < 0 || rev >= length) {
1782 1785 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1783 1786 return NULL;
1784 1787 }
1785 1788 node = index_node_existing(self->nt.index, rev);
1786 1789 if (nt_insert(&self->nt, node, (int)rev) == -1)
1787 1790 return NULL;
1788 1791 Py_RETURN_NONE;
1789 1792 }
1790 1793
1791 1794 static int nt_delete_node(nodetree *self, const char *node)
1792 1795 {
1793 1796 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1794 1797 */
1795 1798 return nt_insert(self, node, -2);
1796 1799 }
1797 1800
1798 1801 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1799 1802 {
1800 1803 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1801 1804 self->nodes = NULL;
1802 1805
1803 1806 self->index = index;
1804 1807 /* The input capacity is in terms of revisions, while the field is in
1805 1808 * terms of nodetree nodes. */
1806 1809 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1807 1810 self->nodelen = index->nodelen;
1808 1811 self->depth = 0;
1809 1812 self->splits = 0;
1810 1813 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1811 1814 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1812 1815 return -1;
1813 1816 }
1814 1817 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1815 1818 if (self->nodes == NULL) {
1816 1819 PyErr_NoMemory();
1817 1820 return -1;
1818 1821 }
1819 1822 self->length = 1;
1820 1823 return 0;
1821 1824 }
1822 1825
1823 1826 static int ntobj_init(nodetreeObject *self, PyObject *args)
1824 1827 {
1825 1828 PyObject *index;
1826 1829 unsigned capacity;
1827 1830 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1828 1831 &capacity))
1829 1832 return -1;
1830 1833 Py_INCREF(index);
1831 1834 return nt_init(&self->nt, (indexObject *)index, capacity);
1832 1835 }
1833 1836
1834 1837 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1835 1838 {
1836 1839 return nt_find(self, node, nodelen, 1);
1837 1840 }
1838 1841
1839 1842 /*
1840 1843 * Find the length of the shortest unique prefix of node.
1841 1844 *
1842 1845 * Return values:
1843 1846 *
1844 1847 * -3: error (exception set)
1845 1848 * -2: not found (no exception set)
1846 1849 * rest: length of shortest prefix
1847 1850 */
1848 1851 static int nt_shortest(nodetree *self, const char *node)
1849 1852 {
1850 1853 int level, off;
1851 1854
1852 1855 for (level = off = 0; level < 2 * self->nodelen; level++) {
1853 1856 int k, v;
1854 1857 nodetreenode *n = &self->nodes[off];
1855 1858 k = nt_level(node, level);
1856 1859 v = n->children[k];
1857 1860 if (v < 0) {
1858 1861 const char *n;
1859 1862 v = -(v + 2);
1860 1863 n = index_node_existing(self->index, v);
1861 1864 if (n == NULL)
1862 1865 return -3;
1863 1866 if (memcmp(node, n, self->nodelen) != 0)
1864 1867 /*
1865 1868 * Found a unique prefix, but it wasn't for the
1866 1869 * requested node (i.e the requested node does
1867 1870 * not exist).
1868 1871 */
1869 1872 return -2;
1870 1873 return level + 1;
1871 1874 }
1872 1875 if (v == 0)
1873 1876 return -2;
1874 1877 off = v;
1875 1878 }
1876 1879 /*
1877 1880 * The node was still not unique after 40 hex digits, so this won't
1878 1881 * happen. Also, if we get here, then there's a programming error in
1879 1882 * this file that made us insert a node longer than 40 hex digits.
1880 1883 */
1881 1884 PyErr_SetString(PyExc_Exception, "broken node tree");
1882 1885 return -3;
1883 1886 }
1884 1887
1885 1888 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1886 1889 {
1887 1890 PyObject *val;
1888 1891 char *node;
1889 1892 int length;
1890 1893
1891 1894 if (!PyArg_ParseTuple(args, "O", &val))
1892 1895 return NULL;
1893 1896 if (node_check(self->nt.nodelen, val, &node) == -1)
1894 1897 return NULL;
1895 1898
1896 1899 length = nt_shortest(&self->nt, node);
1897 1900 if (length == -3)
1898 1901 return NULL;
1899 1902 if (length == -2) {
1900 1903 raise_revlog_error();
1901 1904 return NULL;
1902 1905 }
1903 1906 return PyInt_FromLong(length);
1904 1907 }
1905 1908
1906 1909 static void nt_dealloc(nodetree *self)
1907 1910 {
1908 1911 free(self->nodes);
1909 1912 self->nodes = NULL;
1910 1913 }
1911 1914
1912 1915 static void ntobj_dealloc(nodetreeObject *self)
1913 1916 {
1914 1917 Py_XDECREF(self->nt.index);
1915 1918 nt_dealloc(&self->nt);
1916 1919 PyObject_Del(self);
1917 1920 }
1918 1921
1919 1922 static PyMethodDef ntobj_methods[] = {
1920 1923 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1921 1924 "insert an index entry"},
1922 1925 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1923 1926 "find length of shortest hex nodeid of a binary ID"},
1924 1927 {NULL} /* Sentinel */
1925 1928 };
1926 1929
1927 1930 static PyTypeObject nodetreeType = {
1928 1931 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1929 1932 "parsers.nodetree", /* tp_name */
1930 1933 sizeof(nodetreeObject), /* tp_basicsize */
1931 1934 0, /* tp_itemsize */
1932 1935 (destructor)ntobj_dealloc, /* tp_dealloc */
1933 1936 0, /* tp_print */
1934 1937 0, /* tp_getattr */
1935 1938 0, /* tp_setattr */
1936 1939 0, /* tp_compare */
1937 1940 0, /* tp_repr */
1938 1941 0, /* tp_as_number */
1939 1942 0, /* tp_as_sequence */
1940 1943 0, /* tp_as_mapping */
1941 1944 0, /* tp_hash */
1942 1945 0, /* tp_call */
1943 1946 0, /* tp_str */
1944 1947 0, /* tp_getattro */
1945 1948 0, /* tp_setattro */
1946 1949 0, /* tp_as_buffer */
1947 1950 Py_TPFLAGS_DEFAULT, /* tp_flags */
1948 1951 "nodetree", /* tp_doc */
1949 1952 0, /* tp_traverse */
1950 1953 0, /* tp_clear */
1951 1954 0, /* tp_richcompare */
1952 1955 0, /* tp_weaklistoffset */
1953 1956 0, /* tp_iter */
1954 1957 0, /* tp_iternext */
1955 1958 ntobj_methods, /* tp_methods */
1956 1959 0, /* tp_members */
1957 1960 0, /* tp_getset */
1958 1961 0, /* tp_base */
1959 1962 0, /* tp_dict */
1960 1963 0, /* tp_descr_get */
1961 1964 0, /* tp_descr_set */
1962 1965 0, /* tp_dictoffset */
1963 1966 (initproc)ntobj_init, /* tp_init */
1964 1967 0, /* tp_alloc */
1965 1968 };
1966 1969
1967 1970 static int index_init_nt(indexObject *self)
1968 1971 {
1969 1972 if (!self->ntinitialized) {
1970 1973 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1971 1974 nt_dealloc(&self->nt);
1972 1975 return -1;
1973 1976 }
1974 1977 if (nt_insert(&self->nt, nullid, -1) == -1) {
1975 1978 nt_dealloc(&self->nt);
1976 1979 return -1;
1977 1980 }
1978 1981 self->ntinitialized = 1;
1979 1982 self->ntrev = (int)index_length(self);
1980 1983 self->ntlookups = 1;
1981 1984 self->ntmisses = 0;
1982 1985 }
1983 1986 return 0;
1984 1987 }
1985 1988
1986 1989 /*
1987 1990 * Return values:
1988 1991 *
1989 1992 * -3: error (exception set)
1990 1993 * -2: not found (no exception set)
1991 1994 * rest: valid rev
1992 1995 */
1993 1996 static int index_find_node(indexObject *self, const char *node)
1994 1997 {
1995 1998 int rev;
1996 1999
1997 2000 if (index_init_nt(self) == -1)
1998 2001 return -3;
1999 2002
2000 2003 self->ntlookups++;
2001 2004 rev = nt_find(&self->nt, node, self->nodelen, 0);
2002 2005 if (rev >= -1)
2003 2006 return rev;
2004 2007
2005 2008 /*
2006 2009 * For the first handful of lookups, we scan the entire index,
2007 2010 * and cache only the matching nodes. This optimizes for cases
2008 2011 * like "hg tip", where only a few nodes are accessed.
2009 2012 *
2010 2013 * After that, we cache every node we visit, using a single
2011 2014 * scan amortized over multiple lookups. This gives the best
2012 2015 * bulk performance, e.g. for "hg log".
2013 2016 */
2014 2017 if (self->ntmisses++ < 4) {
2015 2018 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2016 2019 const char *n = index_node_existing(self, rev);
2017 2020 if (n == NULL)
2018 2021 return -3;
2019 2022 if (memcmp(node, n, self->nodelen) == 0) {
2020 2023 if (nt_insert(&self->nt, n, rev) == -1)
2021 2024 return -3;
2022 2025 break;
2023 2026 }
2024 2027 }
2025 2028 } else {
2026 2029 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2027 2030 const char *n = index_node_existing(self, rev);
2028 2031 if (n == NULL)
2029 2032 return -3;
2030 2033 if (nt_insert(&self->nt, n, rev) == -1) {
2031 2034 self->ntrev = rev + 1;
2032 2035 return -3;
2033 2036 }
2034 2037 if (memcmp(node, n, self->nodelen) == 0) {
2035 2038 break;
2036 2039 }
2037 2040 }
2038 2041 self->ntrev = rev;
2039 2042 }
2040 2043
2041 2044 if (rev >= 0)
2042 2045 return rev;
2043 2046 return -2;
2044 2047 }
2045 2048
2046 2049 static PyObject *index_getitem(indexObject *self, PyObject *value)
2047 2050 {
2048 2051 char *node;
2049 2052 int rev;
2050 2053
2051 2054 if (PyInt_Check(value)) {
2052 2055 long idx;
2053 2056 if (!pylong_to_long(value, &idx)) {
2054 2057 return NULL;
2055 2058 }
2056 2059 return index_get(self, idx);
2057 2060 }
2058 2061
2059 2062 if (node_check(self->nodelen, value, &node) == -1)
2060 2063 return NULL;
2061 2064 rev = index_find_node(self, node);
2062 2065 if (rev >= -1)
2063 2066 return PyInt_FromLong(rev);
2064 2067 if (rev == -2)
2065 2068 raise_revlog_error();
2066 2069 return NULL;
2067 2070 }
2068 2071
2069 2072 /*
2070 2073 * Fully populate the radix tree.
2071 2074 */
2072 2075 static int index_populate_nt(indexObject *self)
2073 2076 {
2074 2077 int rev;
2075 2078 if (self->ntrev > 0) {
2076 2079 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2077 2080 const char *n = index_node_existing(self, rev);
2078 2081 if (n == NULL)
2079 2082 return -1;
2080 2083 if (nt_insert(&self->nt, n, rev) == -1)
2081 2084 return -1;
2082 2085 }
2083 2086 self->ntrev = -1;
2084 2087 }
2085 2088 return 0;
2086 2089 }
2087 2090
2088 2091 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2089 2092 {
2090 2093 const char *fullnode;
2091 2094 Py_ssize_t nodelen;
2092 2095 char *node;
2093 2096 int rev, i;
2094 2097
2095 2098 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2096 2099 return NULL;
2097 2100
2098 2101 if (nodelen < 1) {
2099 2102 PyErr_SetString(PyExc_ValueError, "key too short");
2100 2103 return NULL;
2101 2104 }
2102 2105
2103 2106 if (nodelen > 2 * self->nodelen) {
2104 2107 PyErr_SetString(PyExc_ValueError, "key too long");
2105 2108 return NULL;
2106 2109 }
2107 2110
2108 2111 for (i = 0; i < nodelen; i++)
2109 2112 hexdigit(node, i);
2110 2113 if (PyErr_Occurred()) {
2111 2114 /* input contains non-hex characters */
2112 2115 PyErr_Clear();
2113 2116 Py_RETURN_NONE;
2114 2117 }
2115 2118
2116 2119 if (index_init_nt(self) == -1)
2117 2120 return NULL;
2118 2121 if (index_populate_nt(self) == -1)
2119 2122 return NULL;
2120 2123 rev = nt_partialmatch(&self->nt, node, nodelen);
2121 2124
2122 2125 switch (rev) {
2123 2126 case -4:
2124 2127 raise_revlog_error();
2125 2128 return NULL;
2126 2129 case -2:
2127 2130 Py_RETURN_NONE;
2128 2131 case -1:
2129 2132 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2130 2133 }
2131 2134
2132 2135 fullnode = index_node_existing(self, rev);
2133 2136 if (fullnode == NULL) {
2134 2137 return NULL;
2135 2138 }
2136 2139 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2137 2140 }
2138 2141
2139 2142 static PyObject *index_shortest(indexObject *self, PyObject *args)
2140 2143 {
2141 2144 PyObject *val;
2142 2145 char *node;
2143 2146 int length;
2144 2147
2145 2148 if (!PyArg_ParseTuple(args, "O", &val))
2146 2149 return NULL;
2147 2150 if (node_check(self->nodelen, val, &node) == -1)
2148 2151 return NULL;
2149 2152
2150 2153 self->ntlookups++;
2151 2154 if (index_init_nt(self) == -1)
2152 2155 return NULL;
2153 2156 if (index_populate_nt(self) == -1)
2154 2157 return NULL;
2155 2158 length = nt_shortest(&self->nt, node);
2156 2159 if (length == -3)
2157 2160 return NULL;
2158 2161 if (length == -2) {
2159 2162 raise_revlog_error();
2160 2163 return NULL;
2161 2164 }
2162 2165 return PyInt_FromLong(length);
2163 2166 }
2164 2167
2165 2168 static PyObject *index_m_get(indexObject *self, PyObject *args)
2166 2169 {
2167 2170 PyObject *val;
2168 2171 char *node;
2169 2172 int rev;
2170 2173
2171 2174 if (!PyArg_ParseTuple(args, "O", &val))
2172 2175 return NULL;
2173 2176 if (node_check(self->nodelen, val, &node) == -1)
2174 2177 return NULL;
2175 2178 rev = index_find_node(self, node);
2176 2179 if (rev == -3)
2177 2180 return NULL;
2178 2181 if (rev == -2)
2179 2182 Py_RETURN_NONE;
2180 2183 return PyInt_FromLong(rev);
2181 2184 }
2182 2185
2183 2186 static int index_contains(indexObject *self, PyObject *value)
2184 2187 {
2185 2188 char *node;
2186 2189
2187 2190 if (PyInt_Check(value)) {
2188 2191 long rev;
2189 2192 if (!pylong_to_long(value, &rev)) {
2190 2193 return -1;
2191 2194 }
2192 2195 return rev >= -1 && rev < index_length(self);
2193 2196 }
2194 2197
2195 2198 if (node_check(self->nodelen, value, &node) == -1)
2196 2199 return -1;
2197 2200
2198 2201 switch (index_find_node(self, node)) {
2199 2202 case -3:
2200 2203 return -1;
2201 2204 case -2:
2202 2205 return 0;
2203 2206 default:
2204 2207 return 1;
2205 2208 }
2206 2209 }
2207 2210
2208 2211 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2209 2212 {
2210 2213 int ret = index_contains(self, args);
2211 2214 if (ret < 0)
2212 2215 return NULL;
2213 2216 return PyBool_FromLong((long)ret);
2214 2217 }
2215 2218
2216 2219 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2217 2220 {
2218 2221 char *node;
2219 2222 int rev;
2220 2223
2221 2224 if (node_check(self->nodelen, val, &node) == -1)
2222 2225 return NULL;
2223 2226 rev = index_find_node(self, node);
2224 2227 if (rev >= -1)
2225 2228 return PyInt_FromLong(rev);
2226 2229 if (rev == -2)
2227 2230 raise_revlog_error();
2228 2231 return NULL;
2229 2232 }
2230 2233
2231 2234 typedef uint64_t bitmask;
2232 2235
2233 2236 /*
2234 2237 * Given a disjoint set of revs, return all candidates for the
2235 2238 * greatest common ancestor. In revset notation, this is the set
2236 2239 * "heads(::a and ::b and ...)"
2237 2240 */
2238 2241 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2239 2242 int revcount)
2240 2243 {
2241 2244 const bitmask allseen = (1ull << revcount) - 1;
2242 2245 const bitmask poison = 1ull << revcount;
2243 2246 PyObject *gca = PyList_New(0);
2244 2247 int i, v, interesting;
2245 2248 int maxrev = -1;
2246 2249 bitmask sp;
2247 2250 bitmask *seen;
2248 2251
2249 2252 if (gca == NULL)
2250 2253 return PyErr_NoMemory();
2251 2254
2252 2255 for (i = 0; i < revcount; i++) {
2253 2256 if (revs[i] > maxrev)
2254 2257 maxrev = revs[i];
2255 2258 }
2256 2259
2257 2260 seen = calloc(sizeof(*seen), maxrev + 1);
2258 2261 if (seen == NULL) {
2259 2262 Py_DECREF(gca);
2260 2263 return PyErr_NoMemory();
2261 2264 }
2262 2265
2263 2266 for (i = 0; i < revcount; i++)
2264 2267 seen[revs[i]] = 1ull << i;
2265 2268
2266 2269 interesting = revcount;
2267 2270
2268 2271 for (v = maxrev; v >= 0 && interesting; v--) {
2269 2272 bitmask sv = seen[v];
2270 2273 int parents[2];
2271 2274
2272 2275 if (!sv)
2273 2276 continue;
2274 2277
2275 2278 if (sv < poison) {
2276 2279 interesting -= 1;
2277 2280 if (sv == allseen) {
2278 2281 PyObject *obj = PyInt_FromLong(v);
2279 2282 if (obj == NULL)
2280 2283 goto bail;
2281 2284 if (PyList_Append(gca, obj) == -1) {
2282 2285 Py_DECREF(obj);
2283 2286 goto bail;
2284 2287 }
2285 2288 sv |= poison;
2286 2289 for (i = 0; i < revcount; i++) {
2287 2290 if (revs[i] == v)
2288 2291 goto done;
2289 2292 }
2290 2293 }
2291 2294 }
2292 2295 if (index_get_parents(self, v, parents, maxrev) < 0)
2293 2296 goto bail;
2294 2297
2295 2298 for (i = 0; i < 2; i++) {
2296 2299 int p = parents[i];
2297 2300 if (p == -1)
2298 2301 continue;
2299 2302 sp = seen[p];
2300 2303 if (sv < poison) {
2301 2304 if (sp == 0) {
2302 2305 seen[p] = sv;
2303 2306 interesting++;
2304 2307 } else if (sp != sv)
2305 2308 seen[p] |= sv;
2306 2309 } else {
2307 2310 if (sp && sp < poison)
2308 2311 interesting--;
2309 2312 seen[p] = sv;
2310 2313 }
2311 2314 }
2312 2315 }
2313 2316
2314 2317 done:
2315 2318 free(seen);
2316 2319 return gca;
2317 2320 bail:
2318 2321 free(seen);
2319 2322 Py_XDECREF(gca);
2320 2323 return NULL;
2321 2324 }
2322 2325
2323 2326 /*
2324 2327 * Given a disjoint set of revs, return the subset with the longest
2325 2328 * path to the root.
2326 2329 */
2327 2330 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2328 2331 {
2329 2332 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2330 2333 static const Py_ssize_t capacity = 24;
2331 2334 int *depth, *interesting = NULL;
2332 2335 int i, j, v, ninteresting;
2333 2336 PyObject *dict = NULL, *keys = NULL;
2334 2337 long *seen = NULL;
2335 2338 int maxrev = -1;
2336 2339 long final;
2337 2340
2338 2341 if (revcount > capacity) {
2339 2342 PyErr_Format(PyExc_OverflowError,
2340 2343 "bitset size (%ld) > capacity (%ld)",
2341 2344 (long)revcount, (long)capacity);
2342 2345 return NULL;
2343 2346 }
2344 2347
2345 2348 for (i = 0; i < revcount; i++) {
2346 2349 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2347 2350 if (n > maxrev)
2348 2351 maxrev = n;
2349 2352 }
2350 2353
2351 2354 depth = calloc(sizeof(*depth), maxrev + 1);
2352 2355 if (depth == NULL)
2353 2356 return PyErr_NoMemory();
2354 2357
2355 2358 seen = calloc(sizeof(*seen), maxrev + 1);
2356 2359 if (seen == NULL) {
2357 2360 PyErr_NoMemory();
2358 2361 goto bail;
2359 2362 }
2360 2363
2361 2364 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2362 2365 if (interesting == NULL) {
2363 2366 PyErr_NoMemory();
2364 2367 goto bail;
2365 2368 }
2366 2369
2367 2370 if (PyList_Sort(revs) == -1)
2368 2371 goto bail;
2369 2372
2370 2373 for (i = 0; i < revcount; i++) {
2371 2374 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2372 2375 long b = 1l << i;
2373 2376 depth[n] = 1;
2374 2377 seen[n] = b;
2375 2378 interesting[b] = 1;
2376 2379 }
2377 2380
2378 2381 /* invariant: ninteresting is the number of non-zero entries in
2379 2382 * interesting. */
2380 2383 ninteresting = (int)revcount;
2381 2384
2382 2385 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2383 2386 int dv = depth[v];
2384 2387 int parents[2];
2385 2388 long sv;
2386 2389
2387 2390 if (dv == 0)
2388 2391 continue;
2389 2392
2390 2393 sv = seen[v];
2391 2394 if (index_get_parents(self, v, parents, maxrev) < 0)
2392 2395 goto bail;
2393 2396
2394 2397 for (i = 0; i < 2; i++) {
2395 2398 int p = parents[i];
2396 2399 long sp;
2397 2400 int dp;
2398 2401
2399 2402 if (p == -1)
2400 2403 continue;
2401 2404
2402 2405 dp = depth[p];
2403 2406 sp = seen[p];
2404 2407 if (dp <= dv) {
2405 2408 depth[p] = dv + 1;
2406 2409 if (sp != sv) {
2407 2410 interesting[sv] += 1;
2408 2411 seen[p] = sv;
2409 2412 if (sp) {
2410 2413 interesting[sp] -= 1;
2411 2414 if (interesting[sp] == 0)
2412 2415 ninteresting -= 1;
2413 2416 }
2414 2417 }
2415 2418 } else if (dv == dp - 1) {
2416 2419 long nsp = sp | sv;
2417 2420 if (nsp == sp)
2418 2421 continue;
2419 2422 seen[p] = nsp;
2420 2423 interesting[sp] -= 1;
2421 2424 if (interesting[sp] == 0)
2422 2425 ninteresting -= 1;
2423 2426 if (interesting[nsp] == 0)
2424 2427 ninteresting += 1;
2425 2428 interesting[nsp] += 1;
2426 2429 }
2427 2430 }
2428 2431 interesting[sv] -= 1;
2429 2432 if (interesting[sv] == 0)
2430 2433 ninteresting -= 1;
2431 2434 }
2432 2435
2433 2436 final = 0;
2434 2437 j = ninteresting;
2435 2438 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2436 2439 if (interesting[i] == 0)
2437 2440 continue;
2438 2441 final |= i;
2439 2442 j -= 1;
2440 2443 }
2441 2444 if (final == 0) {
2442 2445 keys = PyList_New(0);
2443 2446 goto bail;
2444 2447 }
2445 2448
2446 2449 dict = PyDict_New();
2447 2450 if (dict == NULL)
2448 2451 goto bail;
2449 2452
2450 2453 for (i = 0; i < revcount; i++) {
2451 2454 PyObject *key;
2452 2455
2453 2456 if ((final & (1 << i)) == 0)
2454 2457 continue;
2455 2458
2456 2459 key = PyList_GET_ITEM(revs, i);
2457 2460 Py_INCREF(key);
2458 2461 Py_INCREF(Py_None);
2459 2462 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2460 2463 Py_DECREF(key);
2461 2464 Py_DECREF(Py_None);
2462 2465 goto bail;
2463 2466 }
2464 2467 }
2465 2468
2466 2469 keys = PyDict_Keys(dict);
2467 2470
2468 2471 bail:
2469 2472 free(depth);
2470 2473 free(seen);
2471 2474 free(interesting);
2472 2475 Py_XDECREF(dict);
2473 2476
2474 2477 return keys;
2475 2478 }
2476 2479
2477 2480 /*
2478 2481 * Given a (possibly overlapping) set of revs, return all the
2479 2482 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2480 2483 */
2481 2484 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2482 2485 {
2483 2486 PyObject *ret = NULL;
2484 2487 Py_ssize_t argcount, i, len;
2485 2488 bitmask repeat = 0;
2486 2489 int revcount = 0;
2487 2490 int *revs;
2488 2491
2489 2492 argcount = PySequence_Length(args);
2490 2493 revs = PyMem_Malloc(argcount * sizeof(*revs));
2491 2494 if (argcount > 0 && revs == NULL)
2492 2495 return PyErr_NoMemory();
2493 2496 len = index_length(self);
2494 2497
2495 2498 for (i = 0; i < argcount; i++) {
2496 2499 static const int capacity = 24;
2497 2500 PyObject *obj = PySequence_GetItem(args, i);
2498 2501 bitmask x;
2499 2502 long val;
2500 2503
2501 2504 if (!PyInt_Check(obj)) {
2502 2505 PyErr_SetString(PyExc_TypeError,
2503 2506 "arguments must all be ints");
2504 2507 Py_DECREF(obj);
2505 2508 goto bail;
2506 2509 }
2507 2510 val = PyInt_AsLong(obj);
2508 2511 Py_DECREF(obj);
2509 2512 if (val == -1) {
2510 2513 ret = PyList_New(0);
2511 2514 goto done;
2512 2515 }
2513 2516 if (val < 0 || val >= len) {
2514 2517 PyErr_SetString(PyExc_IndexError, "index out of range");
2515 2518 goto bail;
2516 2519 }
2517 2520 /* this cheesy bloom filter lets us avoid some more
2518 2521 * expensive duplicate checks in the common set-is-disjoint
2519 2522 * case */
2520 2523 x = 1ull << (val & 0x3f);
2521 2524 if (repeat & x) {
2522 2525 int k;
2523 2526 for (k = 0; k < revcount; k++) {
2524 2527 if (val == revs[k])
2525 2528 goto duplicate;
2526 2529 }
2527 2530 } else
2528 2531 repeat |= x;
2529 2532 if (revcount >= capacity) {
2530 2533 PyErr_Format(PyExc_OverflowError,
2531 2534 "bitset size (%d) > capacity (%d)",
2532 2535 revcount, capacity);
2533 2536 goto bail;
2534 2537 }
2535 2538 revs[revcount++] = (int)val;
2536 2539 duplicate:;
2537 2540 }
2538 2541
2539 2542 if (revcount == 0) {
2540 2543 ret = PyList_New(0);
2541 2544 goto done;
2542 2545 }
2543 2546 if (revcount == 1) {
2544 2547 PyObject *obj;
2545 2548 ret = PyList_New(1);
2546 2549 if (ret == NULL)
2547 2550 goto bail;
2548 2551 obj = PyInt_FromLong(revs[0]);
2549 2552 if (obj == NULL)
2550 2553 goto bail;
2551 2554 PyList_SET_ITEM(ret, 0, obj);
2552 2555 goto done;
2553 2556 }
2554 2557
2555 2558 ret = find_gca_candidates(self, revs, revcount);
2556 2559 if (ret == NULL)
2557 2560 goto bail;
2558 2561
2559 2562 done:
2560 2563 PyMem_Free(revs);
2561 2564 return ret;
2562 2565
2563 2566 bail:
2564 2567 PyMem_Free(revs);
2565 2568 Py_XDECREF(ret);
2566 2569 return NULL;
2567 2570 }
2568 2571
2569 2572 /*
2570 2573 * Given a (possibly overlapping) set of revs, return the greatest
2571 2574 * common ancestors: those with the longest path to the root.
2572 2575 */
2573 2576 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2574 2577 {
2575 2578 PyObject *ret;
2576 2579 PyObject *gca = index_commonancestorsheads(self, args);
2577 2580 if (gca == NULL)
2578 2581 return NULL;
2579 2582
2580 2583 if (PyList_GET_SIZE(gca) <= 1) {
2581 2584 return gca;
2582 2585 }
2583 2586
2584 2587 ret = find_deepest(self, gca);
2585 2588 Py_DECREF(gca);
2586 2589 return ret;
2587 2590 }
2588 2591
2589 2592 /*
2590 2593 * Invalidate any trie entries introduced by added revs.
2591 2594 */
2592 2595 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2593 2596 {
2594 2597 Py_ssize_t i, len;
2595 2598
2596 2599 len = self->length + self->new_length;
2597 2600 i = start - self->length;
2598 2601 if (i < 0)
2599 2602 return;
2600 2603
2601 2604 for (i = start; i < len; i++)
2602 2605 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2603 2606
2604 2607 self->new_length = start - self->length;
2605 2608 }
2606 2609
2607 2610 /*
2608 2611 * Delete a numeric range of revs, which must be at the end of the
2609 2612 * range.
2610 2613 */
2611 2614 static int index_slice_del(indexObject *self, PyObject *item)
2612 2615 {
2613 2616 Py_ssize_t start, stop, step, slicelength;
2614 2617 Py_ssize_t length = index_length(self) + 1;
2615 2618 int ret = 0;
2616 2619
2617 2620 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2618 2621 #ifdef IS_PY3K
2619 2622 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2620 2623 &slicelength) < 0)
2621 2624 #else
2622 2625 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2623 2626 &step, &slicelength) < 0)
2624 2627 #endif
2625 2628 return -1;
2626 2629
2627 2630 if (slicelength <= 0)
2628 2631 return 0;
2629 2632
2630 2633 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2631 2634 stop = start;
2632 2635
2633 2636 if (step < 0) {
2634 2637 stop = start + 1;
2635 2638 start = stop + step * (slicelength - 1) - 1;
2636 2639 step = -step;
2637 2640 }
2638 2641
2639 2642 if (step != 1) {
2640 2643 PyErr_SetString(PyExc_ValueError,
2641 2644 "revlog index delete requires step size of 1");
2642 2645 return -1;
2643 2646 }
2644 2647
2645 2648 if (stop != length - 1) {
2646 2649 PyErr_SetString(PyExc_IndexError,
2647 2650 "revlog index deletion indices are invalid");
2648 2651 return -1;
2649 2652 }
2650 2653
2651 2654 if (start < self->length) {
2652 2655 if (self->ntinitialized) {
2653 2656 Py_ssize_t i;
2654 2657
2655 2658 for (i = start; i < self->length; i++) {
2656 2659 const char *node = index_node_existing(self, i);
2657 2660 if (node == NULL)
2658 2661 return -1;
2659 2662
2660 2663 nt_delete_node(&self->nt, node);
2661 2664 }
2662 2665 if (self->new_length)
2663 2666 index_invalidate_added(self, self->length);
2664 2667 if (self->ntrev > start)
2665 2668 self->ntrev = (int)start;
2666 2669 } else if (self->new_length) {
2667 2670 self->new_length = 0;
2668 2671 }
2669 2672
2670 2673 self->length = start;
2671 2674 goto done;
2672 2675 }
2673 2676
2674 2677 if (self->ntinitialized) {
2675 2678 index_invalidate_added(self, start);
2676 2679 if (self->ntrev > start)
2677 2680 self->ntrev = (int)start;
2678 2681 } else {
2679 2682 self->new_length = start - self->length;
2680 2683 }
2681 2684 done:
2682 2685 Py_CLEAR(self->headrevs);
2683 2686 return ret;
2684 2687 }
2685 2688
2686 2689 /*
2687 2690 * Supported ops:
2688 2691 *
2689 2692 * slice deletion
2690 2693 * string assignment (extend node->rev mapping)
2691 2694 * string deletion (shrink node->rev mapping)
2692 2695 */
2693 2696 static int index_assign_subscript(indexObject *self, PyObject *item,
2694 2697 PyObject *value)
2695 2698 {
2696 2699 char *node;
2697 2700 long rev;
2698 2701
2699 2702 if (PySlice_Check(item) && value == NULL)
2700 2703 return index_slice_del(self, item);
2701 2704
2702 2705 if (node_check(self->nodelen, item, &node) == -1)
2703 2706 return -1;
2704 2707
2705 2708 if (value == NULL)
2706 2709 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2707 2710 : 0;
2708 2711 rev = PyInt_AsLong(value);
2709 2712 if (rev > INT_MAX || rev < 0) {
2710 2713 if (!PyErr_Occurred())
2711 2714 PyErr_SetString(PyExc_ValueError, "rev out of range");
2712 2715 return -1;
2713 2716 }
2714 2717
2715 2718 if (index_init_nt(self) == -1)
2716 2719 return -1;
2717 2720 return nt_insert(&self->nt, node, (int)rev);
2718 2721 }
2719 2722
2720 2723 /*
2721 2724 * Find all RevlogNG entries in an index that has inline data. Update
2722 2725 * the optional "offsets" table with those entries.
2723 2726 */
2724 2727 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2725 2728 {
2726 2729 const char *data = (const char *)self->buf.buf;
2727 2730 Py_ssize_t pos = 0;
2728 2731 Py_ssize_t end = self->buf.len;
2729 2732 long incr = self->entry_size;
2730 2733 Py_ssize_t len = 0;
2731 2734
2732 2735 while (pos + self->entry_size <= end && pos >= 0) {
2733 2736 uint32_t comp_len, sidedata_comp_len = 0;
2734 2737 /* 3rd element of header is length of compressed inline data */
2735 2738 comp_len = getbe32(data + pos + 8);
2736 2739 if (self->entry_size == v2_entry_size) {
2737 2740 sidedata_comp_len = getbe32(data + pos + 72);
2738 2741 }
2739 2742 incr = self->entry_size + comp_len + sidedata_comp_len;
2740 2743 if (offsets)
2741 2744 offsets[len] = data + pos;
2742 2745 len++;
2743 2746 pos += incr;
2744 2747 }
2745 2748
2746 2749 if (pos != end) {
2747 2750 if (!PyErr_Occurred())
2748 2751 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2749 2752 return -1;
2750 2753 }
2751 2754
2752 2755 return len;
2753 2756 }
2754 2757
2755 2758 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2756 2759 {
2757 2760 PyObject *data_obj, *inlined_obj, *revlogv2;
2758 2761 Py_ssize_t size;
2759 2762
2760 2763 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2761 2764
2762 2765 /* Initialize before argument-checking to avoid index_dealloc() crash.
2763 2766 */
2764 2767 self->added = NULL;
2765 2768 self->new_length = 0;
2766 2769 self->added_length = 0;
2767 2770 self->data = NULL;
2768 2771 memset(&self->buf, 0, sizeof(self->buf));
2769 2772 self->headrevs = NULL;
2770 2773 self->filteredrevs = Py_None;
2771 2774 Py_INCREF(Py_None);
2772 2775 self->ntinitialized = 0;
2773 2776 self->offsets = NULL;
2774 2777 self->nodelen = 20;
2775 2778 self->nullentry = NULL;
2776 2779 self->rust_ext_compat = 1;
2777 2780
2778 2781 revlogv2 = NULL;
2779 2782 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2780 2783 &data_obj, &inlined_obj, &revlogv2))
2781 2784 return -1;
2782 2785 if (!PyObject_CheckBuffer(data_obj)) {
2783 2786 PyErr_SetString(PyExc_TypeError,
2784 2787 "data does not support buffer interface");
2785 2788 return -1;
2786 2789 }
2787 2790 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2788 2791 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2789 2792 return -1;
2790 2793 }
2791 2794
2792 2795 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2793 2796 self->format_version = format_v2;
2794 2797 self->entry_size = v2_entry_size;
2795 2798 } else {
2796 2799 self->format_version = format_v1;
2797 2800 self->entry_size = v1_entry_size;
2798 2801 }
2799 2802
2800 self->nullentry = Py_BuildValue(
2801 PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
2802 nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
2803 self->nullentry =
2804 Py_BuildValue(PY23("iiiiiiis#iiBBi", "iiiiiiiy#iiBBi"), 0, 0, 0, -1,
2805 -1, -1, -1, nullid, self->nodelen, 0, 0,
2806 comp_mode_inline, comp_mode_inline, rank_unknown);
2803 2807
2804 2808 if (!self->nullentry)
2805 2809 return -1;
2806 2810 PyObject_GC_UnTrack(self->nullentry);
2807 2811
2808 2812 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2809 2813 return -1;
2810 2814 size = self->buf.len;
2811 2815
2812 2816 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2813 2817 self->data = data_obj;
2814 2818
2815 2819 self->ntlookups = self->ntmisses = 0;
2816 2820 self->ntrev = -1;
2817 2821 Py_INCREF(self->data);
2818 2822
2819 2823 if (self->inlined) {
2820 2824 Py_ssize_t len = inline_scan(self, NULL);
2821 2825 if (len == -1)
2822 2826 goto bail;
2823 2827 self->length = len;
2824 2828 } else {
2825 2829 if (size % self->entry_size) {
2826 2830 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2827 2831 goto bail;
2828 2832 }
2829 2833 self->length = size / self->entry_size;
2830 2834 }
2831 2835
2832 2836 return 0;
2833 2837 bail:
2834 2838 return -1;
2835 2839 }
2836 2840
2837 2841 static PyObject *index_nodemap(indexObject *self)
2838 2842 {
2839 2843 Py_INCREF(self);
2840 2844 return (PyObject *)self;
2841 2845 }
2842 2846
2843 2847 static void _index_clearcaches(indexObject *self)
2844 2848 {
2845 2849 if (self->offsets) {
2846 2850 PyMem_Free((void *)self->offsets);
2847 2851 self->offsets = NULL;
2848 2852 }
2849 2853 if (self->ntinitialized) {
2850 2854 nt_dealloc(&self->nt);
2851 2855 }
2852 2856 self->ntinitialized = 0;
2853 2857 Py_CLEAR(self->headrevs);
2854 2858 }
2855 2859
2856 2860 static PyObject *index_clearcaches(indexObject *self)
2857 2861 {
2858 2862 _index_clearcaches(self);
2859 2863 self->ntrev = -1;
2860 2864 self->ntlookups = self->ntmisses = 0;
2861 2865 Py_RETURN_NONE;
2862 2866 }
2863 2867
2864 2868 static void index_dealloc(indexObject *self)
2865 2869 {
2866 2870 _index_clearcaches(self);
2867 2871 Py_XDECREF(self->filteredrevs);
2868 2872 if (self->buf.buf) {
2869 2873 PyBuffer_Release(&self->buf);
2870 2874 memset(&self->buf, 0, sizeof(self->buf));
2871 2875 }
2872 2876 Py_XDECREF(self->data);
2873 2877 PyMem_Free(self->added);
2874 2878 Py_XDECREF(self->nullentry);
2875 2879 PyObject_Del(self);
2876 2880 }
2877 2881
2878 2882 static PySequenceMethods index_sequence_methods = {
2879 2883 (lenfunc)index_length, /* sq_length */
2880 2884 0, /* sq_concat */
2881 2885 0, /* sq_repeat */
2882 2886 (ssizeargfunc)index_get, /* sq_item */
2883 2887 0, /* sq_slice */
2884 2888 0, /* sq_ass_item */
2885 2889 0, /* sq_ass_slice */
2886 2890 (objobjproc)index_contains, /* sq_contains */
2887 2891 };
2888 2892
2889 2893 static PyMappingMethods index_mapping_methods = {
2890 2894 (lenfunc)index_length, /* mp_length */
2891 2895 (binaryfunc)index_getitem, /* mp_subscript */
2892 2896 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2893 2897 };
2894 2898
2895 2899 static PyMethodDef index_methods[] = {
2896 2900 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2897 2901 "return the gca set of the given revs"},
2898 2902 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2899 2903 METH_VARARGS,
2900 2904 "return the heads of the common ancestors of the given revs"},
2901 2905 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2902 2906 "clear the index caches"},
2903 2907 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2904 2908 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2905 2909 "return `rev` associated with a node or None"},
2906 2910 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2907 2911 "return True if the node exist in the index"},
2908 2912 {"rev", (PyCFunction)index_m_rev, METH_O,
2909 2913 "return `rev` associated with a node or raise RevlogError"},
2910 2914 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2911 2915 "compute phases"},
2912 2916 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2913 2917 "reachableroots"},
2914 2918 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2915 2919 METH_VARARGS, "replace an existing index entry with a new value"},
2916 2920 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2917 2921 "get head revisions"}, /* Can do filtering since 3.2 */
2918 2922 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2919 2923 "get filtered head revisions"}, /* Can always do filtering */
2920 2924 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2921 2925 "True if the object is a snapshot"},
2922 2926 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2923 2927 "Gather snapshot data in a cache dict"},
2924 2928 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2925 2929 "determine revisions with deltas to reconstruct fulltext"},
2926 2930 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2927 2931 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2928 2932 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2929 2933 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2930 2934 "match a potentially ambiguous node ID"},
2931 2935 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2932 2936 "find length of shortest hex nodeid of a binary ID"},
2933 2937 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2934 2938 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2935 2939 "return an entry in binary form"},
2936 2940 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2937 2941 "pack the revlog header information into binary"},
2938 2942 {NULL} /* Sentinel */
2939 2943 };
2940 2944
2941 2945 static PyGetSetDef index_getset[] = {
2942 2946 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2943 2947 {NULL} /* Sentinel */
2944 2948 };
2945 2949
2946 2950 static PyMemberDef index_members[] = {
2947 2951 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2948 2952 "size of an index entry"},
2949 2953 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
2950 2954 "size of an index entry"},
2951 2955 {NULL} /* Sentinel */
2952 2956 };
2953 2957
2954 2958 PyTypeObject HgRevlogIndex_Type = {
2955 2959 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2956 2960 "parsers.index", /* tp_name */
2957 2961 sizeof(indexObject), /* tp_basicsize */
2958 2962 0, /* tp_itemsize */
2959 2963 (destructor)index_dealloc, /* tp_dealloc */
2960 2964 0, /* tp_print */
2961 2965 0, /* tp_getattr */
2962 2966 0, /* tp_setattr */
2963 2967 0, /* tp_compare */
2964 2968 0, /* tp_repr */
2965 2969 0, /* tp_as_number */
2966 2970 &index_sequence_methods, /* tp_as_sequence */
2967 2971 &index_mapping_methods, /* tp_as_mapping */
2968 2972 0, /* tp_hash */
2969 2973 0, /* tp_call */
2970 2974 0, /* tp_str */
2971 2975 0, /* tp_getattro */
2972 2976 0, /* tp_setattro */
2973 2977 0, /* tp_as_buffer */
2974 2978 Py_TPFLAGS_DEFAULT, /* tp_flags */
2975 2979 "revlog index", /* tp_doc */
2976 2980 0, /* tp_traverse */
2977 2981 0, /* tp_clear */
2978 2982 0, /* tp_richcompare */
2979 2983 0, /* tp_weaklistoffset */
2980 2984 0, /* tp_iter */
2981 2985 0, /* tp_iternext */
2982 2986 index_methods, /* tp_methods */
2983 2987 index_members, /* tp_members */
2984 2988 index_getset, /* tp_getset */
2985 2989 0, /* tp_base */
2986 2990 0, /* tp_dict */
2987 2991 0, /* tp_descr_get */
2988 2992 0, /* tp_descr_set */
2989 2993 0, /* tp_dictoffset */
2990 2994 (initproc)index_init, /* tp_init */
2991 2995 0, /* tp_alloc */
2992 2996 };
2993 2997
2994 2998 /*
2995 2999 * returns a tuple of the form (index, cache) with elements as
2996 3000 * follows:
2997 3001 *
2998 3002 * index: an index object that lazily parses Revlog (v1 or v2) records
2999 3003 * cache: if data is inlined, a tuple (0, index_file_content), else None
3000 3004 * index_file_content could be a string, or a buffer
3001 3005 *
3002 3006 * added complications are for backwards compatibility
3003 3007 */
3004 3008 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3005 3009 {
3006 3010 PyObject *cache = NULL;
3007 3011 indexObject *idx;
3008 3012 int ret;
3009 3013
3010 3014 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3011 3015 if (idx == NULL)
3012 3016 goto bail;
3013 3017
3014 3018 ret = index_init(idx, args, kwargs);
3015 3019 if (ret == -1)
3016 3020 goto bail;
3017 3021
3018 3022 if (idx->inlined) {
3019 3023 cache = Py_BuildValue("iO", 0, idx->data);
3020 3024 if (cache == NULL)
3021 3025 goto bail;
3022 3026 } else {
3023 3027 cache = Py_None;
3024 3028 Py_INCREF(cache);
3025 3029 }
3026 3030
3027 3031 return Py_BuildValue("NN", idx, cache);
3028 3032
3029 3033 bail:
3030 3034 Py_XDECREF(idx);
3031 3035 Py_XDECREF(cache);
3032 3036 return NULL;
3033 3037 }
3034 3038
3035 3039 static Revlog_CAPI CAPI = {
3036 3040 /* increment the abi_version field upon each change in the Revlog_CAPI
3037 3041 struct or in the ABI of the listed functions */
3038 3042 2,
3039 3043 index_length,
3040 3044 index_node,
3041 3045 HgRevlogIndex_GetParents,
3042 3046 };
3043 3047
3044 3048 void revlog_module_init(PyObject *mod)
3045 3049 {
3046 3050 PyObject *caps = NULL;
3047 3051 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3048 3052 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3049 3053 return;
3050 3054 Py_INCREF(&HgRevlogIndex_Type);
3051 3055 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3052 3056
3053 3057 nodetreeType.tp_new = PyType_GenericNew;
3054 3058 if (PyType_Ready(&nodetreeType) < 0)
3055 3059 return;
3056 3060 Py_INCREF(&nodetreeType);
3057 3061 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3058 3062
3059 3063 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3060 3064 if (caps != NULL)
3061 3065 PyModule_AddObject(mod, "revlog_CAPI", caps);
3062 3066 }
@@ -1,966 +1,969 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 3
52 52 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 4
53 53 DIRSTATE_V2_HAS_FALLBACK_EXEC = 1 << 5
54 54 DIRSTATE_V2_FALLBACK_EXEC = 1 << 6
55 55 DIRSTATE_V2_HAS_FALLBACK_SYMLINK = 1 << 7
56 56 DIRSTATE_V2_FALLBACK_SYMLINK = 1 << 8
57 57 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 9
58 58 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 10
59 59 DIRSTATE_V2_HAS_MTIME = 1 << 11
60 60 DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS = 1 << 12
61 61 DIRSTATE_V2_DIRECTORY = 1 << 13
62 62 DIRSTATE_V2_ALL_UNKNOWN_RECORDED = 1 << 14
63 63 DIRSTATE_V2_ALL_IGNORED_RECORDED = 1 << 15
64 64
65 65
66 66 @attr.s(slots=True, init=False)
67 67 class DirstateItem(object):
68 68 """represent a dirstate entry
69 69
70 70 It hold multiple attributes
71 71
72 72 # about file tracking
73 73 - wc_tracked: is the file tracked by the working copy
74 74 - p1_tracked: is the file tracked in working copy first parent
75 75 - p2_info: the file has been involved in some merge operation. Either
76 76 because it was actually merged, or because the p2 version was
77 77 ahead, or because some rename moved it there. In either case
78 78 `hg status` will want it displayed as modified.
79 79
80 80 # about the file state expected from p1 manifest:
81 81 - mode: the file mode in p1
82 82 - size: the file size in p1
83 83
84 84 These value can be set to None, which mean we don't have a meaningful value
85 85 to compare with. Either because we don't really care about them as there
86 86 `status` is known without having to look at the disk or because we don't
87 87 know these right now and a full comparison will be needed to find out if
88 88 the file is clean.
89 89
90 90 # about the file state on disk last time we saw it:
91 91 - mtime: the last known clean mtime for the file.
92 92
93 93 This value can be set to None if no cachable state exist. Either because we
94 94 do not care (see previous section) or because we could not cache something
95 95 yet.
96 96 """
97 97
98 98 _wc_tracked = attr.ib()
99 99 _p1_tracked = attr.ib()
100 100 _p2_info = attr.ib()
101 101 _mode = attr.ib()
102 102 _size = attr.ib()
103 103 _mtime_s = attr.ib()
104 104 _mtime_ns = attr.ib()
105 105 _fallback_exec = attr.ib()
106 106 _fallback_symlink = attr.ib()
107 107 _mtime_second_ambiguous = attr.ib()
108 108
109 109 def __init__(
110 110 self,
111 111 wc_tracked=False,
112 112 p1_tracked=False,
113 113 p2_info=False,
114 114 has_meaningful_data=True,
115 115 has_meaningful_mtime=True,
116 116 parentfiledata=None,
117 117 fallback_exec=None,
118 118 fallback_symlink=None,
119 119 ):
120 120 self._wc_tracked = wc_tracked
121 121 self._p1_tracked = p1_tracked
122 122 self._p2_info = p2_info
123 123
124 124 self._fallback_exec = fallback_exec
125 125 self._fallback_symlink = fallback_symlink
126 126
127 127 self._mode = None
128 128 self._size = None
129 129 self._mtime_s = None
130 130 self._mtime_ns = None
131 131 self._mtime_second_ambiguous = False
132 132 if parentfiledata is None:
133 133 has_meaningful_mtime = False
134 134 has_meaningful_data = False
135 135 elif parentfiledata[2] is None:
136 136 has_meaningful_mtime = False
137 137 if has_meaningful_data:
138 138 self._mode = parentfiledata[0]
139 139 self._size = parentfiledata[1]
140 140 if has_meaningful_mtime:
141 141 (
142 142 self._mtime_s,
143 143 self._mtime_ns,
144 144 self._mtime_second_ambiguous,
145 145 ) = parentfiledata[2]
146 146
147 147 @classmethod
148 148 def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
149 149 """Build a new DirstateItem object from V2 data"""
150 150 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
151 151 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_MTIME)
152 152 mode = None
153 153
154 154 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
155 155 # we do not have support for this flag in the code yet,
156 156 # force a lookup for this file.
157 157 has_mode_size = False
158 158 has_meaningful_mtime = False
159 159
160 160 fallback_exec = None
161 161 if flags & DIRSTATE_V2_HAS_FALLBACK_EXEC:
162 162 fallback_exec = flags & DIRSTATE_V2_FALLBACK_EXEC
163 163
164 164 fallback_symlink = None
165 165 if flags & DIRSTATE_V2_HAS_FALLBACK_SYMLINK:
166 166 fallback_symlink = flags & DIRSTATE_V2_FALLBACK_SYMLINK
167 167
168 168 if has_mode_size:
169 169 assert stat.S_IXUSR == 0o100
170 170 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
171 171 mode = 0o755
172 172 else:
173 173 mode = 0o644
174 174 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
175 175 mode |= stat.S_IFLNK
176 176 else:
177 177 mode |= stat.S_IFREG
178 178
179 179 second_ambiguous = flags & DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS
180 180 return cls(
181 181 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
182 182 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
183 183 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
184 184 has_meaningful_data=has_mode_size,
185 185 has_meaningful_mtime=has_meaningful_mtime,
186 186 parentfiledata=(mode, size, (mtime_s, mtime_ns, second_ambiguous)),
187 187 fallback_exec=fallback_exec,
188 188 fallback_symlink=fallback_symlink,
189 189 )
190 190
191 191 @classmethod
192 192 def from_v1_data(cls, state, mode, size, mtime):
193 193 """Build a new DirstateItem object from V1 data
194 194
195 195 Since the dirstate-v1 format is frozen, the signature of this function
196 196 is not expected to change, unlike the __init__ one.
197 197 """
198 198 if state == b'm':
199 199 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
200 200 elif state == b'a':
201 201 return cls(wc_tracked=True)
202 202 elif state == b'r':
203 203 if size == NONNORMAL:
204 204 p1_tracked = True
205 205 p2_info = True
206 206 elif size == FROM_P2:
207 207 p1_tracked = False
208 208 p2_info = True
209 209 else:
210 210 p1_tracked = True
211 211 p2_info = False
212 212 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
213 213 elif state == b'n':
214 214 if size == FROM_P2:
215 215 return cls(wc_tracked=True, p2_info=True)
216 216 elif size == NONNORMAL:
217 217 return cls(wc_tracked=True, p1_tracked=True)
218 218 elif mtime == AMBIGUOUS_TIME:
219 219 return cls(
220 220 wc_tracked=True,
221 221 p1_tracked=True,
222 222 has_meaningful_mtime=False,
223 223 parentfiledata=(mode, size, (42, 0, False)),
224 224 )
225 225 else:
226 226 return cls(
227 227 wc_tracked=True,
228 228 p1_tracked=True,
229 229 parentfiledata=(mode, size, (mtime, 0, False)),
230 230 )
231 231 else:
232 232 raise RuntimeError(b'unknown state: %s' % state)
233 233
234 234 def set_possibly_dirty(self):
235 235 """Mark a file as "possibly dirty"
236 236
237 237 This means the next status call will have to actually check its content
238 238 to make sure it is correct.
239 239 """
240 240 self._mtime_s = None
241 241 self._mtime_ns = None
242 242
243 243 def set_clean(self, mode, size, mtime):
244 244 """mark a file as "clean" cancelling potential "possibly dirty call"
245 245
246 246 Note: this function is a descendant of `dirstate.normal` and is
247 247 currently expected to be call on "normal" entry only. There are not
248 248 reason for this to not change in the future as long as the ccode is
249 249 updated to preserve the proper state of the non-normal files.
250 250 """
251 251 self._wc_tracked = True
252 252 self._p1_tracked = True
253 253 self._mode = mode
254 254 self._size = size
255 255 self._mtime_s, self._mtime_ns, self._mtime_second_ambiguous = mtime
256 256
257 257 def set_tracked(self):
258 258 """mark a file as tracked in the working copy
259 259
260 260 This will ultimately be called by command like `hg add`.
261 261 """
262 262 self._wc_tracked = True
263 263 # `set_tracked` is replacing various `normallookup` call. So we mark
264 264 # the files as needing lookup
265 265 #
266 266 # Consider dropping this in the future in favor of something less broad.
267 267 self._mtime_s = None
268 268 self._mtime_ns = None
269 269
270 270 def set_untracked(self):
271 271 """mark a file as untracked in the working copy
272 272
273 273 This will ultimately be called by command like `hg remove`.
274 274 """
275 275 self._wc_tracked = False
276 276 self._mode = None
277 277 self._size = None
278 278 self._mtime_s = None
279 279 self._mtime_ns = None
280 280
281 281 def drop_merge_data(self):
282 282 """remove all "merge-only" from a DirstateItem
283 283
284 284 This is to be call by the dirstatemap code when the second parent is dropped
285 285 """
286 286 if self._p2_info:
287 287 self._p2_info = False
288 288 self._mode = None
289 289 self._size = None
290 290 self._mtime_s = None
291 291 self._mtime_ns = None
292 292
293 293 @property
294 294 def mode(self):
295 295 return self.v1_mode()
296 296
297 297 @property
298 298 def size(self):
299 299 return self.v1_size()
300 300
301 301 @property
302 302 def mtime(self):
303 303 return self.v1_mtime()
304 304
305 305 def mtime_likely_equal_to(self, other_mtime):
306 306 self_sec = self._mtime_s
307 307 if self_sec is None:
308 308 return False
309 309 self_ns = self._mtime_ns
310 310 other_sec, other_ns, second_ambiguous = other_mtime
311 311 if self_sec != other_sec:
312 312 # seconds are different theses mtime are definitly not equal
313 313 return False
314 314 elif other_ns == 0 or self_ns == 0:
315 315 # at least one side as no nano-seconds information
316 316
317 317 if self._mtime_second_ambiguous:
318 318 # We cannot trust the mtime in this case
319 319 return False
320 320 else:
321 321 # the "seconds" value was reliable on its own. We are good to go.
322 322 return True
323 323 else:
324 324 # We have nano second information, let us use them !
325 325 return self_ns == other_ns
326 326
327 327 @property
328 328 def state(self):
329 329 """
330 330 States are:
331 331 n normal
332 332 m needs merging
333 333 r marked for removal
334 334 a marked for addition
335 335
336 336 XXX This "state" is a bit obscure and mostly a direct expression of the
337 337 dirstatev1 format. It would make sense to ultimately deprecate it in
338 338 favor of the more "semantic" attributes.
339 339 """
340 340 if not self.any_tracked:
341 341 return b'?'
342 342 return self.v1_state()
343 343
344 344 @property
345 345 def has_fallback_exec(self):
346 346 """True if "fallback" information are available for the "exec" bit
347 347
348 348 Fallback information can be stored in the dirstate to keep track of
349 349 filesystem attribute tracked by Mercurial when the underlying file
350 350 system or operating system does not support that property, (e.g.
351 351 Windows).
352 352
353 353 Not all version of the dirstate on-disk storage support preserving this
354 354 information.
355 355 """
356 356 return self._fallback_exec is not None
357 357
358 358 @property
359 359 def fallback_exec(self):
360 360 """ "fallback" information for the executable bit
361 361
362 362 True if the file should be considered executable when we cannot get
363 363 this information from the files system. False if it should be
364 364 considered non-executable.
365 365
366 366 See has_fallback_exec for details."""
367 367 return self._fallback_exec
368 368
369 369 @fallback_exec.setter
370 370 def set_fallback_exec(self, value):
371 371 """control "fallback" executable bit
372 372
373 373 Set to:
374 374 - True if the file should be considered executable,
375 375 - False if the file should be considered non-executable,
376 376 - None if we do not have valid fallback data.
377 377
378 378 See has_fallback_exec for details."""
379 379 if value is None:
380 380 self._fallback_exec = None
381 381 else:
382 382 self._fallback_exec = bool(value)
383 383
384 384 @property
385 385 def has_fallback_symlink(self):
386 386 """True if "fallback" information are available for symlink status
387 387
388 388 Fallback information can be stored in the dirstate to keep track of
389 389 filesystem attribute tracked by Mercurial when the underlying file
390 390 system or operating system does not support that property, (e.g.
391 391 Windows).
392 392
393 393 Not all version of the dirstate on-disk storage support preserving this
394 394 information."""
395 395 return self._fallback_symlink is not None
396 396
397 397 @property
398 398 def fallback_symlink(self):
399 399 """ "fallback" information for symlink status
400 400
401 401 True if the file should be considered executable when we cannot get
402 402 this information from the files system. False if it should be
403 403 considered non-executable.
404 404
405 405 See has_fallback_exec for details."""
406 406 return self._fallback_symlink
407 407
408 408 @fallback_symlink.setter
409 409 def set_fallback_symlink(self, value):
410 410 """control "fallback" symlink status
411 411
412 412 Set to:
413 413 - True if the file should be considered a symlink,
414 414 - False if the file should be considered not a symlink,
415 415 - None if we do not have valid fallback data.
416 416
417 417 See has_fallback_symlink for details."""
418 418 if value is None:
419 419 self._fallback_symlink = None
420 420 else:
421 421 self._fallback_symlink = bool(value)
422 422
423 423 @property
424 424 def tracked(self):
425 425 """True is the file is tracked in the working copy"""
426 426 return self._wc_tracked
427 427
428 428 @property
429 429 def any_tracked(self):
430 430 """True is the file is tracked anywhere (wc or parents)"""
431 431 return self._wc_tracked or self._p1_tracked or self._p2_info
432 432
433 433 @property
434 434 def added(self):
435 435 """True if the file has been added"""
436 436 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
437 437
438 438 @property
439 439 def maybe_clean(self):
440 440 """True if the file has a chance to be in the "clean" state"""
441 441 if not self._wc_tracked:
442 442 return False
443 443 elif not self._p1_tracked:
444 444 return False
445 445 elif self._p2_info:
446 446 return False
447 447 return True
448 448
449 449 @property
450 450 def p1_tracked(self):
451 451 """True if the file is tracked in the first parent manifest"""
452 452 return self._p1_tracked
453 453
454 454 @property
455 455 def p2_info(self):
456 456 """True if the file needed to merge or apply any input from p2
457 457
458 458 See the class documentation for details.
459 459 """
460 460 return self._wc_tracked and self._p2_info
461 461
462 462 @property
463 463 def removed(self):
464 464 """True if the file has been removed"""
465 465 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
466 466
467 467 def v2_data(self):
468 468 """Returns (flags, mode, size, mtime) for v2 serialization"""
469 469 flags = 0
470 470 if self._wc_tracked:
471 471 flags |= DIRSTATE_V2_WDIR_TRACKED
472 472 if self._p1_tracked:
473 473 flags |= DIRSTATE_V2_P1_TRACKED
474 474 if self._p2_info:
475 475 flags |= DIRSTATE_V2_P2_INFO
476 476 if self._mode is not None and self._size is not None:
477 477 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
478 478 if self.mode & stat.S_IXUSR:
479 479 flags |= DIRSTATE_V2_MODE_EXEC_PERM
480 480 if stat.S_ISLNK(self.mode):
481 481 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
482 482 if self._mtime_s is not None:
483 483 flags |= DIRSTATE_V2_HAS_MTIME
484 484 if self._mtime_second_ambiguous:
485 485 flags |= DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS
486 486
487 487 if self._fallback_exec is not None:
488 488 flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
489 489 if self._fallback_exec:
490 490 flags |= DIRSTATE_V2_FALLBACK_EXEC
491 491
492 492 if self._fallback_symlink is not None:
493 493 flags |= DIRSTATE_V2_HAS_FALLBACK_SYMLINK
494 494 if self._fallback_symlink:
495 495 flags |= DIRSTATE_V2_FALLBACK_SYMLINK
496 496
497 497 # Note: we do not need to do anything regarding
498 498 # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
499 499 # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
500 500 return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
501 501
502 502 def v1_state(self):
503 503 """return a "state" suitable for v1 serialization"""
504 504 if not self.any_tracked:
505 505 # the object has no state to record, this is -currently-
506 506 # unsupported
507 507 raise RuntimeError('untracked item')
508 508 elif self.removed:
509 509 return b'r'
510 510 elif self._p1_tracked and self._p2_info:
511 511 return b'm'
512 512 elif self.added:
513 513 return b'a'
514 514 else:
515 515 return b'n'
516 516
517 517 def v1_mode(self):
518 518 """return a "mode" suitable for v1 serialization"""
519 519 return self._mode if self._mode is not None else 0
520 520
521 521 def v1_size(self):
522 522 """return a "size" suitable for v1 serialization"""
523 523 if not self.any_tracked:
524 524 # the object has no state to record, this is -currently-
525 525 # unsupported
526 526 raise RuntimeError('untracked item')
527 527 elif self.removed and self._p1_tracked and self._p2_info:
528 528 return NONNORMAL
529 529 elif self._p2_info:
530 530 return FROM_P2
531 531 elif self.removed:
532 532 return 0
533 533 elif self.added:
534 534 return NONNORMAL
535 535 elif self._size is None:
536 536 return NONNORMAL
537 537 else:
538 538 return self._size
539 539
540 540 def v1_mtime(self):
541 541 """return a "mtime" suitable for v1 serialization"""
542 542 if not self.any_tracked:
543 543 # the object has no state to record, this is -currently-
544 544 # unsupported
545 545 raise RuntimeError('untracked item')
546 546 elif self.removed:
547 547 return 0
548 548 elif self._mtime_s is None:
549 549 return AMBIGUOUS_TIME
550 550 elif self._p2_info:
551 551 return AMBIGUOUS_TIME
552 552 elif not self._p1_tracked:
553 553 return AMBIGUOUS_TIME
554 554 elif self._mtime_second_ambiguous:
555 555 return AMBIGUOUS_TIME
556 556 else:
557 557 return self._mtime_s
558 558
559 559
560 560 def gettype(q):
561 561 return int(q & 0xFFFF)
562 562
563 563
564 564 class BaseIndexObject(object):
565 565 # Can I be passed to an algorithme implemented in Rust ?
566 566 rust_ext_compat = 0
567 567 # Format of an index entry according to Python's `struct` language
568 568 index_format = revlog_constants.INDEX_ENTRY_V1
569 569 # Size of a C unsigned long long int, platform independent
570 570 big_int_size = struct.calcsize(b'>Q')
571 571 # Size of a C long int, platform independent
572 572 int_size = struct.calcsize(b'>i')
573 573 # An empty index entry, used as a default value to be overridden, or nullrev
574 574 null_item = (
575 575 0,
576 576 0,
577 577 0,
578 578 -1,
579 579 -1,
580 580 -1,
581 581 -1,
582 582 sha1nodeconstants.nullid,
583 583 0,
584 584 0,
585 585 revlog_constants.COMP_MODE_INLINE,
586 586 revlog_constants.COMP_MODE_INLINE,
587 revlog_constants.RANK_UNKNOWN,
587 588 )
588 589
589 590 @util.propertycache
590 591 def entry_size(self):
591 592 return self.index_format.size
592 593
593 594 @property
594 595 def nodemap(self):
595 596 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
596 597 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
597 598 return self._nodemap
598 599
599 600 @util.propertycache
600 601 def _nodemap(self):
601 602 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
602 603 for r in range(0, len(self)):
603 604 n = self[r][7]
604 605 nodemap[n] = r
605 606 return nodemap
606 607
607 608 def has_node(self, node):
608 609 """return True if the node exist in the index"""
609 610 return node in self._nodemap
610 611
611 612 def rev(self, node):
612 613 """return a revision for a node
613 614
614 615 If the node is unknown, raise a RevlogError"""
615 616 return self._nodemap[node]
616 617
617 618 def get_rev(self, node):
618 619 """return a revision for a node
619 620
620 621 If the node is unknown, return None"""
621 622 return self._nodemap.get(node)
622 623
623 624 def _stripnodes(self, start):
624 625 if '_nodemap' in vars(self):
625 626 for r in range(start, len(self)):
626 627 n = self[r][7]
627 628 del self._nodemap[n]
628 629
629 630 def clearcaches(self):
630 631 self.__dict__.pop('_nodemap', None)
631 632
632 633 def __len__(self):
633 634 return self._lgt + len(self._extra)
634 635
635 636 def append(self, tup):
636 637 if '_nodemap' in vars(self):
637 638 self._nodemap[tup[7]] = len(self)
638 639 data = self._pack_entry(len(self), tup)
639 640 self._extra.append(data)
640 641
641 642 def _pack_entry(self, rev, entry):
642 643 assert entry[8] == 0
643 644 assert entry[9] == 0
644 645 return self.index_format.pack(*entry[:8])
645 646
646 647 def _check_index(self, i):
647 648 if not isinstance(i, int):
648 649 raise TypeError(b"expecting int indexes")
649 650 if i < 0 or i >= len(self):
650 651 raise IndexError(i)
651 652
652 653 def __getitem__(self, i):
653 654 if i == -1:
654 655 return self.null_item
655 656 self._check_index(i)
656 657 if i >= self._lgt:
657 658 data = self._extra[i - self._lgt]
658 659 else:
659 660 index = self._calculate_index(i)
660 661 data = self._data[index : index + self.entry_size]
661 662 r = self._unpack_entry(i, data)
662 663 if self._lgt and i == 0:
663 664 offset = revlogutils.offset_type(0, gettype(r[0]))
664 665 r = (offset,) + r[1:]
665 666 return r
666 667
667 668 def _unpack_entry(self, rev, data):
668 669 r = self.index_format.unpack(data)
669 670 r = r + (
670 671 0,
671 672 0,
672 673 revlog_constants.COMP_MODE_INLINE,
673 674 revlog_constants.COMP_MODE_INLINE,
675 revlog_constants.RANK_UNKNOWN,
674 676 )
675 677 return r
676 678
677 679 def pack_header(self, header):
678 680 """pack header information as binary"""
679 681 v_fmt = revlog_constants.INDEX_HEADER
680 682 return v_fmt.pack(header)
681 683
682 684 def entry_binary(self, rev):
683 685 """return the raw binary string representing a revision"""
684 686 entry = self[rev]
685 687 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
686 688 if rev == 0:
687 689 p = p[revlog_constants.INDEX_HEADER.size :]
688 690 return p
689 691
690 692
691 693 class IndexObject(BaseIndexObject):
692 694 def __init__(self, data):
693 695 assert len(data) % self.entry_size == 0, (
694 696 len(data),
695 697 self.entry_size,
696 698 len(data) % self.entry_size,
697 699 )
698 700 self._data = data
699 701 self._lgt = len(data) // self.entry_size
700 702 self._extra = []
701 703
702 704 def _calculate_index(self, i):
703 705 return i * self.entry_size
704 706
705 707 def __delitem__(self, i):
706 708 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
707 709 raise ValueError(b"deleting slices only supports a:-1 with step 1")
708 710 i = i.start
709 711 self._check_index(i)
710 712 self._stripnodes(i)
711 713 if i < self._lgt:
712 714 self._data = self._data[: i * self.entry_size]
713 715 self._lgt = i
714 716 self._extra = []
715 717 else:
716 718 self._extra = self._extra[: i - self._lgt]
717 719
718 720
719 721 class PersistentNodeMapIndexObject(IndexObject):
720 722 """a Debug oriented class to test persistent nodemap
721 723
722 724 We need a simple python object to test API and higher level behavior. See
723 725 the Rust implementation for more serious usage. This should be used only
724 726 through the dedicated `devel.persistent-nodemap` config.
725 727 """
726 728
727 729 def nodemap_data_all(self):
728 730 """Return bytes containing a full serialization of a nodemap
729 731
730 732 The nodemap should be valid for the full set of revisions in the
731 733 index."""
732 734 return nodemaputil.persistent_data(self)
733 735
734 736 def nodemap_data_incremental(self):
735 737 """Return bytes containing a incremental update to persistent nodemap
736 738
737 739 This containst the data for an append-only update of the data provided
738 740 in the last call to `update_nodemap_data`.
739 741 """
740 742 if self._nm_root is None:
741 743 return None
742 744 docket = self._nm_docket
743 745 changed, data = nodemaputil.update_persistent_data(
744 746 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
745 747 )
746 748
747 749 self._nm_root = self._nm_max_idx = self._nm_docket = None
748 750 return docket, changed, data
749 751
750 752 def update_nodemap_data(self, docket, nm_data):
751 753 """provide full block of persisted binary data for a nodemap
752 754
753 755 The data are expected to come from disk. See `nodemap_data_all` for a
754 756 produceur of such data."""
755 757 if nm_data is not None:
756 758 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
757 759 if self._nm_root:
758 760 self._nm_docket = docket
759 761 else:
760 762 self._nm_root = self._nm_max_idx = self._nm_docket = None
761 763
762 764
763 765 class InlinedIndexObject(BaseIndexObject):
764 766 def __init__(self, data, inline=0):
765 767 self._data = data
766 768 self._lgt = self._inline_scan(None)
767 769 self._inline_scan(self._lgt)
768 770 self._extra = []
769 771
770 772 def _inline_scan(self, lgt):
771 773 off = 0
772 774 if lgt is not None:
773 775 self._offsets = [0] * lgt
774 776 count = 0
775 777 while off <= len(self._data) - self.entry_size:
776 778 start = off + self.big_int_size
777 779 (s,) = struct.unpack(
778 780 b'>i',
779 781 self._data[start : start + self.int_size],
780 782 )
781 783 if lgt is not None:
782 784 self._offsets[count] = off
783 785 count += 1
784 786 off += self.entry_size + s
785 787 if off != len(self._data):
786 788 raise ValueError(b"corrupted data")
787 789 return count
788 790
789 791 def __delitem__(self, i):
790 792 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
791 793 raise ValueError(b"deleting slices only supports a:-1 with step 1")
792 794 i = i.start
793 795 self._check_index(i)
794 796 self._stripnodes(i)
795 797 if i < self._lgt:
796 798 self._offsets = self._offsets[:i]
797 799 self._lgt = i
798 800 self._extra = []
799 801 else:
800 802 self._extra = self._extra[: i - self._lgt]
801 803
802 804 def _calculate_index(self, i):
803 805 return self._offsets[i]
804 806
805 807
806 808 def parse_index2(data, inline, revlogv2=False):
807 809 if not inline:
808 810 cls = IndexObject2 if revlogv2 else IndexObject
809 811 return cls(data), None
810 812 cls = InlinedIndexObject
811 813 return cls(data, inline), (0, data)
812 814
813 815
814 816 def parse_index_cl_v2(data):
815 817 return IndexChangelogV2(data), None
816 818
817 819
818 820 class IndexObject2(IndexObject):
819 821 index_format = revlog_constants.INDEX_ENTRY_V2
820 822
821 823 def replace_sidedata_info(
822 824 self,
823 825 rev,
824 826 sidedata_offset,
825 827 sidedata_length,
826 828 offset_flags,
827 829 compression_mode,
828 830 ):
829 831 """
830 832 Replace an existing index entry's sidedata offset and length with new
831 833 ones.
832 834 This cannot be used outside of the context of sidedata rewriting,
833 835 inside the transaction that creates the revision `rev`.
834 836 """
835 837 if rev < 0:
836 838 raise KeyError
837 839 self._check_index(rev)
838 840 if rev < self._lgt:
839 841 msg = b"cannot rewrite entries outside of this transaction"
840 842 raise KeyError(msg)
841 843 else:
842 844 entry = list(self[rev])
843 845 entry[0] = offset_flags
844 846 entry[8] = sidedata_offset
845 847 entry[9] = sidedata_length
846 848 entry[11] = compression_mode
847 849 entry = tuple(entry)
848 850 new = self._pack_entry(rev, entry)
849 851 self._extra[rev - self._lgt] = new
850 852
851 853 def _unpack_entry(self, rev, data):
852 854 data = self.index_format.unpack(data)
853 855 entry = data[:10]
854 856 data_comp = data[10] & 3
855 857 sidedata_comp = (data[10] & (3 << 2)) >> 2
856 return entry + (data_comp, sidedata_comp)
858 return entry + (data_comp, sidedata_comp, revlog_constants.RANK_UNKNOWN)
857 859
858 860 def _pack_entry(self, rev, entry):
859 861 data = entry[:10]
860 862 data_comp = entry[10] & 3
861 863 sidedata_comp = (entry[11] & 3) << 2
862 864 data += (data_comp | sidedata_comp,)
863 865
864 866 return self.index_format.pack(*data)
865 867
866 868 def entry_binary(self, rev):
867 869 """return the raw binary string representing a revision"""
868 870 entry = self[rev]
869 871 return self._pack_entry(rev, entry)
870 872
871 873 def pack_header(self, header):
872 874 """pack header information as binary"""
873 875 msg = 'version header should go in the docket, not the index: %d'
874 876 msg %= header
875 877 raise error.ProgrammingError(msg)
876 878
877 879
878 880 class IndexChangelogV2(IndexObject2):
879 881 index_format = revlog_constants.INDEX_ENTRY_CL_V2
880 882
881 883 def _unpack_entry(self, rev, data, r=True):
882 884 items = self.index_format.unpack(data)
883 885 return (
884 886 items[revlog_constants.INDEX_ENTRY_V2_IDX_OFFSET],
885 887 items[revlog_constants.INDEX_ENTRY_V2_IDX_COMPRESSED_LENGTH],
886 888 items[revlog_constants.INDEX_ENTRY_V2_IDX_UNCOMPRESSED_LENGTH],
887 889 rev,
888 890 rev,
889 891 items[revlog_constants.INDEX_ENTRY_V2_IDX_PARENT_1],
890 892 items[revlog_constants.INDEX_ENTRY_V2_IDX_PARENT_2],
891 893 items[revlog_constants.INDEX_ENTRY_V2_IDX_NODEID],
892 894 items[revlog_constants.INDEX_ENTRY_V2_IDX_SIDEDATA_OFFSET],
893 895 items[
894 896 revlog_constants.INDEX_ENTRY_V2_IDX_SIDEDATA_COMPRESSED_LENGTH
895 897 ],
896 898 items[revlog_constants.INDEX_ENTRY_V2_IDX_COMPRESSION_MODE] & 3,
897 899 (items[revlog_constants.INDEX_ENTRY_V2_IDX_COMPRESSION_MODE] >> 2)
898 900 & 3,
901 revlog_constants.RANK_UNKNOWN,
899 902 )
900 903
901 904 def _pack_entry(self, rev, entry):
902 905
903 906 base = entry[revlog_constants.ENTRY_DELTA_BASE]
904 907 link_rev = entry[revlog_constants.ENTRY_LINK_REV]
905 908 assert base == rev, (base, rev)
906 909 assert link_rev == rev, (link_rev, rev)
907 910 data = (
908 911 entry[revlog_constants.ENTRY_DATA_OFFSET],
909 912 entry[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH],
910 913 entry[revlog_constants.ENTRY_DATA_UNCOMPRESSED_LENGTH],
911 914 entry[revlog_constants.ENTRY_PARENT_1],
912 915 entry[revlog_constants.ENTRY_PARENT_2],
913 916 entry[revlog_constants.ENTRY_NODE_ID],
914 917 entry[revlog_constants.ENTRY_SIDEDATA_OFFSET],
915 918 entry[revlog_constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH],
916 919 entry[revlog_constants.ENTRY_DATA_COMPRESSION_MODE] & 3
917 920 | (entry[revlog_constants.ENTRY_SIDEDATA_COMPRESSION_MODE] & 3)
918 921 << 2,
919 922 )
920 923 return self.index_format.pack(*data)
921 924
922 925
923 926 def parse_index_devel_nodemap(data, inline):
924 927 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
925 928 return PersistentNodeMapIndexObject(data), None
926 929
927 930
928 931 def parse_dirstate(dmap, copymap, st):
929 932 parents = [st[:20], st[20:40]]
930 933 # dereference fields so they will be local in loop
931 934 format = b">cllll"
932 935 e_size = struct.calcsize(format)
933 936 pos1 = 40
934 937 l = len(st)
935 938
936 939 # the inner loop
937 940 while pos1 < l:
938 941 pos2 = pos1 + e_size
939 942 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
940 943 pos1 = pos2 + e[4]
941 944 f = st[pos2:pos1]
942 945 if b'\0' in f:
943 946 f, c = f.split(b'\0')
944 947 copymap[f] = c
945 948 dmap[f] = DirstateItem.from_v1_data(*e[:4])
946 949 return parents
947 950
948 951
949 952 def pack_dirstate(dmap, copymap, pl):
950 953 cs = stringio()
951 954 write = cs.write
952 955 write(b"".join(pl))
953 956 for f, e in pycompat.iteritems(dmap):
954 957 if f in copymap:
955 958 f = b"%s\0%s" % (f, copymap[f])
956 959 e = _pack(
957 960 b">cllll",
958 961 e.v1_state(),
959 962 e.v1_mode(),
960 963 e.v1_size(),
961 964 e.v1_mtime(),
962 965 len(f),
963 966 )
964 967 write(e)
965 968 write(f)
966 969 return cs.getvalue()
@@ -1,80 +1,83 b''
1 1 # mercurial.revlogutils -- basic utilities for revlog
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..thirdparty import attr
11 11 from ..interfaces import repository
12 12
13 13 # See mercurial.revlogutils.constants for doc
14 14 COMP_MODE_INLINE = 2
15 RANK_UNKNOWN = -1
15 16
16 17
17 18 def offset_type(offset, type):
18 19 if (type & ~repository.REVISION_FLAGS_KNOWN) != 0:
19 20 raise ValueError(b'unknown revlog index flags: %d' % type)
20 21 return int(int(offset) << 16 | type)
21 22
22 23
23 24 def entry(
24 25 data_offset,
25 26 data_compressed_length,
26 27 data_delta_base,
27 28 link_rev,
28 29 parent_rev_1,
29 30 parent_rev_2,
30 31 node_id,
31 32 flags=0,
32 33 data_uncompressed_length=-1,
33 34 data_compression_mode=COMP_MODE_INLINE,
34 35 sidedata_offset=0,
35 36 sidedata_compressed_length=0,
36 37 sidedata_compression_mode=COMP_MODE_INLINE,
38 rank=RANK_UNKNOWN,
37 39 ):
38 40 """Build one entry from symbolic name
39 41
40 42 This is useful to abstract the actual detail of how we build the entry
41 43 tuple for caller who don't care about it.
42 44
43 45 This should always be called using keyword arguments. Some arguments have
44 46 default value, this match the value used by index version that does not store such data.
45 47 """
46 48 return (
47 49 offset_type(data_offset, flags),
48 50 data_compressed_length,
49 51 data_uncompressed_length,
50 52 data_delta_base,
51 53 link_rev,
52 54 parent_rev_1,
53 55 parent_rev_2,
54 56 node_id,
55 57 sidedata_offset,
56 58 sidedata_compressed_length,
57 59 data_compression_mode,
58 60 sidedata_compression_mode,
61 rank,
59 62 )
60 63
61 64
62 65 @attr.s(slots=True, frozen=True)
63 66 class revisioninfo(object):
64 67 """Information about a revision that allows building its fulltext
65 68 node: expected hash of the revision
66 69 p1, p2: parent revs of the revision
67 70 btext: built text cache consisting of a one-element list
68 71 cachedelta: (baserev, uncompressed_delta) or None
69 72 flags: flags associated to the revision storage
70 73
71 74 One of btext[0] or cachedelta must be set.
72 75 """
73 76
74 77 node = attr.ib()
75 78 p1 = attr.ib()
76 79 p2 = attr.ib()
77 80 btext = attr.ib()
78 81 textlen = attr.ib()
79 82 cachedelta = attr.ib()
80 83 flags = attr.ib()
@@ -1,291 +1,302 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15 from .. import revlogutils
16 16
17 17 ### Internal utily constants
18 18
19 19 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
20 20 KIND_MANIFESTLOG = 1002
21 21 KIND_FILELOG = 1003
22 22 KIND_OTHER = 1004
23 23
24 24 ALL_KINDS = {
25 25 KIND_CHANGELOG,
26 26 KIND_MANIFESTLOG,
27 27 KIND_FILELOG,
28 28 KIND_OTHER,
29 29 }
30 30
31 31 ### Index entry key
32 32 #
33 33 #
34 34 # Internal details
35 35 # ----------------
36 36 #
37 37 # A large part of the revlog logic deals with revisions' "index entries", tuple
38 38 # objects that contains the same "items" whatever the revlog version.
39 39 # Different versions will have different ways of storing these items (sometimes
40 40 # not having them at all), but the tuple will always be the same. New fields
41 41 # are usually added at the end to avoid breaking existing code that relies
42 42 # on the existing order. The field are defined as follows:
43 43
44 44 # [0] offset:
45 45 # The byte index of the start of revision data chunk.
46 46 # That value is shifted up by 16 bits. use "offset = field >> 16" to
47 47 # retrieve it.
48 48 #
49 49 # flags:
50 50 # A flag field that carries special information or changes the behavior
51 51 # of the revision. (see `REVIDX_*` constants for details)
52 52 # The flag field only occupies the first 16 bits of this field,
53 53 # use "flags = field & 0xFFFF" to retrieve the value.
54 54 ENTRY_DATA_OFFSET = 0
55 55
56 56 # [1] compressed length:
57 57 # The size, in bytes, of the chunk on disk
58 58 ENTRY_DATA_COMPRESSED_LENGTH = 1
59 59
60 60 # [2] uncompressed length:
61 61 # The size, in bytes, of the full revision once reconstructed.
62 62 ENTRY_DATA_UNCOMPRESSED_LENGTH = 2
63 63
64 64 # [3] base rev:
65 65 # Either the base of the revision delta chain (without general
66 66 # delta), or the base of the delta (stored in the data chunk)
67 67 # with general delta.
68 68 ENTRY_DELTA_BASE = 3
69 69
70 70 # [4] link rev:
71 71 # Changelog revision number of the changeset introducing this
72 72 # revision.
73 73 ENTRY_LINK_REV = 4
74 74
75 75 # [5] parent 1 rev:
76 76 # Revision number of the first parent
77 77 ENTRY_PARENT_1 = 5
78 78
79 79 # [6] parent 2 rev:
80 80 # Revision number of the second parent
81 81 ENTRY_PARENT_2 = 6
82 82
83 83 # [7] node id:
84 84 # The node id of the current revision
85 85 ENTRY_NODE_ID = 7
86 86
87 87 # [8] sidedata offset:
88 88 # The byte index of the start of the revision's side-data chunk.
89 89 ENTRY_SIDEDATA_OFFSET = 8
90 90
91 91 # [9] sidedata chunk length:
92 92 # The size, in bytes, of the revision's side-data chunk.
93 93 ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9
94 94
95 95 # [10] data compression mode:
96 96 # two bits that detail the way the data chunk is compressed on disk.
97 97 # (see "COMP_MODE_*" constants for details). For revlog version 0 and
98 98 # 1 this will always be COMP_MODE_INLINE.
99 99 ENTRY_DATA_COMPRESSION_MODE = 10
100 100
101 101 # [11] side-data compression mode:
102 102 # two bits that detail the way the sidedata chunk is compressed on disk.
103 103 # (see "COMP_MODE_*" constants for details)
104 104 ENTRY_SIDEDATA_COMPRESSION_MODE = 11
105 105
106 # [12] Revision rank:
107 # The number of revision under this one.
108 #
109 # Formally this is defined as : rank(X) = len(ancestors(X) + X)
110 #
111 # If rank == -1; then we do not have this information available.
112 # Only `null` has a rank of 0.
113 ENTRY_RANK = 12
114
115 RANK_UNKNOWN = -1
116
106 117 ### main revlog header
107 118
108 119 # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above
109 120 INDEX_HEADER_FMT = b">I"
110 121 INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)
111 122
112 123 ## revlog version
113 124 REVLOGV0 = 0
114 125 REVLOGV1 = 1
115 126 # Dummy value until file format is finalized.
116 127 REVLOGV2 = 0xDEAD
117 128 # Dummy value until file format is finalized.
118 129 CHANGELOGV2 = 0xD34D
119 130
120 131 ## global revlog header flags
121 132 # Shared across v1 and v2.
122 133 FLAG_INLINE_DATA = 1 << 16
123 134 # Only used by v1, implied by v2.
124 135 FLAG_GENERALDELTA = 1 << 17
125 136 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
126 137 REVLOG_DEFAULT_FORMAT = REVLOGV1
127 138 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
128 139 REVLOGV0_FLAGS = 0
129 140 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
130 141 REVLOGV2_FLAGS = FLAG_INLINE_DATA
131 142 CHANGELOGV2_FLAGS = 0
132 143
133 144 ### individual entry
134 145
135 146 ## index v0:
136 147 # 4 bytes: offset
137 148 # 4 bytes: compressed length
138 149 # 4 bytes: base rev
139 150 # 4 bytes: link rev
140 151 # 20 bytes: parent 1 nodeid
141 152 # 20 bytes: parent 2 nodeid
142 153 # 20 bytes: nodeid
143 154 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
144 155
145 156 ## index v1
146 157 # 6 bytes: offset
147 158 # 2 bytes: flags
148 159 # 4 bytes: compressed length
149 160 # 4 bytes: uncompressed length
150 161 # 4 bytes: base rev
151 162 # 4 bytes: link rev
152 163 # 4 bytes: parent 1 rev
153 164 # 4 bytes: parent 2 rev
154 165 # 32 bytes: nodeid
155 166 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
156 167 assert INDEX_ENTRY_V1.size == 32 * 2
157 168
158 169 # 6 bytes: offset
159 170 # 2 bytes: flags
160 171 # 4 bytes: compressed length
161 172 # 4 bytes: uncompressed length
162 173 # 4 bytes: base rev
163 174 # 4 bytes: link rev
164 175 # 4 bytes: parent 1 rev
165 176 # 4 bytes: parent 2 rev
166 177 # 32 bytes: nodeid
167 178 # 8 bytes: sidedata offset
168 179 # 4 bytes: sidedata compressed length
169 180 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
170 181 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
171 182 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
172 183 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
173 184
174 185 # 6 bytes: offset
175 186 # 2 bytes: flags
176 187 # 4 bytes: compressed length
177 188 # 4 bytes: uncompressed length
178 189 # 4 bytes: parent 1 rev
179 190 # 4 bytes: parent 2 rev
180 191 # 32 bytes: nodeid
181 192 # 8 bytes: sidedata offset
182 193 # 4 bytes: sidedata compressed length
183 194 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
184 195 # 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
185 196 INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
186 197 assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_CL_V2.size
187 198 INDEX_ENTRY_V2_IDX_OFFSET = 0
188 199 INDEX_ENTRY_V2_IDX_COMPRESSED_LENGTH = 1
189 200 INDEX_ENTRY_V2_IDX_UNCOMPRESSED_LENGTH = 2
190 201 INDEX_ENTRY_V2_IDX_PARENT_1 = 3
191 202 INDEX_ENTRY_V2_IDX_PARENT_2 = 4
192 203 INDEX_ENTRY_V2_IDX_NODEID = 5
193 204 INDEX_ENTRY_V2_IDX_SIDEDATA_OFFSET = 6
194 205 INDEX_ENTRY_V2_IDX_SIDEDATA_COMPRESSED_LENGTH = 7
195 206 INDEX_ENTRY_V2_IDX_COMPRESSION_MODE = 8
196 207
197 208 # revlog index flags
198 209
199 210 # For historical reasons, revlog's internal flags were exposed via the
200 211 # wire protocol and are even exposed in parts of the storage APIs.
201 212
202 213 # revision has censor metadata, must be verified
203 214 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
204 215 # revision hash does not match data (narrowhg)
205 216 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
206 217 # revision data is stored externally
207 218 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
208 219 # revision changes files in a way that could affect copy tracing.
209 220 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
210 221 REVIDX_DEFAULT_FLAGS = 0
211 222 # stable order in which flags need to be processed and their processors applied
212 223 REVIDX_FLAGS_ORDER = [
213 224 REVIDX_ISCENSORED,
214 225 REVIDX_ELLIPSIS,
215 226 REVIDX_EXTSTORED,
216 227 REVIDX_HASCOPIESINFO,
217 228 ]
218 229
219 230 # bitmark for flags that could cause rawdata content change
220 231 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
221 232
222 233 ## chunk compression mode constants:
223 234 # These constants are used in revlog version >=2 to denote the compression used
224 235 # for a chunk.
225 236
226 237 # Chunk use no compression, the data stored on disk can be directly use as
227 238 # chunk value. Without any header information prefixed.
228 239 COMP_MODE_PLAIN = 0
229 240
230 241 # Chunk use the "default compression" for the revlog (usually defined in the
231 242 # revlog docket). A header is still used.
232 243 #
233 244 # XXX: keeping a header is probably not useful and we should probably drop it.
234 245 #
235 246 # XXX: The value of allow mixed type of compression in the revlog is unclear
236 247 # and we should consider making PLAIN/DEFAULT the only available mode for
237 248 # revlog v2, disallowing INLINE mode.
238 249 COMP_MODE_DEFAULT = 1
239 250
240 251 # Chunk use a compression mode stored "inline" at the start of the chunk
241 252 # itself. This is the mode always used for revlog version "0" and "1"
242 253 COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE
243 254
244 255 SUPPORTED_FLAGS = {
245 256 REVLOGV0: REVLOGV0_FLAGS,
246 257 REVLOGV1: REVLOGV1_FLAGS,
247 258 REVLOGV2: REVLOGV2_FLAGS,
248 259 CHANGELOGV2: CHANGELOGV2_FLAGS,
249 260 }
250 261
251 262 _no = lambda flags: False
252 263 _yes = lambda flags: True
253 264
254 265
255 266 def _from_flag(flag):
256 267 return lambda flags: bool(flags & flag)
257 268
258 269
259 270 FEATURES_BY_VERSION = {
260 271 REVLOGV0: {
261 272 b'inline': _no,
262 273 b'generaldelta': _no,
263 274 b'sidedata': False,
264 275 b'docket': False,
265 276 },
266 277 REVLOGV1: {
267 278 b'inline': _from_flag(FLAG_INLINE_DATA),
268 279 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
269 280 b'sidedata': False,
270 281 b'docket': False,
271 282 },
272 283 REVLOGV2: {
273 284 # The point of inline-revlog is to reduce the number of files used in
274 285 # the store. Using a docket defeat this purpose. So we needs other
275 286 # means to reduce the number of files for revlogv2.
276 287 b'inline': _no,
277 288 b'generaldelta': _yes,
278 289 b'sidedata': True,
279 290 b'docket': True,
280 291 },
281 292 CHANGELOGV2: {
282 293 b'inline': _no,
283 294 # General delta is useless for changelog since we don't do any delta
284 295 b'generaldelta': _no,
285 296 b'sidedata': True,
286 297 b'docket': True,
287 298 },
288 299 }
289 300
290 301
291 302 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,314 +1,316 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18
19 19 from . import (
20 20 changelog,
21 21 cmdutil,
22 22 encoding,
23 23 error,
24 24 filelog,
25 25 localrepo,
26 26 manifest,
27 27 mdiff,
28 28 pathutil,
29 29 revlog,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33
34 34 from .revlogutils import (
35 35 constants as revlog_constants,
36 36 )
37 37
38 38
39 39 class unionrevlog(revlog.revlog):
40 40 def __init__(self, opener, radix, revlog2, linkmapper):
41 41 # How it works:
42 42 # To retrieve a revision, we just need to know the node id so we can
43 43 # look it up in revlog2.
44 44 #
45 45 # To differentiate a rev in the second revlog from a rev in the revlog,
46 46 # we check revision against repotiprev.
47 47 opener = vfsmod.readonlyvfs(opener)
48 48 target = getattr(revlog2, 'target', None)
49 49 if target is None:
50 50 # a revlog wrapper, eg: the manifestlog that is not an actual revlog
51 51 target = revlog2._revlog.target
52 52 revlog.revlog.__init__(self, opener, target=target, radix=radix)
53 53 self.revlog2 = revlog2
54 54
55 55 n = len(self)
56 56 self.repotiprev = n - 1
57 57 self.bundlerevs = set() # used by 'bundle()' revset expression
58 58 for rev2 in self.revlog2:
59 59 rev = self.revlog2.index[rev2]
60 60 # rev numbers - in revlog2, very different from self.rev
61 61 (
62 62 _start,
63 63 _csize,
64 64 rsize,
65 65 base,
66 66 linkrev,
67 67 p1rev,
68 68 p2rev,
69 69 node,
70 70 _sdo,
71 71 _sds,
72 72 _dcm,
73 73 _sdcm,
74 rank,
74 75 ) = rev
75 76 flags = _start & 0xFFFF
76 77
77 78 if linkmapper is None: # link is to same revlog
78 79 assert linkrev == rev2 # we never link back
79 80 link = n
80 81 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
81 82 link = linkmapper(linkrev)
82 83
83 84 if linkmapper is not None: # link is to same revlog
84 85 base = linkmapper(base)
85 86
86 87 this_rev = self.index.get_rev(node)
87 88 if this_rev is not None:
88 89 # this happens for the common revlog revisions
89 90 self.bundlerevs.add(this_rev)
90 91 continue
91 92
92 93 p1node = self.revlog2.node(p1rev)
93 94 p2node = self.revlog2.node(p2rev)
94 95
95 96 # TODO: it's probably wrong to set compressed length to -1, but
96 97 # I have no idea if csize is valid in the base revlog context.
97 98 e = (
98 99 flags,
99 100 -1,
100 101 rsize,
101 102 base,
102 103 link,
103 104 self.rev(p1node),
104 105 self.rev(p2node),
105 106 node,
106 107 0, # sidedata offset
107 108 0, # sidedata size
108 109 revlog_constants.COMP_MODE_INLINE,
109 110 revlog_constants.COMP_MODE_INLINE,
111 rank,
110 112 )
111 113 self.index.append(e)
112 114 self.bundlerevs.add(n)
113 115 n += 1
114 116
115 117 def _chunk(self, rev):
116 118 if rev <= self.repotiprev:
117 119 return revlog.revlog._chunk(self, rev)
118 120 return self.revlog2._chunk(self.node(rev))
119 121
120 122 def revdiff(self, rev1, rev2):
121 123 """return or calculate a delta between two revisions"""
122 124 if rev1 > self.repotiprev and rev2 > self.repotiprev:
123 125 return self.revlog2.revdiff(
124 126 self.revlog2.rev(self.node(rev1)),
125 127 self.revlog2.rev(self.node(rev2)),
126 128 )
127 129 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
128 130 return super(unionrevlog, self).revdiff(rev1, rev2)
129 131
130 132 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
131 133
132 134 def _revisiondata(self, nodeorrev, _df=None, raw=False):
133 135 if isinstance(nodeorrev, int):
134 136 rev = nodeorrev
135 137 node = self.node(rev)
136 138 else:
137 139 node = nodeorrev
138 140 rev = self.rev(node)
139 141
140 142 if rev > self.repotiprev:
141 143 # work around manifestrevlog NOT being a revlog
142 144 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
143 145 func = revlog2._revisiondata
144 146 else:
145 147 func = super(unionrevlog, self)._revisiondata
146 148 return func(node, _df=_df, raw=raw)
147 149
148 150 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
149 151 raise NotImplementedError
150 152
151 153 def addgroup(
152 154 self,
153 155 deltas,
154 156 linkmapper,
155 157 transaction,
156 158 alwayscache=False,
157 159 addrevisioncb=None,
158 160 duplicaterevisioncb=None,
159 161 maybemissingparents=False,
160 162 ):
161 163 raise NotImplementedError
162 164
163 165 def strip(self, minlink, transaction):
164 166 raise NotImplementedError
165 167
166 168 def checksize(self):
167 169 raise NotImplementedError
168 170
169 171
170 172 class unionchangelog(unionrevlog, changelog.changelog):
171 173 def __init__(self, opener, opener2):
172 174 changelog.changelog.__init__(self, opener)
173 175 linkmapper = None
174 176 changelog2 = changelog.changelog(opener2)
175 177 unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper)
176 178
177 179
178 180 class unionmanifest(unionrevlog, manifest.manifestrevlog):
179 181 def __init__(self, nodeconstants, opener, opener2, linkmapper):
180 182 manifest.manifestrevlog.__init__(self, nodeconstants, opener)
181 183 manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
182 184 unionrevlog.__init__(
183 185 self, opener, self._revlog.radix, manifest2, linkmapper
184 186 )
185 187
186 188
187 189 class unionfilelog(filelog.filelog):
188 190 def __init__(self, opener, path, opener2, linkmapper, repo):
189 191 filelog.filelog.__init__(self, opener, path)
190 192 filelog2 = filelog.filelog(opener2, path)
191 193 self._revlog = unionrevlog(
192 194 opener, self._revlog.radix, filelog2._revlog, linkmapper
193 195 )
194 196 self._repo = repo
195 197 self.repotiprev = self._revlog.repotiprev
196 198 self.revlog2 = self._revlog.revlog2
197 199
198 200 def iscensored(self, rev):
199 201 """Check if a revision is censored."""
200 202 if rev <= self.repotiprev:
201 203 return filelog.filelog.iscensored(self, rev)
202 204 node = self.node(rev)
203 205 return self.revlog2.iscensored(self.revlog2.rev(node))
204 206
205 207
206 208 class unionpeer(localrepo.localpeer):
207 209 def canpush(self):
208 210 return False
209 211
210 212
211 213 class unionrepository(object):
212 214 """Represents the union of data in 2 repositories.
213 215
214 216 Instances are not usable if constructed directly. Use ``instance()``
215 217 or ``makeunionrepository()`` to create a usable instance.
216 218 """
217 219
218 220 def __init__(self, repo2, url):
219 221 self.repo2 = repo2
220 222 self._url = url
221 223
222 224 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
223 225
224 226 @localrepo.unfilteredpropertycache
225 227 def changelog(self):
226 228 return unionchangelog(self.svfs, self.repo2.svfs)
227 229
228 230 @localrepo.unfilteredpropertycache
229 231 def manifestlog(self):
230 232 rootstore = unionmanifest(
231 233 self.nodeconstants,
232 234 self.svfs,
233 235 self.repo2.svfs,
234 236 self.unfiltered()._clrev,
235 237 )
236 238 return manifest.manifestlog(
237 239 self.svfs, self, rootstore, self.narrowmatch()
238 240 )
239 241
240 242 def _clrev(self, rev2):
241 243 """map from repo2 changelog rev to temporary rev in self.changelog"""
242 244 node = self.repo2.changelog.node(rev2)
243 245 return self.changelog.rev(node)
244 246
245 247 def url(self):
246 248 return self._url
247 249
248 250 def file(self, f):
249 251 return unionfilelog(
250 252 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
251 253 )
252 254
253 255 def close(self):
254 256 self.repo2.close()
255 257
256 258 def cancopy(self):
257 259 return False
258 260
259 261 def peer(self):
260 262 return unionpeer(self)
261 263
262 264 def getcwd(self):
263 265 return encoding.getcwd() # always outside the repo
264 266
265 267
266 268 def instance(ui, path, create, intents=None, createopts=None):
267 269 if create:
268 270 raise error.Abort(_(b'cannot create new union repository'))
269 271 parentpath = ui.config(b"bundle", b"mainreporoot")
270 272 if not parentpath:
271 273 # try to find the correct path to the working directory repo
272 274 parentpath = cmdutil.findrepo(encoding.getcwd())
273 275 if parentpath is None:
274 276 parentpath = b''
275 277 if parentpath:
276 278 # Try to make the full path relative so we get a nice, short URL.
277 279 # In particular, we don't want temp dir names in test outputs.
278 280 cwd = encoding.getcwd()
279 281 if parentpath == cwd:
280 282 parentpath = b''
281 283 else:
282 284 cwd = pathutil.normasprefix(cwd)
283 285 if parentpath.startswith(cwd):
284 286 parentpath = parentpath[len(cwd) :]
285 287 if path.startswith(b'union:'):
286 288 s = path.split(b":", 1)[1].split(b"+", 1)
287 289 if len(s) == 1:
288 290 repopath, repopath2 = parentpath, s[0]
289 291 else:
290 292 repopath, repopath2 = s
291 293 else:
292 294 repopath, repopath2 = parentpath, path
293 295
294 296 return makeunionrepository(ui, repopath, repopath2)
295 297
296 298
297 299 def makeunionrepository(ui, repopath1, repopath2):
298 300 """Make a union repository object from 2 local repo paths."""
299 301 repo1 = localrepo.instance(ui, repopath1, create=False)
300 302 repo2 = localrepo.instance(ui, repopath2, create=False)
301 303
302 304 url = b'union:%s+%s' % (
303 305 util.expandpath(repopath1),
304 306 util.expandpath(repopath2),
305 307 )
306 308
307 309 class derivedunionrepository(unionrepository, repo1.__class__):
308 310 pass
309 311
310 312 repo = repo1
311 313 repo.__class__ = derivedunionrepository
312 314 unionrepository.__init__(repo1, repo2, url)
313 315
314 316 return repo
@@ -1,326 +1,330 b''
1 1 """This unit test primarily tests parsers.parse_index2().
2 2
3 3 It also checks certain aspects of the parsers module as a whole.
4 4 """
5 5
6 6 from __future__ import absolute_import, print_function
7 7
8 8 import os
9 9 import struct
10 10 import subprocess
11 11 import sys
12 12 import unittest
13 13
14 14 from mercurial.node import (
15 15 bin,
16 16 hex,
17 17 nullrev,
18 18 sha1nodeconstants,
19 19 )
20 20 from mercurial import (
21 21 policy,
22 22 pycompat,
23 23 )
24 24 from mercurial.revlogutils import (
25 25 constants,
26 26 )
27 27
28 28 parsers = policy.importmod('parsers')
29 29
30 30 # original python implementation
31 31 def gettype(q):
32 32 return int(q & 0xFFFF)
33 33
34 34
35 35 def offset_type(offset, type):
36 36 return int(int(offset) << 16 | type)
37 37
38 38
39 39 indexformatng = ">Qiiiiii20s12x"
40 40
41 41
42 42 def py_parseindex(data, inline):
43 43 s = 64
44 44 cache = None
45 45 index = []
46 46 nodemap = {sha1nodeconstants.nullid: nullrev}
47 47 n = off = 0
48 48
49 49 l = len(data) - s
50 50 append = index.append
51 51 if inline:
52 52 cache = (0, data)
53 53 while off <= l:
54 54 e = struct.unpack(indexformatng, data[off : off + s])
55 55 e = e + (
56 56 0,
57 57 0,
58 58 constants.COMP_MODE_INLINE,
59 59 constants.COMP_MODE_INLINE,
60 constants.RANK_UNKNOWN,
60 61 )
61 62 nodemap[e[7]] = n
62 63 append(e)
63 64 n += 1
64 65 if e[1] < 0:
65 66 break
66 67 off += e[1] + s
67 68 else:
68 69 while off <= l:
69 70 e = struct.unpack(indexformatng, data[off : off + s])
70 71 e = e + (
71 72 0,
72 73 0,
73 74 constants.COMP_MODE_INLINE,
74 75 constants.COMP_MODE_INLINE,
76 constants.RANK_UNKNOWN,
75 77 )
76 78 nodemap[e[7]] = n
77 79 append(e)
78 80 n += 1
79 81 off += s
80 82
81 83 e = list(index[0])
82 84 type = gettype(e[0])
83 85 e[0] = offset_type(0, type)
84 86 index[0] = tuple(e)
85 87
86 88 return index, cache
87 89
88 90
89 91 data_inlined = (
90 92 b'\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c'
91 93 b'\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff'
92 94 b'\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b'
93 95 b'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
94 96 b'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95'
95 97 b'\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@'
96 98 b'\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)'
97 99 b'\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9'
98 100 b'\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W'
99 101 b'\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f'
100 102 b'_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06'
101 103 b'\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6'
102 104 b'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1'
103 105 b'&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa'
104 106 b'\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2'
105 107 b'\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01'
106 108 b'\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06'
107 109 b'\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3'
108 110 b'^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc'
109 111 b'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b'
110 112 b'\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4'
111 113 b'\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9'
112 114 b'\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
113 115 )
114 116
115 117 data_non_inlined = (
116 118 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
117 119 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
118 120 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
119 121 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
120 122 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
121 123 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
122 124 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
123 125 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
124 126 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
125 127 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
126 128 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
127 129 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
128 130 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
129 131 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
130 132 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
131 133 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
132 134 )
133 135
134 136
135 137 def parse_index2(data, inline, revlogv2=False):
136 138 index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
137 139 return list(index), chunkcache
138 140
139 141
140 142 def importparsers(hexversion):
141 143 """Import mercurial.parsers with the given sys.hexversion."""
142 144 # The file parsers.c inspects sys.hexversion to determine the version
143 145 # of the currently-running Python interpreter, so we monkey-patch
144 146 # sys.hexversion to simulate using different versions.
145 147 code = (
146 148 "import sys; sys.hexversion=%s; "
147 149 "import mercurial.cext.parsers" % hexversion
148 150 )
149 151 cmd = "\"%s\" -c \"%s\"" % (os.environ['PYTHON'], code)
150 152 # We need to do these tests inside a subprocess because parser.c's
151 153 # version-checking code happens inside the module init function, and
152 154 # when using reload() to reimport an extension module, "The init function
153 155 # of extension modules is not called a second time"
154 156 # (from http://docs.python.org/2/library/functions.html?#reload).
155 157 p = subprocess.Popen(
156 158 cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
157 159 )
158 160 return p.communicate() # returns stdout, stderr
159 161
160 162
161 163 def hexfailmsg(testnumber, hexversion, stdout, expected):
162 164 try:
163 165 hexstring = hex(hexversion)
164 166 except TypeError:
165 167 hexstring = None
166 168 return (
167 169 "FAILED: version test #%s with Python %s and patched "
168 170 "sys.hexversion %r (%r):\n Expected %s but got:\n-->'%s'\n"
169 171 % (
170 172 testnumber,
171 173 sys.version_info,
172 174 hexversion,
173 175 hexstring,
174 176 expected,
175 177 stdout,
176 178 )
177 179 )
178 180
179 181
180 182 def makehex(major, minor, micro):
181 183 return int("%x%02x%02x00" % (major, minor, micro), 16)
182 184
183 185
184 186 class parseindex2tests(unittest.TestCase):
185 187 def assertversionokay(self, testnumber, hexversion):
186 188 stdout, stderr = importparsers(hexversion)
187 189 self.assertFalse(
188 190 stdout, hexfailmsg(testnumber, hexversion, stdout, 'no stdout')
189 191 )
190 192
191 193 def assertversionfail(self, testnumber, hexversion):
192 194 stdout, stderr = importparsers(hexversion)
193 195 # We include versionerrortext to distinguish from other ImportErrors.
194 196 errtext = b"ImportError: %s" % pycompat.sysbytes(
195 197 parsers.versionerrortext
196 198 )
197 199 self.assertIn(
198 200 errtext,
199 201 stdout,
200 202 hexfailmsg(
201 203 testnumber,
202 204 hexversion,
203 205 stdout,
204 206 expected="stdout to contain %r" % errtext,
205 207 ),
206 208 )
207 209
208 210 def testversiondetection(self):
209 211 """Check the version-detection logic when importing parsers."""
210 212 # Only test the version-detection logic if it is present.
211 213 try:
212 214 parsers.versionerrortext
213 215 except AttributeError:
214 216 return
215 217 info = sys.version_info
216 218 major, minor, micro = info[0], info[1], info[2]
217 219 # Test same major-minor versions.
218 220 self.assertversionokay(1, makehex(major, minor, micro))
219 221 self.assertversionokay(2, makehex(major, minor, micro + 1))
220 222 # Test different major-minor versions.
221 223 self.assertversionfail(3, makehex(major + 1, minor, micro))
222 224 self.assertversionfail(4, makehex(major, minor + 1, micro))
223 225 self.assertversionfail(5, "'foo'")
224 226
225 227 def testbadargs(self):
226 228 # Check that parse_index2() raises TypeError on bad arguments.
227 229 with self.assertRaises(TypeError):
228 230 parse_index2(0, True)
229 231
230 232 def testparseindexfile(self):
231 233 # Check parsers.parse_index2() on an index file against the
232 234 # original Python implementation of parseindex, both with and
233 235 # without inlined data.
234 236
235 237 want = py_parseindex(data_inlined, True)
236 238 got = parse_index2(data_inlined, True)
237 239 self.assertEqual(want, got) # inline data
238 240
239 241 want = py_parseindex(data_non_inlined, False)
240 242 got = parse_index2(data_non_inlined, False)
241 243 self.assertEqual(want, got) # no inline data
242 244
243 245 ix = parsers.parse_index2(data_inlined, True)[0]
244 246 for i, r in enumerate(ix):
245 247 if r[7] == sha1nodeconstants.nullid:
246 248 i = -1
247 249 try:
248 250 self.assertEqual(
249 251 ix[r[7]],
250 252 i,
251 253 'Reverse lookup inconsistent for %r' % hex(r[7]),
252 254 )
253 255 except TypeError:
254 256 # pure version doesn't support this
255 257 break
256 258
257 259 def testminusone(self):
258 260 want = (
259 261 0,
260 262 0,
261 263 0,
262 264 -1,
263 265 -1,
264 266 -1,
265 267 -1,
266 268 sha1nodeconstants.nullid,
267 269 0,
268 270 0,
269 271 constants.COMP_MODE_INLINE,
270 272 constants.COMP_MODE_INLINE,
273 constants.RANK_UNKNOWN,
271 274 )
272 275 index, junk = parsers.parse_index2(data_inlined, True)
273 276 got = index[-1]
274 277 self.assertEqual(want, got) # inline data
275 278
276 279 index, junk = parsers.parse_index2(data_non_inlined, False)
277 280 got = index[-1]
278 281 self.assertEqual(want, got) # no inline data
279 282
280 283 def testdelitemwithoutnodetree(self):
281 284 index, _junk = parsers.parse_index2(data_non_inlined, False)
282 285
283 286 def hexrev(rev):
284 287 if rev == nullrev:
285 288 return b'\xff\xff\xff\xff'
286 289 else:
287 290 return bin('%08x' % rev)
288 291
289 292 def appendrev(p1, p2=nullrev):
290 293 # node won't matter for this test, let's just make sure
291 294 # they don't collide. Other data don't matter either.
292 295 node = hexrev(p1) + hexrev(p2) + b'.' * 12
293 296 e = (
294 297 0,
295 298 0,
296 299 12,
297 300 1,
298 301 34,
299 302 p1,
300 303 p2,
301 304 node,
302 305 0,
303 306 0,
304 307 constants.COMP_MODE_INLINE,
305 308 constants.COMP_MODE_INLINE,
309 constants.RANK_UNKNOWN,
306 310 )
307 311 index.append(e)
308 312
309 313 appendrev(4)
310 314 appendrev(5)
311 315 appendrev(6)
312 316 self.assertEqual(len(index), 7)
313 317
314 318 del index[1:-1]
315 319
316 320 # assertions that failed before correction
317 321 self.assertEqual(len(index), 1) # was 4
318 322 headrevs = getattr(index, 'headrevs', None)
319 323 if headrevs is not None: # not implemented in pure
320 324 self.assertEqual(index.headrevs(), [0]) # gave ValueError
321 325
322 326
323 327 if __name__ == '__main__':
324 328 import silenttestrunner
325 329
326 330 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now