##// END OF EJS Templates
sidedata: enable sidedata computers to optionally rewrite flags...
Raphaël Gomès -
r47844:223b4723 default
parent child Browse files
Show More
@@ -1,3028 +1,3029 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
103 103 };
104 104
105 105 static Py_ssize_t index_length(const indexObject *self)
106 106 {
107 107 return self->length + self->new_length;
108 108 }
109 109
110 110 static const char nullid[32] = {0};
111 111 static const Py_ssize_t nullrev = -1;
112 112
113 113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
114 114
115 115 static int index_find_node(indexObject *self, const char *node);
116 116
117 117 #if LONG_MAX == 0x7fffffffL
118 118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
119 119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
120 120 #else
121 121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
122 122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
123 123 #endif
124 124
125 125 /* A RevlogNG v1 index entry is 64 bytes long. */
126 126 static const long v1_hdrsize = 64;
127 127
128 128 /* A Revlogv2 index entry is 96 bytes long. */
129 129 static const long v2_hdrsize = 96;
130 130
131 131 static void raise_revlog_error(void)
132 132 {
133 133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
134 134
135 135 mod = PyImport_ImportModule("mercurial.error");
136 136 if (mod == NULL) {
137 137 goto cleanup;
138 138 }
139 139
140 140 dict = PyModule_GetDict(mod);
141 141 if (dict == NULL) {
142 142 goto cleanup;
143 143 }
144 144 Py_INCREF(dict);
145 145
146 146 errclass = PyDict_GetItemString(dict, "RevlogError");
147 147 if (errclass == NULL) {
148 148 PyErr_SetString(PyExc_SystemError,
149 149 "could not find RevlogError");
150 150 goto cleanup;
151 151 }
152 152
153 153 /* value of exception is ignored by callers */
154 154 PyErr_SetString(errclass, "RevlogError");
155 155
156 156 cleanup:
157 157 Py_XDECREF(dict);
158 158 Py_XDECREF(mod);
159 159 }
160 160
161 161 /*
162 162 * Return a pointer to the beginning of a RevlogNG record.
163 163 */
164 164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
165 165 {
166 166 if (pos >= self->length)
167 167 return self->added + (pos - self->length) * self->hdrsize;
168 168
169 169 if (self->inlined && pos > 0) {
170 170 if (self->offsets == NULL) {
171 171 Py_ssize_t ret;
172 172 self->offsets =
173 173 PyMem_Malloc(self->length * sizeof(*self->offsets));
174 174 if (self->offsets == NULL)
175 175 return (const char *)PyErr_NoMemory();
176 176 ret = inline_scan(self, self->offsets);
177 177 if (ret == -1) {
178 178 return NULL;
179 179 };
180 180 }
181 181 return self->offsets[pos];
182 182 }
183 183
184 184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
185 185 }
186 186
187 187 /*
188 188 * Get parents of the given rev.
189 189 *
190 190 * The specified rev must be valid and must not be nullrev. A returned
191 191 * parent revision may be nullrev, but is guaranteed to be in valid range.
192 192 */
193 193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
194 194 int maxrev)
195 195 {
196 196 const char *data = index_deref(self, rev);
197 197
198 198 ps[0] = getbe32(data + 24);
199 199 ps[1] = getbe32(data + 28);
200 200
201 201 /* If index file is corrupted, ps[] may point to invalid revisions. So
202 202 * there is a risk of buffer overflow to trust them unconditionally. */
203 203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
204 204 PyErr_SetString(PyExc_ValueError, "parent out of range");
205 205 return -1;
206 206 }
207 207 return 0;
208 208 }
209 209
210 210 /*
211 211 * Get parents of the given rev.
212 212 *
213 213 * If the specified rev is out of range, IndexError will be raised. If the
214 214 * revlog entry is corrupted, ValueError may be raised.
215 215 *
216 216 * Returns 0 on success or -1 on failure.
217 217 */
218 218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
219 219 {
220 220 int tiprev;
221 221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
222 222 PyErr_BadInternalCall();
223 223 return -1;
224 224 }
225 225 tiprev = (int)index_length((indexObject *)op) - 1;
226 226 if (rev < -1 || rev > tiprev) {
227 227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
228 228 return -1;
229 229 } else if (rev == -1) {
230 230 ps[0] = ps[1] = -1;
231 231 return 0;
232 232 } else {
233 233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
234 234 }
235 235 }
236 236
237 237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
238 238 {
239 239 const char *data;
240 240 uint64_t offset;
241 241
242 242 if (rev == nullrev)
243 243 return 0;
244 244
245 245 data = index_deref(self, rev);
246 246 offset = getbe32(data + 4);
247 247 if (rev == 0) {
248 248 /* mask out version number for the first entry */
249 249 offset &= 0xFFFF;
250 250 } else {
251 251 uint32_t offset_high = getbe32(data);
252 252 offset |= ((uint64_t)offset_high) << 32;
253 253 }
254 254 return (int64_t)(offset >> 16);
255 255 }
256 256
257 257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
258 258 {
259 259 const char *data;
260 260 int tmp;
261 261
262 262 if (rev == nullrev)
263 263 return 0;
264 264
265 265 data = index_deref(self, rev);
266 266
267 267 tmp = (int)getbe32(data + 8);
268 268 if (tmp < 0) {
269 269 PyErr_Format(PyExc_OverflowError,
270 270 "revlog entry size out of bound (%d)", tmp);
271 271 return -1;
272 272 }
273 273 return tmp;
274 274 }
275 275
276 276 /*
277 277 * RevlogNG format (all in big endian, data may be inlined):
278 278 * 6 bytes: offset
279 279 * 2 bytes: flags
280 280 * 4 bytes: compressed length
281 281 * 4 bytes: uncompressed length
282 282 * 4 bytes: base revision
283 283 * 4 bytes: link revision
284 284 * 4 bytes: parent 1 revision
285 285 * 4 bytes: parent 2 revision
286 286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
287 287 */
288 288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
289 289 {
290 290 uint64_t offset_flags, sidedata_offset;
291 291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
292 292 sidedata_comp_len;
293 293 const char *c_node_id;
294 294 const char *data;
295 295 Py_ssize_t length = index_length(self);
296 296
297 297 if (pos == nullrev) {
298 298 Py_INCREF(self->nullentry);
299 299 return self->nullentry;
300 300 }
301 301
302 302 if (pos < 0 || pos >= length) {
303 303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
304 304 return NULL;
305 305 }
306 306
307 307 data = index_deref(self, pos);
308 308 if (data == NULL)
309 309 return NULL;
310 310
311 311 offset_flags = getbe32(data + 4);
312 312 /*
313 313 * The first entry on-disk needs the version number masked out,
314 314 * but this doesn't apply if entries are added to an empty index.
315 315 */
316 316 if (self->length && pos == 0)
317 317 offset_flags &= 0xFFFF;
318 318 else {
319 319 uint32_t offset_high = getbe32(data);
320 320 offset_flags |= ((uint64_t)offset_high) << 32;
321 321 }
322 322
323 323 comp_len = getbe32(data + 8);
324 324 uncomp_len = getbe32(data + 12);
325 325 base_rev = getbe32(data + 16);
326 326 link_rev = getbe32(data + 20);
327 327 parent_1 = getbe32(data + 24);
328 328 parent_2 = getbe32(data + 28);
329 329 c_node_id = data + 32;
330 330
331 331 if (self->hdrsize == v1_hdrsize) {
332 332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
333 333 uncomp_len, base_rev, link_rev, parent_1,
334 334 parent_2, c_node_id, self->nodelen);
335 335 } else {
336 336 sidedata_offset = getbe64(data + 64);
337 337 sidedata_comp_len = getbe32(data + 72);
338 338
339 339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
340 340 uncomp_len, base_rev, link_rev, parent_1,
341 341 parent_2, c_node_id, self->nodelen,
342 342 sidedata_offset, sidedata_comp_len);
343 343 }
344 344 }
345 345 /*
346 346 * Pack header information in binary
347 347 */
348 348 static PyObject *index_pack_header(indexObject *self, PyObject *args)
349 349 {
350 350 int header;
351 351 char out[4];
352 352 if (!PyArg_ParseTuple(args, "I", &header)) {
353 353 return NULL;
354 354 }
355 355 putbe32(header, out);
356 356 return PyBytes_FromStringAndSize(out, 4);
357 357 }
358 358 /*
359 359 * Return the raw binary string representing a revision
360 360 */
361 361 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
362 362 {
363 363 long rev;
364 364 const char *data;
365 365 Py_ssize_t length = index_length(self);
366 366
367 367 if (!pylong_to_long(value, &rev)) {
368 368 return NULL;
369 369 }
370 370 if (rev < 0 || rev >= length) {
371 371 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
372 372 rev);
373 373 return NULL;
374 374 };
375 375
376 376 data = index_deref(self, rev);
377 377 if (data == NULL)
378 378 return NULL;
379 379 if (rev == 0) {
380 380 /* the header is eating the start of the first entry */
381 381 return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4);
382 382 }
383 383 return PyBytes_FromStringAndSize(data, self->hdrsize);
384 384 }
385 385
386 386 /*
387 387 * Return the hash of node corresponding to the given rev.
388 388 */
389 389 static const char *index_node(indexObject *self, Py_ssize_t pos)
390 390 {
391 391 Py_ssize_t length = index_length(self);
392 392 const char *data;
393 393
394 394 if (pos == nullrev)
395 395 return nullid;
396 396
397 397 if (pos >= length)
398 398 return NULL;
399 399
400 400 data = index_deref(self, pos);
401 401 return data ? data + 32 : NULL;
402 402 }
403 403
404 404 /*
405 405 * Return the hash of the node corresponding to the given rev. The
406 406 * rev is assumed to be existing. If not, an exception is set.
407 407 */
408 408 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
409 409 {
410 410 const char *node = index_node(self, pos);
411 411 if (node == NULL) {
412 412 PyErr_Format(PyExc_IndexError, "could not access rev %d",
413 413 (int)pos);
414 414 }
415 415 return node;
416 416 }
417 417
418 418 static int nt_insert(nodetree *self, const char *node, int rev);
419 419
420 420 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
421 421 {
422 422 Py_ssize_t thisnodelen;
423 423 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
424 424 return -1;
425 425 if (nodelen == thisnodelen)
426 426 return 0;
427 427 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
428 428 thisnodelen, nodelen);
429 429 return -1;
430 430 }
431 431
432 432 static PyObject *index_append(indexObject *self, PyObject *obj)
433 433 {
434 434 uint64_t offset_flags, sidedata_offset;
435 435 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
436 436 Py_ssize_t c_node_id_len, sidedata_comp_len;
437 437 const char *c_node_id;
438 438 char *data;
439 439
440 440 if (self->hdrsize == v1_hdrsize) {
441 441 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
442 442 &comp_len, &uncomp_len, &base_rev,
443 443 &link_rev, &parent_1, &parent_2,
444 444 &c_node_id, &c_node_id_len)) {
445 445 PyErr_SetString(PyExc_TypeError, "8-tuple required");
446 446 return NULL;
447 447 }
448 448 } else {
449 449 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
450 450 &comp_len, &uncomp_len, &base_rev,
451 451 &link_rev, &parent_1, &parent_2,
452 452 &c_node_id, &c_node_id_len,
453 453 &sidedata_offset, &sidedata_comp_len)) {
454 454 PyErr_SetString(PyExc_TypeError, "10-tuple required");
455 455 return NULL;
456 456 }
457 457 }
458 458
459 459 if (c_node_id_len != self->nodelen) {
460 460 PyErr_SetString(PyExc_TypeError, "invalid node");
461 461 return NULL;
462 462 }
463 463
464 464 if (self->new_length == self->added_length) {
465 465 size_t new_added_length =
466 466 self->added_length ? self->added_length * 2 : 4096;
467 467 void *new_added = PyMem_Realloc(self->added, new_added_length *
468 468 self->hdrsize);
469 469 if (!new_added)
470 470 return PyErr_NoMemory();
471 471 self->added = new_added;
472 472 self->added_length = new_added_length;
473 473 }
474 474 rev = self->length + self->new_length;
475 475 data = self->added + self->hdrsize * self->new_length++;
476 476 putbe32(offset_flags >> 32, data);
477 477 putbe32(offset_flags & 0xffffffffU, data + 4);
478 478 putbe32(comp_len, data + 8);
479 479 putbe32(uncomp_len, data + 12);
480 480 putbe32(base_rev, data + 16);
481 481 putbe32(link_rev, data + 20);
482 482 putbe32(parent_1, data + 24);
483 483 putbe32(parent_2, data + 28);
484 484 memcpy(data + 32, c_node_id, c_node_id_len);
485 485 /* Padding since SHA-1 is only 20 bytes for now */
486 486 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
487 487 if (self->hdrsize != v1_hdrsize) {
488 488 putbe64(sidedata_offset, data + 64);
489 489 putbe32(sidedata_comp_len, data + 72);
490 490 /* Padding for 96 bytes alignment */
491 491 memset(data + 76, 0, self->hdrsize - 76);
492 492 }
493 493
494 494 if (self->ntinitialized)
495 495 nt_insert(&self->nt, c_node_id, rev);
496 496
497 497 Py_CLEAR(self->headrevs);
498 498 Py_RETURN_NONE;
499 499 }
500 500
501 501 /* Replace an existing index entry's sidedata offset and length with new ones.
502 502 This cannot be used outside of the context of sidedata rewriting,
503 503 inside the transaction that creates the given revision. */
504 504 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
505 505 {
506 uint64_t sidedata_offset;
506 uint64_t offset_flags, sidedata_offset;
507 507 int rev;
508 508 Py_ssize_t sidedata_comp_len;
509 509 char *data;
510 510 #if LONG_MAX == 0x7fffffffL
511 const char *const sidedata_format = PY23("nKi", "nKi");
511 const char *const sidedata_format = PY23("nKiK", "nKiK");
512 512 #else
513 const char *const sidedata_format = PY23("nki", "nki");
513 const char *const sidedata_format = PY23("nkik", "nkik");
514 514 #endif
515 515
516 516 if (self->hdrsize == v1_hdrsize || self->inlined) {
517 517 /*
518 518 There is a bug in the transaction handling when going from an
519 519 inline revlog to a separate index and data file. Turn it off until
520 520 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
521 521 See issue6485.
522 522 */
523 523 raise_revlog_error();
524 524 return NULL;
525 525 }
526 526
527 527 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
528 &sidedata_comp_len))
528 &sidedata_comp_len, &offset_flags))
529 529 return NULL;
530 530
531 531 if (rev < 0 || rev >= index_length(self)) {
532 532 PyErr_SetString(PyExc_IndexError, "revision outside index");
533 533 return NULL;
534 534 }
535 535 if (rev < self->length) {
536 536 PyErr_SetString(
537 537 PyExc_IndexError,
538 538 "cannot rewrite entries outside of this transaction");
539 539 return NULL;
540 540 }
541 541
542 542 /* Find the newly added node, offset from the "already on-disk" length
543 543 */
544 544 data = self->added + self->hdrsize * (rev - self->length);
545 putbe64(offset_flags, data);
545 546 putbe64(sidedata_offset, data + 64);
546 547 putbe32(sidedata_comp_len, data + 72);
547 548
548 549 Py_RETURN_NONE;
549 550 }
550 551
551 552 static PyObject *index_stats(indexObject *self)
552 553 {
553 554 PyObject *obj = PyDict_New();
554 555 PyObject *s = NULL;
555 556 PyObject *t = NULL;
556 557
557 558 if (obj == NULL)
558 559 return NULL;
559 560
560 561 #define istat(__n, __d) \
561 562 do { \
562 563 s = PyBytes_FromString(__d); \
563 564 t = PyInt_FromSsize_t(self->__n); \
564 565 if (!s || !t) \
565 566 goto bail; \
566 567 if (PyDict_SetItem(obj, s, t) == -1) \
567 568 goto bail; \
568 569 Py_CLEAR(s); \
569 570 Py_CLEAR(t); \
570 571 } while (0)
571 572
572 573 if (self->added_length)
573 574 istat(new_length, "index entries added");
574 575 istat(length, "revs in memory");
575 576 istat(ntlookups, "node trie lookups");
576 577 istat(ntmisses, "node trie misses");
577 578 istat(ntrev, "node trie last rev scanned");
578 579 if (self->ntinitialized) {
579 580 istat(nt.capacity, "node trie capacity");
580 581 istat(nt.depth, "node trie depth");
581 582 istat(nt.length, "node trie count");
582 583 istat(nt.splits, "node trie splits");
583 584 }
584 585
585 586 #undef istat
586 587
587 588 return obj;
588 589
589 590 bail:
590 591 Py_XDECREF(obj);
591 592 Py_XDECREF(s);
592 593 Py_XDECREF(t);
593 594 return NULL;
594 595 }
595 596
596 597 /*
597 598 * When we cache a list, we want to be sure the caller can't mutate
598 599 * the cached copy.
599 600 */
600 601 static PyObject *list_copy(PyObject *list)
601 602 {
602 603 Py_ssize_t len = PyList_GET_SIZE(list);
603 604 PyObject *newlist = PyList_New(len);
604 605 Py_ssize_t i;
605 606
606 607 if (newlist == NULL)
607 608 return NULL;
608 609
609 610 for (i = 0; i < len; i++) {
610 611 PyObject *obj = PyList_GET_ITEM(list, i);
611 612 Py_INCREF(obj);
612 613 PyList_SET_ITEM(newlist, i, obj);
613 614 }
614 615
615 616 return newlist;
616 617 }
617 618
618 619 static int check_filter(PyObject *filter, Py_ssize_t arg)
619 620 {
620 621 if (filter) {
621 622 PyObject *arglist, *result;
622 623 int isfiltered;
623 624
624 625 arglist = Py_BuildValue("(n)", arg);
625 626 if (!arglist) {
626 627 return -1;
627 628 }
628 629
629 630 result = PyObject_Call(filter, arglist, NULL);
630 631 Py_DECREF(arglist);
631 632 if (!result) {
632 633 return -1;
633 634 }
634 635
635 636 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
636 637 * same as this function, so we can just return it directly.*/
637 638 isfiltered = PyObject_IsTrue(result);
638 639 Py_DECREF(result);
639 640 return isfiltered;
640 641 } else {
641 642 return 0;
642 643 }
643 644 }
644 645
645 646 static inline void set_phase_from_parents(char *phases, int parent_1,
646 647 int parent_2, Py_ssize_t i)
647 648 {
648 649 if (parent_1 >= 0 && phases[parent_1] > phases[i])
649 650 phases[i] = phases[parent_1];
650 651 if (parent_2 >= 0 && phases[parent_2] > phases[i])
651 652 phases[i] = phases[parent_2];
652 653 }
653 654
654 655 static PyObject *reachableroots2(indexObject *self, PyObject *args)
655 656 {
656 657
657 658 /* Input */
658 659 long minroot;
659 660 PyObject *includepatharg = NULL;
660 661 int includepath = 0;
661 662 /* heads and roots are lists */
662 663 PyObject *heads = NULL;
663 664 PyObject *roots = NULL;
664 665 PyObject *reachable = NULL;
665 666
666 667 PyObject *val;
667 668 Py_ssize_t len = index_length(self);
668 669 long revnum;
669 670 Py_ssize_t k;
670 671 Py_ssize_t i;
671 672 Py_ssize_t l;
672 673 int r;
673 674 int parents[2];
674 675
675 676 /* Internal data structure:
676 677 * tovisit: array of length len+1 (all revs + nullrev), filled upto
677 678 * lentovisit
678 679 *
679 680 * revstates: array of length len+1 (all revs + nullrev) */
680 681 int *tovisit = NULL;
681 682 long lentovisit = 0;
682 683 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
683 684 char *revstates = NULL;
684 685
685 686 /* Get arguments */
686 687 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
687 688 &PyList_Type, &roots, &PyBool_Type,
688 689 &includepatharg))
689 690 goto bail;
690 691
691 692 if (includepatharg == Py_True)
692 693 includepath = 1;
693 694
694 695 /* Initialize return set */
695 696 reachable = PyList_New(0);
696 697 if (reachable == NULL)
697 698 goto bail;
698 699
699 700 /* Initialize internal datastructures */
700 701 tovisit = (int *)malloc((len + 1) * sizeof(int));
701 702 if (tovisit == NULL) {
702 703 PyErr_NoMemory();
703 704 goto bail;
704 705 }
705 706
706 707 revstates = (char *)calloc(len + 1, 1);
707 708 if (revstates == NULL) {
708 709 PyErr_NoMemory();
709 710 goto bail;
710 711 }
711 712
712 713 l = PyList_GET_SIZE(roots);
713 714 for (i = 0; i < l; i++) {
714 715 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
715 716 if (revnum == -1 && PyErr_Occurred())
716 717 goto bail;
717 718 /* If root is out of range, e.g. wdir(), it must be unreachable
718 719 * from heads. So we can just ignore it. */
719 720 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
720 721 continue;
721 722 revstates[revnum + 1] |= RS_ROOT;
722 723 }
723 724
724 725 /* Populate tovisit with all the heads */
725 726 l = PyList_GET_SIZE(heads);
726 727 for (i = 0; i < l; i++) {
727 728 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
728 729 if (revnum == -1 && PyErr_Occurred())
729 730 goto bail;
730 731 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
731 732 PyErr_SetString(PyExc_IndexError, "head out of range");
732 733 goto bail;
733 734 }
734 735 if (!(revstates[revnum + 1] & RS_SEEN)) {
735 736 tovisit[lentovisit++] = (int)revnum;
736 737 revstates[revnum + 1] |= RS_SEEN;
737 738 }
738 739 }
739 740
740 741 /* Visit the tovisit list and find the reachable roots */
741 742 k = 0;
742 743 while (k < lentovisit) {
743 744 /* Add the node to reachable if it is a root*/
744 745 revnum = tovisit[k++];
745 746 if (revstates[revnum + 1] & RS_ROOT) {
746 747 revstates[revnum + 1] |= RS_REACHABLE;
747 748 val = PyInt_FromLong(revnum);
748 749 if (val == NULL)
749 750 goto bail;
750 751 r = PyList_Append(reachable, val);
751 752 Py_DECREF(val);
752 753 if (r < 0)
753 754 goto bail;
754 755 if (includepath == 0)
755 756 continue;
756 757 }
757 758
758 759 /* Add its parents to the list of nodes to visit */
759 760 if (revnum == nullrev)
760 761 continue;
761 762 r = index_get_parents(self, revnum, parents, (int)len - 1);
762 763 if (r < 0)
763 764 goto bail;
764 765 for (i = 0; i < 2; i++) {
765 766 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
766 767 parents[i] >= minroot) {
767 768 tovisit[lentovisit++] = parents[i];
768 769 revstates[parents[i] + 1] |= RS_SEEN;
769 770 }
770 771 }
771 772 }
772 773
773 774 /* Find all the nodes in between the roots we found and the heads
774 775 * and add them to the reachable set */
775 776 if (includepath == 1) {
776 777 long minidx = minroot;
777 778 if (minidx < 0)
778 779 minidx = 0;
779 780 for (i = minidx; i < len; i++) {
780 781 if (!(revstates[i + 1] & RS_SEEN))
781 782 continue;
782 783 r = index_get_parents(self, i, parents, (int)len - 1);
783 784 /* Corrupted index file, error is set from
784 785 * index_get_parents */
785 786 if (r < 0)
786 787 goto bail;
787 788 if (((revstates[parents[0] + 1] |
788 789 revstates[parents[1] + 1]) &
789 790 RS_REACHABLE) &&
790 791 !(revstates[i + 1] & RS_REACHABLE)) {
791 792 revstates[i + 1] |= RS_REACHABLE;
792 793 val = PyInt_FromSsize_t(i);
793 794 if (val == NULL)
794 795 goto bail;
795 796 r = PyList_Append(reachable, val);
796 797 Py_DECREF(val);
797 798 if (r < 0)
798 799 goto bail;
799 800 }
800 801 }
801 802 }
802 803
803 804 free(revstates);
804 805 free(tovisit);
805 806 return reachable;
806 807 bail:
807 808 Py_XDECREF(reachable);
808 809 free(revstates);
809 810 free(tovisit);
810 811 return NULL;
811 812 }
812 813
813 814 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
814 815 char phase)
815 816 {
816 817 Py_ssize_t len = index_length(self);
817 818 PyObject *item;
818 819 PyObject *iterator;
819 820 int rev, minrev = -1;
820 821 char *node;
821 822
822 823 if (!PySet_Check(roots)) {
823 824 PyErr_SetString(PyExc_TypeError,
824 825 "roots must be a set of nodes");
825 826 return -2;
826 827 }
827 828 iterator = PyObject_GetIter(roots);
828 829 if (iterator == NULL)
829 830 return -2;
830 831 while ((item = PyIter_Next(iterator))) {
831 832 if (node_check(self->nodelen, item, &node) == -1)
832 833 goto failed;
833 834 rev = index_find_node(self, node);
834 835 /* null is implicitly public, so negative is invalid */
835 836 if (rev < 0 || rev >= len)
836 837 goto failed;
837 838 phases[rev] = phase;
838 839 if (minrev == -1 || minrev > rev)
839 840 minrev = rev;
840 841 Py_DECREF(item);
841 842 }
842 843 Py_DECREF(iterator);
843 844 return minrev;
844 845 failed:
845 846 Py_DECREF(iterator);
846 847 Py_DECREF(item);
847 848 return -2;
848 849 }
849 850
850 851 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
851 852 {
852 853 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
853 854 96: internal */
854 855 static const char trackedphases[] = {1, 2, 32, 96};
855 856 PyObject *roots = Py_None;
856 857 PyObject *phasesetsdict = NULL;
857 858 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
858 859 Py_ssize_t len = index_length(self);
859 860 char *phases = NULL;
860 861 int minphaserev = -1, rev, i;
861 862 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
862 863
863 864 if (!PyArg_ParseTuple(args, "O", &roots))
864 865 return NULL;
865 866 if (roots == NULL || !PyDict_Check(roots)) {
866 867 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
867 868 return NULL;
868 869 }
869 870
870 871 phases = calloc(len, 1);
871 872 if (phases == NULL) {
872 873 PyErr_NoMemory();
873 874 return NULL;
874 875 }
875 876
876 877 for (i = 0; i < numphases; ++i) {
877 878 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
878 879 PyObject *phaseroots = NULL;
879 880 if (pyphase == NULL)
880 881 goto release;
881 882 phaseroots = PyDict_GetItem(roots, pyphase);
882 883 Py_DECREF(pyphase);
883 884 if (phaseroots == NULL)
884 885 continue;
885 886 rev = add_roots_get_min(self, phaseroots, phases,
886 887 trackedphases[i]);
887 888 if (rev == -2)
888 889 goto release;
889 890 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
890 891 minphaserev = rev;
891 892 }
892 893
893 894 for (i = 0; i < numphases; ++i) {
894 895 phasesets[i] = PySet_New(NULL);
895 896 if (phasesets[i] == NULL)
896 897 goto release;
897 898 }
898 899
899 900 if (minphaserev == -1)
900 901 minphaserev = len;
901 902 for (rev = minphaserev; rev < len; ++rev) {
902 903 PyObject *pyphase = NULL;
903 904 PyObject *pyrev = NULL;
904 905 int parents[2];
905 906 /*
906 907 * The parent lookup could be skipped for phaseroots, but
907 908 * phase --force would historically not recompute them
908 909 * correctly, leaving descendents with a lower phase around.
909 910 * As such, unconditionally recompute the phase.
910 911 */
911 912 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
912 913 goto release;
913 914 set_phase_from_parents(phases, parents[0], parents[1], rev);
914 915 switch (phases[rev]) {
915 916 case 0:
916 917 continue;
917 918 case 1:
918 919 pyphase = phasesets[0];
919 920 break;
920 921 case 2:
921 922 pyphase = phasesets[1];
922 923 break;
923 924 case 32:
924 925 pyphase = phasesets[2];
925 926 break;
926 927 case 96:
927 928 pyphase = phasesets[3];
928 929 break;
929 930 default:
930 931 /* this should never happen since the phase number is
931 932 * specified by this function. */
932 933 PyErr_SetString(PyExc_SystemError,
933 934 "bad phase number in internal list");
934 935 goto release;
935 936 }
936 937 pyrev = PyInt_FromLong(rev);
937 938 if (pyrev == NULL)
938 939 goto release;
939 940 if (PySet_Add(pyphase, pyrev) == -1) {
940 941 Py_DECREF(pyrev);
941 942 goto release;
942 943 }
943 944 Py_DECREF(pyrev);
944 945 }
945 946
946 947 phasesetsdict = _dict_new_presized(numphases);
947 948 if (phasesetsdict == NULL)
948 949 goto release;
949 950 for (i = 0; i < numphases; ++i) {
950 951 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
951 952 if (pyphase == NULL)
952 953 goto release;
953 954 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
954 955 -1) {
955 956 Py_DECREF(pyphase);
956 957 goto release;
957 958 }
958 959 Py_DECREF(phasesets[i]);
959 960 phasesets[i] = NULL;
960 961 }
961 962
962 963 return Py_BuildValue("nN", len, phasesetsdict);
963 964
964 965 release:
965 966 for (i = 0; i < numphases; ++i)
966 967 Py_XDECREF(phasesets[i]);
967 968 Py_XDECREF(phasesetsdict);
968 969
969 970 free(phases);
970 971 return NULL;
971 972 }
972 973
973 974 static PyObject *index_headrevs(indexObject *self, PyObject *args)
974 975 {
975 976 Py_ssize_t i, j, len;
976 977 char *nothead = NULL;
977 978 PyObject *heads = NULL;
978 979 PyObject *filter = NULL;
979 980 PyObject *filteredrevs = Py_None;
980 981
981 982 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
982 983 return NULL;
983 984 }
984 985
985 986 if (self->headrevs && filteredrevs == self->filteredrevs)
986 987 return list_copy(self->headrevs);
987 988
988 989 Py_DECREF(self->filteredrevs);
989 990 self->filteredrevs = filteredrevs;
990 991 Py_INCREF(filteredrevs);
991 992
992 993 if (filteredrevs != Py_None) {
993 994 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
994 995 if (!filter) {
995 996 PyErr_SetString(
996 997 PyExc_TypeError,
997 998 "filteredrevs has no attribute __contains__");
998 999 goto bail;
999 1000 }
1000 1001 }
1001 1002
1002 1003 len = index_length(self);
1003 1004 heads = PyList_New(0);
1004 1005 if (heads == NULL)
1005 1006 goto bail;
1006 1007 if (len == 0) {
1007 1008 PyObject *nullid = PyInt_FromLong(-1);
1008 1009 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1009 1010 Py_XDECREF(nullid);
1010 1011 goto bail;
1011 1012 }
1012 1013 goto done;
1013 1014 }
1014 1015
1015 1016 nothead = calloc(len, 1);
1016 1017 if (nothead == NULL) {
1017 1018 PyErr_NoMemory();
1018 1019 goto bail;
1019 1020 }
1020 1021
1021 1022 for (i = len - 1; i >= 0; i--) {
1022 1023 int isfiltered;
1023 1024 int parents[2];
1024 1025
1025 1026 /* If nothead[i] == 1, it means we've seen an unfiltered child
1026 1027 * of this node already, and therefore this node is not
1027 1028 * filtered. So we can skip the expensive check_filter step.
1028 1029 */
1029 1030 if (nothead[i] != 1) {
1030 1031 isfiltered = check_filter(filter, i);
1031 1032 if (isfiltered == -1) {
1032 1033 PyErr_SetString(PyExc_TypeError,
1033 1034 "unable to check filter");
1034 1035 goto bail;
1035 1036 }
1036 1037
1037 1038 if (isfiltered) {
1038 1039 nothead[i] = 1;
1039 1040 continue;
1040 1041 }
1041 1042 }
1042 1043
1043 1044 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1044 1045 goto bail;
1045 1046 for (j = 0; j < 2; j++) {
1046 1047 if (parents[j] >= 0)
1047 1048 nothead[parents[j]] = 1;
1048 1049 }
1049 1050 }
1050 1051
1051 1052 for (i = 0; i < len; i++) {
1052 1053 PyObject *head;
1053 1054
1054 1055 if (nothead[i])
1055 1056 continue;
1056 1057 head = PyInt_FromSsize_t(i);
1057 1058 if (head == NULL || PyList_Append(heads, head) == -1) {
1058 1059 Py_XDECREF(head);
1059 1060 goto bail;
1060 1061 }
1061 1062 }
1062 1063
1063 1064 done:
1064 1065 self->headrevs = heads;
1065 1066 Py_XDECREF(filter);
1066 1067 free(nothead);
1067 1068 return list_copy(self->headrevs);
1068 1069 bail:
1069 1070 Py_XDECREF(filter);
1070 1071 Py_XDECREF(heads);
1071 1072 free(nothead);
1072 1073 return NULL;
1073 1074 }
1074 1075
1075 1076 /**
1076 1077 * Obtain the base revision index entry.
1077 1078 *
1078 1079 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1079 1080 */
1080 1081 static inline int index_baserev(indexObject *self, int rev)
1081 1082 {
1082 1083 const char *data;
1083 1084 int result;
1084 1085
1085 1086 data = index_deref(self, rev);
1086 1087 if (data == NULL)
1087 1088 return -2;
1088 1089 result = getbe32(data + 16);
1089 1090
1090 1091 if (result > rev) {
1091 1092 PyErr_Format(
1092 1093 PyExc_ValueError,
1093 1094 "corrupted revlog, revision base above revision: %d, %d",
1094 1095 rev, result);
1095 1096 return -2;
1096 1097 }
1097 1098 if (result < -1) {
1098 1099 PyErr_Format(
1099 1100 PyExc_ValueError,
1100 1101 "corrupted revlog, revision base out of range: %d, %d", rev,
1101 1102 result);
1102 1103 return -2;
1103 1104 }
1104 1105 return result;
1105 1106 }
1106 1107
1107 1108 /**
1108 1109 * Find if a revision is a snapshot or not
1109 1110 *
1110 1111 * Only relevant for sparse-revlog case.
1111 1112 * Callers must ensure that rev is in a valid range.
1112 1113 */
1113 1114 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1114 1115 {
1115 1116 int ps[2];
1116 1117 Py_ssize_t base;
1117 1118 while (rev >= 0) {
1118 1119 base = (Py_ssize_t)index_baserev(self, rev);
1119 1120 if (base == rev) {
1120 1121 base = -1;
1121 1122 }
1122 1123 if (base == -2) {
1123 1124 assert(PyErr_Occurred());
1124 1125 return -1;
1125 1126 }
1126 1127 if (base == -1) {
1127 1128 return 1;
1128 1129 }
1129 1130 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1130 1131 assert(PyErr_Occurred());
1131 1132 return -1;
1132 1133 };
1133 1134 if (base == ps[0] || base == ps[1]) {
1134 1135 return 0;
1135 1136 }
1136 1137 rev = base;
1137 1138 }
1138 1139 return rev == -1;
1139 1140 }
1140 1141
1141 1142 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1142 1143 {
1143 1144 long rev;
1144 1145 int issnap;
1145 1146 Py_ssize_t length = index_length(self);
1146 1147
1147 1148 if (!pylong_to_long(value, &rev)) {
1148 1149 return NULL;
1149 1150 }
1150 1151 if (rev < -1 || rev >= length) {
1151 1152 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1152 1153 rev);
1153 1154 return NULL;
1154 1155 };
1155 1156 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1156 1157 if (issnap < 0) {
1157 1158 return NULL;
1158 1159 };
1159 1160 return PyBool_FromLong((long)issnap);
1160 1161 }
1161 1162
1162 1163 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1163 1164 {
1164 1165 Py_ssize_t start_rev;
1165 1166 PyObject *cache;
1166 1167 Py_ssize_t base;
1167 1168 Py_ssize_t rev;
1168 1169 PyObject *key = NULL;
1169 1170 PyObject *value = NULL;
1170 1171 const Py_ssize_t length = index_length(self);
1171 1172 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1172 1173 return NULL;
1173 1174 }
1174 1175 for (rev = start_rev; rev < length; rev++) {
1175 1176 int issnap;
1176 1177 PyObject *allvalues = NULL;
1177 1178 issnap = index_issnapshotrev(self, rev);
1178 1179 if (issnap < 0) {
1179 1180 goto bail;
1180 1181 }
1181 1182 if (issnap == 0) {
1182 1183 continue;
1183 1184 }
1184 1185 base = (Py_ssize_t)index_baserev(self, rev);
1185 1186 if (base == rev) {
1186 1187 base = -1;
1187 1188 }
1188 1189 if (base == -2) {
1189 1190 assert(PyErr_Occurred());
1190 1191 goto bail;
1191 1192 }
1192 1193 key = PyInt_FromSsize_t(base);
1193 1194 allvalues = PyDict_GetItem(cache, key);
1194 1195 if (allvalues == NULL && PyErr_Occurred()) {
1195 1196 goto bail;
1196 1197 }
1197 1198 if (allvalues == NULL) {
1198 1199 int r;
1199 1200 allvalues = PyList_New(0);
1200 1201 if (!allvalues) {
1201 1202 goto bail;
1202 1203 }
1203 1204 r = PyDict_SetItem(cache, key, allvalues);
1204 1205 Py_DECREF(allvalues);
1205 1206 if (r < 0) {
1206 1207 goto bail;
1207 1208 }
1208 1209 }
1209 1210 value = PyInt_FromSsize_t(rev);
1210 1211 if (PyList_Append(allvalues, value)) {
1211 1212 goto bail;
1212 1213 }
1213 1214 Py_CLEAR(key);
1214 1215 Py_CLEAR(value);
1215 1216 }
1216 1217 Py_RETURN_NONE;
1217 1218 bail:
1218 1219 Py_XDECREF(key);
1219 1220 Py_XDECREF(value);
1220 1221 return NULL;
1221 1222 }
1222 1223
1223 1224 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1224 1225 {
1225 1226 int rev, generaldelta;
1226 1227 PyObject *stoparg;
1227 1228 int stoprev, iterrev, baserev = -1;
1228 1229 int stopped;
1229 1230 PyObject *chain = NULL, *result = NULL;
1230 1231 const Py_ssize_t length = index_length(self);
1231 1232
1232 1233 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1233 1234 return NULL;
1234 1235 }
1235 1236
1236 1237 if (PyInt_Check(stoparg)) {
1237 1238 stoprev = (int)PyInt_AsLong(stoparg);
1238 1239 if (stoprev == -1 && PyErr_Occurred()) {
1239 1240 return NULL;
1240 1241 }
1241 1242 } else if (stoparg == Py_None) {
1242 1243 stoprev = -2;
1243 1244 } else {
1244 1245 PyErr_SetString(PyExc_ValueError,
1245 1246 "stoprev must be integer or None");
1246 1247 return NULL;
1247 1248 }
1248 1249
1249 1250 if (rev < 0 || rev >= length) {
1250 1251 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1251 1252 return NULL;
1252 1253 }
1253 1254
1254 1255 chain = PyList_New(0);
1255 1256 if (chain == NULL) {
1256 1257 return NULL;
1257 1258 }
1258 1259
1259 1260 baserev = index_baserev(self, rev);
1260 1261
1261 1262 /* This should never happen. */
1262 1263 if (baserev <= -2) {
1263 1264 /* Error should be set by index_deref() */
1264 1265 assert(PyErr_Occurred());
1265 1266 goto bail;
1266 1267 }
1267 1268
1268 1269 iterrev = rev;
1269 1270
1270 1271 while (iterrev != baserev && iterrev != stoprev) {
1271 1272 PyObject *value = PyInt_FromLong(iterrev);
1272 1273 if (value == NULL) {
1273 1274 goto bail;
1274 1275 }
1275 1276 if (PyList_Append(chain, value)) {
1276 1277 Py_DECREF(value);
1277 1278 goto bail;
1278 1279 }
1279 1280 Py_DECREF(value);
1280 1281
1281 1282 if (generaldelta) {
1282 1283 iterrev = baserev;
1283 1284 } else {
1284 1285 iterrev--;
1285 1286 }
1286 1287
1287 1288 if (iterrev < 0) {
1288 1289 break;
1289 1290 }
1290 1291
1291 1292 if (iterrev >= length) {
1292 1293 PyErr_SetString(PyExc_IndexError,
1293 1294 "revision outside index");
1294 1295 return NULL;
1295 1296 }
1296 1297
1297 1298 baserev = index_baserev(self, iterrev);
1298 1299
1299 1300 /* This should never happen. */
1300 1301 if (baserev <= -2) {
1301 1302 /* Error should be set by index_deref() */
1302 1303 assert(PyErr_Occurred());
1303 1304 goto bail;
1304 1305 }
1305 1306 }
1306 1307
1307 1308 if (iterrev == stoprev) {
1308 1309 stopped = 1;
1309 1310 } else {
1310 1311 PyObject *value = PyInt_FromLong(iterrev);
1311 1312 if (value == NULL) {
1312 1313 goto bail;
1313 1314 }
1314 1315 if (PyList_Append(chain, value)) {
1315 1316 Py_DECREF(value);
1316 1317 goto bail;
1317 1318 }
1318 1319 Py_DECREF(value);
1319 1320
1320 1321 stopped = 0;
1321 1322 }
1322 1323
1323 1324 if (PyList_Reverse(chain)) {
1324 1325 goto bail;
1325 1326 }
1326 1327
1327 1328 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1328 1329 Py_DECREF(chain);
1329 1330 return result;
1330 1331
1331 1332 bail:
1332 1333 Py_DECREF(chain);
1333 1334 return NULL;
1334 1335 }
1335 1336
1336 1337 static inline int64_t
1337 1338 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1338 1339 {
1339 1340 int64_t start_offset;
1340 1341 int64_t end_offset;
1341 1342 int end_size;
1342 1343 start_offset = index_get_start(self, start_rev);
1343 1344 if (start_offset < 0) {
1344 1345 return -1;
1345 1346 }
1346 1347 end_offset = index_get_start(self, end_rev);
1347 1348 if (end_offset < 0) {
1348 1349 return -1;
1349 1350 }
1350 1351 end_size = index_get_length(self, end_rev);
1351 1352 if (end_size < 0) {
1352 1353 return -1;
1353 1354 }
1354 1355 if (end_offset < start_offset) {
1355 1356 PyErr_Format(PyExc_ValueError,
1356 1357 "corrupted revlog index: inconsistent offset "
1357 1358 "between revisions (%zd) and (%zd)",
1358 1359 start_rev, end_rev);
1359 1360 return -1;
1360 1361 }
1361 1362 return (end_offset - start_offset) + (int64_t)end_size;
1362 1363 }
1363 1364
1364 1365 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1365 1366 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1366 1367 Py_ssize_t startidx, Py_ssize_t endidx)
1367 1368 {
1368 1369 int length;
1369 1370 while (endidx > 1 && endidx > startidx) {
1370 1371 length = index_get_length(self, revs[endidx - 1]);
1371 1372 if (length < 0) {
1372 1373 return -1;
1373 1374 }
1374 1375 if (length != 0) {
1375 1376 break;
1376 1377 }
1377 1378 endidx -= 1;
1378 1379 }
1379 1380 return endidx;
1380 1381 }
1381 1382
1382 1383 struct Gap {
1383 1384 int64_t size;
1384 1385 Py_ssize_t idx;
1385 1386 };
1386 1387
1387 1388 static int gap_compare(const void *left, const void *right)
1388 1389 {
1389 1390 const struct Gap *l_left = ((const struct Gap *)left);
1390 1391 const struct Gap *l_right = ((const struct Gap *)right);
1391 1392 if (l_left->size < l_right->size) {
1392 1393 return -1;
1393 1394 } else if (l_left->size > l_right->size) {
1394 1395 return 1;
1395 1396 }
1396 1397 return 0;
1397 1398 }
1398 1399 static int Py_ssize_t_compare(const void *left, const void *right)
1399 1400 {
1400 1401 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1401 1402 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1402 1403 if (l_left < l_right) {
1403 1404 return -1;
1404 1405 } else if (l_left > l_right) {
1405 1406 return 1;
1406 1407 }
1407 1408 return 0;
1408 1409 }
1409 1410
1410 1411 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1411 1412 {
1412 1413 /* method arguments */
1413 1414 PyObject *list_revs = NULL; /* revisions in the chain */
1414 1415 double targetdensity = 0; /* min density to achieve */
1415 1416 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1416 1417
1417 1418 /* other core variables */
1418 1419 Py_ssize_t idxlen = index_length(self);
1419 1420 Py_ssize_t i; /* used for various iteration */
1420 1421 PyObject *result = NULL; /* the final return of the function */
1421 1422
1422 1423 /* generic information about the delta chain being slice */
1423 1424 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1424 1425 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1425 1426 int64_t chainpayload = 0; /* sum of all delta in the chain */
1426 1427 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1427 1428
1428 1429 /* variable used for slicing the delta chain */
1429 1430 int64_t readdata = 0; /* amount of data currently planned to be read */
1430 1431 double density = 0; /* ration of payload data compared to read ones */
1431 1432 int64_t previous_end;
1432 1433 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1433 1434 Py_ssize_t num_gaps =
1434 1435 0; /* total number of notable gap recorded so far */
1435 1436 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1436 1437 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1437 1438 PyObject *chunk = NULL; /* individual slice */
1438 1439 PyObject *allchunks = NULL; /* all slices */
1439 1440 Py_ssize_t previdx;
1440 1441
1441 1442 /* parsing argument */
1442 1443 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1443 1444 &targetdensity, &mingapsize)) {
1444 1445 goto bail;
1445 1446 }
1446 1447
1447 1448 /* If the delta chain contains a single element, we do not need slicing
1448 1449 */
1449 1450 num_revs = PyList_GET_SIZE(list_revs);
1450 1451 if (num_revs <= 1) {
1451 1452 result = PyTuple_Pack(1, list_revs);
1452 1453 goto done;
1453 1454 }
1454 1455
1455 1456 /* Turn the python list into a native integer array (for efficiency) */
1456 1457 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1457 1458 if (revs == NULL) {
1458 1459 PyErr_NoMemory();
1459 1460 goto bail;
1460 1461 }
1461 1462 for (i = 0; i < num_revs; i++) {
1462 1463 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1463 1464 if (revnum == -1 && PyErr_Occurred()) {
1464 1465 goto bail;
1465 1466 }
1466 1467 if (revnum < nullrev || revnum >= idxlen) {
1467 1468 PyErr_Format(PyExc_IndexError,
1468 1469 "index out of range: %zd", revnum);
1469 1470 goto bail;
1470 1471 }
1471 1472 revs[i] = revnum;
1472 1473 }
1473 1474
1474 1475 /* Compute and check various property of the unsliced delta chain */
1475 1476 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1476 1477 if (deltachainspan < 0) {
1477 1478 goto bail;
1478 1479 }
1479 1480
1480 1481 if (deltachainspan <= mingapsize) {
1481 1482 result = PyTuple_Pack(1, list_revs);
1482 1483 goto done;
1483 1484 }
1484 1485 chainpayload = 0;
1485 1486 for (i = 0; i < num_revs; i++) {
1486 1487 int tmp = index_get_length(self, revs[i]);
1487 1488 if (tmp < 0) {
1488 1489 goto bail;
1489 1490 }
1490 1491 chainpayload += tmp;
1491 1492 }
1492 1493
1493 1494 readdata = deltachainspan;
1494 1495 density = 1.0;
1495 1496
1496 1497 if (0 < deltachainspan) {
1497 1498 density = (double)chainpayload / (double)deltachainspan;
1498 1499 }
1499 1500
1500 1501 if (density >= targetdensity) {
1501 1502 result = PyTuple_Pack(1, list_revs);
1502 1503 goto done;
1503 1504 }
1504 1505
1505 1506 /* if chain is too sparse, look for relevant gaps */
1506 1507 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1507 1508 if (gaps == NULL) {
1508 1509 PyErr_NoMemory();
1509 1510 goto bail;
1510 1511 }
1511 1512
1512 1513 previous_end = -1;
1513 1514 for (i = 0; i < num_revs; i++) {
1514 1515 int64_t revstart;
1515 1516 int revsize;
1516 1517 revstart = index_get_start(self, revs[i]);
1517 1518 if (revstart < 0) {
1518 1519 goto bail;
1519 1520 };
1520 1521 revsize = index_get_length(self, revs[i]);
1521 1522 if (revsize < 0) {
1522 1523 goto bail;
1523 1524 };
1524 1525 if (revsize == 0) {
1525 1526 continue;
1526 1527 }
1527 1528 if (previous_end >= 0) {
1528 1529 int64_t gapsize = revstart - previous_end;
1529 1530 if (gapsize > mingapsize) {
1530 1531 gaps[num_gaps].size = gapsize;
1531 1532 gaps[num_gaps].idx = i;
1532 1533 num_gaps += 1;
1533 1534 }
1534 1535 }
1535 1536 previous_end = revstart + revsize;
1536 1537 }
1537 1538 if (num_gaps == 0) {
1538 1539 result = PyTuple_Pack(1, list_revs);
1539 1540 goto done;
1540 1541 }
1541 1542 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1542 1543
1543 1544 /* Slice the largest gap first, they improve the density the most */
1544 1545 selected_indices =
1545 1546 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1546 1547 if (selected_indices == NULL) {
1547 1548 PyErr_NoMemory();
1548 1549 goto bail;
1549 1550 }
1550 1551
1551 1552 for (i = num_gaps - 1; i >= 0; i--) {
1552 1553 selected_indices[num_selected] = gaps[i].idx;
1553 1554 readdata -= gaps[i].size;
1554 1555 num_selected += 1;
1555 1556 if (readdata <= 0) {
1556 1557 density = 1.0;
1557 1558 } else {
1558 1559 density = (double)chainpayload / (double)readdata;
1559 1560 }
1560 1561 if (density >= targetdensity) {
1561 1562 break;
1562 1563 }
1563 1564 }
1564 1565 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1565 1566 &Py_ssize_t_compare);
1566 1567
1567 1568 /* create the resulting slice */
1568 1569 allchunks = PyList_New(0);
1569 1570 if (allchunks == NULL) {
1570 1571 goto bail;
1571 1572 }
1572 1573 previdx = 0;
1573 1574 selected_indices[num_selected] = num_revs;
1574 1575 for (i = 0; i <= num_selected; i++) {
1575 1576 Py_ssize_t idx = selected_indices[i];
1576 1577 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1577 1578 if (endidx < 0) {
1578 1579 goto bail;
1579 1580 }
1580 1581 if (previdx < endidx) {
1581 1582 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1582 1583 if (chunk == NULL) {
1583 1584 goto bail;
1584 1585 }
1585 1586 if (PyList_Append(allchunks, chunk) == -1) {
1586 1587 goto bail;
1587 1588 }
1588 1589 Py_DECREF(chunk);
1589 1590 chunk = NULL;
1590 1591 }
1591 1592 previdx = idx;
1592 1593 }
1593 1594 result = allchunks;
1594 1595 goto done;
1595 1596
1596 1597 bail:
1597 1598 Py_XDECREF(allchunks);
1598 1599 Py_XDECREF(chunk);
1599 1600 done:
1600 1601 free(revs);
1601 1602 free(gaps);
1602 1603 free(selected_indices);
1603 1604 return result;
1604 1605 }
1605 1606
1606 1607 static inline int nt_level(const char *node, Py_ssize_t level)
1607 1608 {
1608 1609 int v = node[level >> 1];
1609 1610 if (!(level & 1))
1610 1611 v >>= 4;
1611 1612 return v & 0xf;
1612 1613 }
1613 1614
1614 1615 /*
1615 1616 * Return values:
1616 1617 *
1617 1618 * -4: match is ambiguous (multiple candidates)
1618 1619 * -2: not found
1619 1620 * rest: valid rev
1620 1621 */
1621 1622 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1622 1623 int hex)
1623 1624 {
1624 1625 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1625 1626 int level, maxlevel, off;
1626 1627
1627 1628 /* If the input is binary, do a fast check for the nullid first. */
1628 1629 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1629 1630 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1630 1631 return -1;
1631 1632
1632 1633 if (hex)
1633 1634 maxlevel = nodelen;
1634 1635 else
1635 1636 maxlevel = 2 * nodelen;
1636 1637 if (maxlevel > 2 * self->nodelen)
1637 1638 maxlevel = 2 * self->nodelen;
1638 1639
1639 1640 for (level = off = 0; level < maxlevel; level++) {
1640 1641 int k = getnybble(node, level);
1641 1642 nodetreenode *n = &self->nodes[off];
1642 1643 int v = n->children[k];
1643 1644
1644 1645 if (v < 0) {
1645 1646 const char *n;
1646 1647 Py_ssize_t i;
1647 1648
1648 1649 v = -(v + 2);
1649 1650 n = index_node(self->index, v);
1650 1651 if (n == NULL)
1651 1652 return -2;
1652 1653 for (i = level; i < maxlevel; i++)
1653 1654 if (getnybble(node, i) != nt_level(n, i))
1654 1655 return -2;
1655 1656 return v;
1656 1657 }
1657 1658 if (v == 0)
1658 1659 return -2;
1659 1660 off = v;
1660 1661 }
1661 1662 /* multiple matches against an ambiguous prefix */
1662 1663 return -4;
1663 1664 }
1664 1665
1665 1666 static int nt_new(nodetree *self)
1666 1667 {
1667 1668 if (self->length == self->capacity) {
1668 1669 size_t newcapacity;
1669 1670 nodetreenode *newnodes;
1670 1671 newcapacity = self->capacity * 2;
1671 1672 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1672 1673 PyErr_SetString(PyExc_MemoryError,
1673 1674 "overflow in nt_new");
1674 1675 return -1;
1675 1676 }
1676 1677 newnodes =
1677 1678 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1678 1679 if (newnodes == NULL) {
1679 1680 PyErr_SetString(PyExc_MemoryError, "out of memory");
1680 1681 return -1;
1681 1682 }
1682 1683 self->capacity = newcapacity;
1683 1684 self->nodes = newnodes;
1684 1685 memset(&self->nodes[self->length], 0,
1685 1686 sizeof(nodetreenode) * (self->capacity - self->length));
1686 1687 }
1687 1688 return self->length++;
1688 1689 }
1689 1690
1690 1691 static int nt_insert(nodetree *self, const char *node, int rev)
1691 1692 {
1692 1693 int level = 0;
1693 1694 int off = 0;
1694 1695
1695 1696 while (level < 2 * self->nodelen) {
1696 1697 int k = nt_level(node, level);
1697 1698 nodetreenode *n;
1698 1699 int v;
1699 1700
1700 1701 n = &self->nodes[off];
1701 1702 v = n->children[k];
1702 1703
1703 1704 if (v == 0) {
1704 1705 n->children[k] = -rev - 2;
1705 1706 return 0;
1706 1707 }
1707 1708 if (v < 0) {
1708 1709 const char *oldnode =
1709 1710 index_node_existing(self->index, -(v + 2));
1710 1711 int noff;
1711 1712
1712 1713 if (oldnode == NULL)
1713 1714 return -1;
1714 1715 if (!memcmp(oldnode, node, self->nodelen)) {
1715 1716 n->children[k] = -rev - 2;
1716 1717 return 0;
1717 1718 }
1718 1719 noff = nt_new(self);
1719 1720 if (noff == -1)
1720 1721 return -1;
1721 1722 /* self->nodes may have been changed by realloc */
1722 1723 self->nodes[off].children[k] = noff;
1723 1724 off = noff;
1724 1725 n = &self->nodes[off];
1725 1726 n->children[nt_level(oldnode, ++level)] = v;
1726 1727 if (level > self->depth)
1727 1728 self->depth = level;
1728 1729 self->splits += 1;
1729 1730 } else {
1730 1731 level += 1;
1731 1732 off = v;
1732 1733 }
1733 1734 }
1734 1735
1735 1736 return -1;
1736 1737 }
1737 1738
1738 1739 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1739 1740 {
1740 1741 Py_ssize_t rev;
1741 1742 const char *node;
1742 1743 Py_ssize_t length;
1743 1744 if (!PyArg_ParseTuple(args, "n", &rev))
1744 1745 return NULL;
1745 1746 length = index_length(self->nt.index);
1746 1747 if (rev < 0 || rev >= length) {
1747 1748 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1748 1749 return NULL;
1749 1750 }
1750 1751 node = index_node_existing(self->nt.index, rev);
1751 1752 if (nt_insert(&self->nt, node, (int)rev) == -1)
1752 1753 return NULL;
1753 1754 Py_RETURN_NONE;
1754 1755 }
1755 1756
1756 1757 static int nt_delete_node(nodetree *self, const char *node)
1757 1758 {
1758 1759 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1759 1760 */
1760 1761 return nt_insert(self, node, -2);
1761 1762 }
1762 1763
1763 1764 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1764 1765 {
1765 1766 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1766 1767 self->nodes = NULL;
1767 1768
1768 1769 self->index = index;
1769 1770 /* The input capacity is in terms of revisions, while the field is in
1770 1771 * terms of nodetree nodes. */
1771 1772 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1772 1773 self->nodelen = index->nodelen;
1773 1774 self->depth = 0;
1774 1775 self->splits = 0;
1775 1776 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1776 1777 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1777 1778 return -1;
1778 1779 }
1779 1780 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1780 1781 if (self->nodes == NULL) {
1781 1782 PyErr_NoMemory();
1782 1783 return -1;
1783 1784 }
1784 1785 self->length = 1;
1785 1786 return 0;
1786 1787 }
1787 1788
1788 1789 static int ntobj_init(nodetreeObject *self, PyObject *args)
1789 1790 {
1790 1791 PyObject *index;
1791 1792 unsigned capacity;
1792 1793 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1793 1794 &capacity))
1794 1795 return -1;
1795 1796 Py_INCREF(index);
1796 1797 return nt_init(&self->nt, (indexObject *)index, capacity);
1797 1798 }
1798 1799
1799 1800 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1800 1801 {
1801 1802 return nt_find(self, node, nodelen, 1);
1802 1803 }
1803 1804
1804 1805 /*
1805 1806 * Find the length of the shortest unique prefix of node.
1806 1807 *
1807 1808 * Return values:
1808 1809 *
1809 1810 * -3: error (exception set)
1810 1811 * -2: not found (no exception set)
1811 1812 * rest: length of shortest prefix
1812 1813 */
1813 1814 static int nt_shortest(nodetree *self, const char *node)
1814 1815 {
1815 1816 int level, off;
1816 1817
1817 1818 for (level = off = 0; level < 2 * self->nodelen; level++) {
1818 1819 int k, v;
1819 1820 nodetreenode *n = &self->nodes[off];
1820 1821 k = nt_level(node, level);
1821 1822 v = n->children[k];
1822 1823 if (v < 0) {
1823 1824 const char *n;
1824 1825 v = -(v + 2);
1825 1826 n = index_node_existing(self->index, v);
1826 1827 if (n == NULL)
1827 1828 return -3;
1828 1829 if (memcmp(node, n, self->nodelen) != 0)
1829 1830 /*
1830 1831 * Found a unique prefix, but it wasn't for the
1831 1832 * requested node (i.e the requested node does
1832 1833 * not exist).
1833 1834 */
1834 1835 return -2;
1835 1836 return level + 1;
1836 1837 }
1837 1838 if (v == 0)
1838 1839 return -2;
1839 1840 off = v;
1840 1841 }
1841 1842 /*
1842 1843 * The node was still not unique after 40 hex digits, so this won't
1843 1844 * happen. Also, if we get here, then there's a programming error in
1844 1845 * this file that made us insert a node longer than 40 hex digits.
1845 1846 */
1846 1847 PyErr_SetString(PyExc_Exception, "broken node tree");
1847 1848 return -3;
1848 1849 }
1849 1850
1850 1851 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1851 1852 {
1852 1853 PyObject *val;
1853 1854 char *node;
1854 1855 int length;
1855 1856
1856 1857 if (!PyArg_ParseTuple(args, "O", &val))
1857 1858 return NULL;
1858 1859 if (node_check(self->nt.nodelen, val, &node) == -1)
1859 1860 return NULL;
1860 1861
1861 1862 length = nt_shortest(&self->nt, node);
1862 1863 if (length == -3)
1863 1864 return NULL;
1864 1865 if (length == -2) {
1865 1866 raise_revlog_error();
1866 1867 return NULL;
1867 1868 }
1868 1869 return PyInt_FromLong(length);
1869 1870 }
1870 1871
1871 1872 static void nt_dealloc(nodetree *self)
1872 1873 {
1873 1874 free(self->nodes);
1874 1875 self->nodes = NULL;
1875 1876 }
1876 1877
1877 1878 static void ntobj_dealloc(nodetreeObject *self)
1878 1879 {
1879 1880 Py_XDECREF(self->nt.index);
1880 1881 nt_dealloc(&self->nt);
1881 1882 PyObject_Del(self);
1882 1883 }
1883 1884
1884 1885 static PyMethodDef ntobj_methods[] = {
1885 1886 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1886 1887 "insert an index entry"},
1887 1888 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1888 1889 "find length of shortest hex nodeid of a binary ID"},
1889 1890 {NULL} /* Sentinel */
1890 1891 };
1891 1892
1892 1893 static PyTypeObject nodetreeType = {
1893 1894 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1894 1895 "parsers.nodetree", /* tp_name */
1895 1896 sizeof(nodetreeObject), /* tp_basicsize */
1896 1897 0, /* tp_itemsize */
1897 1898 (destructor)ntobj_dealloc, /* tp_dealloc */
1898 1899 0, /* tp_print */
1899 1900 0, /* tp_getattr */
1900 1901 0, /* tp_setattr */
1901 1902 0, /* tp_compare */
1902 1903 0, /* tp_repr */
1903 1904 0, /* tp_as_number */
1904 1905 0, /* tp_as_sequence */
1905 1906 0, /* tp_as_mapping */
1906 1907 0, /* tp_hash */
1907 1908 0, /* tp_call */
1908 1909 0, /* tp_str */
1909 1910 0, /* tp_getattro */
1910 1911 0, /* tp_setattro */
1911 1912 0, /* tp_as_buffer */
1912 1913 Py_TPFLAGS_DEFAULT, /* tp_flags */
1913 1914 "nodetree", /* tp_doc */
1914 1915 0, /* tp_traverse */
1915 1916 0, /* tp_clear */
1916 1917 0, /* tp_richcompare */
1917 1918 0, /* tp_weaklistoffset */
1918 1919 0, /* tp_iter */
1919 1920 0, /* tp_iternext */
1920 1921 ntobj_methods, /* tp_methods */
1921 1922 0, /* tp_members */
1922 1923 0, /* tp_getset */
1923 1924 0, /* tp_base */
1924 1925 0, /* tp_dict */
1925 1926 0, /* tp_descr_get */
1926 1927 0, /* tp_descr_set */
1927 1928 0, /* tp_dictoffset */
1928 1929 (initproc)ntobj_init, /* tp_init */
1929 1930 0, /* tp_alloc */
1930 1931 };
1931 1932
1932 1933 static int index_init_nt(indexObject *self)
1933 1934 {
1934 1935 if (!self->ntinitialized) {
1935 1936 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1936 1937 nt_dealloc(&self->nt);
1937 1938 return -1;
1938 1939 }
1939 1940 if (nt_insert(&self->nt, nullid, -1) == -1) {
1940 1941 nt_dealloc(&self->nt);
1941 1942 return -1;
1942 1943 }
1943 1944 self->ntinitialized = 1;
1944 1945 self->ntrev = (int)index_length(self);
1945 1946 self->ntlookups = 1;
1946 1947 self->ntmisses = 0;
1947 1948 }
1948 1949 return 0;
1949 1950 }
1950 1951
1951 1952 /*
1952 1953 * Return values:
1953 1954 *
1954 1955 * -3: error (exception set)
1955 1956 * -2: not found (no exception set)
1956 1957 * rest: valid rev
1957 1958 */
1958 1959 static int index_find_node(indexObject *self, const char *node)
1959 1960 {
1960 1961 int rev;
1961 1962
1962 1963 if (index_init_nt(self) == -1)
1963 1964 return -3;
1964 1965
1965 1966 self->ntlookups++;
1966 1967 rev = nt_find(&self->nt, node, self->nodelen, 0);
1967 1968 if (rev >= -1)
1968 1969 return rev;
1969 1970
1970 1971 /*
1971 1972 * For the first handful of lookups, we scan the entire index,
1972 1973 * and cache only the matching nodes. This optimizes for cases
1973 1974 * like "hg tip", where only a few nodes are accessed.
1974 1975 *
1975 1976 * After that, we cache every node we visit, using a single
1976 1977 * scan amortized over multiple lookups. This gives the best
1977 1978 * bulk performance, e.g. for "hg log".
1978 1979 */
1979 1980 if (self->ntmisses++ < 4) {
1980 1981 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1981 1982 const char *n = index_node_existing(self, rev);
1982 1983 if (n == NULL)
1983 1984 return -3;
1984 1985 if (memcmp(node, n, self->nodelen) == 0) {
1985 1986 if (nt_insert(&self->nt, n, rev) == -1)
1986 1987 return -3;
1987 1988 break;
1988 1989 }
1989 1990 }
1990 1991 } else {
1991 1992 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1992 1993 const char *n = index_node_existing(self, rev);
1993 1994 if (n == NULL)
1994 1995 return -3;
1995 1996 if (nt_insert(&self->nt, n, rev) == -1) {
1996 1997 self->ntrev = rev + 1;
1997 1998 return -3;
1998 1999 }
1999 2000 if (memcmp(node, n, self->nodelen) == 0) {
2000 2001 break;
2001 2002 }
2002 2003 }
2003 2004 self->ntrev = rev;
2004 2005 }
2005 2006
2006 2007 if (rev >= 0)
2007 2008 return rev;
2008 2009 return -2;
2009 2010 }
2010 2011
2011 2012 static PyObject *index_getitem(indexObject *self, PyObject *value)
2012 2013 {
2013 2014 char *node;
2014 2015 int rev;
2015 2016
2016 2017 if (PyInt_Check(value)) {
2017 2018 long idx;
2018 2019 if (!pylong_to_long(value, &idx)) {
2019 2020 return NULL;
2020 2021 }
2021 2022 return index_get(self, idx);
2022 2023 }
2023 2024
2024 2025 if (node_check(self->nodelen, value, &node) == -1)
2025 2026 return NULL;
2026 2027 rev = index_find_node(self, node);
2027 2028 if (rev >= -1)
2028 2029 return PyInt_FromLong(rev);
2029 2030 if (rev == -2)
2030 2031 raise_revlog_error();
2031 2032 return NULL;
2032 2033 }
2033 2034
2034 2035 /*
2035 2036 * Fully populate the radix tree.
2036 2037 */
2037 2038 static int index_populate_nt(indexObject *self)
2038 2039 {
2039 2040 int rev;
2040 2041 if (self->ntrev > 0) {
2041 2042 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2042 2043 const char *n = index_node_existing(self, rev);
2043 2044 if (n == NULL)
2044 2045 return -1;
2045 2046 if (nt_insert(&self->nt, n, rev) == -1)
2046 2047 return -1;
2047 2048 }
2048 2049 self->ntrev = -1;
2049 2050 }
2050 2051 return 0;
2051 2052 }
2052 2053
2053 2054 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2054 2055 {
2055 2056 const char *fullnode;
2056 2057 Py_ssize_t nodelen;
2057 2058 char *node;
2058 2059 int rev, i;
2059 2060
2060 2061 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2061 2062 return NULL;
2062 2063
2063 2064 if (nodelen < 1) {
2064 2065 PyErr_SetString(PyExc_ValueError, "key too short");
2065 2066 return NULL;
2066 2067 }
2067 2068
2068 2069 if (nodelen > 2 * self->nodelen) {
2069 2070 PyErr_SetString(PyExc_ValueError, "key too long");
2070 2071 return NULL;
2071 2072 }
2072 2073
2073 2074 for (i = 0; i < nodelen; i++)
2074 2075 hexdigit(node, i);
2075 2076 if (PyErr_Occurred()) {
2076 2077 /* input contains non-hex characters */
2077 2078 PyErr_Clear();
2078 2079 Py_RETURN_NONE;
2079 2080 }
2080 2081
2081 2082 if (index_init_nt(self) == -1)
2082 2083 return NULL;
2083 2084 if (index_populate_nt(self) == -1)
2084 2085 return NULL;
2085 2086 rev = nt_partialmatch(&self->nt, node, nodelen);
2086 2087
2087 2088 switch (rev) {
2088 2089 case -4:
2089 2090 raise_revlog_error();
2090 2091 return NULL;
2091 2092 case -2:
2092 2093 Py_RETURN_NONE;
2093 2094 case -1:
2094 2095 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2095 2096 }
2096 2097
2097 2098 fullnode = index_node_existing(self, rev);
2098 2099 if (fullnode == NULL) {
2099 2100 return NULL;
2100 2101 }
2101 2102 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2102 2103 }
2103 2104
2104 2105 static PyObject *index_shortest(indexObject *self, PyObject *args)
2105 2106 {
2106 2107 PyObject *val;
2107 2108 char *node;
2108 2109 int length;
2109 2110
2110 2111 if (!PyArg_ParseTuple(args, "O", &val))
2111 2112 return NULL;
2112 2113 if (node_check(self->nodelen, val, &node) == -1)
2113 2114 return NULL;
2114 2115
2115 2116 self->ntlookups++;
2116 2117 if (index_init_nt(self) == -1)
2117 2118 return NULL;
2118 2119 if (index_populate_nt(self) == -1)
2119 2120 return NULL;
2120 2121 length = nt_shortest(&self->nt, node);
2121 2122 if (length == -3)
2122 2123 return NULL;
2123 2124 if (length == -2) {
2124 2125 raise_revlog_error();
2125 2126 return NULL;
2126 2127 }
2127 2128 return PyInt_FromLong(length);
2128 2129 }
2129 2130
2130 2131 static PyObject *index_m_get(indexObject *self, PyObject *args)
2131 2132 {
2132 2133 PyObject *val;
2133 2134 char *node;
2134 2135 int rev;
2135 2136
2136 2137 if (!PyArg_ParseTuple(args, "O", &val))
2137 2138 return NULL;
2138 2139 if (node_check(self->nodelen, val, &node) == -1)
2139 2140 return NULL;
2140 2141 rev = index_find_node(self, node);
2141 2142 if (rev == -3)
2142 2143 return NULL;
2143 2144 if (rev == -2)
2144 2145 Py_RETURN_NONE;
2145 2146 return PyInt_FromLong(rev);
2146 2147 }
2147 2148
2148 2149 static int index_contains(indexObject *self, PyObject *value)
2149 2150 {
2150 2151 char *node;
2151 2152
2152 2153 if (PyInt_Check(value)) {
2153 2154 long rev;
2154 2155 if (!pylong_to_long(value, &rev)) {
2155 2156 return -1;
2156 2157 }
2157 2158 return rev >= -1 && rev < index_length(self);
2158 2159 }
2159 2160
2160 2161 if (node_check(self->nodelen, value, &node) == -1)
2161 2162 return -1;
2162 2163
2163 2164 switch (index_find_node(self, node)) {
2164 2165 case -3:
2165 2166 return -1;
2166 2167 case -2:
2167 2168 return 0;
2168 2169 default:
2169 2170 return 1;
2170 2171 }
2171 2172 }
2172 2173
2173 2174 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2174 2175 {
2175 2176 int ret = index_contains(self, args);
2176 2177 if (ret < 0)
2177 2178 return NULL;
2178 2179 return PyBool_FromLong((long)ret);
2179 2180 }
2180 2181
2181 2182 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2182 2183 {
2183 2184 char *node;
2184 2185 int rev;
2185 2186
2186 2187 if (node_check(self->nodelen, val, &node) == -1)
2187 2188 return NULL;
2188 2189 rev = index_find_node(self, node);
2189 2190 if (rev >= -1)
2190 2191 return PyInt_FromLong(rev);
2191 2192 if (rev == -2)
2192 2193 raise_revlog_error();
2193 2194 return NULL;
2194 2195 }
2195 2196
2196 2197 typedef uint64_t bitmask;
2197 2198
2198 2199 /*
2199 2200 * Given a disjoint set of revs, return all candidates for the
2200 2201 * greatest common ancestor. In revset notation, this is the set
2201 2202 * "heads(::a and ::b and ...)"
2202 2203 */
2203 2204 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2204 2205 int revcount)
2205 2206 {
2206 2207 const bitmask allseen = (1ull << revcount) - 1;
2207 2208 const bitmask poison = 1ull << revcount;
2208 2209 PyObject *gca = PyList_New(0);
2209 2210 int i, v, interesting;
2210 2211 int maxrev = -1;
2211 2212 bitmask sp;
2212 2213 bitmask *seen;
2213 2214
2214 2215 if (gca == NULL)
2215 2216 return PyErr_NoMemory();
2216 2217
2217 2218 for (i = 0; i < revcount; i++) {
2218 2219 if (revs[i] > maxrev)
2219 2220 maxrev = revs[i];
2220 2221 }
2221 2222
2222 2223 seen = calloc(sizeof(*seen), maxrev + 1);
2223 2224 if (seen == NULL) {
2224 2225 Py_DECREF(gca);
2225 2226 return PyErr_NoMemory();
2226 2227 }
2227 2228
2228 2229 for (i = 0; i < revcount; i++)
2229 2230 seen[revs[i]] = 1ull << i;
2230 2231
2231 2232 interesting = revcount;
2232 2233
2233 2234 for (v = maxrev; v >= 0 && interesting; v--) {
2234 2235 bitmask sv = seen[v];
2235 2236 int parents[2];
2236 2237
2237 2238 if (!sv)
2238 2239 continue;
2239 2240
2240 2241 if (sv < poison) {
2241 2242 interesting -= 1;
2242 2243 if (sv == allseen) {
2243 2244 PyObject *obj = PyInt_FromLong(v);
2244 2245 if (obj == NULL)
2245 2246 goto bail;
2246 2247 if (PyList_Append(gca, obj) == -1) {
2247 2248 Py_DECREF(obj);
2248 2249 goto bail;
2249 2250 }
2250 2251 sv |= poison;
2251 2252 for (i = 0; i < revcount; i++) {
2252 2253 if (revs[i] == v)
2253 2254 goto done;
2254 2255 }
2255 2256 }
2256 2257 }
2257 2258 if (index_get_parents(self, v, parents, maxrev) < 0)
2258 2259 goto bail;
2259 2260
2260 2261 for (i = 0; i < 2; i++) {
2261 2262 int p = parents[i];
2262 2263 if (p == -1)
2263 2264 continue;
2264 2265 sp = seen[p];
2265 2266 if (sv < poison) {
2266 2267 if (sp == 0) {
2267 2268 seen[p] = sv;
2268 2269 interesting++;
2269 2270 } else if (sp != sv)
2270 2271 seen[p] |= sv;
2271 2272 } else {
2272 2273 if (sp && sp < poison)
2273 2274 interesting--;
2274 2275 seen[p] = sv;
2275 2276 }
2276 2277 }
2277 2278 }
2278 2279
2279 2280 done:
2280 2281 free(seen);
2281 2282 return gca;
2282 2283 bail:
2283 2284 free(seen);
2284 2285 Py_XDECREF(gca);
2285 2286 return NULL;
2286 2287 }
2287 2288
2288 2289 /*
2289 2290 * Given a disjoint set of revs, return the subset with the longest
2290 2291 * path to the root.
2291 2292 */
2292 2293 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2293 2294 {
2294 2295 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2295 2296 static const Py_ssize_t capacity = 24;
2296 2297 int *depth, *interesting = NULL;
2297 2298 int i, j, v, ninteresting;
2298 2299 PyObject *dict = NULL, *keys = NULL;
2299 2300 long *seen = NULL;
2300 2301 int maxrev = -1;
2301 2302 long final;
2302 2303
2303 2304 if (revcount > capacity) {
2304 2305 PyErr_Format(PyExc_OverflowError,
2305 2306 "bitset size (%ld) > capacity (%ld)",
2306 2307 (long)revcount, (long)capacity);
2307 2308 return NULL;
2308 2309 }
2309 2310
2310 2311 for (i = 0; i < revcount; i++) {
2311 2312 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2312 2313 if (n > maxrev)
2313 2314 maxrev = n;
2314 2315 }
2315 2316
2316 2317 depth = calloc(sizeof(*depth), maxrev + 1);
2317 2318 if (depth == NULL)
2318 2319 return PyErr_NoMemory();
2319 2320
2320 2321 seen = calloc(sizeof(*seen), maxrev + 1);
2321 2322 if (seen == NULL) {
2322 2323 PyErr_NoMemory();
2323 2324 goto bail;
2324 2325 }
2325 2326
2326 2327 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2327 2328 if (interesting == NULL) {
2328 2329 PyErr_NoMemory();
2329 2330 goto bail;
2330 2331 }
2331 2332
2332 2333 if (PyList_Sort(revs) == -1)
2333 2334 goto bail;
2334 2335
2335 2336 for (i = 0; i < revcount; i++) {
2336 2337 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2337 2338 long b = 1l << i;
2338 2339 depth[n] = 1;
2339 2340 seen[n] = b;
2340 2341 interesting[b] = 1;
2341 2342 }
2342 2343
2343 2344 /* invariant: ninteresting is the number of non-zero entries in
2344 2345 * interesting. */
2345 2346 ninteresting = (int)revcount;
2346 2347
2347 2348 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2348 2349 int dv = depth[v];
2349 2350 int parents[2];
2350 2351 long sv;
2351 2352
2352 2353 if (dv == 0)
2353 2354 continue;
2354 2355
2355 2356 sv = seen[v];
2356 2357 if (index_get_parents(self, v, parents, maxrev) < 0)
2357 2358 goto bail;
2358 2359
2359 2360 for (i = 0; i < 2; i++) {
2360 2361 int p = parents[i];
2361 2362 long sp;
2362 2363 int dp;
2363 2364
2364 2365 if (p == -1)
2365 2366 continue;
2366 2367
2367 2368 dp = depth[p];
2368 2369 sp = seen[p];
2369 2370 if (dp <= dv) {
2370 2371 depth[p] = dv + 1;
2371 2372 if (sp != sv) {
2372 2373 interesting[sv] += 1;
2373 2374 seen[p] = sv;
2374 2375 if (sp) {
2375 2376 interesting[sp] -= 1;
2376 2377 if (interesting[sp] == 0)
2377 2378 ninteresting -= 1;
2378 2379 }
2379 2380 }
2380 2381 } else if (dv == dp - 1) {
2381 2382 long nsp = sp | sv;
2382 2383 if (nsp == sp)
2383 2384 continue;
2384 2385 seen[p] = nsp;
2385 2386 interesting[sp] -= 1;
2386 2387 if (interesting[sp] == 0)
2387 2388 ninteresting -= 1;
2388 2389 if (interesting[nsp] == 0)
2389 2390 ninteresting += 1;
2390 2391 interesting[nsp] += 1;
2391 2392 }
2392 2393 }
2393 2394 interesting[sv] -= 1;
2394 2395 if (interesting[sv] == 0)
2395 2396 ninteresting -= 1;
2396 2397 }
2397 2398
2398 2399 final = 0;
2399 2400 j = ninteresting;
2400 2401 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2401 2402 if (interesting[i] == 0)
2402 2403 continue;
2403 2404 final |= i;
2404 2405 j -= 1;
2405 2406 }
2406 2407 if (final == 0) {
2407 2408 keys = PyList_New(0);
2408 2409 goto bail;
2409 2410 }
2410 2411
2411 2412 dict = PyDict_New();
2412 2413 if (dict == NULL)
2413 2414 goto bail;
2414 2415
2415 2416 for (i = 0; i < revcount; i++) {
2416 2417 PyObject *key;
2417 2418
2418 2419 if ((final & (1 << i)) == 0)
2419 2420 continue;
2420 2421
2421 2422 key = PyList_GET_ITEM(revs, i);
2422 2423 Py_INCREF(key);
2423 2424 Py_INCREF(Py_None);
2424 2425 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2425 2426 Py_DECREF(key);
2426 2427 Py_DECREF(Py_None);
2427 2428 goto bail;
2428 2429 }
2429 2430 }
2430 2431
2431 2432 keys = PyDict_Keys(dict);
2432 2433
2433 2434 bail:
2434 2435 free(depth);
2435 2436 free(seen);
2436 2437 free(interesting);
2437 2438 Py_XDECREF(dict);
2438 2439
2439 2440 return keys;
2440 2441 }
2441 2442
2442 2443 /*
2443 2444 * Given a (possibly overlapping) set of revs, return all the
2444 2445 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2445 2446 */
2446 2447 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2447 2448 {
2448 2449 PyObject *ret = NULL;
2449 2450 Py_ssize_t argcount, i, len;
2450 2451 bitmask repeat = 0;
2451 2452 int revcount = 0;
2452 2453 int *revs;
2453 2454
2454 2455 argcount = PySequence_Length(args);
2455 2456 revs = PyMem_Malloc(argcount * sizeof(*revs));
2456 2457 if (argcount > 0 && revs == NULL)
2457 2458 return PyErr_NoMemory();
2458 2459 len = index_length(self);
2459 2460
2460 2461 for (i = 0; i < argcount; i++) {
2461 2462 static const int capacity = 24;
2462 2463 PyObject *obj = PySequence_GetItem(args, i);
2463 2464 bitmask x;
2464 2465 long val;
2465 2466
2466 2467 if (!PyInt_Check(obj)) {
2467 2468 PyErr_SetString(PyExc_TypeError,
2468 2469 "arguments must all be ints");
2469 2470 Py_DECREF(obj);
2470 2471 goto bail;
2471 2472 }
2472 2473 val = PyInt_AsLong(obj);
2473 2474 Py_DECREF(obj);
2474 2475 if (val == -1) {
2475 2476 ret = PyList_New(0);
2476 2477 goto done;
2477 2478 }
2478 2479 if (val < 0 || val >= len) {
2479 2480 PyErr_SetString(PyExc_IndexError, "index out of range");
2480 2481 goto bail;
2481 2482 }
2482 2483 /* this cheesy bloom filter lets us avoid some more
2483 2484 * expensive duplicate checks in the common set-is-disjoint
2484 2485 * case */
2485 2486 x = 1ull << (val & 0x3f);
2486 2487 if (repeat & x) {
2487 2488 int k;
2488 2489 for (k = 0; k < revcount; k++) {
2489 2490 if (val == revs[k])
2490 2491 goto duplicate;
2491 2492 }
2492 2493 } else
2493 2494 repeat |= x;
2494 2495 if (revcount >= capacity) {
2495 2496 PyErr_Format(PyExc_OverflowError,
2496 2497 "bitset size (%d) > capacity (%d)",
2497 2498 revcount, capacity);
2498 2499 goto bail;
2499 2500 }
2500 2501 revs[revcount++] = (int)val;
2501 2502 duplicate:;
2502 2503 }
2503 2504
2504 2505 if (revcount == 0) {
2505 2506 ret = PyList_New(0);
2506 2507 goto done;
2507 2508 }
2508 2509 if (revcount == 1) {
2509 2510 PyObject *obj;
2510 2511 ret = PyList_New(1);
2511 2512 if (ret == NULL)
2512 2513 goto bail;
2513 2514 obj = PyInt_FromLong(revs[0]);
2514 2515 if (obj == NULL)
2515 2516 goto bail;
2516 2517 PyList_SET_ITEM(ret, 0, obj);
2517 2518 goto done;
2518 2519 }
2519 2520
2520 2521 ret = find_gca_candidates(self, revs, revcount);
2521 2522 if (ret == NULL)
2522 2523 goto bail;
2523 2524
2524 2525 done:
2525 2526 PyMem_Free(revs);
2526 2527 return ret;
2527 2528
2528 2529 bail:
2529 2530 PyMem_Free(revs);
2530 2531 Py_XDECREF(ret);
2531 2532 return NULL;
2532 2533 }
2533 2534
2534 2535 /*
2535 2536 * Given a (possibly overlapping) set of revs, return the greatest
2536 2537 * common ancestors: those with the longest path to the root.
2537 2538 */
2538 2539 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2539 2540 {
2540 2541 PyObject *ret;
2541 2542 PyObject *gca = index_commonancestorsheads(self, args);
2542 2543 if (gca == NULL)
2543 2544 return NULL;
2544 2545
2545 2546 if (PyList_GET_SIZE(gca) <= 1) {
2546 2547 return gca;
2547 2548 }
2548 2549
2549 2550 ret = find_deepest(self, gca);
2550 2551 Py_DECREF(gca);
2551 2552 return ret;
2552 2553 }
2553 2554
2554 2555 /*
2555 2556 * Invalidate any trie entries introduced by added revs.
2556 2557 */
2557 2558 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2558 2559 {
2559 2560 Py_ssize_t i, len;
2560 2561
2561 2562 len = self->length + self->new_length;
2562 2563 i = start - self->length;
2563 2564 if (i < 0)
2564 2565 return;
2565 2566
2566 2567 for (i = start; i < len; i++)
2567 2568 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2568 2569
2569 2570 self->new_length = start - self->length;
2570 2571 }
2571 2572
2572 2573 /*
2573 2574 * Delete a numeric range of revs, which must be at the end of the
2574 2575 * range.
2575 2576 */
2576 2577 static int index_slice_del(indexObject *self, PyObject *item)
2577 2578 {
2578 2579 Py_ssize_t start, stop, step, slicelength;
2579 2580 Py_ssize_t length = index_length(self) + 1;
2580 2581 int ret = 0;
2581 2582
2582 2583 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2583 2584 #ifdef IS_PY3K
2584 2585 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2585 2586 &slicelength) < 0)
2586 2587 #else
2587 2588 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2588 2589 &step, &slicelength) < 0)
2589 2590 #endif
2590 2591 return -1;
2591 2592
2592 2593 if (slicelength <= 0)
2593 2594 return 0;
2594 2595
2595 2596 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2596 2597 stop = start;
2597 2598
2598 2599 if (step < 0) {
2599 2600 stop = start + 1;
2600 2601 start = stop + step * (slicelength - 1) - 1;
2601 2602 step = -step;
2602 2603 }
2603 2604
2604 2605 if (step != 1) {
2605 2606 PyErr_SetString(PyExc_ValueError,
2606 2607 "revlog index delete requires step size of 1");
2607 2608 return -1;
2608 2609 }
2609 2610
2610 2611 if (stop != length - 1) {
2611 2612 PyErr_SetString(PyExc_IndexError,
2612 2613 "revlog index deletion indices are invalid");
2613 2614 return -1;
2614 2615 }
2615 2616
2616 2617 if (start < self->length) {
2617 2618 if (self->ntinitialized) {
2618 2619 Py_ssize_t i;
2619 2620
2620 2621 for (i = start; i < self->length; i++) {
2621 2622 const char *node = index_node_existing(self, i);
2622 2623 if (node == NULL)
2623 2624 return -1;
2624 2625
2625 2626 nt_delete_node(&self->nt, node);
2626 2627 }
2627 2628 if (self->new_length)
2628 2629 index_invalidate_added(self, self->length);
2629 2630 if (self->ntrev > start)
2630 2631 self->ntrev = (int)start;
2631 2632 } else if (self->new_length) {
2632 2633 self->new_length = 0;
2633 2634 }
2634 2635
2635 2636 self->length = start;
2636 2637 goto done;
2637 2638 }
2638 2639
2639 2640 if (self->ntinitialized) {
2640 2641 index_invalidate_added(self, start);
2641 2642 if (self->ntrev > start)
2642 2643 self->ntrev = (int)start;
2643 2644 } else {
2644 2645 self->new_length = start - self->length;
2645 2646 }
2646 2647 done:
2647 2648 Py_CLEAR(self->headrevs);
2648 2649 return ret;
2649 2650 }
2650 2651
2651 2652 /*
2652 2653 * Supported ops:
2653 2654 *
2654 2655 * slice deletion
2655 2656 * string assignment (extend node->rev mapping)
2656 2657 * string deletion (shrink node->rev mapping)
2657 2658 */
2658 2659 static int index_assign_subscript(indexObject *self, PyObject *item,
2659 2660 PyObject *value)
2660 2661 {
2661 2662 char *node;
2662 2663 long rev;
2663 2664
2664 2665 if (PySlice_Check(item) && value == NULL)
2665 2666 return index_slice_del(self, item);
2666 2667
2667 2668 if (node_check(self->nodelen, item, &node) == -1)
2668 2669 return -1;
2669 2670
2670 2671 if (value == NULL)
2671 2672 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2672 2673 : 0;
2673 2674 rev = PyInt_AsLong(value);
2674 2675 if (rev > INT_MAX || rev < 0) {
2675 2676 if (!PyErr_Occurred())
2676 2677 PyErr_SetString(PyExc_ValueError, "rev out of range");
2677 2678 return -1;
2678 2679 }
2679 2680
2680 2681 if (index_init_nt(self) == -1)
2681 2682 return -1;
2682 2683 return nt_insert(&self->nt, node, (int)rev);
2683 2684 }
2684 2685
2685 2686 /*
2686 2687 * Find all RevlogNG entries in an index that has inline data. Update
2687 2688 * the optional "offsets" table with those entries.
2688 2689 */
2689 2690 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2690 2691 {
2691 2692 const char *data = (const char *)self->buf.buf;
2692 2693 Py_ssize_t pos = 0;
2693 2694 Py_ssize_t end = self->buf.len;
2694 2695 long incr = self->hdrsize;
2695 2696 Py_ssize_t len = 0;
2696 2697
2697 2698 while (pos + self->hdrsize <= end && pos >= 0) {
2698 2699 uint32_t comp_len, sidedata_comp_len = 0;
2699 2700 /* 3rd element of header is length of compressed inline data */
2700 2701 comp_len = getbe32(data + pos + 8);
2701 2702 if (self->hdrsize == v2_hdrsize) {
2702 2703 sidedata_comp_len = getbe32(data + pos + 72);
2703 2704 }
2704 2705 incr = self->hdrsize + comp_len + sidedata_comp_len;
2705 2706 if (offsets)
2706 2707 offsets[len] = data + pos;
2707 2708 len++;
2708 2709 pos += incr;
2709 2710 }
2710 2711
2711 2712 if (pos != end) {
2712 2713 if (!PyErr_Occurred())
2713 2714 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2714 2715 return -1;
2715 2716 }
2716 2717
2717 2718 return len;
2718 2719 }
2719 2720
2720 2721 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2721 2722 {
2722 2723 PyObject *data_obj, *inlined_obj, *revlogv2;
2723 2724 Py_ssize_t size;
2724 2725
2725 2726 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2726 2727
2727 2728 /* Initialize before argument-checking to avoid index_dealloc() crash.
2728 2729 */
2729 2730 self->added = NULL;
2730 2731 self->new_length = 0;
2731 2732 self->added_length = 0;
2732 2733 self->data = NULL;
2733 2734 memset(&self->buf, 0, sizeof(self->buf));
2734 2735 self->headrevs = NULL;
2735 2736 self->filteredrevs = Py_None;
2736 2737 Py_INCREF(Py_None);
2737 2738 self->ntinitialized = 0;
2738 2739 self->offsets = NULL;
2739 2740 self->nodelen = 20;
2740 2741 self->nullentry = NULL;
2741 2742
2742 2743 revlogv2 = NULL;
2743 2744 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2744 2745 &data_obj, &inlined_obj, &revlogv2))
2745 2746 return -1;
2746 2747 if (!PyObject_CheckBuffer(data_obj)) {
2747 2748 PyErr_SetString(PyExc_TypeError,
2748 2749 "data does not support buffer interface");
2749 2750 return -1;
2750 2751 }
2751 2752 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2752 2753 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2753 2754 return -1;
2754 2755 }
2755 2756
2756 2757 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2757 2758 self->hdrsize = v2_hdrsize;
2758 2759 } else {
2759 2760 self->hdrsize = v1_hdrsize;
2760 2761 }
2761 2762
2762 2763 if (self->hdrsize == v1_hdrsize) {
2763 2764 self->nullentry =
2764 2765 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2765 2766 -1, -1, -1, nullid, self->nodelen);
2766 2767 } else {
2767 2768 self->nullentry =
2768 2769 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2769 2770 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2770 2771 }
2771 2772
2772 2773 if (!self->nullentry)
2773 2774 return -1;
2774 2775 PyObject_GC_UnTrack(self->nullentry);
2775 2776
2776 2777 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2777 2778 return -1;
2778 2779 size = self->buf.len;
2779 2780
2780 2781 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2781 2782 self->data = data_obj;
2782 2783
2783 2784 self->ntlookups = self->ntmisses = 0;
2784 2785 self->ntrev = -1;
2785 2786 Py_INCREF(self->data);
2786 2787
2787 2788 if (self->inlined) {
2788 2789 Py_ssize_t len = inline_scan(self, NULL);
2789 2790 if (len == -1)
2790 2791 goto bail;
2791 2792 self->length = len;
2792 2793 } else {
2793 2794 if (size % self->hdrsize) {
2794 2795 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2795 2796 goto bail;
2796 2797 }
2797 2798 self->length = size / self->hdrsize;
2798 2799 }
2799 2800
2800 2801 return 0;
2801 2802 bail:
2802 2803 return -1;
2803 2804 }
2804 2805
2805 2806 static PyObject *index_nodemap(indexObject *self)
2806 2807 {
2807 2808 Py_INCREF(self);
2808 2809 return (PyObject *)self;
2809 2810 }
2810 2811
2811 2812 static void _index_clearcaches(indexObject *self)
2812 2813 {
2813 2814 if (self->offsets) {
2814 2815 PyMem_Free((void *)self->offsets);
2815 2816 self->offsets = NULL;
2816 2817 }
2817 2818 if (self->ntinitialized) {
2818 2819 nt_dealloc(&self->nt);
2819 2820 }
2820 2821 self->ntinitialized = 0;
2821 2822 Py_CLEAR(self->headrevs);
2822 2823 }
2823 2824
2824 2825 static PyObject *index_clearcaches(indexObject *self)
2825 2826 {
2826 2827 _index_clearcaches(self);
2827 2828 self->ntrev = -1;
2828 2829 self->ntlookups = self->ntmisses = 0;
2829 2830 Py_RETURN_NONE;
2830 2831 }
2831 2832
2832 2833 static void index_dealloc(indexObject *self)
2833 2834 {
2834 2835 _index_clearcaches(self);
2835 2836 Py_XDECREF(self->filteredrevs);
2836 2837 if (self->buf.buf) {
2837 2838 PyBuffer_Release(&self->buf);
2838 2839 memset(&self->buf, 0, sizeof(self->buf));
2839 2840 }
2840 2841 Py_XDECREF(self->data);
2841 2842 PyMem_Free(self->added);
2842 2843 Py_XDECREF(self->nullentry);
2843 2844 PyObject_Del(self);
2844 2845 }
2845 2846
2846 2847 static PySequenceMethods index_sequence_methods = {
2847 2848 (lenfunc)index_length, /* sq_length */
2848 2849 0, /* sq_concat */
2849 2850 0, /* sq_repeat */
2850 2851 (ssizeargfunc)index_get, /* sq_item */
2851 2852 0, /* sq_slice */
2852 2853 0, /* sq_ass_item */
2853 2854 0, /* sq_ass_slice */
2854 2855 (objobjproc)index_contains, /* sq_contains */
2855 2856 };
2856 2857
2857 2858 static PyMappingMethods index_mapping_methods = {
2858 2859 (lenfunc)index_length, /* mp_length */
2859 2860 (binaryfunc)index_getitem, /* mp_subscript */
2860 2861 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2861 2862 };
2862 2863
2863 2864 static PyMethodDef index_methods[] = {
2864 2865 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2865 2866 "return the gca set of the given revs"},
2866 2867 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2867 2868 METH_VARARGS,
2868 2869 "return the heads of the common ancestors of the given revs"},
2869 2870 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2870 2871 "clear the index caches"},
2871 2872 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2872 2873 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2873 2874 "return `rev` associated with a node or None"},
2874 2875 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2875 2876 "return True if the node exist in the index"},
2876 2877 {"rev", (PyCFunction)index_m_rev, METH_O,
2877 2878 "return `rev` associated with a node or raise RevlogError"},
2878 2879 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2879 2880 "compute phases"},
2880 2881 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2881 2882 "reachableroots"},
2882 2883 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2883 2884 METH_VARARGS, "replace an existing index entry with a new value"},
2884 2885 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2885 2886 "get head revisions"}, /* Can do filtering since 3.2 */
2886 2887 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2887 2888 "get filtered head revisions"}, /* Can always do filtering */
2888 2889 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2889 2890 "True if the object is a snapshot"},
2890 2891 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2891 2892 "Gather snapshot data in a cache dict"},
2892 2893 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2893 2894 "determine revisions with deltas to reconstruct fulltext"},
2894 2895 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2895 2896 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2896 2897 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2897 2898 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2898 2899 "match a potentially ambiguous node ID"},
2899 2900 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2900 2901 "find length of shortest hex nodeid of a binary ID"},
2901 2902 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2902 2903 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2903 2904 "return an entry in binary form"},
2904 2905 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2905 2906 "pack the revlog header information into binary"},
2906 2907 {NULL} /* Sentinel */
2907 2908 };
2908 2909
2909 2910 static PyGetSetDef index_getset[] = {
2910 2911 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2911 2912 {NULL} /* Sentinel */
2912 2913 };
2913 2914
2914 2915 static PyMemberDef index_members[] = {
2915 2916 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2916 2917 "size of an index entry"},
2917 2918 {NULL} /* Sentinel */
2918 2919 };
2919 2920
2920 2921 PyTypeObject HgRevlogIndex_Type = {
2921 2922 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2922 2923 "parsers.index", /* tp_name */
2923 2924 sizeof(indexObject), /* tp_basicsize */
2924 2925 0, /* tp_itemsize */
2925 2926 (destructor)index_dealloc, /* tp_dealloc */
2926 2927 0, /* tp_print */
2927 2928 0, /* tp_getattr */
2928 2929 0, /* tp_setattr */
2929 2930 0, /* tp_compare */
2930 2931 0, /* tp_repr */
2931 2932 0, /* tp_as_number */
2932 2933 &index_sequence_methods, /* tp_as_sequence */
2933 2934 &index_mapping_methods, /* tp_as_mapping */
2934 2935 0, /* tp_hash */
2935 2936 0, /* tp_call */
2936 2937 0, /* tp_str */
2937 2938 0, /* tp_getattro */
2938 2939 0, /* tp_setattro */
2939 2940 0, /* tp_as_buffer */
2940 2941 Py_TPFLAGS_DEFAULT, /* tp_flags */
2941 2942 "revlog index", /* tp_doc */
2942 2943 0, /* tp_traverse */
2943 2944 0, /* tp_clear */
2944 2945 0, /* tp_richcompare */
2945 2946 0, /* tp_weaklistoffset */
2946 2947 0, /* tp_iter */
2947 2948 0, /* tp_iternext */
2948 2949 index_methods, /* tp_methods */
2949 2950 index_members, /* tp_members */
2950 2951 index_getset, /* tp_getset */
2951 2952 0, /* tp_base */
2952 2953 0, /* tp_dict */
2953 2954 0, /* tp_descr_get */
2954 2955 0, /* tp_descr_set */
2955 2956 0, /* tp_dictoffset */
2956 2957 (initproc)index_init, /* tp_init */
2957 2958 0, /* tp_alloc */
2958 2959 };
2959 2960
2960 2961 /*
2961 2962 * returns a tuple of the form (index, cache) with elements as
2962 2963 * follows:
2963 2964 *
2964 2965 * index: an index object that lazily parses Revlog (v1 or v2) records
2965 2966 * cache: if data is inlined, a tuple (0, index_file_content), else None
2966 2967 * index_file_content could be a string, or a buffer
2967 2968 *
2968 2969 * added complications are for backwards compatibility
2969 2970 */
2970 2971 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2971 2972 {
2972 2973 PyObject *cache = NULL;
2973 2974 indexObject *idx;
2974 2975 int ret;
2975 2976
2976 2977 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2977 2978 if (idx == NULL)
2978 2979 goto bail;
2979 2980
2980 2981 ret = index_init(idx, args, kwargs);
2981 2982 if (ret == -1)
2982 2983 goto bail;
2983 2984
2984 2985 if (idx->inlined) {
2985 2986 cache = Py_BuildValue("iO", 0, idx->data);
2986 2987 if (cache == NULL)
2987 2988 goto bail;
2988 2989 } else {
2989 2990 cache = Py_None;
2990 2991 Py_INCREF(cache);
2991 2992 }
2992 2993
2993 2994 return Py_BuildValue("NN", idx, cache);
2994 2995
2995 2996 bail:
2996 2997 Py_XDECREF(idx);
2997 2998 Py_XDECREF(cache);
2998 2999 return NULL;
2999 3000 }
3000 3001
3001 3002 static Revlog_CAPI CAPI = {
3002 3003 /* increment the abi_version field upon each change in the Revlog_CAPI
3003 3004 struct or in the ABI of the listed functions */
3004 3005 2,
3005 3006 index_length,
3006 3007 index_node,
3007 3008 HgRevlogIndex_GetParents,
3008 3009 };
3009 3010
3010 3011 void revlog_module_init(PyObject *mod)
3011 3012 {
3012 3013 PyObject *caps = NULL;
3013 3014 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3014 3015 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3015 3016 return;
3016 3017 Py_INCREF(&HgRevlogIndex_Type);
3017 3018 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3018 3019
3019 3020 nodetreeType.tp_new = PyType_GenericNew;
3020 3021 if (PyType_Ready(&nodetreeType) < 0)
3021 3022 return;
3022 3023 Py_INCREF(&nodetreeType);
3023 3024 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3024 3025
3025 3026 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3026 3027 if (caps != NULL)
3027 3028 PyModule_AddObject(mod, "revlog_CAPI", caps);
3028 3029 }
@@ -1,2028 +1,2028 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_HASCOPIESINFO = 1 << 12
31 31
32 32 REVISION_FLAGS_KNOWN = (
33 33 REVISION_FLAG_CENSORED
34 34 | REVISION_FLAG_ELLIPSIS
35 35 | REVISION_FLAG_EXTSTORED
36 36 | REVISION_FLAG_HASCOPIESINFO
37 37 )
38 38
39 39 CG_DELTAMODE_STD = b'default'
40 40 CG_DELTAMODE_PREV = b'previous'
41 41 CG_DELTAMODE_FULL = b'fulltext'
42 42 CG_DELTAMODE_P1 = b'p1'
43 43
44 44
45 45 class ipeerconnection(interfaceutil.Interface):
46 46 """Represents a "connection" to a repository.
47 47
48 48 This is the base interface for representing a connection to a repository.
49 49 It holds basic properties and methods applicable to all peer types.
50 50
51 51 This is not a complete interface definition and should not be used
52 52 outside of this module.
53 53 """
54 54
55 55 ui = interfaceutil.Attribute("""ui.ui instance""")
56 56
57 57 def url():
58 58 """Returns a URL string representing this peer.
59 59
60 60 Currently, implementations expose the raw URL used to construct the
61 61 instance. It may contain credentials as part of the URL. The
62 62 expectations of the value aren't well-defined and this could lead to
63 63 data leakage.
64 64
65 65 TODO audit/clean consumers and more clearly define the contents of this
66 66 value.
67 67 """
68 68
69 69 def local():
70 70 """Returns a local repository instance.
71 71
72 72 If the peer represents a local repository, returns an object that
73 73 can be used to interface with it. Otherwise returns ``None``.
74 74 """
75 75
76 76 def peer():
77 77 """Returns an object conforming to this interface.
78 78
79 79 Most implementations will ``return self``.
80 80 """
81 81
82 82 def canpush():
83 83 """Returns a boolean indicating if this peer can be pushed to."""
84 84
85 85 def close():
86 86 """Close the connection to this peer.
87 87
88 88 This is called when the peer will no longer be used. Resources
89 89 associated with the peer should be cleaned up.
90 90 """
91 91
92 92
93 93 class ipeercapabilities(interfaceutil.Interface):
94 94 """Peer sub-interface related to capabilities."""
95 95
96 96 def capable(name):
97 97 """Determine support for a named capability.
98 98
99 99 Returns ``False`` if capability not supported.
100 100
101 101 Returns ``True`` if boolean capability is supported. Returns a string
102 102 if capability support is non-boolean.
103 103
104 104 Capability strings may or may not map to wire protocol capabilities.
105 105 """
106 106
107 107 def requirecap(name, purpose):
108 108 """Require a capability to be present.
109 109
110 110 Raises a ``CapabilityError`` if the capability isn't present.
111 111 """
112 112
113 113
114 114 class ipeercommands(interfaceutil.Interface):
115 115 """Client-side interface for communicating over the wire protocol.
116 116
117 117 This interface is used as a gateway to the Mercurial wire protocol.
118 118 methods commonly call wire protocol commands of the same name.
119 119 """
120 120
121 121 def branchmap():
122 122 """Obtain heads in named branches.
123 123
124 124 Returns a dict mapping branch name to an iterable of nodes that are
125 125 heads on that branch.
126 126 """
127 127
128 128 def capabilities():
129 129 """Obtain capabilities of the peer.
130 130
131 131 Returns a set of string capabilities.
132 132 """
133 133
134 134 def clonebundles():
135 135 """Obtains the clone bundles manifest for the repo.
136 136
137 137 Returns the manifest as unparsed bytes.
138 138 """
139 139
140 140 def debugwireargs(one, two, three=None, four=None, five=None):
141 141 """Used to facilitate debugging of arguments passed over the wire."""
142 142
143 143 def getbundle(source, **kwargs):
144 144 """Obtain remote repository data as a bundle.
145 145
146 146 This command is how the bulk of repository data is transferred from
147 147 the peer to the local repository
148 148
149 149 Returns a generator of bundle data.
150 150 """
151 151
152 152 def heads():
153 153 """Determine all known head revisions in the peer.
154 154
155 155 Returns an iterable of binary nodes.
156 156 """
157 157
158 158 def known(nodes):
159 159 """Determine whether multiple nodes are known.
160 160
161 161 Accepts an iterable of nodes whose presence to check for.
162 162
163 163 Returns an iterable of booleans indicating of the corresponding node
164 164 at that index is known to the peer.
165 165 """
166 166
167 167 def listkeys(namespace):
168 168 """Obtain all keys in a pushkey namespace.
169 169
170 170 Returns an iterable of key names.
171 171 """
172 172
173 173 def lookup(key):
174 174 """Resolve a value to a known revision.
175 175
176 176 Returns a binary node of the resolved revision on success.
177 177 """
178 178
179 179 def pushkey(namespace, key, old, new):
180 180 """Set a value using the ``pushkey`` protocol.
181 181
182 182 Arguments correspond to the pushkey namespace and key to operate on and
183 183 the old and new values for that key.
184 184
185 185 Returns a string with the peer result. The value inside varies by the
186 186 namespace.
187 187 """
188 188
189 189 def stream_out():
190 190 """Obtain streaming clone data.
191 191
192 192 Successful result should be a generator of data chunks.
193 193 """
194 194
195 195 def unbundle(bundle, heads, url):
196 196 """Transfer repository data to the peer.
197 197
198 198 This is how the bulk of data during a push is transferred.
199 199
200 200 Returns the integer number of heads added to the peer.
201 201 """
202 202
203 203
204 204 class ipeerlegacycommands(interfaceutil.Interface):
205 205 """Interface for implementing support for legacy wire protocol commands.
206 206
207 207 Wire protocol commands transition to legacy status when they are no longer
208 208 used by modern clients. To facilitate identifying which commands are
209 209 legacy, the interfaces are split.
210 210 """
211 211
212 212 def between(pairs):
213 213 """Obtain nodes between pairs of nodes.
214 214
215 215 ``pairs`` is an iterable of node pairs.
216 216
217 217 Returns an iterable of iterables of nodes corresponding to each
218 218 requested pair.
219 219 """
220 220
221 221 def branches(nodes):
222 222 """Obtain ancestor changesets of specific nodes back to a branch point.
223 223
224 224 For each requested node, the peer finds the first ancestor node that is
225 225 a DAG root or is a merge.
226 226
227 227 Returns an iterable of iterables with the resolved values for each node.
228 228 """
229 229
230 230 def changegroup(nodes, source):
231 231 """Obtain a changegroup with data for descendants of specified nodes."""
232 232
233 233 def changegroupsubset(bases, heads, source):
234 234 pass
235 235
236 236
237 237 class ipeercommandexecutor(interfaceutil.Interface):
238 238 """Represents a mechanism to execute remote commands.
239 239
240 240 This is the primary interface for requesting that wire protocol commands
241 241 be executed. Instances of this interface are active in a context manager
242 242 and have a well-defined lifetime. When the context manager exits, all
243 243 outstanding requests are waited on.
244 244 """
245 245
246 246 def callcommand(name, args):
247 247 """Request that a named command be executed.
248 248
249 249 Receives the command name and a dictionary of command arguments.
250 250
251 251 Returns a ``concurrent.futures.Future`` that will resolve to the
252 252 result of that command request. That exact value is left up to
253 253 the implementation and possibly varies by command.
254 254
255 255 Not all commands can coexist with other commands in an executor
256 256 instance: it depends on the underlying wire protocol transport being
257 257 used and the command itself.
258 258
259 259 Implementations MAY call ``sendcommands()`` automatically if the
260 260 requested command can not coexist with other commands in this executor.
261 261
262 262 Implementations MAY call ``sendcommands()`` automatically when the
263 263 future's ``result()`` is called. So, consumers using multiple
264 264 commands with an executor MUST ensure that ``result()`` is not called
265 265 until all command requests have been issued.
266 266 """
267 267
268 268 def sendcommands():
269 269 """Trigger submission of queued command requests.
270 270
271 271 Not all transports submit commands as soon as they are requested to
272 272 run. When called, this method forces queued command requests to be
273 273 issued. It will no-op if all commands have already been sent.
274 274
275 275 When called, no more new commands may be issued with this executor.
276 276 """
277 277
278 278 def close():
279 279 """Signal that this command request is finished.
280 280
281 281 When called, no more new commands may be issued. All outstanding
282 282 commands that have previously been issued are waited on before
283 283 returning. This not only includes waiting for the futures to resolve,
284 284 but also waiting for all response data to arrive. In other words,
285 285 calling this waits for all on-wire state for issued command requests
286 286 to finish.
287 287
288 288 When used as a context manager, this method is called when exiting the
289 289 context manager.
290 290
291 291 This method may call ``sendcommands()`` if there are buffered commands.
292 292 """
293 293
294 294
295 295 class ipeerrequests(interfaceutil.Interface):
296 296 """Interface for executing commands on a peer."""
297 297
298 298 limitedarguments = interfaceutil.Attribute(
299 299 """True if the peer cannot receive large argument value for commands."""
300 300 )
301 301
302 302 def commandexecutor():
303 303 """A context manager that resolves to an ipeercommandexecutor.
304 304
305 305 The object this resolves to can be used to issue command requests
306 306 to the peer.
307 307
308 308 Callers should call its ``callcommand`` method to issue command
309 309 requests.
310 310
311 311 A new executor should be obtained for each distinct set of commands
312 312 (possibly just a single command) that the consumer wants to execute
313 313 as part of a single operation or round trip. This is because some
314 314 peers are half-duplex and/or don't support persistent connections.
315 315 e.g. in the case of HTTP peers, commands sent to an executor represent
316 316 a single HTTP request. While some peers may support multiple command
317 317 sends over the wire per executor, consumers need to code to the least
318 318 capable peer. So it should be assumed that command executors buffer
319 319 called commands until they are told to send them and that each
320 320 command executor could result in a new connection or wire-level request
321 321 being issued.
322 322 """
323 323
324 324
325 325 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
326 326 """Unified interface for peer repositories.
327 327
328 328 All peer instances must conform to this interface.
329 329 """
330 330
331 331
332 332 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
333 333 """Unified peer interface for wire protocol version 2 peers."""
334 334
335 335 apidescriptor = interfaceutil.Attribute(
336 336 """Data structure holding description of server API."""
337 337 )
338 338
339 339
340 340 @interfaceutil.implementer(ipeerbase)
341 341 class peer(object):
342 342 """Base class for peer repositories."""
343 343
344 344 limitedarguments = False
345 345
346 346 def capable(self, name):
347 347 caps = self.capabilities()
348 348 if name in caps:
349 349 return True
350 350
351 351 name = b'%s=' % name
352 352 for cap in caps:
353 353 if cap.startswith(name):
354 354 return cap[len(name) :]
355 355
356 356 return False
357 357
358 358 def requirecap(self, name, purpose):
359 359 if self.capable(name):
360 360 return
361 361
362 362 raise error.CapabilityError(
363 363 _(
364 364 b'cannot %s; remote repository does not support the '
365 365 b'\'%s\' capability'
366 366 )
367 367 % (purpose, name)
368 368 )
369 369
370 370
371 371 class iverifyproblem(interfaceutil.Interface):
372 372 """Represents a problem with the integrity of the repository.
373 373
374 374 Instances of this interface are emitted to describe an integrity issue
375 375 with a repository (e.g. corrupt storage, missing data, etc).
376 376
377 377 Instances are essentially messages associated with severity.
378 378 """
379 379
380 380 warning = interfaceutil.Attribute(
381 381 """Message indicating a non-fatal problem."""
382 382 )
383 383
384 384 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
385 385
386 386 node = interfaceutil.Attribute(
387 387 """Revision encountering the problem.
388 388
389 389 ``None`` means the problem doesn't apply to a single revision.
390 390 """
391 391 )
392 392
393 393
394 394 class irevisiondelta(interfaceutil.Interface):
395 395 """Represents a delta between one revision and another.
396 396
397 397 Instances convey enough information to allow a revision to be exchanged
398 398 with another repository.
399 399
400 400 Instances represent the fulltext revision data or a delta against
401 401 another revision. Therefore the ``revision`` and ``delta`` attributes
402 402 are mutually exclusive.
403 403
404 404 Typically used for changegroup generation.
405 405 """
406 406
407 407 node = interfaceutil.Attribute("""20 byte node of this revision.""")
408 408
409 409 p1node = interfaceutil.Attribute(
410 410 """20 byte node of 1st parent of this revision."""
411 411 )
412 412
413 413 p2node = interfaceutil.Attribute(
414 414 """20 byte node of 2nd parent of this revision."""
415 415 )
416 416
417 417 linknode = interfaceutil.Attribute(
418 418 """20 byte node of the changelog revision this node is linked to."""
419 419 )
420 420
421 421 flags = interfaceutil.Attribute(
422 422 """2 bytes of integer flags that apply to this revision.
423 423
424 424 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
425 425 """
426 426 )
427 427
428 428 basenode = interfaceutil.Attribute(
429 429 """20 byte node of the revision this data is a delta against.
430 430
431 431 ``nullid`` indicates that the revision is a full revision and not
432 432 a delta.
433 433 """
434 434 )
435 435
436 436 baserevisionsize = interfaceutil.Attribute(
437 437 """Size of base revision this delta is against.
438 438
439 439 May be ``None`` if ``basenode`` is ``nullid``.
440 440 """
441 441 )
442 442
443 443 revision = interfaceutil.Attribute(
444 444 """Raw fulltext of revision data for this node."""
445 445 )
446 446
447 447 delta = interfaceutil.Attribute(
448 448 """Delta between ``basenode`` and ``node``.
449 449
450 450 Stored in the bdiff delta format.
451 451 """
452 452 )
453 453
454 454 sidedata = interfaceutil.Attribute(
455 455 """Raw sidedata bytes for the given revision."""
456 456 )
457 457
458 458 protocol_flags = interfaceutil.Attribute(
459 459 """Single byte of integer flags that can influence the protocol.
460 460
461 461 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
462 462 """
463 463 )
464 464
465 465
466 466 class ifilerevisionssequence(interfaceutil.Interface):
467 467 """Contains index data for all revisions of a file.
468 468
469 469 Types implementing this behave like lists of tuples. The index
470 470 in the list corresponds to the revision number. The values contain
471 471 index metadata.
472 472
473 473 The *null* revision (revision number -1) is always the last item
474 474 in the index.
475 475 """
476 476
477 477 def __len__():
478 478 """The total number of revisions."""
479 479
480 480 def __getitem__(rev):
481 481 """Returns the object having a specific revision number.
482 482
483 483 Returns an 8-tuple with the following fields:
484 484
485 485 offset+flags
486 486 Contains the offset and flags for the revision. 64-bit unsigned
487 487 integer where first 6 bytes are the offset and the next 2 bytes
488 488 are flags. The offset can be 0 if it is not used by the store.
489 489 compressed size
490 490 Size of the revision data in the store. It can be 0 if it isn't
491 491 needed by the store.
492 492 uncompressed size
493 493 Fulltext size. It can be 0 if it isn't needed by the store.
494 494 base revision
495 495 Revision number of revision the delta for storage is encoded
496 496 against. -1 indicates not encoded against a base revision.
497 497 link revision
498 498 Revision number of changelog revision this entry is related to.
499 499 p1 revision
500 500 Revision number of 1st parent. -1 if no 1st parent.
501 501 p2 revision
502 502 Revision number of 2nd parent. -1 if no 1st parent.
503 503 node
504 504 Binary node value for this revision number.
505 505
506 506 Negative values should index off the end of the sequence. ``-1``
507 507 should return the null revision. ``-2`` should return the most
508 508 recent revision.
509 509 """
510 510
511 511 def __contains__(rev):
512 512 """Whether a revision number exists."""
513 513
514 514 def insert(self, i, entry):
515 515 """Add an item to the index at specific revision."""
516 516
517 517
518 518 class ifileindex(interfaceutil.Interface):
519 519 """Storage interface for index data of a single file.
520 520
521 521 File storage data is divided into index metadata and data storage.
522 522 This interface defines the index portion of the interface.
523 523
524 524 The index logically consists of:
525 525
526 526 * A mapping between revision numbers and nodes.
527 527 * DAG data (storing and querying the relationship between nodes).
528 528 * Metadata to facilitate storage.
529 529 """
530 530
531 531 nullid = interfaceutil.Attribute(
532 532 """node for the null revision for use as delta base."""
533 533 )
534 534
535 535 def __len__():
536 536 """Obtain the number of revisions stored for this file."""
537 537
538 538 def __iter__():
539 539 """Iterate over revision numbers for this file."""
540 540
541 541 def hasnode(node):
542 542 """Returns a bool indicating if a node is known to this store.
543 543
544 544 Implementations must only return True for full, binary node values:
545 545 hex nodes, revision numbers, and partial node matches must be
546 546 rejected.
547 547
548 548 The null node is never present.
549 549 """
550 550
551 551 def revs(start=0, stop=None):
552 552 """Iterate over revision numbers for this file, with control."""
553 553
554 554 def parents(node):
555 555 """Returns a 2-tuple of parent nodes for a revision.
556 556
557 557 Values will be ``nullid`` if the parent is empty.
558 558 """
559 559
560 560 def parentrevs(rev):
561 561 """Like parents() but operates on revision numbers."""
562 562
563 563 def rev(node):
564 564 """Obtain the revision number given a node.
565 565
566 566 Raises ``error.LookupError`` if the node is not known.
567 567 """
568 568
569 569 def node(rev):
570 570 """Obtain the node value given a revision number.
571 571
572 572 Raises ``IndexError`` if the node is not known.
573 573 """
574 574
575 575 def lookup(node):
576 576 """Attempt to resolve a value to a node.
577 577
578 578 Value can be a binary node, hex node, revision number, or a string
579 579 that can be converted to an integer.
580 580
581 581 Raises ``error.LookupError`` if a node could not be resolved.
582 582 """
583 583
584 584 def linkrev(rev):
585 585 """Obtain the changeset revision number a revision is linked to."""
586 586
587 587 def iscensored(rev):
588 588 """Return whether a revision's content has been censored."""
589 589
590 590 def commonancestorsheads(node1, node2):
591 591 """Obtain an iterable of nodes containing heads of common ancestors.
592 592
593 593 See ``ancestor.commonancestorsheads()``.
594 594 """
595 595
596 596 def descendants(revs):
597 597 """Obtain descendant revision numbers for a set of revision numbers.
598 598
599 599 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
600 600 """
601 601
602 602 def heads(start=None, stop=None):
603 603 """Obtain a list of nodes that are DAG heads, with control.
604 604
605 605 The set of revisions examined can be limited by specifying
606 606 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
607 607 iterable of nodes. DAG traversal starts at earlier revision
608 608 ``start`` and iterates forward until any node in ``stop`` is
609 609 encountered.
610 610 """
611 611
612 612 def children(node):
613 613 """Obtain nodes that are children of a node.
614 614
615 615 Returns a list of nodes.
616 616 """
617 617
618 618
619 619 class ifiledata(interfaceutil.Interface):
620 620 """Storage interface for data storage of a specific file.
621 621
622 622 This complements ``ifileindex`` and provides an interface for accessing
623 623 data for a tracked file.
624 624 """
625 625
626 626 def size(rev):
627 627 """Obtain the fulltext size of file data.
628 628
629 629 Any metadata is excluded from size measurements.
630 630 """
631 631
632 632 def revision(node, raw=False):
633 633 """Obtain fulltext data for a node.
634 634
635 635 By default, any storage transformations are applied before the data
636 636 is returned. If ``raw`` is True, non-raw storage transformations
637 637 are not applied.
638 638
639 639 The fulltext data may contain a header containing metadata. Most
640 640 consumers should use ``read()`` to obtain the actual file data.
641 641 """
642 642
643 643 def rawdata(node):
644 644 """Obtain raw data for a node."""
645 645
646 646 def read(node):
647 647 """Resolve file fulltext data.
648 648
649 649 This is similar to ``revision()`` except any metadata in the data
650 650 headers is stripped.
651 651 """
652 652
653 653 def renamed(node):
654 654 """Obtain copy metadata for a node.
655 655
656 656 Returns ``False`` if no copy metadata is stored or a 2-tuple of
657 657 (path, node) from which this revision was copied.
658 658 """
659 659
660 660 def cmp(node, fulltext):
661 661 """Compare fulltext to another revision.
662 662
663 663 Returns True if the fulltext is different from what is stored.
664 664
665 665 This takes copy metadata into account.
666 666
667 667 TODO better document the copy metadata and censoring logic.
668 668 """
669 669
670 670 def emitrevisions(
671 671 nodes,
672 672 nodesorder=None,
673 673 revisiondata=False,
674 674 assumehaveparentrevisions=False,
675 675 deltamode=CG_DELTAMODE_STD,
676 676 ):
677 677 """Produce ``irevisiondelta`` for revisions.
678 678
679 679 Given an iterable of nodes, emits objects conforming to the
680 680 ``irevisiondelta`` interface that describe revisions in storage.
681 681
682 682 This method is a generator.
683 683
684 684 The input nodes may be unordered. Implementations must ensure that a
685 685 node's parents are emitted before the node itself. Transitively, this
686 686 means that a node may only be emitted once all its ancestors in
687 687 ``nodes`` have also been emitted.
688 688
689 689 By default, emits "index" data (the ``node``, ``p1node``, and
690 690 ``p2node`` attributes). If ``revisiondata`` is set, revision data
691 691 will also be present on the emitted objects.
692 692
693 693 With default argument values, implementations can choose to emit
694 694 either fulltext revision data or a delta. When emitting deltas,
695 695 implementations must consider whether the delta's base revision
696 696 fulltext is available to the receiver.
697 697
698 698 The base revision fulltext is guaranteed to be available if any of
699 699 the following are met:
700 700
701 701 * Its fulltext revision was emitted by this method call.
702 702 * A delta for that revision was emitted by this method call.
703 703 * ``assumehaveparentrevisions`` is True and the base revision is a
704 704 parent of the node.
705 705
706 706 ``nodesorder`` can be used to control the order that revisions are
707 707 emitted. By default, revisions can be reordered as long as they are
708 708 in DAG topological order (see above). If the value is ``nodes``,
709 709 the iteration order from ``nodes`` should be used. If the value is
710 710 ``storage``, then the native order from the backing storage layer
711 711 is used. (Not all storage layers will have strong ordering and behavior
712 712 of this mode is storage-dependent.) ``nodes`` ordering can force
713 713 revisions to be emitted before their ancestors, so consumers should
714 714 use it with care.
715 715
716 716 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
717 717 be set and it is the caller's responsibility to resolve it, if needed.
718 718
719 719 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
720 720 all revision data should be emitted as deltas against the revision
721 721 emitted just prior. The initial revision should be a delta against its
722 722 1st parent.
723 723 """
724 724
725 725
726 726 class ifilemutation(interfaceutil.Interface):
727 727 """Storage interface for mutation events of a tracked file."""
728 728
729 729 def add(filedata, meta, transaction, linkrev, p1, p2):
730 730 """Add a new revision to the store.
731 731
732 732 Takes file data, dictionary of metadata, a transaction, linkrev,
733 733 and parent nodes.
734 734
735 735 Returns the node that was added.
736 736
737 737 May no-op if a revision matching the supplied data is already stored.
738 738 """
739 739
740 740 def addrevision(
741 741 revisiondata,
742 742 transaction,
743 743 linkrev,
744 744 p1,
745 745 p2,
746 746 node=None,
747 747 flags=0,
748 748 cachedelta=None,
749 749 ):
750 750 """Add a new revision to the store and return its number.
751 751
752 752 This is similar to ``add()`` except it operates at a lower level.
753 753
754 754 The data passed in already contains a metadata header, if any.
755 755
756 756 ``node`` and ``flags`` can be used to define the expected node and
757 757 the flags to use with storage. ``flags`` is a bitwise value composed
758 758 of the various ``REVISION_FLAG_*`` constants.
759 759
760 760 ``add()`` is usually called when adding files from e.g. the working
761 761 directory. ``addrevision()`` is often called by ``add()`` and for
762 762 scenarios where revision data has already been computed, such as when
763 763 applying raw data from a peer repo.
764 764 """
765 765
766 766 def addgroup(
767 767 deltas,
768 768 linkmapper,
769 769 transaction,
770 770 addrevisioncb=None,
771 771 duplicaterevisioncb=None,
772 772 maybemissingparents=False,
773 773 ):
774 774 """Process a series of deltas for storage.
775 775
776 776 ``deltas`` is an iterable of 7-tuples of
777 777 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
778 778 to add.
779 779
780 780 The ``delta`` field contains ``mpatch`` data to apply to a base
781 781 revision, identified by ``deltabase``. The base node can be
782 782 ``nullid``, in which case the header from the delta can be ignored
783 783 and the delta used as the fulltext.
784 784
785 785 ``alwayscache`` instructs the lower layers to cache the content of the
786 786 newly added revision, even if it needs to be explicitly computed.
787 787 This used to be the default when ``addrevisioncb`` was provided up to
788 788 Mercurial 5.8.
789 789
790 790 ``addrevisioncb`` should be called for each new rev as it is committed.
791 791 ``duplicaterevisioncb`` should be called for all revs with a
792 792 pre-existing node.
793 793
794 794 ``maybemissingparents`` is a bool indicating whether the incoming
795 795 data may reference parents/ancestor revisions that aren't present.
796 796 This flag is set when receiving data into a "shallow" store that
797 797 doesn't hold all history.
798 798
799 799 Returns a list of nodes that were processed. A node will be in the list
800 800 even if it existed in the store previously.
801 801 """
802 802
803 803 def censorrevision(tr, node, tombstone=b''):
804 804 """Remove the content of a single revision.
805 805
806 806 The specified ``node`` will have its content purged from storage.
807 807 Future attempts to access the revision data for this node will
808 808 result in failure.
809 809
810 810 A ``tombstone`` message can optionally be stored. This message may be
811 811 displayed to users when they attempt to access the missing revision
812 812 data.
813 813
814 814 Storage backends may have stored deltas against the previous content
815 815 in this revision. As part of censoring a revision, these storage
816 816 backends are expected to rewrite any internally stored deltas such
817 817 that they no longer reference the deleted content.
818 818 """
819 819
820 820 def getstrippoint(minlink):
821 821 """Find the minimum revision that must be stripped to strip a linkrev.
822 822
823 823 Returns a 2-tuple containing the minimum revision number and a set
824 824 of all revisions numbers that would be broken by this strip.
825 825
826 826 TODO this is highly revlog centric and should be abstracted into
827 827 a higher-level deletion API. ``repair.strip()`` relies on this.
828 828 """
829 829
830 830 def strip(minlink, transaction):
831 831 """Remove storage of items starting at a linkrev.
832 832
833 833 This uses ``getstrippoint()`` to determine the first node to remove.
834 834 Then it effectively truncates storage for all revisions after that.
835 835
836 836 TODO this is highly revlog centric and should be abstracted into a
837 837 higher-level deletion API.
838 838 """
839 839
840 840
841 841 class ifilestorage(ifileindex, ifiledata, ifilemutation):
842 842 """Complete storage interface for a single tracked file."""
843 843
844 844 def files():
845 845 """Obtain paths that are backing storage for this file.
846 846
847 847 TODO this is used heavily by verify code and there should probably
848 848 be a better API for that.
849 849 """
850 850
851 851 def storageinfo(
852 852 exclusivefiles=False,
853 853 sharedfiles=False,
854 854 revisionscount=False,
855 855 trackedsize=False,
856 856 storedsize=False,
857 857 ):
858 858 """Obtain information about storage for this file's data.
859 859
860 860 Returns a dict describing storage for this tracked path. The keys
861 861 in the dict map to arguments of the same. The arguments are bools
862 862 indicating whether to calculate and obtain that data.
863 863
864 864 exclusivefiles
865 865 Iterable of (vfs, path) describing files that are exclusively
866 866 used to back storage for this tracked path.
867 867
868 868 sharedfiles
869 869 Iterable of (vfs, path) describing files that are used to back
870 870 storage for this tracked path. Those files may also provide storage
871 871 for other stored entities.
872 872
873 873 revisionscount
874 874 Number of revisions available for retrieval.
875 875
876 876 trackedsize
877 877 Total size in bytes of all tracked revisions. This is a sum of the
878 878 length of the fulltext of all revisions.
879 879
880 880 storedsize
881 881 Total size in bytes used to store data for all tracked revisions.
882 882 This is commonly less than ``trackedsize`` due to internal usage
883 883 of deltas rather than fulltext revisions.
884 884
885 885 Not all storage backends may support all queries are have a reasonable
886 886 value to use. In that case, the value should be set to ``None`` and
887 887 callers are expected to handle this special value.
888 888 """
889 889
890 890 def verifyintegrity(state):
891 891 """Verifies the integrity of file storage.
892 892
893 893 ``state`` is a dict holding state of the verifier process. It can be
894 894 used to communicate data between invocations of multiple storage
895 895 primitives.
896 896
897 897 If individual revisions cannot have their revision content resolved,
898 898 the method is expected to set the ``skipread`` key to a set of nodes
899 899 that encountered problems. If set, the method can also add the node(s)
900 900 to ``safe_renamed`` in order to indicate nodes that may perform the
901 901 rename checks with currently accessible data.
902 902
903 903 The method yields objects conforming to the ``iverifyproblem``
904 904 interface.
905 905 """
906 906
907 907
908 908 class idirs(interfaceutil.Interface):
909 909 """Interface representing a collection of directories from paths.
910 910
911 911 This interface is essentially a derived data structure representing
912 912 directories from a collection of paths.
913 913 """
914 914
915 915 def addpath(path):
916 916 """Add a path to the collection.
917 917
918 918 All directories in the path will be added to the collection.
919 919 """
920 920
921 921 def delpath(path):
922 922 """Remove a path from the collection.
923 923
924 924 If the removal was the last path in a particular directory, the
925 925 directory is removed from the collection.
926 926 """
927 927
928 928 def __iter__():
929 929 """Iterate over the directories in this collection of paths."""
930 930
931 931 def __contains__(path):
932 932 """Whether a specific directory is in this collection."""
933 933
934 934
935 935 class imanifestdict(interfaceutil.Interface):
936 936 """Interface representing a manifest data structure.
937 937
938 938 A manifest is effectively a dict mapping paths to entries. Each entry
939 939 consists of a binary node and extra flags affecting that entry.
940 940 """
941 941
942 942 def __getitem__(path):
943 943 """Returns the binary node value for a path in the manifest.
944 944
945 945 Raises ``KeyError`` if the path does not exist in the manifest.
946 946
947 947 Equivalent to ``self.find(path)[0]``.
948 948 """
949 949
950 950 def find(path):
951 951 """Returns the entry for a path in the manifest.
952 952
953 953 Returns a 2-tuple of (node, flags).
954 954
955 955 Raises ``KeyError`` if the path does not exist in the manifest.
956 956 """
957 957
958 958 def __len__():
959 959 """Return the number of entries in the manifest."""
960 960
961 961 def __nonzero__():
962 962 """Returns True if the manifest has entries, False otherwise."""
963 963
964 964 __bool__ = __nonzero__
965 965
966 966 def __setitem__(path, node):
967 967 """Define the node value for a path in the manifest.
968 968
969 969 If the path is already in the manifest, its flags will be copied to
970 970 the new entry.
971 971 """
972 972
973 973 def __contains__(path):
974 974 """Whether a path exists in the manifest."""
975 975
976 976 def __delitem__(path):
977 977 """Remove a path from the manifest.
978 978
979 979 Raises ``KeyError`` if the path is not in the manifest.
980 980 """
981 981
982 982 def __iter__():
983 983 """Iterate over paths in the manifest."""
984 984
985 985 def iterkeys():
986 986 """Iterate over paths in the manifest."""
987 987
988 988 def keys():
989 989 """Obtain a list of paths in the manifest."""
990 990
991 991 def filesnotin(other, match=None):
992 992 """Obtain the set of paths in this manifest but not in another.
993 993
994 994 ``match`` is an optional matcher function to be applied to both
995 995 manifests.
996 996
997 997 Returns a set of paths.
998 998 """
999 999
1000 1000 def dirs():
1001 1001 """Returns an object implementing the ``idirs`` interface."""
1002 1002
1003 1003 def hasdir(dir):
1004 1004 """Returns a bool indicating if a directory is in this manifest."""
1005 1005
1006 1006 def walk(match):
1007 1007 """Generator of paths in manifest satisfying a matcher.
1008 1008
1009 1009 If the matcher has explicit files listed and they don't exist in
1010 1010 the manifest, ``match.bad()`` is called for each missing file.
1011 1011 """
1012 1012
1013 1013 def diff(other, match=None, clean=False):
1014 1014 """Find differences between this manifest and another.
1015 1015
1016 1016 This manifest is compared to ``other``.
1017 1017
1018 1018 If ``match`` is provided, the two manifests are filtered against this
1019 1019 matcher and only entries satisfying the matcher are compared.
1020 1020
1021 1021 If ``clean`` is True, unchanged files are included in the returned
1022 1022 object.
1023 1023
1024 1024 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1025 1025 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1026 1026 represents the node and flags for this manifest and ``(node2, flag2)``
1027 1027 are the same for the other manifest.
1028 1028 """
1029 1029
1030 1030 def setflag(path, flag):
1031 1031 """Set the flag value for a given path.
1032 1032
1033 1033 Raises ``KeyError`` if the path is not already in the manifest.
1034 1034 """
1035 1035
1036 1036 def get(path, default=None):
1037 1037 """Obtain the node value for a path or a default value if missing."""
1038 1038
1039 1039 def flags(path):
1040 1040 """Return the flags value for a path (default: empty bytestring)."""
1041 1041
1042 1042 def copy():
1043 1043 """Return a copy of this manifest."""
1044 1044
1045 1045 def items():
1046 1046 """Returns an iterable of (path, node) for items in this manifest."""
1047 1047
1048 1048 def iteritems():
1049 1049 """Identical to items()."""
1050 1050
1051 1051 def iterentries():
1052 1052 """Returns an iterable of (path, node, flags) for this manifest.
1053 1053
1054 1054 Similar to ``iteritems()`` except items are a 3-tuple and include
1055 1055 flags.
1056 1056 """
1057 1057
1058 1058 def text():
1059 1059 """Obtain the raw data representation for this manifest.
1060 1060
1061 1061 Result is used to create a manifest revision.
1062 1062 """
1063 1063
1064 1064 def fastdelta(base, changes):
1065 1065 """Obtain a delta between this manifest and another given changes.
1066 1066
1067 1067 ``base`` in the raw data representation for another manifest.
1068 1068
1069 1069 ``changes`` is an iterable of ``(path, to_delete)``.
1070 1070
1071 1071 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1072 1072 delta between ``base`` and this manifest.
1073 1073
1074 1074 If this manifest implementation can't support ``fastdelta()``,
1075 1075 raise ``mercurial.manifest.FastdeltaUnavailable``.
1076 1076 """
1077 1077
1078 1078
1079 1079 class imanifestrevisionbase(interfaceutil.Interface):
1080 1080 """Base interface representing a single revision of a manifest.
1081 1081
1082 1082 Should not be used as a primary interface: should always be inherited
1083 1083 as part of a larger interface.
1084 1084 """
1085 1085
1086 1086 def copy():
1087 1087 """Obtain a copy of this manifest instance.
1088 1088
1089 1089 Returns an object conforming to the ``imanifestrevisionwritable``
1090 1090 interface. The instance will be associated with the same
1091 1091 ``imanifestlog`` collection as this instance.
1092 1092 """
1093 1093
1094 1094 def read():
1095 1095 """Obtain the parsed manifest data structure.
1096 1096
1097 1097 The returned object conforms to the ``imanifestdict`` interface.
1098 1098 """
1099 1099
1100 1100
1101 1101 class imanifestrevisionstored(imanifestrevisionbase):
1102 1102 """Interface representing a manifest revision committed to storage."""
1103 1103
1104 1104 def node():
1105 1105 """The binary node for this manifest."""
1106 1106
1107 1107 parents = interfaceutil.Attribute(
1108 1108 """List of binary nodes that are parents for this manifest revision."""
1109 1109 )
1110 1110
1111 1111 def readdelta(shallow=False):
1112 1112 """Obtain the manifest data structure representing changes from parent.
1113 1113
1114 1114 This manifest is compared to its 1st parent. A new manifest representing
1115 1115 those differences is constructed.
1116 1116
1117 1117 The returned object conforms to the ``imanifestdict`` interface.
1118 1118 """
1119 1119
1120 1120 def readfast(shallow=False):
1121 1121 """Calls either ``read()`` or ``readdelta()``.
1122 1122
1123 1123 The faster of the two options is called.
1124 1124 """
1125 1125
1126 1126 def find(key):
1127 1127 """Calls self.read().find(key)``.
1128 1128
1129 1129 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1130 1130 """
1131 1131
1132 1132
1133 1133 class imanifestrevisionwritable(imanifestrevisionbase):
1134 1134 """Interface representing a manifest revision that can be committed."""
1135 1135
1136 1136 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1137 1137 """Add this revision to storage.
1138 1138
1139 1139 Takes a transaction object, the changeset revision number it will
1140 1140 be associated with, its parent nodes, and lists of added and
1141 1141 removed paths.
1142 1142
1143 1143 If match is provided, storage can choose not to inspect or write out
1144 1144 items that do not match. Storage is still required to be able to provide
1145 1145 the full manifest in the future for any directories written (these
1146 1146 manifests should not be "narrowed on disk").
1147 1147
1148 1148 Returns the binary node of the created revision.
1149 1149 """
1150 1150
1151 1151
1152 1152 class imanifeststorage(interfaceutil.Interface):
1153 1153 """Storage interface for manifest data."""
1154 1154
1155 1155 nodeconstants = interfaceutil.Attribute(
1156 1156 """nodeconstants used by the current repository."""
1157 1157 )
1158 1158
1159 1159 tree = interfaceutil.Attribute(
1160 1160 """The path to the directory this manifest tracks.
1161 1161
1162 1162 The empty bytestring represents the root manifest.
1163 1163 """
1164 1164 )
1165 1165
1166 1166 index = interfaceutil.Attribute(
1167 1167 """An ``ifilerevisionssequence`` instance."""
1168 1168 )
1169 1169
1170 1170 indexfile = interfaceutil.Attribute(
1171 1171 """Path of revlog index file.
1172 1172
1173 1173 TODO this is revlog specific and should not be exposed.
1174 1174 """
1175 1175 )
1176 1176
1177 1177 opener = interfaceutil.Attribute(
1178 1178 """VFS opener to use to access underlying files used for storage.
1179 1179
1180 1180 TODO this is revlog specific and should not be exposed.
1181 1181 """
1182 1182 )
1183 1183
1184 1184 version = interfaceutil.Attribute(
1185 1185 """Revlog version number.
1186 1186
1187 1187 TODO this is revlog specific and should not be exposed.
1188 1188 """
1189 1189 )
1190 1190
1191 1191 _generaldelta = interfaceutil.Attribute(
1192 1192 """Whether generaldelta storage is being used.
1193 1193
1194 1194 TODO this is revlog specific and should not be exposed.
1195 1195 """
1196 1196 )
1197 1197
1198 1198 fulltextcache = interfaceutil.Attribute(
1199 1199 """Dict with cache of fulltexts.
1200 1200
1201 1201 TODO this doesn't feel appropriate for the storage interface.
1202 1202 """
1203 1203 )
1204 1204
1205 1205 def __len__():
1206 1206 """Obtain the number of revisions stored for this manifest."""
1207 1207
1208 1208 def __iter__():
1209 1209 """Iterate over revision numbers for this manifest."""
1210 1210
1211 1211 def rev(node):
1212 1212 """Obtain the revision number given a binary node.
1213 1213
1214 1214 Raises ``error.LookupError`` if the node is not known.
1215 1215 """
1216 1216
1217 1217 def node(rev):
1218 1218 """Obtain the node value given a revision number.
1219 1219
1220 1220 Raises ``error.LookupError`` if the revision is not known.
1221 1221 """
1222 1222
1223 1223 def lookup(value):
1224 1224 """Attempt to resolve a value to a node.
1225 1225
1226 1226 Value can be a binary node, hex node, revision number, or a bytes
1227 1227 that can be converted to an integer.
1228 1228
1229 1229 Raises ``error.LookupError`` if a ndoe could not be resolved.
1230 1230 """
1231 1231
1232 1232 def parents(node):
1233 1233 """Returns a 2-tuple of parent nodes for a node.
1234 1234
1235 1235 Values will be ``nullid`` if the parent is empty.
1236 1236 """
1237 1237
1238 1238 def parentrevs(rev):
1239 1239 """Like parents() but operates on revision numbers."""
1240 1240
1241 1241 def linkrev(rev):
1242 1242 """Obtain the changeset revision number a revision is linked to."""
1243 1243
1244 1244 def revision(node, _df=None, raw=False):
1245 1245 """Obtain fulltext data for a node."""
1246 1246
1247 1247 def rawdata(node, _df=None):
1248 1248 """Obtain raw data for a node."""
1249 1249
1250 1250 def revdiff(rev1, rev2):
1251 1251 """Obtain a delta between two revision numbers.
1252 1252
1253 1253 The returned data is the result of ``bdiff.bdiff()`` on the raw
1254 1254 revision data.
1255 1255 """
1256 1256
1257 1257 def cmp(node, fulltext):
1258 1258 """Compare fulltext to another revision.
1259 1259
1260 1260 Returns True if the fulltext is different from what is stored.
1261 1261 """
1262 1262
1263 1263 def emitrevisions(
1264 1264 nodes,
1265 1265 nodesorder=None,
1266 1266 revisiondata=False,
1267 1267 assumehaveparentrevisions=False,
1268 1268 ):
1269 1269 """Produce ``irevisiondelta`` describing revisions.
1270 1270
1271 1271 See the documentation for ``ifiledata`` for more.
1272 1272 """
1273 1273
1274 1274 def addgroup(
1275 1275 deltas,
1276 1276 linkmapper,
1277 1277 transaction,
1278 1278 addrevisioncb=None,
1279 1279 duplicaterevisioncb=None,
1280 1280 ):
1281 1281 """Process a series of deltas for storage.
1282 1282
1283 1283 See the documentation in ``ifilemutation`` for more.
1284 1284 """
1285 1285
1286 1286 def rawsize(rev):
1287 1287 """Obtain the size of tracked data.
1288 1288
1289 1289 Is equivalent to ``len(m.rawdata(node))``.
1290 1290
1291 1291 TODO this method is only used by upgrade code and may be removed.
1292 1292 """
1293 1293
1294 1294 def getstrippoint(minlink):
1295 1295 """Find minimum revision that must be stripped to strip a linkrev.
1296 1296
1297 1297 See the documentation in ``ifilemutation`` for more.
1298 1298 """
1299 1299
1300 1300 def strip(minlink, transaction):
1301 1301 """Remove storage of items starting at a linkrev.
1302 1302
1303 1303 See the documentation in ``ifilemutation`` for more.
1304 1304 """
1305 1305
1306 1306 def checksize():
1307 1307 """Obtain the expected sizes of backing files.
1308 1308
1309 1309 TODO this is used by verify and it should not be part of the interface.
1310 1310 """
1311 1311
1312 1312 def files():
1313 1313 """Obtain paths that are backing storage for this manifest.
1314 1314
1315 1315 TODO this is used by verify and there should probably be a better API
1316 1316 for this functionality.
1317 1317 """
1318 1318
1319 1319 def deltaparent(rev):
1320 1320 """Obtain the revision that a revision is delta'd against.
1321 1321
1322 1322 TODO delta encoding is an implementation detail of storage and should
1323 1323 not be exposed to the storage interface.
1324 1324 """
1325 1325
1326 1326 def clone(tr, dest, **kwargs):
1327 1327 """Clone this instance to another."""
1328 1328
1329 1329 def clearcaches(clear_persisted_data=False):
1330 1330 """Clear any caches associated with this instance."""
1331 1331
1332 1332 def dirlog(d):
1333 1333 """Obtain a manifest storage instance for a tree."""
1334 1334
1335 1335 def add(
1336 1336 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1337 1337 ):
1338 1338 """Add a revision to storage.
1339 1339
1340 1340 ``m`` is an object conforming to ``imanifestdict``.
1341 1341
1342 1342 ``link`` is the linkrev revision number.
1343 1343
1344 1344 ``p1`` and ``p2`` are the parent revision numbers.
1345 1345
1346 1346 ``added`` and ``removed`` are iterables of added and removed paths,
1347 1347 respectively.
1348 1348
1349 1349 ``readtree`` is a function that can be used to read the child tree(s)
1350 1350 when recursively writing the full tree structure when using
1351 1351 treemanifets.
1352 1352
1353 1353 ``match`` is a matcher that can be used to hint to storage that not all
1354 1354 paths must be inspected; this is an optimization and can be safely
1355 1355 ignored. Note that the storage must still be able to reproduce a full
1356 1356 manifest including files that did not match.
1357 1357 """
1358 1358
1359 1359 def storageinfo(
1360 1360 exclusivefiles=False,
1361 1361 sharedfiles=False,
1362 1362 revisionscount=False,
1363 1363 trackedsize=False,
1364 1364 storedsize=False,
1365 1365 ):
1366 1366 """Obtain information about storage for this manifest's data.
1367 1367
1368 1368 See ``ifilestorage.storageinfo()`` for a description of this method.
1369 1369 This one behaves the same way, except for manifest data.
1370 1370 """
1371 1371
1372 1372
1373 1373 class imanifestlog(interfaceutil.Interface):
1374 1374 """Interface representing a collection of manifest snapshots.
1375 1375
1376 1376 Represents the root manifest in a repository.
1377 1377
1378 1378 Also serves as a means to access nested tree manifests and to cache
1379 1379 tree manifests.
1380 1380 """
1381 1381
1382 1382 nodeconstants = interfaceutil.Attribute(
1383 1383 """nodeconstants used by the current repository."""
1384 1384 )
1385 1385
1386 1386 def __getitem__(node):
1387 1387 """Obtain a manifest instance for a given binary node.
1388 1388
1389 1389 Equivalent to calling ``self.get('', node)``.
1390 1390
1391 1391 The returned object conforms to the ``imanifestrevisionstored``
1392 1392 interface.
1393 1393 """
1394 1394
1395 1395 def get(tree, node, verify=True):
1396 1396 """Retrieve the manifest instance for a given directory and binary node.
1397 1397
1398 1398 ``node`` always refers to the node of the root manifest (which will be
1399 1399 the only manifest if flat manifests are being used).
1400 1400
1401 1401 If ``tree`` is the empty string, the root manifest is returned.
1402 1402 Otherwise the manifest for the specified directory will be returned
1403 1403 (requires tree manifests).
1404 1404
1405 1405 If ``verify`` is True, ``LookupError`` is raised if the node is not
1406 1406 known.
1407 1407
1408 1408 The returned object conforms to the ``imanifestrevisionstored``
1409 1409 interface.
1410 1410 """
1411 1411
1412 1412 def getstorage(tree):
1413 1413 """Retrieve an interface to storage for a particular tree.
1414 1414
1415 1415 If ``tree`` is the empty bytestring, storage for the root manifest will
1416 1416 be returned. Otherwise storage for a tree manifest is returned.
1417 1417
1418 1418 TODO formalize interface for returned object.
1419 1419 """
1420 1420
1421 1421 def clearcaches():
1422 1422 """Clear caches associated with this collection."""
1423 1423
1424 1424 def rev(node):
1425 1425 """Obtain the revision number for a binary node.
1426 1426
1427 1427 Raises ``error.LookupError`` if the node is not known.
1428 1428 """
1429 1429
1430 1430 def update_caches(transaction):
1431 1431 """update whatever cache are relevant for the used storage."""
1432 1432
1433 1433
1434 1434 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1435 1435 """Local repository sub-interface providing access to tracked file storage.
1436 1436
1437 1437 This interface defines how a repository accesses storage for a single
1438 1438 tracked file path.
1439 1439 """
1440 1440
1441 1441 def file(f):
1442 1442 """Obtain a filelog for a tracked path.
1443 1443
1444 1444 The returned type conforms to the ``ifilestorage`` interface.
1445 1445 """
1446 1446
1447 1447
1448 1448 class ilocalrepositorymain(interfaceutil.Interface):
1449 1449 """Main interface for local repositories.
1450 1450
1451 1451 This currently captures the reality of things - not how things should be.
1452 1452 """
1453 1453
1454 1454 nodeconstants = interfaceutil.Attribute(
1455 1455 """Constant nodes matching the hash function used by the repository."""
1456 1456 )
1457 1457 nullid = interfaceutil.Attribute(
1458 1458 """null revision for the hash function used by the repository."""
1459 1459 )
1460 1460
1461 1461 supportedformats = interfaceutil.Attribute(
1462 1462 """Set of requirements that apply to stream clone.
1463 1463
1464 1464 This is actually a class attribute and is shared among all instances.
1465 1465 """
1466 1466 )
1467 1467
1468 1468 supported = interfaceutil.Attribute(
1469 1469 """Set of requirements that this repo is capable of opening."""
1470 1470 )
1471 1471
1472 1472 requirements = interfaceutil.Attribute(
1473 1473 """Set of requirements this repo uses."""
1474 1474 )
1475 1475
1476 1476 features = interfaceutil.Attribute(
1477 1477 """Set of "features" this repository supports.
1478 1478
1479 1479 A "feature" is a loosely-defined term. It can refer to a feature
1480 1480 in the classical sense or can describe an implementation detail
1481 1481 of the repository. For example, a ``readonly`` feature may denote
1482 1482 the repository as read-only. Or a ``revlogfilestore`` feature may
1483 1483 denote that the repository is using revlogs for file storage.
1484 1484
1485 1485 The intent of features is to provide a machine-queryable mechanism
1486 1486 for repo consumers to test for various repository characteristics.
1487 1487
1488 1488 Features are similar to ``requirements``. The main difference is that
1489 1489 requirements are stored on-disk and represent requirements to open the
1490 1490 repository. Features are more run-time capabilities of the repository
1491 1491 and more granular capabilities (which may be derived from requirements).
1492 1492 """
1493 1493 )
1494 1494
1495 1495 filtername = interfaceutil.Attribute(
1496 1496 """Name of the repoview that is active on this repo."""
1497 1497 )
1498 1498
1499 1499 wvfs = interfaceutil.Attribute(
1500 1500 """VFS used to access the working directory."""
1501 1501 )
1502 1502
1503 1503 vfs = interfaceutil.Attribute(
1504 1504 """VFS rooted at the .hg directory.
1505 1505
1506 1506 Used to access repository data not in the store.
1507 1507 """
1508 1508 )
1509 1509
1510 1510 svfs = interfaceutil.Attribute(
1511 1511 """VFS rooted at the store.
1512 1512
1513 1513 Used to access repository data in the store. Typically .hg/store.
1514 1514 But can point elsewhere if the store is shared.
1515 1515 """
1516 1516 )
1517 1517
1518 1518 root = interfaceutil.Attribute(
1519 1519 """Path to the root of the working directory."""
1520 1520 )
1521 1521
1522 1522 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1523 1523
1524 1524 origroot = interfaceutil.Attribute(
1525 1525 """The filesystem path that was used to construct the repo."""
1526 1526 )
1527 1527
1528 1528 auditor = interfaceutil.Attribute(
1529 1529 """A pathauditor for the working directory.
1530 1530
1531 1531 This checks if a path refers to a nested repository.
1532 1532
1533 1533 Operates on the filesystem.
1534 1534 """
1535 1535 )
1536 1536
1537 1537 nofsauditor = interfaceutil.Attribute(
1538 1538 """A pathauditor for the working directory.
1539 1539
1540 1540 This is like ``auditor`` except it doesn't do filesystem checks.
1541 1541 """
1542 1542 )
1543 1543
1544 1544 baseui = interfaceutil.Attribute(
1545 1545 """Original ui instance passed into constructor."""
1546 1546 )
1547 1547
1548 1548 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1549 1549
1550 1550 sharedpath = interfaceutil.Attribute(
1551 1551 """Path to the .hg directory of the repo this repo was shared from."""
1552 1552 )
1553 1553
1554 1554 store = interfaceutil.Attribute("""A store instance.""")
1555 1555
1556 1556 spath = interfaceutil.Attribute("""Path to the store.""")
1557 1557
1558 1558 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1559 1559
1560 1560 cachevfs = interfaceutil.Attribute(
1561 1561 """A VFS used to access the cache directory.
1562 1562
1563 1563 Typically .hg/cache.
1564 1564 """
1565 1565 )
1566 1566
1567 1567 wcachevfs = interfaceutil.Attribute(
1568 1568 """A VFS used to access the cache directory dedicated to working copy
1569 1569
1570 1570 Typically .hg/wcache.
1571 1571 """
1572 1572 )
1573 1573
1574 1574 filteredrevcache = interfaceutil.Attribute(
1575 1575 """Holds sets of revisions to be filtered."""
1576 1576 )
1577 1577
1578 1578 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1579 1579
1580 1580 filecopiesmode = interfaceutil.Attribute(
1581 1581 """The way files copies should be dealt with in this repo."""
1582 1582 )
1583 1583
1584 1584 def close():
1585 1585 """Close the handle on this repository."""
1586 1586
1587 1587 def peer():
1588 1588 """Obtain an object conforming to the ``peer`` interface."""
1589 1589
1590 1590 def unfiltered():
1591 1591 """Obtain an unfiltered/raw view of this repo."""
1592 1592
1593 1593 def filtered(name, visibilityexceptions=None):
1594 1594 """Obtain a named view of this repository."""
1595 1595
1596 1596 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1597 1597
1598 1598 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1599 1599
1600 1600 manifestlog = interfaceutil.Attribute(
1601 1601 """An instance conforming to the ``imanifestlog`` interface.
1602 1602
1603 1603 Provides access to manifests for the repository.
1604 1604 """
1605 1605 )
1606 1606
1607 1607 dirstate = interfaceutil.Attribute("""Working directory state.""")
1608 1608
1609 1609 narrowpats = interfaceutil.Attribute(
1610 1610 """Matcher patterns for this repository's narrowspec."""
1611 1611 )
1612 1612
1613 1613 def narrowmatch(match=None, includeexact=False):
1614 1614 """Obtain a matcher for the narrowspec."""
1615 1615
1616 1616 def setnarrowpats(newincludes, newexcludes):
1617 1617 """Define the narrowspec for this repository."""
1618 1618
1619 1619 def __getitem__(changeid):
1620 1620 """Try to resolve a changectx."""
1621 1621
1622 1622 def __contains__(changeid):
1623 1623 """Whether a changeset exists."""
1624 1624
1625 1625 def __nonzero__():
1626 1626 """Always returns True."""
1627 1627 return True
1628 1628
1629 1629 __bool__ = __nonzero__
1630 1630
1631 1631 def __len__():
1632 1632 """Returns the number of changesets in the repo."""
1633 1633
1634 1634 def __iter__():
1635 1635 """Iterate over revisions in the changelog."""
1636 1636
1637 1637 def revs(expr, *args):
1638 1638 """Evaluate a revset.
1639 1639
1640 1640 Emits revisions.
1641 1641 """
1642 1642
1643 1643 def set(expr, *args):
1644 1644 """Evaluate a revset.
1645 1645
1646 1646 Emits changectx instances.
1647 1647 """
1648 1648
1649 1649 def anyrevs(specs, user=False, localalias=None):
1650 1650 """Find revisions matching one of the given revsets."""
1651 1651
1652 1652 def url():
1653 1653 """Returns a string representing the location of this repo."""
1654 1654
1655 1655 def hook(name, throw=False, **args):
1656 1656 """Call a hook."""
1657 1657
1658 1658 def tags():
1659 1659 """Return a mapping of tag to node."""
1660 1660
1661 1661 def tagtype(tagname):
1662 1662 """Return the type of a given tag."""
1663 1663
1664 1664 def tagslist():
1665 1665 """Return a list of tags ordered by revision."""
1666 1666
1667 1667 def nodetags(node):
1668 1668 """Return the tags associated with a node."""
1669 1669
1670 1670 def nodebookmarks(node):
1671 1671 """Return the list of bookmarks pointing to the specified node."""
1672 1672
1673 1673 def branchmap():
1674 1674 """Return a mapping of branch to heads in that branch."""
1675 1675
1676 1676 def revbranchcache():
1677 1677 pass
1678 1678
1679 1679 def register_changeset(rev, changelogrevision):
1680 1680 """Extension point for caches for new nodes.
1681 1681
1682 1682 Multiple consumers are expected to need parts of the changelogrevision,
1683 1683 so it is provided as optimization to avoid duplicate lookups. A simple
1684 1684 cache would be fragile when other revisions are accessed, too."""
1685 1685 pass
1686 1686
1687 1687 def branchtip(branchtip, ignoremissing=False):
1688 1688 """Return the tip node for a given branch."""
1689 1689
1690 1690 def lookup(key):
1691 1691 """Resolve the node for a revision."""
1692 1692
1693 1693 def lookupbranch(key):
1694 1694 """Look up the branch name of the given revision or branch name."""
1695 1695
1696 1696 def known(nodes):
1697 1697 """Determine whether a series of nodes is known.
1698 1698
1699 1699 Returns a list of bools.
1700 1700 """
1701 1701
1702 1702 def local():
1703 1703 """Whether the repository is local."""
1704 1704 return True
1705 1705
1706 1706 def publishing():
1707 1707 """Whether the repository is a publishing repository."""
1708 1708
1709 1709 def cancopy():
1710 1710 pass
1711 1711
1712 1712 def shared():
1713 1713 """The type of shared repository or None."""
1714 1714
1715 1715 def wjoin(f, *insidef):
1716 1716 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1717 1717
1718 1718 def setparents(p1, p2):
1719 1719 """Set the parent nodes of the working directory."""
1720 1720
1721 1721 def filectx(path, changeid=None, fileid=None):
1722 1722 """Obtain a filectx for the given file revision."""
1723 1723
1724 1724 def getcwd():
1725 1725 """Obtain the current working directory from the dirstate."""
1726 1726
1727 1727 def pathto(f, cwd=None):
1728 1728 """Obtain the relative path to a file."""
1729 1729
1730 1730 def adddatafilter(name, fltr):
1731 1731 pass
1732 1732
1733 1733 def wread(filename):
1734 1734 """Read a file from wvfs, using data filters."""
1735 1735
1736 1736 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1737 1737 """Write data to a file in the wvfs, using data filters."""
1738 1738
1739 1739 def wwritedata(filename, data):
1740 1740 """Resolve data for writing to the wvfs, using data filters."""
1741 1741
1742 1742 def currenttransaction():
1743 1743 """Obtain the current transaction instance or None."""
1744 1744
1745 1745 def transaction(desc, report=None):
1746 1746 """Open a new transaction to write to the repository."""
1747 1747
1748 1748 def undofiles():
1749 1749 """Returns a list of (vfs, path) for files to undo transactions."""
1750 1750
1751 1751 def recover():
1752 1752 """Roll back an interrupted transaction."""
1753 1753
1754 1754 def rollback(dryrun=False, force=False):
1755 1755 """Undo the last transaction.
1756 1756
1757 1757 DANGEROUS.
1758 1758 """
1759 1759
1760 1760 def updatecaches(tr=None, full=False):
1761 1761 """Warm repo caches."""
1762 1762
1763 1763 def invalidatecaches():
1764 1764 """Invalidate cached data due to the repository mutating."""
1765 1765
1766 1766 def invalidatevolatilesets():
1767 1767 pass
1768 1768
1769 1769 def invalidatedirstate():
1770 1770 """Invalidate the dirstate."""
1771 1771
1772 1772 def invalidate(clearfilecache=False):
1773 1773 pass
1774 1774
1775 1775 def invalidateall():
1776 1776 pass
1777 1777
1778 1778 def lock(wait=True):
1779 1779 """Lock the repository store and return a lock instance."""
1780 1780
1781 1781 def wlock(wait=True):
1782 1782 """Lock the non-store parts of the repository."""
1783 1783
1784 1784 def currentwlock():
1785 1785 """Return the wlock if it's held or None."""
1786 1786
1787 1787 def checkcommitpatterns(wctx, match, status, fail):
1788 1788 pass
1789 1789
1790 1790 def commit(
1791 1791 text=b'',
1792 1792 user=None,
1793 1793 date=None,
1794 1794 match=None,
1795 1795 force=False,
1796 1796 editor=False,
1797 1797 extra=None,
1798 1798 ):
1799 1799 """Add a new revision to the repository."""
1800 1800
1801 1801 def commitctx(ctx, error=False, origctx=None):
1802 1802 """Commit a commitctx instance to the repository."""
1803 1803
1804 1804 def destroying():
1805 1805 """Inform the repository that nodes are about to be destroyed."""
1806 1806
1807 1807 def destroyed():
1808 1808 """Inform the repository that nodes have been destroyed."""
1809 1809
1810 1810 def status(
1811 1811 node1=b'.',
1812 1812 node2=None,
1813 1813 match=None,
1814 1814 ignored=False,
1815 1815 clean=False,
1816 1816 unknown=False,
1817 1817 listsubrepos=False,
1818 1818 ):
1819 1819 """Convenience method to call repo[x].status()."""
1820 1820
1821 1821 def addpostdsstatus(ps):
1822 1822 pass
1823 1823
1824 1824 def postdsstatus():
1825 1825 pass
1826 1826
1827 1827 def clearpostdsstatus():
1828 1828 pass
1829 1829
1830 1830 def heads(start=None):
1831 1831 """Obtain list of nodes that are DAG heads."""
1832 1832
1833 1833 def branchheads(branch=None, start=None, closed=False):
1834 1834 pass
1835 1835
1836 1836 def branches(nodes):
1837 1837 pass
1838 1838
1839 1839 def between(pairs):
1840 1840 pass
1841 1841
1842 1842 def checkpush(pushop):
1843 1843 pass
1844 1844
1845 1845 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1846 1846
1847 1847 def pushkey(namespace, key, old, new):
1848 1848 pass
1849 1849
1850 1850 def listkeys(namespace):
1851 1851 pass
1852 1852
1853 1853 def debugwireargs(one, two, three=None, four=None, five=None):
1854 1854 pass
1855 1855
1856 1856 def savecommitmessage(text):
1857 1857 pass
1858 1858
1859 def register_sidedata_computer(kind, category, keys, computer):
1859 def register_sidedata_computer(kind, category, keys, computer, flags):
1860 1860 pass
1861 1861
1862 1862 def register_wanted_sidedata(category):
1863 1863 pass
1864 1864
1865 1865
1866 1866 class completelocalrepository(
1867 1867 ilocalrepositorymain, ilocalrepositoryfilestorage
1868 1868 ):
1869 1869 """Complete interface for a local repository."""
1870 1870
1871 1871
1872 1872 class iwireprotocolcommandcacher(interfaceutil.Interface):
1873 1873 """Represents a caching backend for wire protocol commands.
1874 1874
1875 1875 Wire protocol version 2 supports transparent caching of many commands.
1876 1876 To leverage this caching, servers can activate objects that cache
1877 1877 command responses. Objects handle both cache writing and reading.
1878 1878 This interface defines how that response caching mechanism works.
1879 1879
1880 1880 Wire protocol version 2 commands emit a series of objects that are
1881 1881 serialized and sent to the client. The caching layer exists between
1882 1882 the invocation of the command function and the sending of its output
1883 1883 objects to an output layer.
1884 1884
1885 1885 Instances of this interface represent a binding to a cache that
1886 1886 can serve a response (in place of calling a command function) and/or
1887 1887 write responses to a cache for subsequent use.
1888 1888
1889 1889 When a command request arrives, the following happens with regards
1890 1890 to this interface:
1891 1891
1892 1892 1. The server determines whether the command request is cacheable.
1893 1893 2. If it is, an instance of this interface is spawned.
1894 1894 3. The cacher is activated in a context manager (``__enter__`` is called).
1895 1895 4. A cache *key* for that request is derived. This will call the
1896 1896 instance's ``adjustcachekeystate()`` method so the derivation
1897 1897 can be influenced.
1898 1898 5. The cacher is informed of the derived cache key via a call to
1899 1899 ``setcachekey()``.
1900 1900 6. The cacher's ``lookup()`` method is called to test for presence of
1901 1901 the derived key in the cache.
1902 1902 7. If ``lookup()`` returns a hit, that cached result is used in place
1903 1903 of invoking the command function. ``__exit__`` is called and the instance
1904 1904 is discarded.
1905 1905 8. The command function is invoked.
1906 1906 9. ``onobject()`` is called for each object emitted by the command
1907 1907 function.
1908 1908 10. After the final object is seen, ``onfinished()`` is called.
1909 1909 11. ``__exit__`` is called to signal the end of use of the instance.
1910 1910
1911 1911 Cache *key* derivation can be influenced by the instance.
1912 1912
1913 1913 Cache keys are initially derived by a deterministic representation of
1914 1914 the command request. This includes the command name, arguments, protocol
1915 1915 version, etc. This initial key derivation is performed by CBOR-encoding a
1916 1916 data structure and feeding that output into a hasher.
1917 1917
1918 1918 Instances of this interface can influence this initial key derivation
1919 1919 via ``adjustcachekeystate()``.
1920 1920
1921 1921 The instance is informed of the derived cache key via a call to
1922 1922 ``setcachekey()``. The instance must store the key locally so it can
1923 1923 be consulted on subsequent operations that may require it.
1924 1924
1925 1925 When constructed, the instance has access to a callable that can be used
1926 1926 for encoding response objects. This callable receives as its single
1927 1927 argument an object emitted by a command function. It returns an iterable
1928 1928 of bytes chunks representing the encoded object. Unless the cacher is
1929 1929 caching native Python objects in memory or has a way of reconstructing
1930 1930 the original Python objects, implementations typically call this function
1931 1931 to produce bytes from the output objects and then store those bytes in
1932 1932 the cache. When it comes time to re-emit those bytes, they are wrapped
1933 1933 in a ``wireprototypes.encodedresponse`` instance to tell the output
1934 1934 layer that they are pre-encoded.
1935 1935
1936 1936 When receiving the objects emitted by the command function, instances
1937 1937 can choose what to do with those objects. The simplest thing to do is
1938 1938 re-emit the original objects. They will be forwarded to the output
1939 1939 layer and will be processed as if the cacher did not exist.
1940 1940
1941 1941 Implementations could also choose to not emit objects - instead locally
1942 1942 buffering objects or their encoded representation. They could then emit
1943 1943 a single "coalesced" object when ``onfinished()`` is called. In
1944 1944 this way, the implementation would function as a filtering layer of
1945 1945 sorts.
1946 1946
1947 1947 When caching objects, typically the encoded form of the object will
1948 1948 be stored. Keep in mind that if the original object is forwarded to
1949 1949 the output layer, it will need to be encoded there as well. For large
1950 1950 output, this redundant encoding could add overhead. Implementations
1951 1951 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1952 1952 instances to avoid this overhead.
1953 1953 """
1954 1954
1955 1955 def __enter__():
1956 1956 """Marks the instance as active.
1957 1957
1958 1958 Should return self.
1959 1959 """
1960 1960
1961 1961 def __exit__(exctype, excvalue, exctb):
1962 1962 """Called when cacher is no longer used.
1963 1963
1964 1964 This can be used by implementations to perform cleanup actions (e.g.
1965 1965 disconnecting network sockets, aborting a partially cached response.
1966 1966 """
1967 1967
1968 1968 def adjustcachekeystate(state):
1969 1969 """Influences cache key derivation by adjusting state to derive key.
1970 1970
1971 1971 A dict defining the state used to derive the cache key is passed.
1972 1972
1973 1973 Implementations can modify this dict to record additional state that
1974 1974 is wanted to influence key derivation.
1975 1975
1976 1976 Implementations are *highly* encouraged to not modify or delete
1977 1977 existing keys.
1978 1978 """
1979 1979
1980 1980 def setcachekey(key):
1981 1981 """Record the derived cache key for this request.
1982 1982
1983 1983 Instances may mutate the key for internal usage, as desired. e.g.
1984 1984 instances may wish to prepend the repo name, introduce path
1985 1985 components for filesystem or URL addressing, etc. Behavior is up to
1986 1986 the cache.
1987 1987
1988 1988 Returns a bool indicating if the request is cacheable by this
1989 1989 instance.
1990 1990 """
1991 1991
1992 1992 def lookup():
1993 1993 """Attempt to resolve an entry in the cache.
1994 1994
1995 1995 The instance is instructed to look for the cache key that it was
1996 1996 informed about via the call to ``setcachekey()``.
1997 1997
1998 1998 If there's no cache hit or the cacher doesn't wish to use the cached
1999 1999 entry, ``None`` should be returned.
2000 2000
2001 2001 Else, a dict defining the cached result should be returned. The
2002 2002 dict may have the following keys:
2003 2003
2004 2004 objs
2005 2005 An iterable of objects that should be sent to the client. That
2006 2006 iterable of objects is expected to be what the command function
2007 2007 would return if invoked or an equivalent representation thereof.
2008 2008 """
2009 2009
2010 2010 def onobject(obj):
2011 2011 """Called when a new object is emitted from the command function.
2012 2012
2013 2013 Receives as its argument the object that was emitted from the
2014 2014 command function.
2015 2015
2016 2016 This method returns an iterator of objects to forward to the output
2017 2017 layer. The easiest implementation is a generator that just
2018 2018 ``yield obj``.
2019 2019 """
2020 2020
2021 2021 def onfinished():
2022 2022 """Called after all objects have been emitted from the command function.
2023 2023
2024 2024 Implementations should return an iterator of objects to forward to
2025 2025 the output layer.
2026 2026
2027 2027 This method can be a generator.
2028 2028 """
@@ -1,3763 +1,3763 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import os
13 13 import random
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from .i18n import _
19 19 from .node import (
20 20 bin,
21 21 hex,
22 22 nullrev,
23 23 sha1nodeconstants,
24 24 short,
25 25 )
26 26 from .pycompat import (
27 27 delattr,
28 28 getattr,
29 29 )
30 30 from . import (
31 31 bookmarks,
32 32 branchmap,
33 33 bundle2,
34 34 bundlecaches,
35 35 changegroup,
36 36 color,
37 37 commit,
38 38 context,
39 39 dirstate,
40 40 dirstateguard,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 metadata as metadatamod,
53 53 namespaces,
54 54 narrowspec,
55 55 obsolete,
56 56 pathutil,
57 57 phases,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 )
94 94
95 95 release = lockmod.release
96 96 urlerr = util.urlerr
97 97 urlreq = util.urlreq
98 98
99 99 # set of (path, vfs-location) tuples. vfs-location is:
100 100 # - 'plain for vfs relative paths
101 101 # - '' for svfs relative paths
102 102 _cachedfiles = set()
103 103
104 104
105 105 class _basefilecache(scmutil.filecache):
106 106 """All filecache usage on repo are done for logic that should be unfiltered"""
107 107
108 108 def __get__(self, repo, type=None):
109 109 if repo is None:
110 110 return self
111 111 # proxy to unfiltered __dict__ since filtered repo has no entry
112 112 unfi = repo.unfiltered()
113 113 try:
114 114 return unfi.__dict__[self.sname]
115 115 except KeyError:
116 116 pass
117 117 return super(_basefilecache, self).__get__(unfi, type)
118 118
119 119 def set(self, repo, value):
120 120 return super(_basefilecache, self).set(repo.unfiltered(), value)
121 121
122 122
123 123 class repofilecache(_basefilecache):
124 124 """filecache for files in .hg but outside of .hg/store"""
125 125
126 126 def __init__(self, *paths):
127 127 super(repofilecache, self).__init__(*paths)
128 128 for path in paths:
129 129 _cachedfiles.add((path, b'plain'))
130 130
131 131 def join(self, obj, fname):
132 132 return obj.vfs.join(fname)
133 133
134 134
135 135 class storecache(_basefilecache):
136 136 """filecache for files in the store"""
137 137
138 138 def __init__(self, *paths):
139 139 super(storecache, self).__init__(*paths)
140 140 for path in paths:
141 141 _cachedfiles.add((path, b''))
142 142
143 143 def join(self, obj, fname):
144 144 return obj.sjoin(fname)
145 145
146 146
147 147 class mixedrepostorecache(_basefilecache):
148 148 """filecache for a mix files in .hg/store and outside"""
149 149
150 150 def __init__(self, *pathsandlocations):
151 151 # scmutil.filecache only uses the path for passing back into our
152 152 # join(), so we can safely pass a list of paths and locations
153 153 super(mixedrepostorecache, self).__init__(*pathsandlocations)
154 154 _cachedfiles.update(pathsandlocations)
155 155
156 156 def join(self, obj, fnameandlocation):
157 157 fname, location = fnameandlocation
158 158 if location == b'plain':
159 159 return obj.vfs.join(fname)
160 160 else:
161 161 if location != b'':
162 162 raise error.ProgrammingError(
163 163 b'unexpected location: %s' % location
164 164 )
165 165 return obj.sjoin(fname)
166 166
167 167
168 168 def isfilecached(repo, name):
169 169 """check if a repo has already cached "name" filecache-ed property
170 170
171 171 This returns (cachedobj-or-None, iscached) tuple.
172 172 """
173 173 cacheentry = repo.unfiltered()._filecache.get(name, None)
174 174 if not cacheentry:
175 175 return None, False
176 176 return cacheentry.obj, True
177 177
178 178
179 179 class unfilteredpropertycache(util.propertycache):
180 180 """propertycache that apply to unfiltered repo only"""
181 181
182 182 def __get__(self, repo, type=None):
183 183 unfi = repo.unfiltered()
184 184 if unfi is repo:
185 185 return super(unfilteredpropertycache, self).__get__(unfi)
186 186 return getattr(unfi, self.name)
187 187
188 188
189 189 class filteredpropertycache(util.propertycache):
190 190 """propertycache that must take filtering in account"""
191 191
192 192 def cachevalue(self, obj, value):
193 193 object.__setattr__(obj, self.name, value)
194 194
195 195
196 196 def hasunfilteredcache(repo, name):
197 197 """check if a repo has an unfilteredpropertycache value for <name>"""
198 198 return name in vars(repo.unfiltered())
199 199
200 200
201 201 def unfilteredmethod(orig):
202 202 """decorate method that always need to be run on unfiltered version"""
203 203
204 204 @functools.wraps(orig)
205 205 def wrapper(repo, *args, **kwargs):
206 206 return orig(repo.unfiltered(), *args, **kwargs)
207 207
208 208 return wrapper
209 209
210 210
211 211 moderncaps = {
212 212 b'lookup',
213 213 b'branchmap',
214 214 b'pushkey',
215 215 b'known',
216 216 b'getbundle',
217 217 b'unbundle',
218 218 }
219 219 legacycaps = moderncaps.union({b'changegroupsubset'})
220 220
221 221
222 222 @interfaceutil.implementer(repository.ipeercommandexecutor)
223 223 class localcommandexecutor(object):
224 224 def __init__(self, peer):
225 225 self._peer = peer
226 226 self._sent = False
227 227 self._closed = False
228 228
229 229 def __enter__(self):
230 230 return self
231 231
232 232 def __exit__(self, exctype, excvalue, exctb):
233 233 self.close()
234 234
235 235 def callcommand(self, command, args):
236 236 if self._sent:
237 237 raise error.ProgrammingError(
238 238 b'callcommand() cannot be used after sendcommands()'
239 239 )
240 240
241 241 if self._closed:
242 242 raise error.ProgrammingError(
243 243 b'callcommand() cannot be used after close()'
244 244 )
245 245
246 246 # We don't need to support anything fancy. Just call the named
247 247 # method on the peer and return a resolved future.
248 248 fn = getattr(self._peer, pycompat.sysstr(command))
249 249
250 250 f = pycompat.futures.Future()
251 251
252 252 try:
253 253 result = fn(**pycompat.strkwargs(args))
254 254 except Exception:
255 255 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
256 256 else:
257 257 f.set_result(result)
258 258
259 259 return f
260 260
261 261 def sendcommands(self):
262 262 self._sent = True
263 263
264 264 def close(self):
265 265 self._closed = True
266 266
267 267
268 268 @interfaceutil.implementer(repository.ipeercommands)
269 269 class localpeer(repository.peer):
270 270 '''peer for a local repo; reflects only the most recent API'''
271 271
272 272 def __init__(self, repo, caps=None):
273 273 super(localpeer, self).__init__()
274 274
275 275 if caps is None:
276 276 caps = moderncaps.copy()
277 277 self._repo = repo.filtered(b'served')
278 278 self.ui = repo.ui
279 279
280 280 if repo._wanted_sidedata:
281 281 formatted = bundle2.format_remote_wanted_sidedata(repo)
282 282 caps.add(b'exp-wanted-sidedata=' + formatted)
283 283
284 284 self._caps = repo._restrictcapabilities(caps)
285 285
286 286 # Begin of _basepeer interface.
287 287
288 288 def url(self):
289 289 return self._repo.url()
290 290
291 291 def local(self):
292 292 return self._repo
293 293
294 294 def peer(self):
295 295 return self
296 296
297 297 def canpush(self):
298 298 return True
299 299
300 300 def close(self):
301 301 self._repo.close()
302 302
303 303 # End of _basepeer interface.
304 304
305 305 # Begin of _basewirecommands interface.
306 306
307 307 def branchmap(self):
308 308 return self._repo.branchmap()
309 309
310 310 def capabilities(self):
311 311 return self._caps
312 312
313 313 def clonebundles(self):
314 314 return self._repo.tryread(bundlecaches.CB_MANIFEST_FILE)
315 315
316 316 def debugwireargs(self, one, two, three=None, four=None, five=None):
317 317 """Used to test argument passing over the wire"""
318 318 return b"%s %s %s %s %s" % (
319 319 one,
320 320 two,
321 321 pycompat.bytestr(three),
322 322 pycompat.bytestr(four),
323 323 pycompat.bytestr(five),
324 324 )
325 325
326 326 def getbundle(
327 327 self,
328 328 source,
329 329 heads=None,
330 330 common=None,
331 331 bundlecaps=None,
332 332 remote_sidedata=None,
333 333 **kwargs
334 334 ):
335 335 chunks = exchange.getbundlechunks(
336 336 self._repo,
337 337 source,
338 338 heads=heads,
339 339 common=common,
340 340 bundlecaps=bundlecaps,
341 341 remote_sidedata=remote_sidedata,
342 342 **kwargs
343 343 )[1]
344 344 cb = util.chunkbuffer(chunks)
345 345
346 346 if exchange.bundle2requested(bundlecaps):
347 347 # When requesting a bundle2, getbundle returns a stream to make the
348 348 # wire level function happier. We need to build a proper object
349 349 # from it in local peer.
350 350 return bundle2.getunbundler(self.ui, cb)
351 351 else:
352 352 return changegroup.getunbundler(b'01', cb, None)
353 353
354 354 def heads(self):
355 355 return self._repo.heads()
356 356
357 357 def known(self, nodes):
358 358 return self._repo.known(nodes)
359 359
360 360 def listkeys(self, namespace):
361 361 return self._repo.listkeys(namespace)
362 362
363 363 def lookup(self, key):
364 364 return self._repo.lookup(key)
365 365
366 366 def pushkey(self, namespace, key, old, new):
367 367 return self._repo.pushkey(namespace, key, old, new)
368 368
369 369 def stream_out(self):
370 370 raise error.Abort(_(b'cannot perform stream clone against local peer'))
371 371
372 372 def unbundle(self, bundle, heads, url):
373 373 """apply a bundle on a repo
374 374
375 375 This function handles the repo locking itself."""
376 376 try:
377 377 try:
378 378 bundle = exchange.readbundle(self.ui, bundle, None)
379 379 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
380 380 if util.safehasattr(ret, b'getchunks'):
381 381 # This is a bundle20 object, turn it into an unbundler.
382 382 # This little dance should be dropped eventually when the
383 383 # API is finally improved.
384 384 stream = util.chunkbuffer(ret.getchunks())
385 385 ret = bundle2.getunbundler(self.ui, stream)
386 386 return ret
387 387 except Exception as exc:
388 388 # If the exception contains output salvaged from a bundle2
389 389 # reply, we need to make sure it is printed before continuing
390 390 # to fail. So we build a bundle2 with such output and consume
391 391 # it directly.
392 392 #
393 393 # This is not very elegant but allows a "simple" solution for
394 394 # issue4594
395 395 output = getattr(exc, '_bundle2salvagedoutput', ())
396 396 if output:
397 397 bundler = bundle2.bundle20(self._repo.ui)
398 398 for out in output:
399 399 bundler.addpart(out)
400 400 stream = util.chunkbuffer(bundler.getchunks())
401 401 b = bundle2.getunbundler(self.ui, stream)
402 402 bundle2.processbundle(self._repo, b)
403 403 raise
404 404 except error.PushRaced as exc:
405 405 raise error.ResponseError(
406 406 _(b'push failed:'), stringutil.forcebytestr(exc)
407 407 )
408 408
409 409 # End of _basewirecommands interface.
410 410
411 411 # Begin of peer interface.
412 412
413 413 def commandexecutor(self):
414 414 return localcommandexecutor(self)
415 415
416 416 # End of peer interface.
417 417
418 418
419 419 @interfaceutil.implementer(repository.ipeerlegacycommands)
420 420 class locallegacypeer(localpeer):
421 421 """peer extension which implements legacy methods too; used for tests with
422 422 restricted capabilities"""
423 423
424 424 def __init__(self, repo):
425 425 super(locallegacypeer, self).__init__(repo, caps=legacycaps)
426 426
427 427 # Begin of baselegacywirecommands interface.
428 428
429 429 def between(self, pairs):
430 430 return self._repo.between(pairs)
431 431
432 432 def branches(self, nodes):
433 433 return self._repo.branches(nodes)
434 434
435 435 def changegroup(self, nodes, source):
436 436 outgoing = discovery.outgoing(
437 437 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
438 438 )
439 439 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
440 440
441 441 def changegroupsubset(self, bases, heads, source):
442 442 outgoing = discovery.outgoing(
443 443 self._repo, missingroots=bases, ancestorsof=heads
444 444 )
445 445 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
446 446
447 447 # End of baselegacywirecommands interface.
448 448
449 449
450 450 # Functions receiving (ui, features) that extensions can register to impact
451 451 # the ability to load repositories with custom requirements. Only
452 452 # functions defined in loaded extensions are called.
453 453 #
454 454 # The function receives a set of requirement strings that the repository
455 455 # is capable of opening. Functions will typically add elements to the
456 456 # set to reflect that the extension knows how to handle that requirements.
457 457 featuresetupfuncs = set()
458 458
459 459
460 460 def _getsharedvfs(hgvfs, requirements):
461 461 """returns the vfs object pointing to root of shared source
462 462 repo for a shared repository
463 463
464 464 hgvfs is vfs pointing at .hg/ of current repo (shared one)
465 465 requirements is a set of requirements of current repo (shared one)
466 466 """
467 467 # The ``shared`` or ``relshared`` requirements indicate the
468 468 # store lives in the path contained in the ``.hg/sharedpath`` file.
469 469 # This is an absolute path for ``shared`` and relative to
470 470 # ``.hg/`` for ``relshared``.
471 471 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
472 472 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
473 473 sharedpath = util.normpath(hgvfs.join(sharedpath))
474 474
475 475 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
476 476
477 477 if not sharedvfs.exists():
478 478 raise error.RepoError(
479 479 _(b'.hg/sharedpath points to nonexistent directory %s')
480 480 % sharedvfs.base
481 481 )
482 482 return sharedvfs
483 483
484 484
485 485 def _readrequires(vfs, allowmissing):
486 486 """reads the require file present at root of this vfs
487 487 and return a set of requirements
488 488
489 489 If allowmissing is True, we suppress ENOENT if raised"""
490 490 # requires file contains a newline-delimited list of
491 491 # features/capabilities the opener (us) must have in order to use
492 492 # the repository. This file was introduced in Mercurial 0.9.2,
493 493 # which means very old repositories may not have one. We assume
494 494 # a missing file translates to no requirements.
495 495 try:
496 496 requirements = set(vfs.read(b'requires').splitlines())
497 497 except IOError as e:
498 498 if not (allowmissing and e.errno == errno.ENOENT):
499 499 raise
500 500 requirements = set()
501 501 return requirements
502 502
503 503
504 504 def makelocalrepository(baseui, path, intents=None):
505 505 """Create a local repository object.
506 506
507 507 Given arguments needed to construct a local repository, this function
508 508 performs various early repository loading functionality (such as
509 509 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
510 510 the repository can be opened, derives a type suitable for representing
511 511 that repository, and returns an instance of it.
512 512
513 513 The returned object conforms to the ``repository.completelocalrepository``
514 514 interface.
515 515
516 516 The repository type is derived by calling a series of factory functions
517 517 for each aspect/interface of the final repository. These are defined by
518 518 ``REPO_INTERFACES``.
519 519
520 520 Each factory function is called to produce a type implementing a specific
521 521 interface. The cumulative list of returned types will be combined into a
522 522 new type and that type will be instantiated to represent the local
523 523 repository.
524 524
525 525 The factory functions each receive various state that may be consulted
526 526 as part of deriving a type.
527 527
528 528 Extensions should wrap these factory functions to customize repository type
529 529 creation. Note that an extension's wrapped function may be called even if
530 530 that extension is not loaded for the repo being constructed. Extensions
531 531 should check if their ``__name__`` appears in the
532 532 ``extensionmodulenames`` set passed to the factory function and no-op if
533 533 not.
534 534 """
535 535 ui = baseui.copy()
536 536 # Prevent copying repo configuration.
537 537 ui.copy = baseui.copy
538 538
539 539 # Working directory VFS rooted at repository root.
540 540 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
541 541
542 542 # Main VFS for .hg/ directory.
543 543 hgpath = wdirvfs.join(b'.hg')
544 544 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
545 545 # Whether this repository is shared one or not
546 546 shared = False
547 547 # If this repository is shared, vfs pointing to shared repo
548 548 sharedvfs = None
549 549
550 550 # The .hg/ path should exist and should be a directory. All other
551 551 # cases are errors.
552 552 if not hgvfs.isdir():
553 553 try:
554 554 hgvfs.stat()
555 555 except OSError as e:
556 556 if e.errno != errno.ENOENT:
557 557 raise
558 558 except ValueError as e:
559 559 # Can be raised on Python 3.8 when path is invalid.
560 560 raise error.Abort(
561 561 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
562 562 )
563 563
564 564 raise error.RepoError(_(b'repository %s not found') % path)
565 565
566 566 requirements = _readrequires(hgvfs, True)
567 567 shared = (
568 568 requirementsmod.SHARED_REQUIREMENT in requirements
569 569 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
570 570 )
571 571 storevfs = None
572 572 if shared:
573 573 # This is a shared repo
574 574 sharedvfs = _getsharedvfs(hgvfs, requirements)
575 575 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
576 576 else:
577 577 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
578 578
579 579 # if .hg/requires contains the sharesafe requirement, it means
580 580 # there exists a `.hg/store/requires` too and we should read it
581 581 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
582 582 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
583 583 # is not present, refer checkrequirementscompat() for that
584 584 #
585 585 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
586 586 # repository was shared the old way. We check the share source .hg/requires
587 587 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
588 588 # to be reshared
589 589 hint = _(b"see `hg help config.format.use-share-safe` for more information")
590 590 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
591 591
592 592 if (
593 593 shared
594 594 and requirementsmod.SHARESAFE_REQUIREMENT
595 595 not in _readrequires(sharedvfs, True)
596 596 ):
597 597 mismatch_warn = ui.configbool(
598 598 b'share', b'safe-mismatch.source-not-safe.warn'
599 599 )
600 600 mismatch_config = ui.config(
601 601 b'share', b'safe-mismatch.source-not-safe'
602 602 )
603 603 if mismatch_config in (
604 604 b'downgrade-allow',
605 605 b'allow',
606 606 b'downgrade-abort',
607 607 ):
608 608 # prevent cyclic import localrepo -> upgrade -> localrepo
609 609 from . import upgrade
610 610
611 611 upgrade.downgrade_share_to_non_safe(
612 612 ui,
613 613 hgvfs,
614 614 sharedvfs,
615 615 requirements,
616 616 mismatch_config,
617 617 mismatch_warn,
618 618 )
619 619 elif mismatch_config == b'abort':
620 620 raise error.Abort(
621 621 _(b"share source does not support share-safe requirement"),
622 622 hint=hint,
623 623 )
624 624 else:
625 625 raise error.Abort(
626 626 _(
627 627 b"share-safe mismatch with source.\nUnrecognized"
628 628 b" value '%s' of `share.safe-mismatch.source-not-safe`"
629 629 b" set."
630 630 )
631 631 % mismatch_config,
632 632 hint=hint,
633 633 )
634 634 else:
635 635 requirements |= _readrequires(storevfs, False)
636 636 elif shared:
637 637 sourcerequires = _readrequires(sharedvfs, False)
638 638 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
639 639 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
640 640 mismatch_warn = ui.configbool(
641 641 b'share', b'safe-mismatch.source-safe.warn'
642 642 )
643 643 if mismatch_config in (
644 644 b'upgrade-allow',
645 645 b'allow',
646 646 b'upgrade-abort',
647 647 ):
648 648 # prevent cyclic import localrepo -> upgrade -> localrepo
649 649 from . import upgrade
650 650
651 651 upgrade.upgrade_share_to_safe(
652 652 ui,
653 653 hgvfs,
654 654 storevfs,
655 655 requirements,
656 656 mismatch_config,
657 657 mismatch_warn,
658 658 )
659 659 elif mismatch_config == b'abort':
660 660 raise error.Abort(
661 661 _(
662 662 b'version mismatch: source uses share-safe'
663 663 b' functionality while the current share does not'
664 664 ),
665 665 hint=hint,
666 666 )
667 667 else:
668 668 raise error.Abort(
669 669 _(
670 670 b"share-safe mismatch with source.\nUnrecognized"
671 671 b" value '%s' of `share.safe-mismatch.source-safe` set."
672 672 )
673 673 % mismatch_config,
674 674 hint=hint,
675 675 )
676 676
677 677 # The .hg/hgrc file may load extensions or contain config options
678 678 # that influence repository construction. Attempt to load it and
679 679 # process any new extensions that it may have pulled in.
680 680 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
681 681 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
682 682 extensions.loadall(ui)
683 683 extensions.populateui(ui)
684 684
685 685 # Set of module names of extensions loaded for this repository.
686 686 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
687 687
688 688 supportedrequirements = gathersupportedrequirements(ui)
689 689
690 690 # We first validate the requirements are known.
691 691 ensurerequirementsrecognized(requirements, supportedrequirements)
692 692
693 693 # Then we validate that the known set is reasonable to use together.
694 694 ensurerequirementscompatible(ui, requirements)
695 695
696 696 # TODO there are unhandled edge cases related to opening repositories with
697 697 # shared storage. If storage is shared, we should also test for requirements
698 698 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
699 699 # that repo, as that repo may load extensions needed to open it. This is a
700 700 # bit complicated because we don't want the other hgrc to overwrite settings
701 701 # in this hgrc.
702 702 #
703 703 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
704 704 # file when sharing repos. But if a requirement is added after the share is
705 705 # performed, thereby introducing a new requirement for the opener, we may
706 706 # will not see that and could encounter a run-time error interacting with
707 707 # that shared store since it has an unknown-to-us requirement.
708 708
709 709 # At this point, we know we should be capable of opening the repository.
710 710 # Now get on with doing that.
711 711
712 712 features = set()
713 713
714 714 # The "store" part of the repository holds versioned data. How it is
715 715 # accessed is determined by various requirements. If `shared` or
716 716 # `relshared` requirements are present, this indicates current repository
717 717 # is a share and store exists in path mentioned in `.hg/sharedpath`
718 718 if shared:
719 719 storebasepath = sharedvfs.base
720 720 cachepath = sharedvfs.join(b'cache')
721 721 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
722 722 else:
723 723 storebasepath = hgvfs.base
724 724 cachepath = hgvfs.join(b'cache')
725 725 wcachepath = hgvfs.join(b'wcache')
726 726
727 727 # The store has changed over time and the exact layout is dictated by
728 728 # requirements. The store interface abstracts differences across all
729 729 # of them.
730 730 store = makestore(
731 731 requirements,
732 732 storebasepath,
733 733 lambda base: vfsmod.vfs(base, cacheaudited=True),
734 734 )
735 735 hgvfs.createmode = store.createmode
736 736
737 737 storevfs = store.vfs
738 738 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
739 739
740 740 # The cache vfs is used to manage cache files.
741 741 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
742 742 cachevfs.createmode = store.createmode
743 743 # The cache vfs is used to manage cache files related to the working copy
744 744 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
745 745 wcachevfs.createmode = store.createmode
746 746
747 747 # Now resolve the type for the repository object. We do this by repeatedly
748 748 # calling a factory function to produces types for specific aspects of the
749 749 # repo's operation. The aggregate returned types are used as base classes
750 750 # for a dynamically-derived type, which will represent our new repository.
751 751
752 752 bases = []
753 753 extrastate = {}
754 754
755 755 for iface, fn in REPO_INTERFACES:
756 756 # We pass all potentially useful state to give extensions tons of
757 757 # flexibility.
758 758 typ = fn()(
759 759 ui=ui,
760 760 intents=intents,
761 761 requirements=requirements,
762 762 features=features,
763 763 wdirvfs=wdirvfs,
764 764 hgvfs=hgvfs,
765 765 store=store,
766 766 storevfs=storevfs,
767 767 storeoptions=storevfs.options,
768 768 cachevfs=cachevfs,
769 769 wcachevfs=wcachevfs,
770 770 extensionmodulenames=extensionmodulenames,
771 771 extrastate=extrastate,
772 772 baseclasses=bases,
773 773 )
774 774
775 775 if not isinstance(typ, type):
776 776 raise error.ProgrammingError(
777 777 b'unable to construct type for %s' % iface
778 778 )
779 779
780 780 bases.append(typ)
781 781
782 782 # type() allows you to use characters in type names that wouldn't be
783 783 # recognized as Python symbols in source code. We abuse that to add
784 784 # rich information about our constructed repo.
785 785 name = pycompat.sysstr(
786 786 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
787 787 )
788 788
789 789 cls = type(name, tuple(bases), {})
790 790
791 791 return cls(
792 792 baseui=baseui,
793 793 ui=ui,
794 794 origroot=path,
795 795 wdirvfs=wdirvfs,
796 796 hgvfs=hgvfs,
797 797 requirements=requirements,
798 798 supportedrequirements=supportedrequirements,
799 799 sharedpath=storebasepath,
800 800 store=store,
801 801 cachevfs=cachevfs,
802 802 wcachevfs=wcachevfs,
803 803 features=features,
804 804 intents=intents,
805 805 )
806 806
807 807
808 808 def loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs=None):
809 809 """Load hgrc files/content into a ui instance.
810 810
811 811 This is called during repository opening to load any additional
812 812 config files or settings relevant to the current repository.
813 813
814 814 Returns a bool indicating whether any additional configs were loaded.
815 815
816 816 Extensions should monkeypatch this function to modify how per-repo
817 817 configs are loaded. For example, an extension may wish to pull in
818 818 configs from alternate files or sources.
819 819
820 820 sharedvfs is vfs object pointing to source repo if the current one is a
821 821 shared one
822 822 """
823 823 if not rcutil.use_repo_hgrc():
824 824 return False
825 825
826 826 ret = False
827 827 # first load config from shared source if we has to
828 828 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
829 829 try:
830 830 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
831 831 ret = True
832 832 except IOError:
833 833 pass
834 834
835 835 try:
836 836 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
837 837 ret = True
838 838 except IOError:
839 839 pass
840 840
841 841 try:
842 842 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
843 843 ret = True
844 844 except IOError:
845 845 pass
846 846
847 847 return ret
848 848
849 849
850 850 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
851 851 """Perform additional actions after .hg/hgrc is loaded.
852 852
853 853 This function is called during repository loading immediately after
854 854 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
855 855
856 856 The function can be used to validate configs, automatically add
857 857 options (including extensions) based on requirements, etc.
858 858 """
859 859
860 860 # Map of requirements to list of extensions to load automatically when
861 861 # requirement is present.
862 862 autoextensions = {
863 863 b'git': [b'git'],
864 864 b'largefiles': [b'largefiles'],
865 865 b'lfs': [b'lfs'],
866 866 }
867 867
868 868 for requirement, names in sorted(autoextensions.items()):
869 869 if requirement not in requirements:
870 870 continue
871 871
872 872 for name in names:
873 873 if not ui.hasconfig(b'extensions', name):
874 874 ui.setconfig(b'extensions', name, b'', source=b'autoload')
875 875
876 876
877 877 def gathersupportedrequirements(ui):
878 878 """Determine the complete set of recognized requirements."""
879 879 # Start with all requirements supported by this file.
880 880 supported = set(localrepository._basesupported)
881 881
882 882 # Execute ``featuresetupfuncs`` entries if they belong to an extension
883 883 # relevant to this ui instance.
884 884 modules = {m.__name__ for n, m in extensions.extensions(ui)}
885 885
886 886 for fn in featuresetupfuncs:
887 887 if fn.__module__ in modules:
888 888 fn(ui, supported)
889 889
890 890 # Add derived requirements from registered compression engines.
891 891 for name in util.compengines:
892 892 engine = util.compengines[name]
893 893 if engine.available() and engine.revlogheader():
894 894 supported.add(b'exp-compression-%s' % name)
895 895 if engine.name() == b'zstd':
896 896 supported.add(b'revlog-compression-zstd')
897 897
898 898 return supported
899 899
900 900
901 901 def ensurerequirementsrecognized(requirements, supported):
902 902 """Validate that a set of local requirements is recognized.
903 903
904 904 Receives a set of requirements. Raises an ``error.RepoError`` if there
905 905 exists any requirement in that set that currently loaded code doesn't
906 906 recognize.
907 907
908 908 Returns a set of supported requirements.
909 909 """
910 910 missing = set()
911 911
912 912 for requirement in requirements:
913 913 if requirement in supported:
914 914 continue
915 915
916 916 if not requirement or not requirement[0:1].isalnum():
917 917 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
918 918
919 919 missing.add(requirement)
920 920
921 921 if missing:
922 922 raise error.RequirementError(
923 923 _(b'repository requires features unknown to this Mercurial: %s')
924 924 % b' '.join(sorted(missing)),
925 925 hint=_(
926 926 b'see https://mercurial-scm.org/wiki/MissingRequirement '
927 927 b'for more information'
928 928 ),
929 929 )
930 930
931 931
932 932 def ensurerequirementscompatible(ui, requirements):
933 933 """Validates that a set of recognized requirements is mutually compatible.
934 934
935 935 Some requirements may not be compatible with others or require
936 936 config options that aren't enabled. This function is called during
937 937 repository opening to ensure that the set of requirements needed
938 938 to open a repository is sane and compatible with config options.
939 939
940 940 Extensions can monkeypatch this function to perform additional
941 941 checking.
942 942
943 943 ``error.RepoError`` should be raised on failure.
944 944 """
945 945 if (
946 946 requirementsmod.SPARSE_REQUIREMENT in requirements
947 947 and not sparse.enabled
948 948 ):
949 949 raise error.RepoError(
950 950 _(
951 951 b'repository is using sparse feature but '
952 952 b'sparse is not enabled; enable the '
953 953 b'"sparse" extensions to access'
954 954 )
955 955 )
956 956
957 957
958 958 def makestore(requirements, path, vfstype):
959 959 """Construct a storage object for a repository."""
960 960 if requirementsmod.STORE_REQUIREMENT in requirements:
961 961 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
962 962 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
963 963 return storemod.fncachestore(path, vfstype, dotencode)
964 964
965 965 return storemod.encodedstore(path, vfstype)
966 966
967 967 return storemod.basicstore(path, vfstype)
968 968
969 969
970 970 def resolvestorevfsoptions(ui, requirements, features):
971 971 """Resolve the options to pass to the store vfs opener.
972 972
973 973 The returned dict is used to influence behavior of the storage layer.
974 974 """
975 975 options = {}
976 976
977 977 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
978 978 options[b'treemanifest'] = True
979 979
980 980 # experimental config: format.manifestcachesize
981 981 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
982 982 if manifestcachesize is not None:
983 983 options[b'manifestcachesize'] = manifestcachesize
984 984
985 985 # In the absence of another requirement superseding a revlog-related
986 986 # requirement, we have to assume the repo is using revlog version 0.
987 987 # This revlog format is super old and we don't bother trying to parse
988 988 # opener options for it because those options wouldn't do anything
989 989 # meaningful on such old repos.
990 990 if (
991 991 requirementsmod.REVLOGV1_REQUIREMENT in requirements
992 992 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
993 993 ):
994 994 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
995 995 else: # explicitly mark repo as using revlogv0
996 996 options[b'revlogv0'] = True
997 997
998 998 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
999 999 options[b'copies-storage'] = b'changeset-sidedata'
1000 1000 else:
1001 1001 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1002 1002 copiesextramode = (b'changeset-only', b'compatibility')
1003 1003 if writecopiesto in copiesextramode:
1004 1004 options[b'copies-storage'] = b'extra'
1005 1005
1006 1006 return options
1007 1007
1008 1008
1009 1009 def resolverevlogstorevfsoptions(ui, requirements, features):
1010 1010 """Resolve opener options specific to revlogs."""
1011 1011
1012 1012 options = {}
1013 1013 options[b'flagprocessors'] = {}
1014 1014
1015 1015 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1016 1016 options[b'revlogv1'] = True
1017 1017 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1018 1018 options[b'revlogv2'] = True
1019 1019
1020 1020 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1021 1021 options[b'generaldelta'] = True
1022 1022
1023 1023 # experimental config: format.chunkcachesize
1024 1024 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1025 1025 if chunkcachesize is not None:
1026 1026 options[b'chunkcachesize'] = chunkcachesize
1027 1027
1028 1028 deltabothparents = ui.configbool(
1029 1029 b'storage', b'revlog.optimize-delta-parent-choice'
1030 1030 )
1031 1031 options[b'deltabothparents'] = deltabothparents
1032 1032
1033 1033 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1034 1034 lazydeltabase = False
1035 1035 if lazydelta:
1036 1036 lazydeltabase = ui.configbool(
1037 1037 b'storage', b'revlog.reuse-external-delta-parent'
1038 1038 )
1039 1039 if lazydeltabase is None:
1040 1040 lazydeltabase = not scmutil.gddeltaconfig(ui)
1041 1041 options[b'lazydelta'] = lazydelta
1042 1042 options[b'lazydeltabase'] = lazydeltabase
1043 1043
1044 1044 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1045 1045 if 0 <= chainspan:
1046 1046 options[b'maxdeltachainspan'] = chainspan
1047 1047
1048 1048 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1049 1049 if mmapindexthreshold is not None:
1050 1050 options[b'mmapindexthreshold'] = mmapindexthreshold
1051 1051
1052 1052 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1053 1053 srdensitythres = float(
1054 1054 ui.config(b'experimental', b'sparse-read.density-threshold')
1055 1055 )
1056 1056 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1057 1057 options[b'with-sparse-read'] = withsparseread
1058 1058 options[b'sparse-read-density-threshold'] = srdensitythres
1059 1059 options[b'sparse-read-min-gap-size'] = srmingapsize
1060 1060
1061 1061 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1062 1062 options[b'sparse-revlog'] = sparserevlog
1063 1063 if sparserevlog:
1064 1064 options[b'generaldelta'] = True
1065 1065
1066 1066 sidedata = requirementsmod.SIDEDATA_REQUIREMENT in requirements
1067 1067 options[b'side-data'] = sidedata
1068 1068
1069 1069 maxchainlen = None
1070 1070 if sparserevlog:
1071 1071 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1072 1072 # experimental config: format.maxchainlen
1073 1073 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1074 1074 if maxchainlen is not None:
1075 1075 options[b'maxchainlen'] = maxchainlen
1076 1076
1077 1077 for r in requirements:
1078 1078 # we allow multiple compression engine requirement to co-exist because
1079 1079 # strickly speaking, revlog seems to support mixed compression style.
1080 1080 #
1081 1081 # The compression used for new entries will be "the last one"
1082 1082 prefix = r.startswith
1083 1083 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1084 1084 options[b'compengine'] = r.split(b'-', 2)[2]
1085 1085
1086 1086 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1087 1087 if options[b'zlib.level'] is not None:
1088 1088 if not (0 <= options[b'zlib.level'] <= 9):
1089 1089 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1090 1090 raise error.Abort(msg % options[b'zlib.level'])
1091 1091 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1092 1092 if options[b'zstd.level'] is not None:
1093 1093 if not (0 <= options[b'zstd.level'] <= 22):
1094 1094 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1095 1095 raise error.Abort(msg % options[b'zstd.level'])
1096 1096
1097 1097 if requirementsmod.NARROW_REQUIREMENT in requirements:
1098 1098 options[b'enableellipsis'] = True
1099 1099
1100 1100 if ui.configbool(b'experimental', b'rust.index'):
1101 1101 options[b'rust.index'] = True
1102 1102 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1103 1103 slow_path = ui.config(
1104 1104 b'storage', b'revlog.persistent-nodemap.slow-path'
1105 1105 )
1106 1106 if slow_path not in (b'allow', b'warn', b'abort'):
1107 1107 default = ui.config_default(
1108 1108 b'storage', b'revlog.persistent-nodemap.slow-path'
1109 1109 )
1110 1110 msg = _(
1111 1111 b'unknown value for config '
1112 1112 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1113 1113 )
1114 1114 ui.warn(msg % slow_path)
1115 1115 if not ui.quiet:
1116 1116 ui.warn(_(b'falling back to default value: %s\n') % default)
1117 1117 slow_path = default
1118 1118
1119 1119 msg = _(
1120 1120 b"accessing `persistent-nodemap` repository without associated "
1121 1121 b"fast implementation."
1122 1122 )
1123 1123 hint = _(
1124 1124 b"check `hg help config.format.use-persistent-nodemap` "
1125 1125 b"for details"
1126 1126 )
1127 1127 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1128 1128 if slow_path == b'warn':
1129 1129 msg = b"warning: " + msg + b'\n'
1130 1130 ui.warn(msg)
1131 1131 if not ui.quiet:
1132 1132 hint = b'(' + hint + b')\n'
1133 1133 ui.warn(hint)
1134 1134 if slow_path == b'abort':
1135 1135 raise error.Abort(msg, hint=hint)
1136 1136 options[b'persistent-nodemap'] = True
1137 1137 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1138 1138 options[b'persistent-nodemap.mmap'] = True
1139 1139 if ui.configbool(b'devel', b'persistent-nodemap'):
1140 1140 options[b'devel-force-nodemap'] = True
1141 1141
1142 1142 return options
1143 1143
1144 1144
1145 1145 def makemain(**kwargs):
1146 1146 """Produce a type conforming to ``ilocalrepositorymain``."""
1147 1147 return localrepository
1148 1148
1149 1149
1150 1150 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1151 1151 class revlogfilestorage(object):
1152 1152 """File storage when using revlogs."""
1153 1153
1154 1154 def file(self, path):
1155 1155 if path.startswith(b'/'):
1156 1156 path = path[1:]
1157 1157
1158 1158 return filelog.filelog(self.svfs, path)
1159 1159
1160 1160
1161 1161 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1162 1162 class revlognarrowfilestorage(object):
1163 1163 """File storage when using revlogs and narrow files."""
1164 1164
1165 1165 def file(self, path):
1166 1166 if path.startswith(b'/'):
1167 1167 path = path[1:]
1168 1168
1169 1169 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
1170 1170
1171 1171
1172 1172 def makefilestorage(requirements, features, **kwargs):
1173 1173 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1174 1174 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1175 1175 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1176 1176
1177 1177 if requirementsmod.NARROW_REQUIREMENT in requirements:
1178 1178 return revlognarrowfilestorage
1179 1179 else:
1180 1180 return revlogfilestorage
1181 1181
1182 1182
1183 1183 # List of repository interfaces and factory functions for them. Each
1184 1184 # will be called in order during ``makelocalrepository()`` to iteratively
1185 1185 # derive the final type for a local repository instance. We capture the
1186 1186 # function as a lambda so we don't hold a reference and the module-level
1187 1187 # functions can be wrapped.
1188 1188 REPO_INTERFACES = [
1189 1189 (repository.ilocalrepositorymain, lambda: makemain),
1190 1190 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1191 1191 ]
1192 1192
1193 1193
1194 1194 @interfaceutil.implementer(repository.ilocalrepositorymain)
1195 1195 class localrepository(object):
1196 1196 """Main class for representing local repositories.
1197 1197
1198 1198 All local repositories are instances of this class.
1199 1199
1200 1200 Constructed on its own, instances of this class are not usable as
1201 1201 repository objects. To obtain a usable repository object, call
1202 1202 ``hg.repository()``, ``localrepo.instance()``, or
1203 1203 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1204 1204 ``instance()`` adds support for creating new repositories.
1205 1205 ``hg.repository()`` adds more extension integration, including calling
1206 1206 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1207 1207 used.
1208 1208 """
1209 1209
1210 1210 # obsolete experimental requirements:
1211 1211 # - manifestv2: An experimental new manifest format that allowed
1212 1212 # for stem compression of long paths. Experiment ended up not
1213 1213 # being successful (repository sizes went up due to worse delta
1214 1214 # chains), and the code was deleted in 4.6.
1215 1215 supportedformats = {
1216 1216 requirementsmod.REVLOGV1_REQUIREMENT,
1217 1217 requirementsmod.GENERALDELTA_REQUIREMENT,
1218 1218 requirementsmod.TREEMANIFEST_REQUIREMENT,
1219 1219 requirementsmod.COPIESSDC_REQUIREMENT,
1220 1220 requirementsmod.REVLOGV2_REQUIREMENT,
1221 1221 requirementsmod.SIDEDATA_REQUIREMENT,
1222 1222 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1223 1223 requirementsmod.NODEMAP_REQUIREMENT,
1224 1224 bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT,
1225 1225 requirementsmod.SHARESAFE_REQUIREMENT,
1226 1226 }
1227 1227 _basesupported = supportedformats | {
1228 1228 requirementsmod.STORE_REQUIREMENT,
1229 1229 requirementsmod.FNCACHE_REQUIREMENT,
1230 1230 requirementsmod.SHARED_REQUIREMENT,
1231 1231 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1232 1232 requirementsmod.DOTENCODE_REQUIREMENT,
1233 1233 requirementsmod.SPARSE_REQUIREMENT,
1234 1234 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1235 1235 }
1236 1236
1237 1237 # list of prefix for file which can be written without 'wlock'
1238 1238 # Extensions should extend this list when needed
1239 1239 _wlockfreeprefix = {
1240 1240 # We migh consider requiring 'wlock' for the next
1241 1241 # two, but pretty much all the existing code assume
1242 1242 # wlock is not needed so we keep them excluded for
1243 1243 # now.
1244 1244 b'hgrc',
1245 1245 b'requires',
1246 1246 # XXX cache is a complicatged business someone
1247 1247 # should investigate this in depth at some point
1248 1248 b'cache/',
1249 1249 # XXX shouldn't be dirstate covered by the wlock?
1250 1250 b'dirstate',
1251 1251 # XXX bisect was still a bit too messy at the time
1252 1252 # this changeset was introduced. Someone should fix
1253 1253 # the remainig bit and drop this line
1254 1254 b'bisect.state',
1255 1255 }
1256 1256
1257 1257 def __init__(
1258 1258 self,
1259 1259 baseui,
1260 1260 ui,
1261 1261 origroot,
1262 1262 wdirvfs,
1263 1263 hgvfs,
1264 1264 requirements,
1265 1265 supportedrequirements,
1266 1266 sharedpath,
1267 1267 store,
1268 1268 cachevfs,
1269 1269 wcachevfs,
1270 1270 features,
1271 1271 intents=None,
1272 1272 ):
1273 1273 """Create a new local repository instance.
1274 1274
1275 1275 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1276 1276 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1277 1277 object.
1278 1278
1279 1279 Arguments:
1280 1280
1281 1281 baseui
1282 1282 ``ui.ui`` instance that ``ui`` argument was based off of.
1283 1283
1284 1284 ui
1285 1285 ``ui.ui`` instance for use by the repository.
1286 1286
1287 1287 origroot
1288 1288 ``bytes`` path to working directory root of this repository.
1289 1289
1290 1290 wdirvfs
1291 1291 ``vfs.vfs`` rooted at the working directory.
1292 1292
1293 1293 hgvfs
1294 1294 ``vfs.vfs`` rooted at .hg/
1295 1295
1296 1296 requirements
1297 1297 ``set`` of bytestrings representing repository opening requirements.
1298 1298
1299 1299 supportedrequirements
1300 1300 ``set`` of bytestrings representing repository requirements that we
1301 1301 know how to open. May be a supetset of ``requirements``.
1302 1302
1303 1303 sharedpath
1304 1304 ``bytes`` Defining path to storage base directory. Points to a
1305 1305 ``.hg/`` directory somewhere.
1306 1306
1307 1307 store
1308 1308 ``store.basicstore`` (or derived) instance providing access to
1309 1309 versioned storage.
1310 1310
1311 1311 cachevfs
1312 1312 ``vfs.vfs`` used for cache files.
1313 1313
1314 1314 wcachevfs
1315 1315 ``vfs.vfs`` used for cache files related to the working copy.
1316 1316
1317 1317 features
1318 1318 ``set`` of bytestrings defining features/capabilities of this
1319 1319 instance.
1320 1320
1321 1321 intents
1322 1322 ``set`` of system strings indicating what this repo will be used
1323 1323 for.
1324 1324 """
1325 1325 self.baseui = baseui
1326 1326 self.ui = ui
1327 1327 self.origroot = origroot
1328 1328 # vfs rooted at working directory.
1329 1329 self.wvfs = wdirvfs
1330 1330 self.root = wdirvfs.base
1331 1331 # vfs rooted at .hg/. Used to access most non-store paths.
1332 1332 self.vfs = hgvfs
1333 1333 self.path = hgvfs.base
1334 1334 self.requirements = requirements
1335 1335 self.nodeconstants = sha1nodeconstants
1336 1336 self.nullid = self.nodeconstants.nullid
1337 1337 self.supported = supportedrequirements
1338 1338 self.sharedpath = sharedpath
1339 1339 self.store = store
1340 1340 self.cachevfs = cachevfs
1341 1341 self.wcachevfs = wcachevfs
1342 1342 self.features = features
1343 1343
1344 1344 self.filtername = None
1345 1345
1346 1346 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1347 1347 b'devel', b'check-locks'
1348 1348 ):
1349 1349 self.vfs.audit = self._getvfsward(self.vfs.audit)
1350 1350 # A list of callback to shape the phase if no data were found.
1351 1351 # Callback are in the form: func(repo, roots) --> processed root.
1352 1352 # This list it to be filled by extension during repo setup
1353 1353 self._phasedefaults = []
1354 1354
1355 1355 color.setup(self.ui)
1356 1356
1357 1357 self.spath = self.store.path
1358 1358 self.svfs = self.store.vfs
1359 1359 self.sjoin = self.store.join
1360 1360 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1361 1361 b'devel', b'check-locks'
1362 1362 ):
1363 1363 if util.safehasattr(self.svfs, b'vfs'): # this is filtervfs
1364 1364 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1365 1365 else: # standard vfs
1366 1366 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1367 1367
1368 1368 self._dirstatevalidatewarned = False
1369 1369
1370 1370 self._branchcaches = branchmap.BranchMapCache()
1371 1371 self._revbranchcache = None
1372 1372 self._filterpats = {}
1373 1373 self._datafilters = {}
1374 1374 self._transref = self._lockref = self._wlockref = None
1375 1375
1376 1376 # A cache for various files under .hg/ that tracks file changes,
1377 1377 # (used by the filecache decorator)
1378 1378 #
1379 1379 # Maps a property name to its util.filecacheentry
1380 1380 self._filecache = {}
1381 1381
1382 1382 # hold sets of revision to be filtered
1383 1383 # should be cleared when something might have changed the filter value:
1384 1384 # - new changesets,
1385 1385 # - phase change,
1386 1386 # - new obsolescence marker,
1387 1387 # - working directory parent change,
1388 1388 # - bookmark changes
1389 1389 self.filteredrevcache = {}
1390 1390
1391 1391 # post-dirstate-status hooks
1392 1392 self._postdsstatus = []
1393 1393
1394 1394 # generic mapping between names and nodes
1395 1395 self.names = namespaces.namespaces()
1396 1396
1397 1397 # Key to signature value.
1398 1398 self._sparsesignaturecache = {}
1399 1399 # Signature to cached matcher instance.
1400 1400 self._sparsematchercache = {}
1401 1401
1402 1402 self._extrafilterid = repoview.extrafilter(ui)
1403 1403
1404 1404 self.filecopiesmode = None
1405 1405 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1406 1406 self.filecopiesmode = b'changeset-sidedata'
1407 1407
1408 1408 self._wanted_sidedata = set()
1409 1409 self._sidedata_computers = {}
1410 1410 metadatamod.set_sidedata_spec_for_repo(self)
1411 1411
1412 1412 def _getvfsward(self, origfunc):
1413 1413 """build a ward for self.vfs"""
1414 1414 rref = weakref.ref(self)
1415 1415
1416 1416 def checkvfs(path, mode=None):
1417 1417 ret = origfunc(path, mode=mode)
1418 1418 repo = rref()
1419 1419 if (
1420 1420 repo is None
1421 1421 or not util.safehasattr(repo, b'_wlockref')
1422 1422 or not util.safehasattr(repo, b'_lockref')
1423 1423 ):
1424 1424 return
1425 1425 if mode in (None, b'r', b'rb'):
1426 1426 return
1427 1427 if path.startswith(repo.path):
1428 1428 # truncate name relative to the repository (.hg)
1429 1429 path = path[len(repo.path) + 1 :]
1430 1430 if path.startswith(b'cache/'):
1431 1431 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1432 1432 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1433 1433 # path prefixes covered by 'lock'
1434 1434 vfs_path_prefixes = (
1435 1435 b'journal.',
1436 1436 b'undo.',
1437 1437 b'strip-backup/',
1438 1438 b'cache/',
1439 1439 )
1440 1440 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1441 1441 if repo._currentlock(repo._lockref) is None:
1442 1442 repo.ui.develwarn(
1443 1443 b'write with no lock: "%s"' % path,
1444 1444 stacklevel=3,
1445 1445 config=b'check-locks',
1446 1446 )
1447 1447 elif repo._currentlock(repo._wlockref) is None:
1448 1448 # rest of vfs files are covered by 'wlock'
1449 1449 #
1450 1450 # exclude special files
1451 1451 for prefix in self._wlockfreeprefix:
1452 1452 if path.startswith(prefix):
1453 1453 return
1454 1454 repo.ui.develwarn(
1455 1455 b'write with no wlock: "%s"' % path,
1456 1456 stacklevel=3,
1457 1457 config=b'check-locks',
1458 1458 )
1459 1459 return ret
1460 1460
1461 1461 return checkvfs
1462 1462
1463 1463 def _getsvfsward(self, origfunc):
1464 1464 """build a ward for self.svfs"""
1465 1465 rref = weakref.ref(self)
1466 1466
1467 1467 def checksvfs(path, mode=None):
1468 1468 ret = origfunc(path, mode=mode)
1469 1469 repo = rref()
1470 1470 if repo is None or not util.safehasattr(repo, b'_lockref'):
1471 1471 return
1472 1472 if mode in (None, b'r', b'rb'):
1473 1473 return
1474 1474 if path.startswith(repo.sharedpath):
1475 1475 # truncate name relative to the repository (.hg)
1476 1476 path = path[len(repo.sharedpath) + 1 :]
1477 1477 if repo._currentlock(repo._lockref) is None:
1478 1478 repo.ui.develwarn(
1479 1479 b'write with no lock: "%s"' % path, stacklevel=4
1480 1480 )
1481 1481 return ret
1482 1482
1483 1483 return checksvfs
1484 1484
1485 1485 def close(self):
1486 1486 self._writecaches()
1487 1487
1488 1488 def _writecaches(self):
1489 1489 if self._revbranchcache:
1490 1490 self._revbranchcache.write()
1491 1491
1492 1492 def _restrictcapabilities(self, caps):
1493 1493 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1494 1494 caps = set(caps)
1495 1495 capsblob = bundle2.encodecaps(
1496 1496 bundle2.getrepocaps(self, role=b'client')
1497 1497 )
1498 1498 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1499 1499 if self.ui.configbool(b'experimental', b'narrow'):
1500 1500 caps.add(wireprototypes.NARROWCAP)
1501 1501 return caps
1502 1502
1503 1503 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1504 1504 # self -> auditor -> self._checknested -> self
1505 1505
1506 1506 @property
1507 1507 def auditor(self):
1508 1508 # This is only used by context.workingctx.match in order to
1509 1509 # detect files in subrepos.
1510 1510 return pathutil.pathauditor(self.root, callback=self._checknested)
1511 1511
1512 1512 @property
1513 1513 def nofsauditor(self):
1514 1514 # This is only used by context.basectx.match in order to detect
1515 1515 # files in subrepos.
1516 1516 return pathutil.pathauditor(
1517 1517 self.root, callback=self._checknested, realfs=False, cached=True
1518 1518 )
1519 1519
1520 1520 def _checknested(self, path):
1521 1521 """Determine if path is a legal nested repository."""
1522 1522 if not path.startswith(self.root):
1523 1523 return False
1524 1524 subpath = path[len(self.root) + 1 :]
1525 1525 normsubpath = util.pconvert(subpath)
1526 1526
1527 1527 # XXX: Checking against the current working copy is wrong in
1528 1528 # the sense that it can reject things like
1529 1529 #
1530 1530 # $ hg cat -r 10 sub/x.txt
1531 1531 #
1532 1532 # if sub/ is no longer a subrepository in the working copy
1533 1533 # parent revision.
1534 1534 #
1535 1535 # However, it can of course also allow things that would have
1536 1536 # been rejected before, such as the above cat command if sub/
1537 1537 # is a subrepository now, but was a normal directory before.
1538 1538 # The old path auditor would have rejected by mistake since it
1539 1539 # panics when it sees sub/.hg/.
1540 1540 #
1541 1541 # All in all, checking against the working copy seems sensible
1542 1542 # since we want to prevent access to nested repositories on
1543 1543 # the filesystem *now*.
1544 1544 ctx = self[None]
1545 1545 parts = util.splitpath(subpath)
1546 1546 while parts:
1547 1547 prefix = b'/'.join(parts)
1548 1548 if prefix in ctx.substate:
1549 1549 if prefix == normsubpath:
1550 1550 return True
1551 1551 else:
1552 1552 sub = ctx.sub(prefix)
1553 1553 return sub.checknested(subpath[len(prefix) + 1 :])
1554 1554 else:
1555 1555 parts.pop()
1556 1556 return False
1557 1557
1558 1558 def peer(self):
1559 1559 return localpeer(self) # not cached to avoid reference cycle
1560 1560
1561 1561 def unfiltered(self):
1562 1562 """Return unfiltered version of the repository
1563 1563
1564 1564 Intended to be overwritten by filtered repo."""
1565 1565 return self
1566 1566
1567 1567 def filtered(self, name, visibilityexceptions=None):
1568 1568 """Return a filtered version of a repository
1569 1569
1570 1570 The `name` parameter is the identifier of the requested view. This
1571 1571 will return a repoview object set "exactly" to the specified view.
1572 1572
1573 1573 This function does not apply recursive filtering to a repository. For
1574 1574 example calling `repo.filtered("served")` will return a repoview using
1575 1575 the "served" view, regardless of the initial view used by `repo`.
1576 1576
1577 1577 In other word, there is always only one level of `repoview` "filtering".
1578 1578 """
1579 1579 if self._extrafilterid is not None and b'%' not in name:
1580 1580 name = name + b'%' + self._extrafilterid
1581 1581
1582 1582 cls = repoview.newtype(self.unfiltered().__class__)
1583 1583 return cls(self, name, visibilityexceptions)
1584 1584
1585 1585 @mixedrepostorecache(
1586 1586 (b'bookmarks', b'plain'),
1587 1587 (b'bookmarks.current', b'plain'),
1588 1588 (b'bookmarks', b''),
1589 1589 (b'00changelog.i', b''),
1590 1590 )
1591 1591 def _bookmarks(self):
1592 1592 # Since the multiple files involved in the transaction cannot be
1593 1593 # written atomically (with current repository format), there is a race
1594 1594 # condition here.
1595 1595 #
1596 1596 # 1) changelog content A is read
1597 1597 # 2) outside transaction update changelog to content B
1598 1598 # 3) outside transaction update bookmark file referring to content B
1599 1599 # 4) bookmarks file content is read and filtered against changelog-A
1600 1600 #
1601 1601 # When this happens, bookmarks against nodes missing from A are dropped.
1602 1602 #
1603 1603 # Having this happening during read is not great, but it become worse
1604 1604 # when this happen during write because the bookmarks to the "unknown"
1605 1605 # nodes will be dropped for good. However, writes happen within locks.
1606 1606 # This locking makes it possible to have a race free consistent read.
1607 1607 # For this purpose data read from disc before locking are
1608 1608 # "invalidated" right after the locks are taken. This invalidations are
1609 1609 # "light", the `filecache` mechanism keep the data in memory and will
1610 1610 # reuse them if the underlying files did not changed. Not parsing the
1611 1611 # same data multiple times helps performances.
1612 1612 #
1613 1613 # Unfortunately in the case describe above, the files tracked by the
1614 1614 # bookmarks file cache might not have changed, but the in-memory
1615 1615 # content is still "wrong" because we used an older changelog content
1616 1616 # to process the on-disk data. So after locking, the changelog would be
1617 1617 # refreshed but `_bookmarks` would be preserved.
1618 1618 # Adding `00changelog.i` to the list of tracked file is not
1619 1619 # enough, because at the time we build the content for `_bookmarks` in
1620 1620 # (4), the changelog file has already diverged from the content used
1621 1621 # for loading `changelog` in (1)
1622 1622 #
1623 1623 # To prevent the issue, we force the changelog to be explicitly
1624 1624 # reloaded while computing `_bookmarks`. The data race can still happen
1625 1625 # without the lock (with a narrower window), but it would no longer go
1626 1626 # undetected during the lock time refresh.
1627 1627 #
1628 1628 # The new schedule is as follow
1629 1629 #
1630 1630 # 1) filecache logic detect that `_bookmarks` needs to be computed
1631 1631 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1632 1632 # 3) We force `changelog` filecache to be tested
1633 1633 # 4) cachestat for `changelog` are captured (for changelog)
1634 1634 # 5) `_bookmarks` is computed and cached
1635 1635 #
1636 1636 # The step in (3) ensure we have a changelog at least as recent as the
1637 1637 # cache stat computed in (1). As a result at locking time:
1638 1638 # * if the changelog did not changed since (1) -> we can reuse the data
1639 1639 # * otherwise -> the bookmarks get refreshed.
1640 1640 self._refreshchangelog()
1641 1641 return bookmarks.bmstore(self)
1642 1642
1643 1643 def _refreshchangelog(self):
1644 1644 """make sure the in memory changelog match the on-disk one"""
1645 1645 if 'changelog' in vars(self) and self.currenttransaction() is None:
1646 1646 del self.changelog
1647 1647
1648 1648 @property
1649 1649 def _activebookmark(self):
1650 1650 return self._bookmarks.active
1651 1651
1652 1652 # _phasesets depend on changelog. what we need is to call
1653 1653 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1654 1654 # can't be easily expressed in filecache mechanism.
1655 1655 @storecache(b'phaseroots', b'00changelog.i')
1656 1656 def _phasecache(self):
1657 1657 return phases.phasecache(self, self._phasedefaults)
1658 1658
1659 1659 @storecache(b'obsstore')
1660 1660 def obsstore(self):
1661 1661 return obsolete.makestore(self.ui, self)
1662 1662
1663 1663 @storecache(b'00changelog.i')
1664 1664 def changelog(self):
1665 1665 # load dirstate before changelog to avoid race see issue6303
1666 1666 self.dirstate.prefetch_parents()
1667 1667 return self.store.changelog(
1668 1668 txnutil.mayhavepending(self.root),
1669 1669 concurrencychecker=revlogchecker.get_checker(self.ui, b'changelog'),
1670 1670 )
1671 1671
1672 1672 @storecache(b'00manifest.i')
1673 1673 def manifestlog(self):
1674 1674 return self.store.manifestlog(self, self._storenarrowmatch)
1675 1675
1676 1676 @repofilecache(b'dirstate')
1677 1677 def dirstate(self):
1678 1678 return self._makedirstate()
1679 1679
1680 1680 def _makedirstate(self):
1681 1681 """Extension point for wrapping the dirstate per-repo."""
1682 1682 sparsematchfn = lambda: sparse.matcher(self)
1683 1683
1684 1684 return dirstate.dirstate(
1685 1685 self.vfs,
1686 1686 self.ui,
1687 1687 self.root,
1688 1688 self._dirstatevalidate,
1689 1689 sparsematchfn,
1690 1690 self.nodeconstants,
1691 1691 )
1692 1692
1693 1693 def _dirstatevalidate(self, node):
1694 1694 try:
1695 1695 self.changelog.rev(node)
1696 1696 return node
1697 1697 except error.LookupError:
1698 1698 if not self._dirstatevalidatewarned:
1699 1699 self._dirstatevalidatewarned = True
1700 1700 self.ui.warn(
1701 1701 _(b"warning: ignoring unknown working parent %s!\n")
1702 1702 % short(node)
1703 1703 )
1704 1704 return self.nullid
1705 1705
1706 1706 @storecache(narrowspec.FILENAME)
1707 1707 def narrowpats(self):
1708 1708 """matcher patterns for this repository's narrowspec
1709 1709
1710 1710 A tuple of (includes, excludes).
1711 1711 """
1712 1712 return narrowspec.load(self)
1713 1713
1714 1714 @storecache(narrowspec.FILENAME)
1715 1715 def _storenarrowmatch(self):
1716 1716 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1717 1717 return matchmod.always()
1718 1718 include, exclude = self.narrowpats
1719 1719 return narrowspec.match(self.root, include=include, exclude=exclude)
1720 1720
1721 1721 @storecache(narrowspec.FILENAME)
1722 1722 def _narrowmatch(self):
1723 1723 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1724 1724 return matchmod.always()
1725 1725 narrowspec.checkworkingcopynarrowspec(self)
1726 1726 include, exclude = self.narrowpats
1727 1727 return narrowspec.match(self.root, include=include, exclude=exclude)
1728 1728
1729 1729 def narrowmatch(self, match=None, includeexact=False):
1730 1730 """matcher corresponding the the repo's narrowspec
1731 1731
1732 1732 If `match` is given, then that will be intersected with the narrow
1733 1733 matcher.
1734 1734
1735 1735 If `includeexact` is True, then any exact matches from `match` will
1736 1736 be included even if they're outside the narrowspec.
1737 1737 """
1738 1738 if match:
1739 1739 if includeexact and not self._narrowmatch.always():
1740 1740 # do not exclude explicitly-specified paths so that they can
1741 1741 # be warned later on
1742 1742 em = matchmod.exact(match.files())
1743 1743 nm = matchmod.unionmatcher([self._narrowmatch, em])
1744 1744 return matchmod.intersectmatchers(match, nm)
1745 1745 return matchmod.intersectmatchers(match, self._narrowmatch)
1746 1746 return self._narrowmatch
1747 1747
1748 1748 def setnarrowpats(self, newincludes, newexcludes):
1749 1749 narrowspec.save(self, newincludes, newexcludes)
1750 1750 self.invalidate(clearfilecache=True)
1751 1751
1752 1752 @unfilteredpropertycache
1753 1753 def _quick_access_changeid_null(self):
1754 1754 return {
1755 1755 b'null': (nullrev, self.nodeconstants.nullid),
1756 1756 nullrev: (nullrev, self.nodeconstants.nullid),
1757 1757 self.nullid: (nullrev, self.nullid),
1758 1758 }
1759 1759
1760 1760 @unfilteredpropertycache
1761 1761 def _quick_access_changeid_wc(self):
1762 1762 # also fast path access to the working copy parents
1763 1763 # however, only do it for filter that ensure wc is visible.
1764 1764 quick = self._quick_access_changeid_null.copy()
1765 1765 cl = self.unfiltered().changelog
1766 1766 for node in self.dirstate.parents():
1767 1767 if node == self.nullid:
1768 1768 continue
1769 1769 rev = cl.index.get_rev(node)
1770 1770 if rev is None:
1771 1771 # unknown working copy parent case:
1772 1772 #
1773 1773 # skip the fast path and let higher code deal with it
1774 1774 continue
1775 1775 pair = (rev, node)
1776 1776 quick[rev] = pair
1777 1777 quick[node] = pair
1778 1778 # also add the parents of the parents
1779 1779 for r in cl.parentrevs(rev):
1780 1780 if r == nullrev:
1781 1781 continue
1782 1782 n = cl.node(r)
1783 1783 pair = (r, n)
1784 1784 quick[r] = pair
1785 1785 quick[n] = pair
1786 1786 p1node = self.dirstate.p1()
1787 1787 if p1node != self.nullid:
1788 1788 quick[b'.'] = quick[p1node]
1789 1789 return quick
1790 1790
1791 1791 @unfilteredmethod
1792 1792 def _quick_access_changeid_invalidate(self):
1793 1793 if '_quick_access_changeid_wc' in vars(self):
1794 1794 del self.__dict__['_quick_access_changeid_wc']
1795 1795
1796 1796 @property
1797 1797 def _quick_access_changeid(self):
1798 1798 """an helper dictionnary for __getitem__ calls
1799 1799
1800 1800 This contains a list of symbol we can recognise right away without
1801 1801 further processing.
1802 1802 """
1803 1803 if self.filtername in repoview.filter_has_wc:
1804 1804 return self._quick_access_changeid_wc
1805 1805 return self._quick_access_changeid_null
1806 1806
1807 1807 def __getitem__(self, changeid):
1808 1808 # dealing with special cases
1809 1809 if changeid is None:
1810 1810 return context.workingctx(self)
1811 1811 if isinstance(changeid, context.basectx):
1812 1812 return changeid
1813 1813
1814 1814 # dealing with multiple revisions
1815 1815 if isinstance(changeid, slice):
1816 1816 # wdirrev isn't contiguous so the slice shouldn't include it
1817 1817 return [
1818 1818 self[i]
1819 1819 for i in pycompat.xrange(*changeid.indices(len(self)))
1820 1820 if i not in self.changelog.filteredrevs
1821 1821 ]
1822 1822
1823 1823 # dealing with some special values
1824 1824 quick_access = self._quick_access_changeid.get(changeid)
1825 1825 if quick_access is not None:
1826 1826 rev, node = quick_access
1827 1827 return context.changectx(self, rev, node, maybe_filtered=False)
1828 1828 if changeid == b'tip':
1829 1829 node = self.changelog.tip()
1830 1830 rev = self.changelog.rev(node)
1831 1831 return context.changectx(self, rev, node)
1832 1832
1833 1833 # dealing with arbitrary values
1834 1834 try:
1835 1835 if isinstance(changeid, int):
1836 1836 node = self.changelog.node(changeid)
1837 1837 rev = changeid
1838 1838 elif changeid == b'.':
1839 1839 # this is a hack to delay/avoid loading obsmarkers
1840 1840 # when we know that '.' won't be hidden
1841 1841 node = self.dirstate.p1()
1842 1842 rev = self.unfiltered().changelog.rev(node)
1843 1843 elif len(changeid) == self.nodeconstants.nodelen:
1844 1844 try:
1845 1845 node = changeid
1846 1846 rev = self.changelog.rev(changeid)
1847 1847 except error.FilteredLookupError:
1848 1848 changeid = hex(changeid) # for the error message
1849 1849 raise
1850 1850 except LookupError:
1851 1851 # check if it might have come from damaged dirstate
1852 1852 #
1853 1853 # XXX we could avoid the unfiltered if we had a recognizable
1854 1854 # exception for filtered changeset access
1855 1855 if (
1856 1856 self.local()
1857 1857 and changeid in self.unfiltered().dirstate.parents()
1858 1858 ):
1859 1859 msg = _(b"working directory has unknown parent '%s'!")
1860 1860 raise error.Abort(msg % short(changeid))
1861 1861 changeid = hex(changeid) # for the error message
1862 1862 raise
1863 1863
1864 1864 elif len(changeid) == 2 * self.nodeconstants.nodelen:
1865 1865 node = bin(changeid)
1866 1866 rev = self.changelog.rev(node)
1867 1867 else:
1868 1868 raise error.ProgrammingError(
1869 1869 b"unsupported changeid '%s' of type %s"
1870 1870 % (changeid, pycompat.bytestr(type(changeid)))
1871 1871 )
1872 1872
1873 1873 return context.changectx(self, rev, node)
1874 1874
1875 1875 except (error.FilteredIndexError, error.FilteredLookupError):
1876 1876 raise error.FilteredRepoLookupError(
1877 1877 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
1878 1878 )
1879 1879 except (IndexError, LookupError):
1880 1880 raise error.RepoLookupError(
1881 1881 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
1882 1882 )
1883 1883 except error.WdirUnsupported:
1884 1884 return context.workingctx(self)
1885 1885
1886 1886 def __contains__(self, changeid):
1887 1887 """True if the given changeid exists"""
1888 1888 try:
1889 1889 self[changeid]
1890 1890 return True
1891 1891 except error.RepoLookupError:
1892 1892 return False
1893 1893
1894 1894 def __nonzero__(self):
1895 1895 return True
1896 1896
1897 1897 __bool__ = __nonzero__
1898 1898
1899 1899 def __len__(self):
1900 1900 # no need to pay the cost of repoview.changelog
1901 1901 unfi = self.unfiltered()
1902 1902 return len(unfi.changelog)
1903 1903
1904 1904 def __iter__(self):
1905 1905 return iter(self.changelog)
1906 1906
1907 1907 def revs(self, expr, *args):
1908 1908 """Find revisions matching a revset.
1909 1909
1910 1910 The revset is specified as a string ``expr`` that may contain
1911 1911 %-formatting to escape certain types. See ``revsetlang.formatspec``.
1912 1912
1913 1913 Revset aliases from the configuration are not expanded. To expand
1914 1914 user aliases, consider calling ``scmutil.revrange()`` or
1915 1915 ``repo.anyrevs([expr], user=True)``.
1916 1916
1917 1917 Returns a smartset.abstractsmartset, which is a list-like interface
1918 1918 that contains integer revisions.
1919 1919 """
1920 1920 tree = revsetlang.spectree(expr, *args)
1921 1921 return revset.makematcher(tree)(self)
1922 1922
1923 1923 def set(self, expr, *args):
1924 1924 """Find revisions matching a revset and emit changectx instances.
1925 1925
1926 1926 This is a convenience wrapper around ``revs()`` that iterates the
1927 1927 result and is a generator of changectx instances.
1928 1928
1929 1929 Revset aliases from the configuration are not expanded. To expand
1930 1930 user aliases, consider calling ``scmutil.revrange()``.
1931 1931 """
1932 1932 for r in self.revs(expr, *args):
1933 1933 yield self[r]
1934 1934
1935 1935 def anyrevs(self, specs, user=False, localalias=None):
1936 1936 """Find revisions matching one of the given revsets.
1937 1937
1938 1938 Revset aliases from the configuration are not expanded by default. To
1939 1939 expand user aliases, specify ``user=True``. To provide some local
1940 1940 definitions overriding user aliases, set ``localalias`` to
1941 1941 ``{name: definitionstring}``.
1942 1942 """
1943 1943 if specs == [b'null']:
1944 1944 return revset.baseset([nullrev])
1945 1945 if specs == [b'.']:
1946 1946 quick_data = self._quick_access_changeid.get(b'.')
1947 1947 if quick_data is not None:
1948 1948 return revset.baseset([quick_data[0]])
1949 1949 if user:
1950 1950 m = revset.matchany(
1951 1951 self.ui,
1952 1952 specs,
1953 1953 lookup=revset.lookupfn(self),
1954 1954 localalias=localalias,
1955 1955 )
1956 1956 else:
1957 1957 m = revset.matchany(None, specs, localalias=localalias)
1958 1958 return m(self)
1959 1959
1960 1960 def url(self):
1961 1961 return b'file:' + self.root
1962 1962
1963 1963 def hook(self, name, throw=False, **args):
1964 1964 """Call a hook, passing this repo instance.
1965 1965
1966 1966 This a convenience method to aid invoking hooks. Extensions likely
1967 1967 won't call this unless they have registered a custom hook or are
1968 1968 replacing code that is expected to call a hook.
1969 1969 """
1970 1970 return hook.hook(self.ui, self, name, throw, **args)
1971 1971
1972 1972 @filteredpropertycache
1973 1973 def _tagscache(self):
1974 1974 """Returns a tagscache object that contains various tags related
1975 1975 caches."""
1976 1976
1977 1977 # This simplifies its cache management by having one decorated
1978 1978 # function (this one) and the rest simply fetch things from it.
1979 1979 class tagscache(object):
1980 1980 def __init__(self):
1981 1981 # These two define the set of tags for this repository. tags
1982 1982 # maps tag name to node; tagtypes maps tag name to 'global' or
1983 1983 # 'local'. (Global tags are defined by .hgtags across all
1984 1984 # heads, and local tags are defined in .hg/localtags.)
1985 1985 # They constitute the in-memory cache of tags.
1986 1986 self.tags = self.tagtypes = None
1987 1987
1988 1988 self.nodetagscache = self.tagslist = None
1989 1989
1990 1990 cache = tagscache()
1991 1991 cache.tags, cache.tagtypes = self._findtags()
1992 1992
1993 1993 return cache
1994 1994
1995 1995 def tags(self):
1996 1996 '''return a mapping of tag to node'''
1997 1997 t = {}
1998 1998 if self.changelog.filteredrevs:
1999 1999 tags, tt = self._findtags()
2000 2000 else:
2001 2001 tags = self._tagscache.tags
2002 2002 rev = self.changelog.rev
2003 2003 for k, v in pycompat.iteritems(tags):
2004 2004 try:
2005 2005 # ignore tags to unknown nodes
2006 2006 rev(v)
2007 2007 t[k] = v
2008 2008 except (error.LookupError, ValueError):
2009 2009 pass
2010 2010 return t
2011 2011
2012 2012 def _findtags(self):
2013 2013 """Do the hard work of finding tags. Return a pair of dicts
2014 2014 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2015 2015 maps tag name to a string like \'global\' or \'local\'.
2016 2016 Subclasses or extensions are free to add their own tags, but
2017 2017 should be aware that the returned dicts will be retained for the
2018 2018 duration of the localrepo object."""
2019 2019
2020 2020 # XXX what tagtype should subclasses/extensions use? Currently
2021 2021 # mq and bookmarks add tags, but do not set the tagtype at all.
2022 2022 # Should each extension invent its own tag type? Should there
2023 2023 # be one tagtype for all such "virtual" tags? Or is the status
2024 2024 # quo fine?
2025 2025
2026 2026 # map tag name to (node, hist)
2027 2027 alltags = tagsmod.findglobaltags(self.ui, self)
2028 2028 # map tag name to tag type
2029 2029 tagtypes = {tag: b'global' for tag in alltags}
2030 2030
2031 2031 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2032 2032
2033 2033 # Build the return dicts. Have to re-encode tag names because
2034 2034 # the tags module always uses UTF-8 (in order not to lose info
2035 2035 # writing to the cache), but the rest of Mercurial wants them in
2036 2036 # local encoding.
2037 2037 tags = {}
2038 2038 for (name, (node, hist)) in pycompat.iteritems(alltags):
2039 2039 if node != self.nullid:
2040 2040 tags[encoding.tolocal(name)] = node
2041 2041 tags[b'tip'] = self.changelog.tip()
2042 2042 tagtypes = {
2043 2043 encoding.tolocal(name): value
2044 2044 for (name, value) in pycompat.iteritems(tagtypes)
2045 2045 }
2046 2046 return (tags, tagtypes)
2047 2047
2048 2048 def tagtype(self, tagname):
2049 2049 """
2050 2050 return the type of the given tag. result can be:
2051 2051
2052 2052 'local' : a local tag
2053 2053 'global' : a global tag
2054 2054 None : tag does not exist
2055 2055 """
2056 2056
2057 2057 return self._tagscache.tagtypes.get(tagname)
2058 2058
2059 2059 def tagslist(self):
2060 2060 '''return a list of tags ordered by revision'''
2061 2061 if not self._tagscache.tagslist:
2062 2062 l = []
2063 2063 for t, n in pycompat.iteritems(self.tags()):
2064 2064 l.append((self.changelog.rev(n), t, n))
2065 2065 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2066 2066
2067 2067 return self._tagscache.tagslist
2068 2068
2069 2069 def nodetags(self, node):
2070 2070 '''return the tags associated with a node'''
2071 2071 if not self._tagscache.nodetagscache:
2072 2072 nodetagscache = {}
2073 2073 for t, n in pycompat.iteritems(self._tagscache.tags):
2074 2074 nodetagscache.setdefault(n, []).append(t)
2075 2075 for tags in pycompat.itervalues(nodetagscache):
2076 2076 tags.sort()
2077 2077 self._tagscache.nodetagscache = nodetagscache
2078 2078 return self._tagscache.nodetagscache.get(node, [])
2079 2079
2080 2080 def nodebookmarks(self, node):
2081 2081 """return the list of bookmarks pointing to the specified node"""
2082 2082 return self._bookmarks.names(node)
2083 2083
2084 2084 def branchmap(self):
2085 2085 """returns a dictionary {branch: [branchheads]} with branchheads
2086 2086 ordered by increasing revision number"""
2087 2087 return self._branchcaches[self]
2088 2088
2089 2089 @unfilteredmethod
2090 2090 def revbranchcache(self):
2091 2091 if not self._revbranchcache:
2092 2092 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2093 2093 return self._revbranchcache
2094 2094
2095 2095 def register_changeset(self, rev, changelogrevision):
2096 2096 self.revbranchcache().setdata(rev, changelogrevision)
2097 2097
2098 2098 def branchtip(self, branch, ignoremissing=False):
2099 2099 """return the tip node for a given branch
2100 2100
2101 2101 If ignoremissing is True, then this method will not raise an error.
2102 2102 This is helpful for callers that only expect None for a missing branch
2103 2103 (e.g. namespace).
2104 2104
2105 2105 """
2106 2106 try:
2107 2107 return self.branchmap().branchtip(branch)
2108 2108 except KeyError:
2109 2109 if not ignoremissing:
2110 2110 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2111 2111 else:
2112 2112 pass
2113 2113
2114 2114 def lookup(self, key):
2115 2115 node = scmutil.revsymbol(self, key).node()
2116 2116 if node is None:
2117 2117 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2118 2118 return node
2119 2119
2120 2120 def lookupbranch(self, key):
2121 2121 if self.branchmap().hasbranch(key):
2122 2122 return key
2123 2123
2124 2124 return scmutil.revsymbol(self, key).branch()
2125 2125
2126 2126 def known(self, nodes):
2127 2127 cl = self.changelog
2128 2128 get_rev = cl.index.get_rev
2129 2129 filtered = cl.filteredrevs
2130 2130 result = []
2131 2131 for n in nodes:
2132 2132 r = get_rev(n)
2133 2133 resp = not (r is None or r in filtered)
2134 2134 result.append(resp)
2135 2135 return result
2136 2136
2137 2137 def local(self):
2138 2138 return self
2139 2139
2140 2140 def publishing(self):
2141 2141 # it's safe (and desirable) to trust the publish flag unconditionally
2142 2142 # so that we don't finalize changes shared between users via ssh or nfs
2143 2143 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2144 2144
2145 2145 def cancopy(self):
2146 2146 # so statichttprepo's override of local() works
2147 2147 if not self.local():
2148 2148 return False
2149 2149 if not self.publishing():
2150 2150 return True
2151 2151 # if publishing we can't copy if there is filtered content
2152 2152 return not self.filtered(b'visible').changelog.filteredrevs
2153 2153
2154 2154 def shared(self):
2155 2155 '''the type of shared repository (None if not shared)'''
2156 2156 if self.sharedpath != self.path:
2157 2157 return b'store'
2158 2158 return None
2159 2159
2160 2160 def wjoin(self, f, *insidef):
2161 2161 return self.vfs.reljoin(self.root, f, *insidef)
2162 2162
2163 2163 def setparents(self, p1, p2=None):
2164 2164 if p2 is None:
2165 2165 p2 = self.nullid
2166 2166 self[None].setparents(p1, p2)
2167 2167 self._quick_access_changeid_invalidate()
2168 2168
2169 2169 def filectx(self, path, changeid=None, fileid=None, changectx=None):
2170 2170 """changeid must be a changeset revision, if specified.
2171 2171 fileid can be a file revision or node."""
2172 2172 return context.filectx(
2173 2173 self, path, changeid, fileid, changectx=changectx
2174 2174 )
2175 2175
2176 2176 def getcwd(self):
2177 2177 return self.dirstate.getcwd()
2178 2178
2179 2179 def pathto(self, f, cwd=None):
2180 2180 return self.dirstate.pathto(f, cwd)
2181 2181
2182 2182 def _loadfilter(self, filter):
2183 2183 if filter not in self._filterpats:
2184 2184 l = []
2185 2185 for pat, cmd in self.ui.configitems(filter):
2186 2186 if cmd == b'!':
2187 2187 continue
2188 2188 mf = matchmod.match(self.root, b'', [pat])
2189 2189 fn = None
2190 2190 params = cmd
2191 2191 for name, filterfn in pycompat.iteritems(self._datafilters):
2192 2192 if cmd.startswith(name):
2193 2193 fn = filterfn
2194 2194 params = cmd[len(name) :].lstrip()
2195 2195 break
2196 2196 if not fn:
2197 2197 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2198 2198 fn.__name__ = 'commandfilter'
2199 2199 # Wrap old filters not supporting keyword arguments
2200 2200 if not pycompat.getargspec(fn)[2]:
2201 2201 oldfn = fn
2202 2202 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2203 2203 fn.__name__ = 'compat-' + oldfn.__name__
2204 2204 l.append((mf, fn, params))
2205 2205 self._filterpats[filter] = l
2206 2206 return self._filterpats[filter]
2207 2207
2208 2208 def _filter(self, filterpats, filename, data):
2209 2209 for mf, fn, cmd in filterpats:
2210 2210 if mf(filename):
2211 2211 self.ui.debug(
2212 2212 b"filtering %s through %s\n"
2213 2213 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2214 2214 )
2215 2215 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2216 2216 break
2217 2217
2218 2218 return data
2219 2219
2220 2220 @unfilteredpropertycache
2221 2221 def _encodefilterpats(self):
2222 2222 return self._loadfilter(b'encode')
2223 2223
2224 2224 @unfilteredpropertycache
2225 2225 def _decodefilterpats(self):
2226 2226 return self._loadfilter(b'decode')
2227 2227
2228 2228 def adddatafilter(self, name, filter):
2229 2229 self._datafilters[name] = filter
2230 2230
2231 2231 def wread(self, filename):
2232 2232 if self.wvfs.islink(filename):
2233 2233 data = self.wvfs.readlink(filename)
2234 2234 else:
2235 2235 data = self.wvfs.read(filename)
2236 2236 return self._filter(self._encodefilterpats, filename, data)
2237 2237
2238 2238 def wwrite(self, filename, data, flags, backgroundclose=False, **kwargs):
2239 2239 """write ``data`` into ``filename`` in the working directory
2240 2240
2241 2241 This returns length of written (maybe decoded) data.
2242 2242 """
2243 2243 data = self._filter(self._decodefilterpats, filename, data)
2244 2244 if b'l' in flags:
2245 2245 self.wvfs.symlink(data, filename)
2246 2246 else:
2247 2247 self.wvfs.write(
2248 2248 filename, data, backgroundclose=backgroundclose, **kwargs
2249 2249 )
2250 2250 if b'x' in flags:
2251 2251 self.wvfs.setflags(filename, False, True)
2252 2252 else:
2253 2253 self.wvfs.setflags(filename, False, False)
2254 2254 return len(data)
2255 2255
2256 2256 def wwritedata(self, filename, data):
2257 2257 return self._filter(self._decodefilterpats, filename, data)
2258 2258
2259 2259 def currenttransaction(self):
2260 2260 """return the current transaction or None if non exists"""
2261 2261 if self._transref:
2262 2262 tr = self._transref()
2263 2263 else:
2264 2264 tr = None
2265 2265
2266 2266 if tr and tr.running():
2267 2267 return tr
2268 2268 return None
2269 2269
2270 2270 def transaction(self, desc, report=None):
2271 2271 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2272 2272 b'devel', b'check-locks'
2273 2273 ):
2274 2274 if self._currentlock(self._lockref) is None:
2275 2275 raise error.ProgrammingError(b'transaction requires locking')
2276 2276 tr = self.currenttransaction()
2277 2277 if tr is not None:
2278 2278 return tr.nest(name=desc)
2279 2279
2280 2280 # abort here if the journal already exists
2281 2281 if self.svfs.exists(b"journal"):
2282 2282 raise error.RepoError(
2283 2283 _(b"abandoned transaction found"),
2284 2284 hint=_(b"run 'hg recover' to clean up transaction"),
2285 2285 )
2286 2286
2287 2287 idbase = b"%.40f#%f" % (random.random(), time.time())
2288 2288 ha = hex(hashutil.sha1(idbase).digest())
2289 2289 txnid = b'TXN:' + ha
2290 2290 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2291 2291
2292 2292 self._writejournal(desc)
2293 2293 renames = [(vfs, x, undoname(x)) for vfs, x in self._journalfiles()]
2294 2294 if report:
2295 2295 rp = report
2296 2296 else:
2297 2297 rp = self.ui.warn
2298 2298 vfsmap = {b'plain': self.vfs, b'store': self.svfs} # root of .hg/
2299 2299 # we must avoid cyclic reference between repo and transaction.
2300 2300 reporef = weakref.ref(self)
2301 2301 # Code to track tag movement
2302 2302 #
2303 2303 # Since tags are all handled as file content, it is actually quite hard
2304 2304 # to track these movement from a code perspective. So we fallback to a
2305 2305 # tracking at the repository level. One could envision to track changes
2306 2306 # to the '.hgtags' file through changegroup apply but that fails to
2307 2307 # cope with case where transaction expose new heads without changegroup
2308 2308 # being involved (eg: phase movement).
2309 2309 #
2310 2310 # For now, We gate the feature behind a flag since this likely comes
2311 2311 # with performance impacts. The current code run more often than needed
2312 2312 # and do not use caches as much as it could. The current focus is on
2313 2313 # the behavior of the feature so we disable it by default. The flag
2314 2314 # will be removed when we are happy with the performance impact.
2315 2315 #
2316 2316 # Once this feature is no longer experimental move the following
2317 2317 # documentation to the appropriate help section:
2318 2318 #
2319 2319 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2320 2320 # tags (new or changed or deleted tags). In addition the details of
2321 2321 # these changes are made available in a file at:
2322 2322 # ``REPOROOT/.hg/changes/tags.changes``.
2323 2323 # Make sure you check for HG_TAG_MOVED before reading that file as it
2324 2324 # might exist from a previous transaction even if no tag were touched
2325 2325 # in this one. Changes are recorded in a line base format::
2326 2326 #
2327 2327 # <action> <hex-node> <tag-name>\n
2328 2328 #
2329 2329 # Actions are defined as follow:
2330 2330 # "-R": tag is removed,
2331 2331 # "+A": tag is added,
2332 2332 # "-M": tag is moved (old value),
2333 2333 # "+M": tag is moved (new value),
2334 2334 tracktags = lambda x: None
2335 2335 # experimental config: experimental.hook-track-tags
2336 2336 shouldtracktags = self.ui.configbool(
2337 2337 b'experimental', b'hook-track-tags'
2338 2338 )
2339 2339 if desc != b'strip' and shouldtracktags:
2340 2340 oldheads = self.changelog.headrevs()
2341 2341
2342 2342 def tracktags(tr2):
2343 2343 repo = reporef()
2344 2344 assert repo is not None # help pytype
2345 2345 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2346 2346 newheads = repo.changelog.headrevs()
2347 2347 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2348 2348 # notes: we compare lists here.
2349 2349 # As we do it only once buiding set would not be cheaper
2350 2350 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2351 2351 if changes:
2352 2352 tr2.hookargs[b'tag_moved'] = b'1'
2353 2353 with repo.vfs(
2354 2354 b'changes/tags.changes', b'w', atomictemp=True
2355 2355 ) as changesfile:
2356 2356 # note: we do not register the file to the transaction
2357 2357 # because we needs it to still exist on the transaction
2358 2358 # is close (for txnclose hooks)
2359 2359 tagsmod.writediff(changesfile, changes)
2360 2360
2361 2361 def validate(tr2):
2362 2362 """will run pre-closing hooks"""
2363 2363 # XXX the transaction API is a bit lacking here so we take a hacky
2364 2364 # path for now
2365 2365 #
2366 2366 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2367 2367 # dict is copied before these run. In addition we needs the data
2368 2368 # available to in memory hooks too.
2369 2369 #
2370 2370 # Moreover, we also need to make sure this runs before txnclose
2371 2371 # hooks and there is no "pending" mechanism that would execute
2372 2372 # logic only if hooks are about to run.
2373 2373 #
2374 2374 # Fixing this limitation of the transaction is also needed to track
2375 2375 # other families of changes (bookmarks, phases, obsolescence).
2376 2376 #
2377 2377 # This will have to be fixed before we remove the experimental
2378 2378 # gating.
2379 2379 tracktags(tr2)
2380 2380 repo = reporef()
2381 2381 assert repo is not None # help pytype
2382 2382
2383 2383 singleheadopt = (b'experimental', b'single-head-per-branch')
2384 2384 singlehead = repo.ui.configbool(*singleheadopt)
2385 2385 if singlehead:
2386 2386 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2387 2387 accountclosed = singleheadsub.get(
2388 2388 b"account-closed-heads", False
2389 2389 )
2390 2390 if singleheadsub.get(b"public-changes-only", False):
2391 2391 filtername = b"immutable"
2392 2392 else:
2393 2393 filtername = b"visible"
2394 2394 scmutil.enforcesinglehead(
2395 2395 repo, tr2, desc, accountclosed, filtername
2396 2396 )
2397 2397 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2398 2398 for name, (old, new) in sorted(
2399 2399 tr.changes[b'bookmarks'].items()
2400 2400 ):
2401 2401 args = tr.hookargs.copy()
2402 2402 args.update(bookmarks.preparehookargs(name, old, new))
2403 2403 repo.hook(
2404 2404 b'pretxnclose-bookmark',
2405 2405 throw=True,
2406 2406 **pycompat.strkwargs(args)
2407 2407 )
2408 2408 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2409 2409 cl = repo.unfiltered().changelog
2410 2410 for revs, (old, new) in tr.changes[b'phases']:
2411 2411 for rev in revs:
2412 2412 args = tr.hookargs.copy()
2413 2413 node = hex(cl.node(rev))
2414 2414 args.update(phases.preparehookargs(node, old, new))
2415 2415 repo.hook(
2416 2416 b'pretxnclose-phase',
2417 2417 throw=True,
2418 2418 **pycompat.strkwargs(args)
2419 2419 )
2420 2420
2421 2421 repo.hook(
2422 2422 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2423 2423 )
2424 2424
2425 2425 def releasefn(tr, success):
2426 2426 repo = reporef()
2427 2427 if repo is None:
2428 2428 # If the repo has been GC'd (and this release function is being
2429 2429 # called from transaction.__del__), there's not much we can do,
2430 2430 # so just leave the unfinished transaction there and let the
2431 2431 # user run `hg recover`.
2432 2432 return
2433 2433 if success:
2434 2434 # this should be explicitly invoked here, because
2435 2435 # in-memory changes aren't written out at closing
2436 2436 # transaction, if tr.addfilegenerator (via
2437 2437 # dirstate.write or so) isn't invoked while
2438 2438 # transaction running
2439 2439 repo.dirstate.write(None)
2440 2440 else:
2441 2441 # discard all changes (including ones already written
2442 2442 # out) in this transaction
2443 2443 narrowspec.restorebackup(self, b'journal.narrowspec')
2444 2444 narrowspec.restorewcbackup(self, b'journal.narrowspec.dirstate')
2445 2445 repo.dirstate.restorebackup(None, b'journal.dirstate')
2446 2446
2447 2447 repo.invalidate(clearfilecache=True)
2448 2448
2449 2449 tr = transaction.transaction(
2450 2450 rp,
2451 2451 self.svfs,
2452 2452 vfsmap,
2453 2453 b"journal",
2454 2454 b"undo",
2455 2455 aftertrans(renames),
2456 2456 self.store.createmode,
2457 2457 validator=validate,
2458 2458 releasefn=releasefn,
2459 2459 checkambigfiles=_cachedfiles,
2460 2460 name=desc,
2461 2461 )
2462 2462 tr.changes[b'origrepolen'] = len(self)
2463 2463 tr.changes[b'obsmarkers'] = set()
2464 2464 tr.changes[b'phases'] = []
2465 2465 tr.changes[b'bookmarks'] = {}
2466 2466
2467 2467 tr.hookargs[b'txnid'] = txnid
2468 2468 tr.hookargs[b'txnname'] = desc
2469 2469 tr.hookargs[b'changes'] = tr.changes
2470 2470 # note: writing the fncache only during finalize mean that the file is
2471 2471 # outdated when running hooks. As fncache is used for streaming clone,
2472 2472 # this is not expected to break anything that happen during the hooks.
2473 2473 tr.addfinalize(b'flush-fncache', self.store.write)
2474 2474
2475 2475 def txnclosehook(tr2):
2476 2476 """To be run if transaction is successful, will schedule a hook run"""
2477 2477 # Don't reference tr2 in hook() so we don't hold a reference.
2478 2478 # This reduces memory consumption when there are multiple
2479 2479 # transactions per lock. This can likely go away if issue5045
2480 2480 # fixes the function accumulation.
2481 2481 hookargs = tr2.hookargs
2482 2482
2483 2483 def hookfunc(unused_success):
2484 2484 repo = reporef()
2485 2485 assert repo is not None # help pytype
2486 2486
2487 2487 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2488 2488 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2489 2489 for name, (old, new) in bmchanges:
2490 2490 args = tr.hookargs.copy()
2491 2491 args.update(bookmarks.preparehookargs(name, old, new))
2492 2492 repo.hook(
2493 2493 b'txnclose-bookmark',
2494 2494 throw=False,
2495 2495 **pycompat.strkwargs(args)
2496 2496 )
2497 2497
2498 2498 if hook.hashook(repo.ui, b'txnclose-phase'):
2499 2499 cl = repo.unfiltered().changelog
2500 2500 phasemv = sorted(
2501 2501 tr.changes[b'phases'], key=lambda r: r[0][0]
2502 2502 )
2503 2503 for revs, (old, new) in phasemv:
2504 2504 for rev in revs:
2505 2505 args = tr.hookargs.copy()
2506 2506 node = hex(cl.node(rev))
2507 2507 args.update(phases.preparehookargs(node, old, new))
2508 2508 repo.hook(
2509 2509 b'txnclose-phase',
2510 2510 throw=False,
2511 2511 **pycompat.strkwargs(args)
2512 2512 )
2513 2513
2514 2514 repo.hook(
2515 2515 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2516 2516 )
2517 2517
2518 2518 repo = reporef()
2519 2519 assert repo is not None # help pytype
2520 2520 repo._afterlock(hookfunc)
2521 2521
2522 2522 tr.addfinalize(b'txnclose-hook', txnclosehook)
2523 2523 # Include a leading "-" to make it happen before the transaction summary
2524 2524 # reports registered via scmutil.registersummarycallback() whose names
2525 2525 # are 00-txnreport etc. That way, the caches will be warm when the
2526 2526 # callbacks run.
2527 2527 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2528 2528
2529 2529 def txnaborthook(tr2):
2530 2530 """To be run if transaction is aborted"""
2531 2531 repo = reporef()
2532 2532 assert repo is not None # help pytype
2533 2533 repo.hook(
2534 2534 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2535 2535 )
2536 2536
2537 2537 tr.addabort(b'txnabort-hook', txnaborthook)
2538 2538 # avoid eager cache invalidation. in-memory data should be identical
2539 2539 # to stored data if transaction has no error.
2540 2540 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2541 2541 self._transref = weakref.ref(tr)
2542 2542 scmutil.registersummarycallback(self, tr, desc)
2543 2543 return tr
2544 2544
2545 2545 def _journalfiles(self):
2546 2546 return (
2547 2547 (self.svfs, b'journal'),
2548 2548 (self.svfs, b'journal.narrowspec'),
2549 2549 (self.vfs, b'journal.narrowspec.dirstate'),
2550 2550 (self.vfs, b'journal.dirstate'),
2551 2551 (self.vfs, b'journal.branch'),
2552 2552 (self.vfs, b'journal.desc'),
2553 2553 (bookmarks.bookmarksvfs(self), b'journal.bookmarks'),
2554 2554 (self.svfs, b'journal.phaseroots'),
2555 2555 )
2556 2556
2557 2557 def undofiles(self):
2558 2558 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2559 2559
2560 2560 @unfilteredmethod
2561 2561 def _writejournal(self, desc):
2562 2562 self.dirstate.savebackup(None, b'journal.dirstate')
2563 2563 narrowspec.savewcbackup(self, b'journal.narrowspec.dirstate')
2564 2564 narrowspec.savebackup(self, b'journal.narrowspec')
2565 2565 self.vfs.write(
2566 2566 b"journal.branch", encoding.fromlocal(self.dirstate.branch())
2567 2567 )
2568 2568 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2569 2569 bookmarksvfs = bookmarks.bookmarksvfs(self)
2570 2570 bookmarksvfs.write(
2571 2571 b"journal.bookmarks", bookmarksvfs.tryread(b"bookmarks")
2572 2572 )
2573 2573 self.svfs.write(b"journal.phaseroots", self.svfs.tryread(b"phaseroots"))
2574 2574
2575 2575 def recover(self):
2576 2576 with self.lock():
2577 2577 if self.svfs.exists(b"journal"):
2578 2578 self.ui.status(_(b"rolling back interrupted transaction\n"))
2579 2579 vfsmap = {
2580 2580 b'': self.svfs,
2581 2581 b'plain': self.vfs,
2582 2582 }
2583 2583 transaction.rollback(
2584 2584 self.svfs,
2585 2585 vfsmap,
2586 2586 b"journal",
2587 2587 self.ui.warn,
2588 2588 checkambigfiles=_cachedfiles,
2589 2589 )
2590 2590 self.invalidate()
2591 2591 return True
2592 2592 else:
2593 2593 self.ui.warn(_(b"no interrupted transaction available\n"))
2594 2594 return False
2595 2595
2596 2596 def rollback(self, dryrun=False, force=False):
2597 2597 wlock = lock = dsguard = None
2598 2598 try:
2599 2599 wlock = self.wlock()
2600 2600 lock = self.lock()
2601 2601 if self.svfs.exists(b"undo"):
2602 2602 dsguard = dirstateguard.dirstateguard(self, b'rollback')
2603 2603
2604 2604 return self._rollback(dryrun, force, dsguard)
2605 2605 else:
2606 2606 self.ui.warn(_(b"no rollback information available\n"))
2607 2607 return 1
2608 2608 finally:
2609 2609 release(dsguard, lock, wlock)
2610 2610
2611 2611 @unfilteredmethod # Until we get smarter cache management
2612 2612 def _rollback(self, dryrun, force, dsguard):
2613 2613 ui = self.ui
2614 2614 try:
2615 2615 args = self.vfs.read(b'undo.desc').splitlines()
2616 2616 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2617 2617 if len(args) >= 3:
2618 2618 detail = args[2]
2619 2619 oldtip = oldlen - 1
2620 2620
2621 2621 if detail and ui.verbose:
2622 2622 msg = _(
2623 2623 b'repository tip rolled back to revision %d'
2624 2624 b' (undo %s: %s)\n'
2625 2625 ) % (oldtip, desc, detail)
2626 2626 else:
2627 2627 msg = _(
2628 2628 b'repository tip rolled back to revision %d (undo %s)\n'
2629 2629 ) % (oldtip, desc)
2630 2630 except IOError:
2631 2631 msg = _(b'rolling back unknown transaction\n')
2632 2632 desc = None
2633 2633
2634 2634 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2635 2635 raise error.Abort(
2636 2636 _(
2637 2637 b'rollback of last commit while not checked out '
2638 2638 b'may lose data'
2639 2639 ),
2640 2640 hint=_(b'use -f to force'),
2641 2641 )
2642 2642
2643 2643 ui.status(msg)
2644 2644 if dryrun:
2645 2645 return 0
2646 2646
2647 2647 parents = self.dirstate.parents()
2648 2648 self.destroying()
2649 2649 vfsmap = {b'plain': self.vfs, b'': self.svfs}
2650 2650 transaction.rollback(
2651 2651 self.svfs, vfsmap, b'undo', ui.warn, checkambigfiles=_cachedfiles
2652 2652 )
2653 2653 bookmarksvfs = bookmarks.bookmarksvfs(self)
2654 2654 if bookmarksvfs.exists(b'undo.bookmarks'):
2655 2655 bookmarksvfs.rename(
2656 2656 b'undo.bookmarks', b'bookmarks', checkambig=True
2657 2657 )
2658 2658 if self.svfs.exists(b'undo.phaseroots'):
2659 2659 self.svfs.rename(b'undo.phaseroots', b'phaseroots', checkambig=True)
2660 2660 self.invalidate()
2661 2661
2662 2662 has_node = self.changelog.index.has_node
2663 2663 parentgone = any(not has_node(p) for p in parents)
2664 2664 if parentgone:
2665 2665 # prevent dirstateguard from overwriting already restored one
2666 2666 dsguard.close()
2667 2667
2668 2668 narrowspec.restorebackup(self, b'undo.narrowspec')
2669 2669 narrowspec.restorewcbackup(self, b'undo.narrowspec.dirstate')
2670 2670 self.dirstate.restorebackup(None, b'undo.dirstate')
2671 2671 try:
2672 2672 branch = self.vfs.read(b'undo.branch')
2673 2673 self.dirstate.setbranch(encoding.tolocal(branch))
2674 2674 except IOError:
2675 2675 ui.warn(
2676 2676 _(
2677 2677 b'named branch could not be reset: '
2678 2678 b'current branch is still \'%s\'\n'
2679 2679 )
2680 2680 % self.dirstate.branch()
2681 2681 )
2682 2682
2683 2683 parents = tuple([p.rev() for p in self[None].parents()])
2684 2684 if len(parents) > 1:
2685 2685 ui.status(
2686 2686 _(
2687 2687 b'working directory now based on '
2688 2688 b'revisions %d and %d\n'
2689 2689 )
2690 2690 % parents
2691 2691 )
2692 2692 else:
2693 2693 ui.status(
2694 2694 _(b'working directory now based on revision %d\n') % parents
2695 2695 )
2696 2696 mergestatemod.mergestate.clean(self)
2697 2697
2698 2698 # TODO: if we know which new heads may result from this rollback, pass
2699 2699 # them to destroy(), which will prevent the branchhead cache from being
2700 2700 # invalidated.
2701 2701 self.destroyed()
2702 2702 return 0
2703 2703
2704 2704 def _buildcacheupdater(self, newtransaction):
2705 2705 """called during transaction to build the callback updating cache
2706 2706
2707 2707 Lives on the repository to help extension who might want to augment
2708 2708 this logic. For this purpose, the created transaction is passed to the
2709 2709 method.
2710 2710 """
2711 2711 # we must avoid cyclic reference between repo and transaction.
2712 2712 reporef = weakref.ref(self)
2713 2713
2714 2714 def updater(tr):
2715 2715 repo = reporef()
2716 2716 assert repo is not None # help pytype
2717 2717 repo.updatecaches(tr)
2718 2718
2719 2719 return updater
2720 2720
2721 2721 @unfilteredmethod
2722 2722 def updatecaches(self, tr=None, full=False):
2723 2723 """warm appropriate caches
2724 2724
2725 2725 If this function is called after a transaction closed. The transaction
2726 2726 will be available in the 'tr' argument. This can be used to selectively
2727 2727 update caches relevant to the changes in that transaction.
2728 2728
2729 2729 If 'full' is set, make sure all caches the function knows about have
2730 2730 up-to-date data. Even the ones usually loaded more lazily.
2731 2731 """
2732 2732 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2733 2733 # During strip, many caches are invalid but
2734 2734 # later call to `destroyed` will refresh them.
2735 2735 return
2736 2736
2737 2737 if tr is None or tr.changes[b'origrepolen'] < len(self):
2738 2738 # accessing the 'served' branchmap should refresh all the others,
2739 2739 self.ui.debug(b'updating the branch cache\n')
2740 2740 self.filtered(b'served').branchmap()
2741 2741 self.filtered(b'served.hidden').branchmap()
2742 2742
2743 2743 if full:
2744 2744 unfi = self.unfiltered()
2745 2745
2746 2746 self.changelog.update_caches(transaction=tr)
2747 2747 self.manifestlog.update_caches(transaction=tr)
2748 2748
2749 2749 rbc = unfi.revbranchcache()
2750 2750 for r in unfi.changelog:
2751 2751 rbc.branchinfo(r)
2752 2752 rbc.write()
2753 2753
2754 2754 # ensure the working copy parents are in the manifestfulltextcache
2755 2755 for ctx in self[b'.'].parents():
2756 2756 ctx.manifest() # accessing the manifest is enough
2757 2757
2758 2758 # accessing fnode cache warms the cache
2759 2759 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2760 2760 # accessing tags warm the cache
2761 2761 self.tags()
2762 2762 self.filtered(b'served').tags()
2763 2763
2764 2764 # The `full` arg is documented as updating even the lazily-loaded
2765 2765 # caches immediately, so we're forcing a write to cause these caches
2766 2766 # to be warmed up even if they haven't explicitly been requested
2767 2767 # yet (if they've never been used by hg, they won't ever have been
2768 2768 # written, even if they're a subset of another kind of cache that
2769 2769 # *has* been used).
2770 2770 for filt in repoview.filtertable.keys():
2771 2771 filtered = self.filtered(filt)
2772 2772 filtered.branchmap().write(filtered)
2773 2773
2774 2774 def invalidatecaches(self):
2775 2775
2776 2776 if '_tagscache' in vars(self):
2777 2777 # can't use delattr on proxy
2778 2778 del self.__dict__['_tagscache']
2779 2779
2780 2780 self._branchcaches.clear()
2781 2781 self.invalidatevolatilesets()
2782 2782 self._sparsesignaturecache.clear()
2783 2783
2784 2784 def invalidatevolatilesets(self):
2785 2785 self.filteredrevcache.clear()
2786 2786 obsolete.clearobscaches(self)
2787 2787 self._quick_access_changeid_invalidate()
2788 2788
2789 2789 def invalidatedirstate(self):
2790 2790 """Invalidates the dirstate, causing the next call to dirstate
2791 2791 to check if it was modified since the last time it was read,
2792 2792 rereading it if it has.
2793 2793
2794 2794 This is different to dirstate.invalidate() that it doesn't always
2795 2795 rereads the dirstate. Use dirstate.invalidate() if you want to
2796 2796 explicitly read the dirstate again (i.e. restoring it to a previous
2797 2797 known good state)."""
2798 2798 if hasunfilteredcache(self, 'dirstate'):
2799 2799 for k in self.dirstate._filecache:
2800 2800 try:
2801 2801 delattr(self.dirstate, k)
2802 2802 except AttributeError:
2803 2803 pass
2804 2804 delattr(self.unfiltered(), 'dirstate')
2805 2805
2806 2806 def invalidate(self, clearfilecache=False):
2807 2807 """Invalidates both store and non-store parts other than dirstate
2808 2808
2809 2809 If a transaction is running, invalidation of store is omitted,
2810 2810 because discarding in-memory changes might cause inconsistency
2811 2811 (e.g. incomplete fncache causes unintentional failure, but
2812 2812 redundant one doesn't).
2813 2813 """
2814 2814 unfiltered = self.unfiltered() # all file caches are stored unfiltered
2815 2815 for k in list(self._filecache.keys()):
2816 2816 # dirstate is invalidated separately in invalidatedirstate()
2817 2817 if k == b'dirstate':
2818 2818 continue
2819 2819 if (
2820 2820 k == b'changelog'
2821 2821 and self.currenttransaction()
2822 2822 and self.changelog._delayed
2823 2823 ):
2824 2824 # The changelog object may store unwritten revisions. We don't
2825 2825 # want to lose them.
2826 2826 # TODO: Solve the problem instead of working around it.
2827 2827 continue
2828 2828
2829 2829 if clearfilecache:
2830 2830 del self._filecache[k]
2831 2831 try:
2832 2832 delattr(unfiltered, k)
2833 2833 except AttributeError:
2834 2834 pass
2835 2835 self.invalidatecaches()
2836 2836 if not self.currenttransaction():
2837 2837 # TODO: Changing contents of store outside transaction
2838 2838 # causes inconsistency. We should make in-memory store
2839 2839 # changes detectable, and abort if changed.
2840 2840 self.store.invalidatecaches()
2841 2841
2842 2842 def invalidateall(self):
2843 2843 """Fully invalidates both store and non-store parts, causing the
2844 2844 subsequent operation to reread any outside changes."""
2845 2845 # extension should hook this to invalidate its caches
2846 2846 self.invalidate()
2847 2847 self.invalidatedirstate()
2848 2848
2849 2849 @unfilteredmethod
2850 2850 def _refreshfilecachestats(self, tr):
2851 2851 """Reload stats of cached files so that they are flagged as valid"""
2852 2852 for k, ce in self._filecache.items():
2853 2853 k = pycompat.sysstr(k)
2854 2854 if k == 'dirstate' or k not in self.__dict__:
2855 2855 continue
2856 2856 ce.refresh()
2857 2857
2858 2858 def _lock(
2859 2859 self,
2860 2860 vfs,
2861 2861 lockname,
2862 2862 wait,
2863 2863 releasefn,
2864 2864 acquirefn,
2865 2865 desc,
2866 2866 ):
2867 2867 timeout = 0
2868 2868 warntimeout = 0
2869 2869 if wait:
2870 2870 timeout = self.ui.configint(b"ui", b"timeout")
2871 2871 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
2872 2872 # internal config: ui.signal-safe-lock
2873 2873 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
2874 2874
2875 2875 l = lockmod.trylock(
2876 2876 self.ui,
2877 2877 vfs,
2878 2878 lockname,
2879 2879 timeout,
2880 2880 warntimeout,
2881 2881 releasefn=releasefn,
2882 2882 acquirefn=acquirefn,
2883 2883 desc=desc,
2884 2884 signalsafe=signalsafe,
2885 2885 )
2886 2886 return l
2887 2887
2888 2888 def _afterlock(self, callback):
2889 2889 """add a callback to be run when the repository is fully unlocked
2890 2890
2891 2891 The callback will be executed when the outermost lock is released
2892 2892 (with wlock being higher level than 'lock')."""
2893 2893 for ref in (self._wlockref, self._lockref):
2894 2894 l = ref and ref()
2895 2895 if l and l.held:
2896 2896 l.postrelease.append(callback)
2897 2897 break
2898 2898 else: # no lock have been found.
2899 2899 callback(True)
2900 2900
2901 2901 def lock(self, wait=True):
2902 2902 """Lock the repository store (.hg/store) and return a weak reference
2903 2903 to the lock. Use this before modifying the store (e.g. committing or
2904 2904 stripping). If you are opening a transaction, get a lock as well.)
2905 2905
2906 2906 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2907 2907 'wlock' first to avoid a dead-lock hazard."""
2908 2908 l = self._currentlock(self._lockref)
2909 2909 if l is not None:
2910 2910 l.lock()
2911 2911 return l
2912 2912
2913 2913 l = self._lock(
2914 2914 vfs=self.svfs,
2915 2915 lockname=b"lock",
2916 2916 wait=wait,
2917 2917 releasefn=None,
2918 2918 acquirefn=self.invalidate,
2919 2919 desc=_(b'repository %s') % self.origroot,
2920 2920 )
2921 2921 self._lockref = weakref.ref(l)
2922 2922 return l
2923 2923
2924 2924 def wlock(self, wait=True):
2925 2925 """Lock the non-store parts of the repository (everything under
2926 2926 .hg except .hg/store) and return a weak reference to the lock.
2927 2927
2928 2928 Use this before modifying files in .hg.
2929 2929
2930 2930 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2931 2931 'wlock' first to avoid a dead-lock hazard."""
2932 2932 l = self._wlockref() if self._wlockref else None
2933 2933 if l is not None and l.held:
2934 2934 l.lock()
2935 2935 return l
2936 2936
2937 2937 # We do not need to check for non-waiting lock acquisition. Such
2938 2938 # acquisition would not cause dead-lock as they would just fail.
2939 2939 if wait and (
2940 2940 self.ui.configbool(b'devel', b'all-warnings')
2941 2941 or self.ui.configbool(b'devel', b'check-locks')
2942 2942 ):
2943 2943 if self._currentlock(self._lockref) is not None:
2944 2944 self.ui.develwarn(b'"wlock" acquired after "lock"')
2945 2945
2946 2946 def unlock():
2947 2947 if self.dirstate.pendingparentchange():
2948 2948 self.dirstate.invalidate()
2949 2949 else:
2950 2950 self.dirstate.write(None)
2951 2951
2952 2952 self._filecache[b'dirstate'].refresh()
2953 2953
2954 2954 l = self._lock(
2955 2955 self.vfs,
2956 2956 b"wlock",
2957 2957 wait,
2958 2958 unlock,
2959 2959 self.invalidatedirstate,
2960 2960 _(b'working directory of %s') % self.origroot,
2961 2961 )
2962 2962 self._wlockref = weakref.ref(l)
2963 2963 return l
2964 2964
2965 2965 def _currentlock(self, lockref):
2966 2966 """Returns the lock if it's held, or None if it's not."""
2967 2967 if lockref is None:
2968 2968 return None
2969 2969 l = lockref()
2970 2970 if l is None or not l.held:
2971 2971 return None
2972 2972 return l
2973 2973
2974 2974 def currentwlock(self):
2975 2975 """Returns the wlock if it's held, or None if it's not."""
2976 2976 return self._currentlock(self._wlockref)
2977 2977
2978 2978 def checkcommitpatterns(self, wctx, match, status, fail):
2979 2979 """check for commit arguments that aren't committable"""
2980 2980 if match.isexact() or match.prefix():
2981 2981 matched = set(status.modified + status.added + status.removed)
2982 2982
2983 2983 for f in match.files():
2984 2984 f = self.dirstate.normalize(f)
2985 2985 if f == b'.' or f in matched or f in wctx.substate:
2986 2986 continue
2987 2987 if f in status.deleted:
2988 2988 fail(f, _(b'file not found!'))
2989 2989 # Is it a directory that exists or used to exist?
2990 2990 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
2991 2991 d = f + b'/'
2992 2992 for mf in matched:
2993 2993 if mf.startswith(d):
2994 2994 break
2995 2995 else:
2996 2996 fail(f, _(b"no match under directory!"))
2997 2997 elif f not in self.dirstate:
2998 2998 fail(f, _(b"file not tracked!"))
2999 2999
3000 3000 @unfilteredmethod
3001 3001 def commit(
3002 3002 self,
3003 3003 text=b"",
3004 3004 user=None,
3005 3005 date=None,
3006 3006 match=None,
3007 3007 force=False,
3008 3008 editor=None,
3009 3009 extra=None,
3010 3010 ):
3011 3011 """Add a new revision to current repository.
3012 3012
3013 3013 Revision information is gathered from the working directory,
3014 3014 match can be used to filter the committed files. If editor is
3015 3015 supplied, it is called to get a commit message.
3016 3016 """
3017 3017 if extra is None:
3018 3018 extra = {}
3019 3019
3020 3020 def fail(f, msg):
3021 3021 raise error.InputError(b'%s: %s' % (f, msg))
3022 3022
3023 3023 if not match:
3024 3024 match = matchmod.always()
3025 3025
3026 3026 if not force:
3027 3027 match.bad = fail
3028 3028
3029 3029 # lock() for recent changelog (see issue4368)
3030 3030 with self.wlock(), self.lock():
3031 3031 wctx = self[None]
3032 3032 merge = len(wctx.parents()) > 1
3033 3033
3034 3034 if not force and merge and not match.always():
3035 3035 raise error.Abort(
3036 3036 _(
3037 3037 b'cannot partially commit a merge '
3038 3038 b'(do not specify files or patterns)'
3039 3039 )
3040 3040 )
3041 3041
3042 3042 status = self.status(match=match, clean=force)
3043 3043 if force:
3044 3044 status.modified.extend(
3045 3045 status.clean
3046 3046 ) # mq may commit clean files
3047 3047
3048 3048 # check subrepos
3049 3049 subs, commitsubs, newstate = subrepoutil.precommit(
3050 3050 self.ui, wctx, status, match, force=force
3051 3051 )
3052 3052
3053 3053 # make sure all explicit patterns are matched
3054 3054 if not force:
3055 3055 self.checkcommitpatterns(wctx, match, status, fail)
3056 3056
3057 3057 cctx = context.workingcommitctx(
3058 3058 self, status, text, user, date, extra
3059 3059 )
3060 3060
3061 3061 ms = mergestatemod.mergestate.read(self)
3062 3062 mergeutil.checkunresolved(ms)
3063 3063
3064 3064 # internal config: ui.allowemptycommit
3065 3065 if cctx.isempty() and not self.ui.configbool(
3066 3066 b'ui', b'allowemptycommit'
3067 3067 ):
3068 3068 self.ui.debug(b'nothing to commit, clearing merge state\n')
3069 3069 ms.reset()
3070 3070 return None
3071 3071
3072 3072 if merge and cctx.deleted():
3073 3073 raise error.Abort(_(b"cannot commit merge with missing files"))
3074 3074
3075 3075 if editor:
3076 3076 cctx._text = editor(self, cctx, subs)
3077 3077 edited = text != cctx._text
3078 3078
3079 3079 # Save commit message in case this transaction gets rolled back
3080 3080 # (e.g. by a pretxncommit hook). Leave the content alone on
3081 3081 # the assumption that the user will use the same editor again.
3082 3082 msgfn = self.savecommitmessage(cctx._text)
3083 3083
3084 3084 # commit subs and write new state
3085 3085 if subs:
3086 3086 uipathfn = scmutil.getuipathfn(self)
3087 3087 for s in sorted(commitsubs):
3088 3088 sub = wctx.sub(s)
3089 3089 self.ui.status(
3090 3090 _(b'committing subrepository %s\n')
3091 3091 % uipathfn(subrepoutil.subrelpath(sub))
3092 3092 )
3093 3093 sr = sub.commit(cctx._text, user, date)
3094 3094 newstate[s] = (newstate[s][0], sr)
3095 3095 subrepoutil.writestate(self, newstate)
3096 3096
3097 3097 p1, p2 = self.dirstate.parents()
3098 3098 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3099 3099 try:
3100 3100 self.hook(
3101 3101 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3102 3102 )
3103 3103 with self.transaction(b'commit'):
3104 3104 ret = self.commitctx(cctx, True)
3105 3105 # update bookmarks, dirstate and mergestate
3106 3106 bookmarks.update(self, [p1, p2], ret)
3107 3107 cctx.markcommitted(ret)
3108 3108 ms.reset()
3109 3109 except: # re-raises
3110 3110 if edited:
3111 3111 self.ui.write(
3112 3112 _(b'note: commit message saved in %s\n') % msgfn
3113 3113 )
3114 3114 self.ui.write(
3115 3115 _(
3116 3116 b"note: use 'hg commit --logfile "
3117 3117 b".hg/last-message.txt --edit' to reuse it\n"
3118 3118 )
3119 3119 )
3120 3120 raise
3121 3121
3122 3122 def commithook(unused_success):
3123 3123 # hack for command that use a temporary commit (eg: histedit)
3124 3124 # temporary commit got stripped before hook release
3125 3125 if self.changelog.hasnode(ret):
3126 3126 self.hook(
3127 3127 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3128 3128 )
3129 3129
3130 3130 self._afterlock(commithook)
3131 3131 return ret
3132 3132
3133 3133 @unfilteredmethod
3134 3134 def commitctx(self, ctx, error=False, origctx=None):
3135 3135 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3136 3136
3137 3137 @unfilteredmethod
3138 3138 def destroying(self):
3139 3139 """Inform the repository that nodes are about to be destroyed.
3140 3140 Intended for use by strip and rollback, so there's a common
3141 3141 place for anything that has to be done before destroying history.
3142 3142
3143 3143 This is mostly useful for saving state that is in memory and waiting
3144 3144 to be flushed when the current lock is released. Because a call to
3145 3145 destroyed is imminent, the repo will be invalidated causing those
3146 3146 changes to stay in memory (waiting for the next unlock), or vanish
3147 3147 completely.
3148 3148 """
3149 3149 # When using the same lock to commit and strip, the phasecache is left
3150 3150 # dirty after committing. Then when we strip, the repo is invalidated,
3151 3151 # causing those changes to disappear.
3152 3152 if '_phasecache' in vars(self):
3153 3153 self._phasecache.write()
3154 3154
3155 3155 @unfilteredmethod
3156 3156 def destroyed(self):
3157 3157 """Inform the repository that nodes have been destroyed.
3158 3158 Intended for use by strip and rollback, so there's a common
3159 3159 place for anything that has to be done after destroying history.
3160 3160 """
3161 3161 # When one tries to:
3162 3162 # 1) destroy nodes thus calling this method (e.g. strip)
3163 3163 # 2) use phasecache somewhere (e.g. commit)
3164 3164 #
3165 3165 # then 2) will fail because the phasecache contains nodes that were
3166 3166 # removed. We can either remove phasecache from the filecache,
3167 3167 # causing it to reload next time it is accessed, or simply filter
3168 3168 # the removed nodes now and write the updated cache.
3169 3169 self._phasecache.filterunknown(self)
3170 3170 self._phasecache.write()
3171 3171
3172 3172 # refresh all repository caches
3173 3173 self.updatecaches()
3174 3174
3175 3175 # Ensure the persistent tag cache is updated. Doing it now
3176 3176 # means that the tag cache only has to worry about destroyed
3177 3177 # heads immediately after a strip/rollback. That in turn
3178 3178 # guarantees that "cachetip == currenttip" (comparing both rev
3179 3179 # and node) always means no nodes have been added or destroyed.
3180 3180
3181 3181 # XXX this is suboptimal when qrefresh'ing: we strip the current
3182 3182 # head, refresh the tag cache, then immediately add a new head.
3183 3183 # But I think doing it this way is necessary for the "instant
3184 3184 # tag cache retrieval" case to work.
3185 3185 self.invalidate()
3186 3186
3187 3187 def status(
3188 3188 self,
3189 3189 node1=b'.',
3190 3190 node2=None,
3191 3191 match=None,
3192 3192 ignored=False,
3193 3193 clean=False,
3194 3194 unknown=False,
3195 3195 listsubrepos=False,
3196 3196 ):
3197 3197 '''a convenience method that calls node1.status(node2)'''
3198 3198 return self[node1].status(
3199 3199 node2, match, ignored, clean, unknown, listsubrepos
3200 3200 )
3201 3201
3202 3202 def addpostdsstatus(self, ps):
3203 3203 """Add a callback to run within the wlock, at the point at which status
3204 3204 fixups happen.
3205 3205
3206 3206 On status completion, callback(wctx, status) will be called with the
3207 3207 wlock held, unless the dirstate has changed from underneath or the wlock
3208 3208 couldn't be grabbed.
3209 3209
3210 3210 Callbacks should not capture and use a cached copy of the dirstate --
3211 3211 it might change in the meanwhile. Instead, they should access the
3212 3212 dirstate via wctx.repo().dirstate.
3213 3213
3214 3214 This list is emptied out after each status run -- extensions should
3215 3215 make sure it adds to this list each time dirstate.status is called.
3216 3216 Extensions should also make sure they don't call this for statuses
3217 3217 that don't involve the dirstate.
3218 3218 """
3219 3219
3220 3220 # The list is located here for uniqueness reasons -- it is actually
3221 3221 # managed by the workingctx, but that isn't unique per-repo.
3222 3222 self._postdsstatus.append(ps)
3223 3223
3224 3224 def postdsstatus(self):
3225 3225 """Used by workingctx to get the list of post-dirstate-status hooks."""
3226 3226 return self._postdsstatus
3227 3227
3228 3228 def clearpostdsstatus(self):
3229 3229 """Used by workingctx to clear post-dirstate-status hooks."""
3230 3230 del self._postdsstatus[:]
3231 3231
3232 3232 def heads(self, start=None):
3233 3233 if start is None:
3234 3234 cl = self.changelog
3235 3235 headrevs = reversed(cl.headrevs())
3236 3236 return [cl.node(rev) for rev in headrevs]
3237 3237
3238 3238 heads = self.changelog.heads(start)
3239 3239 # sort the output in rev descending order
3240 3240 return sorted(heads, key=self.changelog.rev, reverse=True)
3241 3241
3242 3242 def branchheads(self, branch=None, start=None, closed=False):
3243 3243 """return a (possibly filtered) list of heads for the given branch
3244 3244
3245 3245 Heads are returned in topological order, from newest to oldest.
3246 3246 If branch is None, use the dirstate branch.
3247 3247 If start is not None, return only heads reachable from start.
3248 3248 If closed is True, return heads that are marked as closed as well.
3249 3249 """
3250 3250 if branch is None:
3251 3251 branch = self[None].branch()
3252 3252 branches = self.branchmap()
3253 3253 if not branches.hasbranch(branch):
3254 3254 return []
3255 3255 # the cache returns heads ordered lowest to highest
3256 3256 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3257 3257 if start is not None:
3258 3258 # filter out the heads that cannot be reached from startrev
3259 3259 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3260 3260 bheads = [h for h in bheads if h in fbheads]
3261 3261 return bheads
3262 3262
3263 3263 def branches(self, nodes):
3264 3264 if not nodes:
3265 3265 nodes = [self.changelog.tip()]
3266 3266 b = []
3267 3267 for n in nodes:
3268 3268 t = n
3269 3269 while True:
3270 3270 p = self.changelog.parents(n)
3271 3271 if p[1] != self.nullid or p[0] == self.nullid:
3272 3272 b.append((t, n, p[0], p[1]))
3273 3273 break
3274 3274 n = p[0]
3275 3275 return b
3276 3276
3277 3277 def between(self, pairs):
3278 3278 r = []
3279 3279
3280 3280 for top, bottom in pairs:
3281 3281 n, l, i = top, [], 0
3282 3282 f = 1
3283 3283
3284 3284 while n != bottom and n != self.nullid:
3285 3285 p = self.changelog.parents(n)[0]
3286 3286 if i == f:
3287 3287 l.append(n)
3288 3288 f = f * 2
3289 3289 n = p
3290 3290 i += 1
3291 3291
3292 3292 r.append(l)
3293 3293
3294 3294 return r
3295 3295
3296 3296 def checkpush(self, pushop):
3297 3297 """Extensions can override this function if additional checks have
3298 3298 to be performed before pushing, or call it if they override push
3299 3299 command.
3300 3300 """
3301 3301
3302 3302 @unfilteredpropertycache
3303 3303 def prepushoutgoinghooks(self):
3304 3304 """Return util.hooks consists of a pushop with repo, remote, outgoing
3305 3305 methods, which are called before pushing changesets.
3306 3306 """
3307 3307 return util.hooks()
3308 3308
3309 3309 def pushkey(self, namespace, key, old, new):
3310 3310 try:
3311 3311 tr = self.currenttransaction()
3312 3312 hookargs = {}
3313 3313 if tr is not None:
3314 3314 hookargs.update(tr.hookargs)
3315 3315 hookargs = pycompat.strkwargs(hookargs)
3316 3316 hookargs['namespace'] = namespace
3317 3317 hookargs['key'] = key
3318 3318 hookargs['old'] = old
3319 3319 hookargs['new'] = new
3320 3320 self.hook(b'prepushkey', throw=True, **hookargs)
3321 3321 except error.HookAbort as exc:
3322 3322 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3323 3323 if exc.hint:
3324 3324 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3325 3325 return False
3326 3326 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3327 3327 ret = pushkey.push(self, namespace, key, old, new)
3328 3328
3329 3329 def runhook(unused_success):
3330 3330 self.hook(
3331 3331 b'pushkey',
3332 3332 namespace=namespace,
3333 3333 key=key,
3334 3334 old=old,
3335 3335 new=new,
3336 3336 ret=ret,
3337 3337 )
3338 3338
3339 3339 self._afterlock(runhook)
3340 3340 return ret
3341 3341
3342 3342 def listkeys(self, namespace):
3343 3343 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3344 3344 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3345 3345 values = pushkey.list(self, namespace)
3346 3346 self.hook(b'listkeys', namespace=namespace, values=values)
3347 3347 return values
3348 3348
3349 3349 def debugwireargs(self, one, two, three=None, four=None, five=None):
3350 3350 '''used to test argument passing over the wire'''
3351 3351 return b"%s %s %s %s %s" % (
3352 3352 one,
3353 3353 two,
3354 3354 pycompat.bytestr(three),
3355 3355 pycompat.bytestr(four),
3356 3356 pycompat.bytestr(five),
3357 3357 )
3358 3358
3359 3359 def savecommitmessage(self, text):
3360 3360 fp = self.vfs(b'last-message.txt', b'wb')
3361 3361 try:
3362 3362 fp.write(text)
3363 3363 finally:
3364 3364 fp.close()
3365 3365 return self.pathto(fp.name[len(self.root) + 1 :])
3366 3366
3367 3367 def register_wanted_sidedata(self, category):
3368 3368 if requirementsmod.REVLOGV2_REQUIREMENT not in self.requirements:
3369 3369 # Only revlogv2 repos can want sidedata.
3370 3370 return
3371 3371 self._wanted_sidedata.add(pycompat.bytestr(category))
3372 3372
3373 def register_sidedata_computer(self, kind, category, keys, computer):
3373 def register_sidedata_computer(self, kind, category, keys, computer, flags):
3374 3374 if kind not in revlogconst.ALL_KINDS:
3375 msg = _(b"unexpected revlog kind %r.")
3375 msg = _(b"unexpected revlog kind '%s'.")
3376 3376 raise error.ProgrammingError(msg % kind)
3377 3377 category = pycompat.bytestr(category)
3378 3378 if category in self._sidedata_computers.get(kind, []):
3379 3379 msg = _(
3380 3380 b"cannot register a sidedata computer twice for category '%s'."
3381 3381 )
3382 3382 raise error.ProgrammingError(msg % category)
3383 3383 self._sidedata_computers.setdefault(kind, {})
3384 self._sidedata_computers[kind][category] = (keys, computer)
3384 self._sidedata_computers[kind][category] = (keys, computer, flags)
3385 3385
3386 3386
3387 3387 # used to avoid circular references so destructors work
3388 3388 def aftertrans(files):
3389 3389 renamefiles = [tuple(t) for t in files]
3390 3390
3391 3391 def a():
3392 3392 for vfs, src, dest in renamefiles:
3393 3393 # if src and dest refer to a same file, vfs.rename is a no-op,
3394 3394 # leaving both src and dest on disk. delete dest to make sure
3395 3395 # the rename couldn't be such a no-op.
3396 3396 vfs.tryunlink(dest)
3397 3397 try:
3398 3398 vfs.rename(src, dest)
3399 3399 except OSError: # journal file does not yet exist
3400 3400 pass
3401 3401
3402 3402 return a
3403 3403
3404 3404
3405 3405 def undoname(fn):
3406 3406 base, name = os.path.split(fn)
3407 3407 assert name.startswith(b'journal')
3408 3408 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3409 3409
3410 3410
3411 3411 def instance(ui, path, create, intents=None, createopts=None):
3412 3412 localpath = urlutil.urllocalpath(path)
3413 3413 if create:
3414 3414 createrepository(ui, localpath, createopts=createopts)
3415 3415
3416 3416 return makelocalrepository(ui, localpath, intents=intents)
3417 3417
3418 3418
3419 3419 def islocal(path):
3420 3420 return True
3421 3421
3422 3422
3423 3423 def defaultcreateopts(ui, createopts=None):
3424 3424 """Populate the default creation options for a repository.
3425 3425
3426 3426 A dictionary of explicitly requested creation options can be passed
3427 3427 in. Missing keys will be populated.
3428 3428 """
3429 3429 createopts = dict(createopts or {})
3430 3430
3431 3431 if b'backend' not in createopts:
3432 3432 # experimental config: storage.new-repo-backend
3433 3433 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3434 3434
3435 3435 return createopts
3436 3436
3437 3437
3438 3438 def newreporequirements(ui, createopts):
3439 3439 """Determine the set of requirements for a new local repository.
3440 3440
3441 3441 Extensions can wrap this function to specify custom requirements for
3442 3442 new repositories.
3443 3443 """
3444 3444 # If the repo is being created from a shared repository, we copy
3445 3445 # its requirements.
3446 3446 if b'sharedrepo' in createopts:
3447 3447 requirements = set(createopts[b'sharedrepo'].requirements)
3448 3448 if createopts.get(b'sharedrelative'):
3449 3449 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3450 3450 else:
3451 3451 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3452 3452
3453 3453 return requirements
3454 3454
3455 3455 if b'backend' not in createopts:
3456 3456 raise error.ProgrammingError(
3457 3457 b'backend key not present in createopts; '
3458 3458 b'was defaultcreateopts() called?'
3459 3459 )
3460 3460
3461 3461 if createopts[b'backend'] != b'revlogv1':
3462 3462 raise error.Abort(
3463 3463 _(
3464 3464 b'unable to determine repository requirements for '
3465 3465 b'storage backend: %s'
3466 3466 )
3467 3467 % createopts[b'backend']
3468 3468 )
3469 3469
3470 3470 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3471 3471 if ui.configbool(b'format', b'usestore'):
3472 3472 requirements.add(requirementsmod.STORE_REQUIREMENT)
3473 3473 if ui.configbool(b'format', b'usefncache'):
3474 3474 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3475 3475 if ui.configbool(b'format', b'dotencode'):
3476 3476 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3477 3477
3478 3478 compengines = ui.configlist(b'format', b'revlog-compression')
3479 3479 for compengine in compengines:
3480 3480 if compengine in util.compengines:
3481 3481 engine = util.compengines[compengine]
3482 3482 if engine.available() and engine.revlogheader():
3483 3483 break
3484 3484 else:
3485 3485 raise error.Abort(
3486 3486 _(
3487 3487 b'compression engines %s defined by '
3488 3488 b'format.revlog-compression not available'
3489 3489 )
3490 3490 % b', '.join(b'"%s"' % e for e in compengines),
3491 3491 hint=_(
3492 3492 b'run "hg debuginstall" to list available '
3493 3493 b'compression engines'
3494 3494 ),
3495 3495 )
3496 3496
3497 3497 # zlib is the historical default and doesn't need an explicit requirement.
3498 3498 if compengine == b'zstd':
3499 3499 requirements.add(b'revlog-compression-zstd')
3500 3500 elif compengine != b'zlib':
3501 3501 requirements.add(b'exp-compression-%s' % compengine)
3502 3502
3503 3503 if scmutil.gdinitconfig(ui):
3504 3504 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3505 3505 if ui.configbool(b'format', b'sparse-revlog'):
3506 3506 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3507 3507
3508 3508 # experimental config: format.exp-use-side-data
3509 3509 if ui.configbool(b'format', b'exp-use-side-data'):
3510 3510 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3511 3511 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3512 3512 requirements.add(requirementsmod.SIDEDATA_REQUIREMENT)
3513 3513 # experimental config: format.exp-use-copies-side-data-changeset
3514 3514 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3515 3515 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3516 3516 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3517 3517 requirements.add(requirementsmod.SIDEDATA_REQUIREMENT)
3518 3518 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3519 3519 if ui.configbool(b'experimental', b'treemanifest'):
3520 3520 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3521 3521
3522 3522 revlogv2 = ui.config(b'experimental', b'revlogv2')
3523 3523 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3524 3524 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3525 3525 # generaldelta is implied by revlogv2.
3526 3526 requirements.discard(requirementsmod.GENERALDELTA_REQUIREMENT)
3527 3527 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3528 3528 # experimental config: format.internal-phase
3529 3529 if ui.configbool(b'format', b'internal-phase'):
3530 3530 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3531 3531
3532 3532 if createopts.get(b'narrowfiles'):
3533 3533 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3534 3534
3535 3535 if createopts.get(b'lfs'):
3536 3536 requirements.add(b'lfs')
3537 3537
3538 3538 if ui.configbool(b'format', b'bookmarks-in-store'):
3539 3539 requirements.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3540 3540
3541 3541 if ui.configbool(b'format', b'use-persistent-nodemap'):
3542 3542 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3543 3543
3544 3544 # if share-safe is enabled, let's create the new repository with the new
3545 3545 # requirement
3546 3546 if ui.configbool(b'format', b'use-share-safe'):
3547 3547 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3548 3548
3549 3549 return requirements
3550 3550
3551 3551
3552 3552 def checkrequirementscompat(ui, requirements):
3553 3553 """Checks compatibility of repository requirements enabled and disabled.
3554 3554
3555 3555 Returns a set of requirements which needs to be dropped because dependend
3556 3556 requirements are not enabled. Also warns users about it"""
3557 3557
3558 3558 dropped = set()
3559 3559
3560 3560 if requirementsmod.STORE_REQUIREMENT not in requirements:
3561 3561 if bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3562 3562 ui.warn(
3563 3563 _(
3564 3564 b'ignoring enabled \'format.bookmarks-in-store\' config '
3565 3565 b'beacuse it is incompatible with disabled '
3566 3566 b'\'format.usestore\' config\n'
3567 3567 )
3568 3568 )
3569 3569 dropped.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3570 3570
3571 3571 if (
3572 3572 requirementsmod.SHARED_REQUIREMENT in requirements
3573 3573 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3574 3574 ):
3575 3575 raise error.Abort(
3576 3576 _(
3577 3577 b"cannot create shared repository as source was created"
3578 3578 b" with 'format.usestore' config disabled"
3579 3579 )
3580 3580 )
3581 3581
3582 3582 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3583 3583 ui.warn(
3584 3584 _(
3585 3585 b"ignoring enabled 'format.use-share-safe' config because "
3586 3586 b"it is incompatible with disabled 'format.usestore'"
3587 3587 b" config\n"
3588 3588 )
3589 3589 )
3590 3590 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3591 3591
3592 3592 return dropped
3593 3593
3594 3594
3595 3595 def filterknowncreateopts(ui, createopts):
3596 3596 """Filters a dict of repo creation options against options that are known.
3597 3597
3598 3598 Receives a dict of repo creation options and returns a dict of those
3599 3599 options that we don't know how to handle.
3600 3600
3601 3601 This function is called as part of repository creation. If the
3602 3602 returned dict contains any items, repository creation will not
3603 3603 be allowed, as it means there was a request to create a repository
3604 3604 with options not recognized by loaded code.
3605 3605
3606 3606 Extensions can wrap this function to filter out creation options
3607 3607 they know how to handle.
3608 3608 """
3609 3609 known = {
3610 3610 b'backend',
3611 3611 b'lfs',
3612 3612 b'narrowfiles',
3613 3613 b'sharedrepo',
3614 3614 b'sharedrelative',
3615 3615 b'shareditems',
3616 3616 b'shallowfilestore',
3617 3617 }
3618 3618
3619 3619 return {k: v for k, v in createopts.items() if k not in known}
3620 3620
3621 3621
3622 3622 def createrepository(ui, path, createopts=None):
3623 3623 """Create a new repository in a vfs.
3624 3624
3625 3625 ``path`` path to the new repo's working directory.
3626 3626 ``createopts`` options for the new repository.
3627 3627
3628 3628 The following keys for ``createopts`` are recognized:
3629 3629
3630 3630 backend
3631 3631 The storage backend to use.
3632 3632 lfs
3633 3633 Repository will be created with ``lfs`` requirement. The lfs extension
3634 3634 will automatically be loaded when the repository is accessed.
3635 3635 narrowfiles
3636 3636 Set up repository to support narrow file storage.
3637 3637 sharedrepo
3638 3638 Repository object from which storage should be shared.
3639 3639 sharedrelative
3640 3640 Boolean indicating if the path to the shared repo should be
3641 3641 stored as relative. By default, the pointer to the "parent" repo
3642 3642 is stored as an absolute path.
3643 3643 shareditems
3644 3644 Set of items to share to the new repository (in addition to storage).
3645 3645 shallowfilestore
3646 3646 Indicates that storage for files should be shallow (not all ancestor
3647 3647 revisions are known).
3648 3648 """
3649 3649 createopts = defaultcreateopts(ui, createopts=createopts)
3650 3650
3651 3651 unknownopts = filterknowncreateopts(ui, createopts)
3652 3652
3653 3653 if not isinstance(unknownopts, dict):
3654 3654 raise error.ProgrammingError(
3655 3655 b'filterknowncreateopts() did not return a dict'
3656 3656 )
3657 3657
3658 3658 if unknownopts:
3659 3659 raise error.Abort(
3660 3660 _(
3661 3661 b'unable to create repository because of unknown '
3662 3662 b'creation option: %s'
3663 3663 )
3664 3664 % b', '.join(sorted(unknownopts)),
3665 3665 hint=_(b'is a required extension not loaded?'),
3666 3666 )
3667 3667
3668 3668 requirements = newreporequirements(ui, createopts=createopts)
3669 3669 requirements -= checkrequirementscompat(ui, requirements)
3670 3670
3671 3671 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3672 3672
3673 3673 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3674 3674 if hgvfs.exists():
3675 3675 raise error.RepoError(_(b'repository %s already exists') % path)
3676 3676
3677 3677 if b'sharedrepo' in createopts:
3678 3678 sharedpath = createopts[b'sharedrepo'].sharedpath
3679 3679
3680 3680 if createopts.get(b'sharedrelative'):
3681 3681 try:
3682 3682 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3683 3683 sharedpath = util.pconvert(sharedpath)
3684 3684 except (IOError, ValueError) as e:
3685 3685 # ValueError is raised on Windows if the drive letters differ
3686 3686 # on each path.
3687 3687 raise error.Abort(
3688 3688 _(b'cannot calculate relative path'),
3689 3689 hint=stringutil.forcebytestr(e),
3690 3690 )
3691 3691
3692 3692 if not wdirvfs.exists():
3693 3693 wdirvfs.makedirs()
3694 3694
3695 3695 hgvfs.makedir(notindexed=True)
3696 3696 if b'sharedrepo' not in createopts:
3697 3697 hgvfs.mkdir(b'cache')
3698 3698 hgvfs.mkdir(b'wcache')
3699 3699
3700 3700 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3701 3701 if has_store and b'sharedrepo' not in createopts:
3702 3702 hgvfs.mkdir(b'store')
3703 3703
3704 3704 # We create an invalid changelog outside the store so very old
3705 3705 # Mercurial versions (which didn't know about the requirements
3706 3706 # file) encounter an error on reading the changelog. This
3707 3707 # effectively locks out old clients and prevents them from
3708 3708 # mucking with a repo in an unknown format.
3709 3709 #
3710 3710 # The revlog header has version 65535, which won't be recognized by
3711 3711 # such old clients.
3712 3712 hgvfs.append(
3713 3713 b'00changelog.i',
3714 3714 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3715 3715 b'layout',
3716 3716 )
3717 3717
3718 3718 # Filter the requirements into working copy and store ones
3719 3719 wcreq, storereq = scmutil.filterrequirements(requirements)
3720 3720 # write working copy ones
3721 3721 scmutil.writerequires(hgvfs, wcreq)
3722 3722 # If there are store requirements and the current repository
3723 3723 # is not a shared one, write stored requirements
3724 3724 # For new shared repository, we don't need to write the store
3725 3725 # requirements as they are already present in store requires
3726 3726 if storereq and b'sharedrepo' not in createopts:
3727 3727 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
3728 3728 scmutil.writerequires(storevfs, storereq)
3729 3729
3730 3730 # Write out file telling readers where to find the shared store.
3731 3731 if b'sharedrepo' in createopts:
3732 3732 hgvfs.write(b'sharedpath', sharedpath)
3733 3733
3734 3734 if createopts.get(b'shareditems'):
3735 3735 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
3736 3736 hgvfs.write(b'shared', shared)
3737 3737
3738 3738
3739 3739 def poisonrepository(repo):
3740 3740 """Poison a repository instance so it can no longer be used."""
3741 3741 # Perform any cleanup on the instance.
3742 3742 repo.close()
3743 3743
3744 3744 # Our strategy is to replace the type of the object with one that
3745 3745 # has all attribute lookups result in error.
3746 3746 #
3747 3747 # But we have to allow the close() method because some constructors
3748 3748 # of repos call close() on repo references.
3749 3749 class poisonedrepository(object):
3750 3750 def __getattribute__(self, item):
3751 3751 if item == 'close':
3752 3752 return object.__getattribute__(self, item)
3753 3753
3754 3754 raise error.ProgrammingError(
3755 3755 b'repo instances should not be used after unshare'
3756 3756 )
3757 3757
3758 3758 def close(self):
3759 3759 pass
3760 3760
3761 3761 # We may have a repoview, which intercepts __setattr__. So be sure
3762 3762 # we operate at the lowest level possible.
3763 3763 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,964 +1,967 b''
1 1 # coding: utf-8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from .node import nullrev
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements as requirementsmod,
19 19 util,
20 20 )
21 21
22 22 from .revlogutils import (
23 23 constants as revlogconst,
24 24 flagutil as sidedataflag,
25 25 sidedata as sidedatamod,
26 26 )
27 27
28 28
29 29 class ChangingFiles(object):
30 30 """A class recording the changes made to files by a changeset
31 31
32 32 Actions performed on files are gathered into 3 sets:
33 33
34 34 - added: files actively added in the changeset.
35 35 - merged: files whose history got merged
36 36 - removed: files removed in the revision
37 37 - salvaged: files that might have been deleted by a merge but were not
38 38 - touched: files affected by the merge
39 39
40 40 and copies information is held by 2 mappings
41 41
42 42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
43 43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
44 44
45 45 See their inline help for details.
46 46 """
47 47
48 48 def __init__(
49 49 self,
50 50 touched=None,
51 51 added=None,
52 52 removed=None,
53 53 merged=None,
54 54 salvaged=None,
55 55 p1_copies=None,
56 56 p2_copies=None,
57 57 ):
58 58 self._added = set(() if added is None else added)
59 59 self._merged = set(() if merged is None else merged)
60 60 self._removed = set(() if removed is None else removed)
61 61 self._touched = set(() if touched is None else touched)
62 62 self._salvaged = set(() if salvaged is None else salvaged)
63 63 self._touched.update(self._added)
64 64 self._touched.update(self._merged)
65 65 self._touched.update(self._removed)
66 66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
67 67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
68 68
69 69 def __eq__(self, other):
70 70 return (
71 71 self.added == other.added
72 72 and self.merged == other.merged
73 73 and self.removed == other.removed
74 74 and self.salvaged == other.salvaged
75 75 and self.touched == other.touched
76 76 and self.copied_from_p1 == other.copied_from_p1
77 77 and self.copied_from_p2 == other.copied_from_p2
78 78 )
79 79
80 80 @property
81 81 def has_copies_info(self):
82 82 return bool(
83 83 self.removed
84 84 or self.merged
85 85 or self.salvaged
86 86 or self.copied_from_p1
87 87 or self.copied_from_p2
88 88 )
89 89
90 90 @util.propertycache
91 91 def added(self):
92 92 """files actively added in the changeset
93 93
94 94 Any file present in that revision that was absent in all the changeset's
95 95 parents.
96 96
97 97 In case of merge, this means a file absent in one of the parents but
98 98 existing in the other will *not* be contained in this set. (They were
99 99 added by an ancestor)
100 100 """
101 101 return frozenset(self._added)
102 102
103 103 def mark_added(self, filename):
104 104 if 'added' in vars(self):
105 105 del self.added
106 106 self._added.add(filename)
107 107 self.mark_touched(filename)
108 108
109 109 def update_added(self, filenames):
110 110 for f in filenames:
111 111 self.mark_added(f)
112 112
113 113 @util.propertycache
114 114 def merged(self):
115 115 """files actively merged during a merge
116 116
117 117 Any modified files which had modification on both size that needed merging.
118 118
119 119 In this case a new filenode was created and it has two parents.
120 120 """
121 121 return frozenset(self._merged)
122 122
123 123 def mark_merged(self, filename):
124 124 if 'merged' in vars(self):
125 125 del self.merged
126 126 self._merged.add(filename)
127 127 self.mark_touched(filename)
128 128
129 129 def update_merged(self, filenames):
130 130 for f in filenames:
131 131 self.mark_merged(f)
132 132
133 133 @util.propertycache
134 134 def removed(self):
135 135 """files actively removed by the changeset
136 136
137 137 In case of merge this will only contain the set of files removing "new"
138 138 content. For any file absent in the current changeset:
139 139
140 140 a) If the file exists in both parents, it is clearly "actively" removed
141 141 by this changeset.
142 142
143 143 b) If a file exists in only one parent and in none of the common
144 144 ancestors, then the file was newly added in one of the merged branches
145 145 and then got "actively" removed.
146 146
147 147 c) If a file exists in only one parent and at least one of the common
148 148 ancestors using the same filenode, then the file was unchanged on one
149 149 side and deleted on the other side. The merge "passively" propagated
150 150 that deletion, but didn't "actively" remove the file. In this case the
151 151 file is *not* included in the `removed` set.
152 152
153 153 d) If a file exists in only one parent and at least one of the common
154 154 ancestors using a different filenode, then the file was changed on one
155 155 side and removed on the other side. The merge process "actively"
156 156 decided to drop the new change and delete the file. Unlike in the
157 157 previous case, (c), the file included in the `removed` set.
158 158
159 159 Summary table for merge:
160 160
161 161 case | exists in parents | exists in gca || removed
162 162 (a) | both | * || yes
163 163 (b) | one | none || yes
164 164 (c) | one | same filenode || no
165 165 (d) | one | new filenode || yes
166 166 """
167 167 return frozenset(self._removed)
168 168
169 169 def mark_removed(self, filename):
170 170 if 'removed' in vars(self):
171 171 del self.removed
172 172 self._removed.add(filename)
173 173 self.mark_touched(filename)
174 174
175 175 def update_removed(self, filenames):
176 176 for f in filenames:
177 177 self.mark_removed(f)
178 178
179 179 @util.propertycache
180 180 def salvaged(self):
181 181 """files that might have been deleted by a merge, but still exists.
182 182
183 183 During a merge, the manifest merging might select some files for
184 184 removal, or for a removed/changed conflict. If at commit time the file
185 185 still exists, its removal was "reverted" and the file is "salvaged"
186 186 """
187 187 return frozenset(self._salvaged)
188 188
189 189 def mark_salvaged(self, filename):
190 190 if "salvaged" in vars(self):
191 191 del self.salvaged
192 192 self._salvaged.add(filename)
193 193 self.mark_touched(filename)
194 194
195 195 def update_salvaged(self, filenames):
196 196 for f in filenames:
197 197 self.mark_salvaged(f)
198 198
199 199 @util.propertycache
200 200 def touched(self):
201 201 """files either actively modified, added or removed"""
202 202 return frozenset(self._touched)
203 203
204 204 def mark_touched(self, filename):
205 205 if 'touched' in vars(self):
206 206 del self.touched
207 207 self._touched.add(filename)
208 208
209 209 def update_touched(self, filenames):
210 210 for f in filenames:
211 211 self.mark_touched(f)
212 212
213 213 @util.propertycache
214 214 def copied_from_p1(self):
215 215 return self._p1_copies.copy()
216 216
217 217 def mark_copied_from_p1(self, source, dest):
218 218 if 'copied_from_p1' in vars(self):
219 219 del self.copied_from_p1
220 220 self._p1_copies[dest] = source
221 221
222 222 def update_copies_from_p1(self, copies):
223 223 for dest, source in copies.items():
224 224 self.mark_copied_from_p1(source, dest)
225 225
226 226 @util.propertycache
227 227 def copied_from_p2(self):
228 228 return self._p2_copies.copy()
229 229
230 230 def mark_copied_from_p2(self, source, dest):
231 231 if 'copied_from_p2' in vars(self):
232 232 del self.copied_from_p2
233 233 self._p2_copies[dest] = source
234 234
235 235 def update_copies_from_p2(self, copies):
236 236 for dest, source in copies.items():
237 237 self.mark_copied_from_p2(source, dest)
238 238
239 239
240 240 def compute_all_files_changes(ctx):
241 241 """compute the files changed by a revision"""
242 242 p1 = ctx.p1()
243 243 p2 = ctx.p2()
244 244 if p1.rev() == nullrev and p2.rev() == nullrev:
245 245 return _process_root(ctx)
246 246 elif p1.rev() != nullrev and p2.rev() == nullrev:
247 247 return _process_linear(p1, ctx)
248 248 elif p1.rev() == nullrev and p2.rev() != nullrev:
249 249 # In the wild, one can encounter changeset where p1 is null but p2 is not
250 250 return _process_linear(p1, ctx, parent=2)
251 251 elif p1.rev() == p2.rev():
252 252 # In the wild, one can encounter such "non-merge"
253 253 return _process_linear(p1, ctx)
254 254 else:
255 255 return _process_merge(p1, p2, ctx)
256 256
257 257
258 258 def _process_root(ctx):
259 259 """compute the appropriate changed files for a changeset with no parents"""
260 260 # Simple, there was nothing before it, so everything is added.
261 261 md = ChangingFiles()
262 262 manifest = ctx.manifest()
263 263 for filename in manifest:
264 264 md.mark_added(filename)
265 265 return md
266 266
267 267
268 268 def _process_linear(parent_ctx, children_ctx, parent=1):
269 269 """compute the appropriate changed files for a changeset with a single parent"""
270 270 md = ChangingFiles()
271 271 parent_manifest = parent_ctx.manifest()
272 272 children_manifest = children_ctx.manifest()
273 273
274 274 copies_candidate = []
275 275
276 276 for filename, d in parent_manifest.diff(children_manifest).items():
277 277 if d[1][0] is None:
278 278 # no filenode for the "new" value, file is absent
279 279 md.mark_removed(filename)
280 280 else:
281 281 copies_candidate.append(filename)
282 282 if d[0][0] is None:
283 283 # not filenode for the "old" value file was absent
284 284 md.mark_added(filename)
285 285 else:
286 286 # filenode for both "old" and "new"
287 287 md.mark_touched(filename)
288 288
289 289 if parent == 1:
290 290 copied = md.mark_copied_from_p1
291 291 elif parent == 2:
292 292 copied = md.mark_copied_from_p2
293 293 else:
294 294 assert False, "bad parent value %d" % parent
295 295
296 296 for filename in copies_candidate:
297 297 copy_info = children_ctx[filename].renamed()
298 298 if copy_info:
299 299 source, srcnode = copy_info
300 300 copied(source, filename)
301 301
302 302 return md
303 303
304 304
305 305 def _process_merge(p1_ctx, p2_ctx, ctx):
306 306 """compute the appropriate changed files for a changeset with two parents
307 307
308 308 This is a more advance case. The information we need to record is summarise
309 309 in the following table:
310 310
311 311 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
312 312 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
313 313 │ p2 ╲ p1 │ │ │ │ │
314 314 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
315 315 │ │ │🄱 No Changes │🄳 No Changes │ │
316 316 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
317 317 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
318 318 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
319 319 │ │🄶 No Changes │ │ │ │
320 320 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
321 321 │ │🄷 Deleted[1] │ │ │ │
322 322 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
323 323 │ │🄸 No Changes │ │ │ 🄽 Touched │
324 324 │ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │
325 325 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
326 326 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
327 327 │ │ │ │ 🄾 Touched │ 🄿 Merged │
328 328 │ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │
329 329 │ │ [3] │ │ (copied?) │ (copied?) │
330 330 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
331 331
332 332 Special case [1]:
333 333
334 334 The situation is:
335 335 - parent-A: file exists,
336 336 - parent-B: no file,
337 337 - working-copy: no file.
338 338
339 339 Detecting a "deletion" will depend on the presence of actual change on
340 340 the "parent-A" branch:
341 341
342 342 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
343 343 compared to the merge ancestors, then parent-A branch left the file
344 344 untouched while parent-B deleted it. We simply apply the change from
345 345 "parent-B" branch the file was automatically dropped.
346 346 The result is:
347 347 - file is not recorded as touched by the merge.
348 348
349 349 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
350 350 the file was "deleted again". From a user perspective, the message
351 351 about "locally changed" while "remotely deleted" (or the other way
352 352 around) was issued and the user chose to deleted the file.
353 353 The result:
354 354 - file is recorded as touched by the merge.
355 355
356 356
357 357 Special case [2]:
358 358
359 359 The situation is:
360 360 - parent-A: no file,
361 361 - parent-B: file,
362 362 - working-copy: file (same content as parent-B).
363 363
364 364 There are three subcases depending on the ancestors contents:
365 365
366 366 - A) the file is missing in all ancestors,
367 367 - B) at least one ancestor has the file with filenode ≠ from parent-B,
368 368 - C) all ancestors use the same filenode as parent-B,
369 369
370 370 Subcase (A) is the simpler, nothing happend on parent-A side while
371 371 parent-B added it.
372 372
373 373 The result:
374 374 - the file is not marked as touched by the merge.
375 375
376 376 Subcase (B) is the counter part of "Special case [1]", the file was
377 377 modified on parent-B side, while parent-A side deleted it. However this
378 378 time, the conflict was solved by keeping the file (and its
379 379 modification). We consider the file as "salvaged".
380 380
381 381 The result:
382 382 - the file is marked as "salvaged" by the merge.
383 383
384 384 Subcase (C) is subtle variation of the case above. In this case, the
385 385 file in unchanged on the parent-B side and actively removed on the
386 386 parent-A side. So the merge machinery correctly decide it should be
387 387 removed. However, the file was explicitly restored to its parent-B
388 388 content before the merge was commited. The file is be marked
389 389 as salvaged too. From the merge result perspective, this is similar to
390 390 Subcase (B), however from the merge resolution perspective they differ
391 391 since in (C), there was some conflict not obvious solution to the
392 392 merge (That got reversed)
393 393
394 394 Special case [3]:
395 395
396 396 The situation is:
397 397 - parent-A: file,
398 398 - parent-B: file (different filenode as parent-A),
399 399 - working-copy: file (same filenode as parent-B).
400 400
401 401 This case is in theory much simple, for this to happens, this mean the
402 402 filenode in parent-A is purely replacing the one in parent-B (either a
403 403 descendant, or a full new file history, see changeset). So the merge
404 404 introduce no changes, and the file is not affected by the merge...
405 405
406 406 However, in the wild it is possible to find commit with the above is not
407 407 True. For example repository have some commit where the *new* node is an
408 408 ancestor of the node in parent-A, or where parent-A and parent-B are two
409 409 branches of the same file history, yet not merge-filenode were created
410 410 (while the "merge" should have led to a "modification").
411 411
412 412 Detecting such cases (and not recording the file as modified) would be a
413 413 nice bonus. However do not any of this yet.
414 414 """
415 415
416 416 repo = ctx.repo()
417 417 md = ChangingFiles()
418 418
419 419 m = ctx.manifest()
420 420 p1m = p1_ctx.manifest()
421 421 p2m = p2_ctx.manifest()
422 422 diff_p1 = p1m.diff(m)
423 423 diff_p2 = p2m.diff(m)
424 424
425 425 cahs = ctx.repo().changelog.commonancestorsheads(
426 426 p1_ctx.node(), p2_ctx.node()
427 427 )
428 428 if not cahs:
429 429 cahs = [nullrev]
430 430 mas = [ctx.repo()[r].manifest() for r in cahs]
431 431
432 432 copy_candidates = []
433 433
434 434 # Dealing with case 🄰 happens automatically. Since there are no entry in
435 435 # d1 nor d2, we won't iterate on it ever.
436 436
437 437 # Iteration over d1 content will deal with all cases, but the one in the
438 438 # first column of the table.
439 439 for filename, d1 in diff_p1.items():
440 440
441 441 d2 = diff_p2.pop(filename, None)
442 442
443 443 if d2 is None:
444 444 # this deal with the first line of the table.
445 445 _process_other_unchanged(md, mas, filename, d1)
446 446 else:
447 447
448 448 if d1[0][0] is None and d2[0][0] is None:
449 449 # case 🄼 — both deleted the file.
450 450 md.mark_added(filename)
451 451 copy_candidates.append(filename)
452 452 elif d1[1][0] is None and d2[1][0] is None:
453 453 # case 🄻 — both deleted the file.
454 454 md.mark_removed(filename)
455 455 elif d1[1][0] is not None and d2[1][0] is not None:
456 456 if d1[0][0] is None or d2[0][0] is None:
457 457 if any(_find(ma, filename) is not None for ma in mas):
458 458 # case 🅀 or 🅁
459 459 md.mark_salvaged(filename)
460 460 else:
461 461 # case 🄽 🄾 : touched
462 462 md.mark_touched(filename)
463 463 else:
464 464 fctx = repo.filectx(filename, fileid=d1[1][0])
465 465 if fctx.p2().rev() == nullrev:
466 466 # case 🅂
467 467 # lets assume we can trust the file history. If the
468 468 # filenode is not a merge, the file was not merged.
469 469 md.mark_touched(filename)
470 470 else:
471 471 # case 🄿
472 472 md.mark_merged(filename)
473 473 copy_candidates.append(filename)
474 474 else:
475 475 # Impossible case, the post-merge file status cannot be None on
476 476 # one side and Something on the other side.
477 477 assert False, "unreachable"
478 478
479 479 # Iteration over remaining d2 content deal with the first column of the
480 480 # table.
481 481 for filename, d2 in diff_p2.items():
482 482 _process_other_unchanged(md, mas, filename, d2)
483 483
484 484 for filename in copy_candidates:
485 485 copy_info = ctx[filename].renamed()
486 486 if copy_info:
487 487 source, srcnode = copy_info
488 488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
489 489 md.mark_copied_from_p1(source, filename)
490 490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
491 491 md.mark_copied_from_p2(source, filename)
492 492 return md
493 493
494 494
495 495 def _find(manifest, filename):
496 496 """return the associate filenode or None"""
497 497 if filename not in manifest:
498 498 return None
499 499 return manifest.find(filename)[0]
500 500
501 501
502 502 def _process_other_unchanged(md, mas, filename, diff):
503 503 source_node = diff[0][0]
504 504 target_node = diff[1][0]
505 505
506 506 if source_node is not None and target_node is None:
507 507 if any(not _find(ma, filename) == source_node for ma in mas):
508 508 # case 🄲 of 🄷
509 509 md.mark_removed(filename)
510 510 # else, we have case 🄱 or 🄶 : no change need to be recorded
511 511 elif source_node is None and target_node is not None:
512 512 if any(_find(ma, filename) is not None for ma in mas):
513 513 # case 🄴 or 🄹
514 514 md.mark_salvaged(filename)
515 515 # else, we have case 🄳 or 🄸 : simple merge without intervention
516 516 elif source_node is not None and target_node is not None:
517 517 # case 🄵 or 🄺 : simple merge without intervention
518 518 #
519 519 # In buggy case where source_node is not an ancestors of target_node.
520 520 # There should have a been a new filenode created, recording this as
521 521 # "modified". We do not deal with them yet.
522 522 pass
523 523 else:
524 524 # An impossible case, the diff algorithm should not return entry if the
525 525 # file is missing on both side.
526 526 assert False, "unreachable"
527 527
528 528
529 529 def _missing_from_all_ancestors(mas, filename):
530 530 return all(_find(ma, filename) is None for ma in mas)
531 531
532 532
533 533 def computechangesetfilesadded(ctx):
534 534 """return the list of files added in a changeset"""
535 535 added = []
536 536 for f in ctx.files():
537 537 if not any(f in p for p in ctx.parents()):
538 538 added.append(f)
539 539 return added
540 540
541 541
542 542 def get_removal_filter(ctx, x=None):
543 543 """return a function to detect files "wrongly" detected as `removed`
544 544
545 545 When a file is removed relative to p1 in a merge, this
546 546 function determines whether the absence is due to a
547 547 deletion from a parent, or whether the merge commit
548 548 itself deletes the file. We decide this by doing a
549 549 simplified three way merge of the manifest entry for
550 550 the file. There are two ways we decide the merge
551 551 itself didn't delete a file:
552 552 - neither parent (nor the merge) contain the file
553 553 - exactly one parent contains the file, and that
554 554 parent has the same filelog entry as the merge
555 555 ancestor (or all of them if there two). In other
556 556 words, that parent left the file unchanged while the
557 557 other one deleted it.
558 558 One way to think about this is that deleting a file is
559 559 similar to emptying it, so the list of changed files
560 560 should be similar either way. The computation
561 561 described above is not done directly in _filecommit
562 562 when creating the list of changed files, however
563 563 it does something very similar by comparing filelog
564 564 nodes.
565 565 """
566 566
567 567 if x is not None:
568 568 p1, p2, m1, m2 = x
569 569 else:
570 570 p1 = ctx.p1()
571 571 p2 = ctx.p2()
572 572 m1 = p1.manifest()
573 573 m2 = p2.manifest()
574 574
575 575 @util.cachefunc
576 576 def mas():
577 577 p1n = p1.node()
578 578 p2n = p2.node()
579 579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
580 580 if not cahs:
581 581 cahs = [nullrev]
582 582 return [ctx.repo()[r].manifest() for r in cahs]
583 583
584 584 def deletionfromparent(f):
585 585 if f in m1:
586 586 return f not in m2 and all(
587 587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
588 588 )
589 589 elif f in m2:
590 590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
591 591 else:
592 592 return True
593 593
594 594 return deletionfromparent
595 595
596 596
597 597 def computechangesetfilesremoved(ctx):
598 598 """return the list of files removed in a changeset"""
599 599 removed = []
600 600 for f in ctx.files():
601 601 if f not in ctx:
602 602 removed.append(f)
603 603 if removed:
604 604 rf = get_removal_filter(ctx)
605 605 removed = [r for r in removed if not rf(r)]
606 606 return removed
607 607
608 608
609 609 def computechangesetfilesmerged(ctx):
610 610 """return the list of files merged in a changeset"""
611 611 merged = []
612 612 if len(ctx.parents()) < 2:
613 613 return merged
614 614 for f in ctx.files():
615 615 if f in ctx:
616 616 fctx = ctx[f]
617 617 parents = fctx._filelog.parents(fctx._filenode)
618 618 if parents[1] != ctx.repo().nullid:
619 619 merged.append(f)
620 620 return merged
621 621
622 622
623 623 def computechangesetcopies(ctx):
624 624 """return the copies data for a changeset
625 625
626 626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
627 627
628 628 Each dictionnary are in the form: `{newname: oldname}`
629 629 """
630 630 p1copies = {}
631 631 p2copies = {}
632 632 p1 = ctx.p1()
633 633 p2 = ctx.p2()
634 634 narrowmatch = ctx._repo.narrowmatch()
635 635 for dst in ctx.files():
636 636 if not narrowmatch(dst) or dst not in ctx:
637 637 continue
638 638 copied = ctx[dst].renamed()
639 639 if not copied:
640 640 continue
641 641 src, srcnode = copied
642 642 if src in p1 and p1[src].filenode() == srcnode:
643 643 p1copies[dst] = src
644 644 elif src in p2 and p2[src].filenode() == srcnode:
645 645 p2copies[dst] = src
646 646 return p1copies, p2copies
647 647
648 648
649 649 def encodecopies(files, copies):
650 650 items = []
651 651 for i, dst in enumerate(files):
652 652 if dst in copies:
653 653 items.append(b'%d\0%s' % (i, copies[dst]))
654 654 if len(items) != len(copies):
655 655 raise error.ProgrammingError(
656 656 b'some copy targets missing from file list'
657 657 )
658 658 return b"\n".join(items)
659 659
660 660
661 661 def decodecopies(files, data):
662 662 try:
663 663 copies = {}
664 664 if not data:
665 665 return copies
666 666 for l in data.split(b'\n'):
667 667 strindex, src = l.split(b'\0')
668 668 i = int(strindex)
669 669 dst = files[i]
670 670 copies[dst] = src
671 671 return copies
672 672 except (ValueError, IndexError):
673 673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
674 674 # used different syntax for the value.
675 675 return None
676 676
677 677
678 678 def encodefileindices(files, subset):
679 679 subset = set(subset)
680 680 indices = []
681 681 for i, f in enumerate(files):
682 682 if f in subset:
683 683 indices.append(b'%d' % i)
684 684 return b'\n'.join(indices)
685 685
686 686
687 687 def decodefileindices(files, data):
688 688 try:
689 689 subset = []
690 690 if not data:
691 691 return subset
692 692 for strindex in data.split(b'\n'):
693 693 i = int(strindex)
694 694 if i < 0 or i >= len(files):
695 695 return None
696 696 subset.append(files[i])
697 697 return subset
698 698 except (ValueError, IndexError):
699 699 # Perhaps someone had chosen the same key name (e.g. "added") and
700 700 # used different syntax for the value.
701 701 return None
702 702
703 703
704 704 # see mercurial/helptext/internals/revlogs.txt for details about the format
705 705
706 706 ACTION_MASK = int("111" "00", 2)
707 707 # note: untouched file used as copy source will as `000` for this mask.
708 708 ADDED_FLAG = int("001" "00", 2)
709 709 MERGED_FLAG = int("010" "00", 2)
710 710 REMOVED_FLAG = int("011" "00", 2)
711 711 SALVAGED_FLAG = int("100" "00", 2)
712 712 TOUCHED_FLAG = int("101" "00", 2)
713 713
714 714 COPIED_MASK = int("11", 2)
715 715 COPIED_FROM_P1_FLAG = int("10", 2)
716 716 COPIED_FROM_P2_FLAG = int("11", 2)
717 717
718 718 # structure is <flag><filename-end><copy-source>
719 719 INDEX_HEADER = struct.Struct(">L")
720 720 INDEX_ENTRY = struct.Struct(">bLL")
721 721
722 722
723 723 def encode_files_sidedata(files):
724 724 all_files = set(files.touched)
725 725 all_files.update(files.copied_from_p1.values())
726 726 all_files.update(files.copied_from_p2.values())
727 727 all_files = sorted(all_files)
728 728 file_idx = {f: i for (i, f) in enumerate(all_files)}
729 729 file_idx[None] = 0
730 730
731 731 chunks = [INDEX_HEADER.pack(len(all_files))]
732 732
733 733 filename_length = 0
734 734 for f in all_files:
735 735 filename_size = len(f)
736 736 filename_length += filename_size
737 737 flag = 0
738 738 if f in files.added:
739 739 flag |= ADDED_FLAG
740 740 elif f in files.merged:
741 741 flag |= MERGED_FLAG
742 742 elif f in files.removed:
743 743 flag |= REMOVED_FLAG
744 744 elif f in files.salvaged:
745 745 flag |= SALVAGED_FLAG
746 746 elif f in files.touched:
747 747 flag |= TOUCHED_FLAG
748 748
749 749 copy = None
750 750 if f in files.copied_from_p1:
751 751 flag |= COPIED_FROM_P1_FLAG
752 752 copy = files.copied_from_p1.get(f)
753 753 elif f in files.copied_from_p2:
754 754 copy = files.copied_from_p2.get(f)
755 755 flag |= COPIED_FROM_P2_FLAG
756 756 copy_idx = file_idx[copy]
757 757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
758 758 chunks.extend(all_files)
759 759 return {sidedatamod.SD_FILES: b''.join(chunks)}
760 760
761 761
762 762 def decode_files_sidedata(sidedata):
763 763 md = ChangingFiles()
764 764 raw = sidedata.get(sidedatamod.SD_FILES)
765 765
766 766 if raw is None:
767 767 return md
768 768
769 769 copies = []
770 770 all_files = []
771 771
772 772 assert len(raw) >= INDEX_HEADER.size
773 773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
774 774
775 775 offset = INDEX_HEADER.size
776 776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
777 777 file_offset_last = file_offset_base
778 778
779 779 assert len(raw) >= file_offset_base
780 780
781 781 for idx in range(total_files):
782 782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
783 783 file_end += file_offset_base
784 784 filename = raw[file_offset_last:file_end]
785 785 filesize = file_end - file_offset_last
786 786 assert len(filename) == filesize
787 787 offset += INDEX_ENTRY.size
788 788 file_offset_last = file_end
789 789 all_files.append(filename)
790 790 if flag & ACTION_MASK == ADDED_FLAG:
791 791 md.mark_added(filename)
792 792 elif flag & ACTION_MASK == MERGED_FLAG:
793 793 md.mark_merged(filename)
794 794 elif flag & ACTION_MASK == REMOVED_FLAG:
795 795 md.mark_removed(filename)
796 796 elif flag & ACTION_MASK == SALVAGED_FLAG:
797 797 md.mark_salvaged(filename)
798 798 elif flag & ACTION_MASK == TOUCHED_FLAG:
799 799 md.mark_touched(filename)
800 800
801 801 copied = None
802 802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
803 803 copied = md.mark_copied_from_p1
804 804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
805 805 copied = md.mark_copied_from_p2
806 806
807 807 if copied is not None:
808 808 copies.append((copied, filename, copy_idx))
809 809
810 810 for copied, filename, copy_idx in copies:
811 811 copied(all_files[copy_idx], filename)
812 812
813 813 return md
814 814
815 815
816 816 def _getsidedata(srcrepo, rev):
817 817 ctx = srcrepo[rev]
818 818 files = compute_all_files_changes(ctx)
819 819 return encode_files_sidedata(files), files.has_copies_info
820 820
821 821
822 822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
823 return _getsidedata(repo, rev)[0]
823 sidedata, has_copies_info = _getsidedata(repo, rev)
824 flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
825 return sidedata, (flags_to_add, 0)
824 826
825 827
826 828 def set_sidedata_spec_for_repo(repo):
827 829 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
828 830 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
829 831 repo.register_sidedata_computer(
830 832 revlogconst.KIND_CHANGELOG,
831 833 sidedatamod.SD_FILES,
832 834 (sidedatamod.SD_FILES,),
833 835 copies_sidedata_computer,
836 sidedataflag.REVIDX_HASCOPIESINFO,
834 837 )
835 838
836 839
837 840 def getsidedataadder(srcrepo, destrepo):
838 841 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
839 842 if pycompat.iswindows or not use_w:
840 843 return _get_simple_sidedata_adder(srcrepo, destrepo)
841 844 else:
842 845 return _get_worker_sidedata_adder(srcrepo, destrepo)
843 846
844 847
845 848 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
846 849 """The function used by worker precomputing sidedata
847 850
848 851 It read an input queue containing revision numbers
849 852 It write in an output queue containing (rev, <sidedata-map>)
850 853
851 854 The `None` input value is used as a stop signal.
852 855
853 856 The `tokens` semaphore is user to avoid having too many unprocessed
854 857 entries. The workers needs to acquire one token before fetching a task.
855 858 They will be released by the consumer of the produced data.
856 859 """
857 860 tokens.acquire()
858 861 rev = revs_queue.get()
859 862 while rev is not None:
860 863 data = _getsidedata(srcrepo, rev)
861 864 sidedata_queue.put((rev, data))
862 865 tokens.acquire()
863 866 rev = revs_queue.get()
864 867 # processing of `None` is completed, release the token.
865 868 tokens.release()
866 869
867 870
868 871 BUFF_PER_WORKER = 50
869 872
870 873
871 874 def _get_worker_sidedata_adder(srcrepo, destrepo):
872 875 """The parallel version of the sidedata computation
873 876
874 877 This code spawn a pool of worker that precompute a buffer of sidedata
875 878 before we actually need them"""
876 879 # avoid circular import copies -> scmutil -> worker -> copies
877 880 from . import worker
878 881
879 882 nbworkers = worker._numworkers(srcrepo.ui)
880 883
881 884 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
882 885 revsq = multiprocessing.Queue()
883 886 sidedataq = multiprocessing.Queue()
884 887
885 888 assert srcrepo.filtername is None
886 889 # queue all tasks beforehand, revision numbers are small and it make
887 890 # synchronisation simpler
888 891 #
889 892 # Since the computation for each node can be quite expensive, the overhead
890 893 # of using a single queue is not revelant. In practice, most computation
891 894 # are fast but some are very expensive and dominate all the other smaller
892 895 # cost.
893 896 for r in srcrepo.changelog.revs():
894 897 revsq.put(r)
895 898 # queue the "no more tasks" markers
896 899 for i in range(nbworkers):
897 900 revsq.put(None)
898 901
899 902 allworkers = []
900 903 for i in range(nbworkers):
901 904 args = (srcrepo, revsq, sidedataq, tokens)
902 905 w = multiprocessing.Process(target=_sidedata_worker, args=args)
903 906 allworkers.append(w)
904 907 w.start()
905 908
906 909 # dictionnary to store results for revision higher than we one we are
907 910 # looking for. For example, if we need the sidedatamap for 42, and 43 is
908 911 # received, when shelve 43 for later use.
909 912 staging = {}
910 913
911 914 def sidedata_companion(revlog, rev):
912 915 data = {}, False
913 916 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
914 917 # Is the data previously shelved ?
915 918 data = staging.pop(rev, None)
916 919 if data is None:
917 920 # look at the queued result until we find the one we are lookig
918 921 # for (shelve the other ones)
919 922 r, data = sidedataq.get()
920 923 while r != rev:
921 924 staging[r] = data
922 925 r, data = sidedataq.get()
923 926 tokens.release()
924 927 sidedata, has_copies_info = data
925 928 new_flag = 0
926 929 if has_copies_info:
927 930 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
928 931 return False, (), sidedata, new_flag, 0
929 932
930 933 return sidedata_companion
931 934
932 935
933 936 def _get_simple_sidedata_adder(srcrepo, destrepo):
934 937 """The simple version of the sidedata computation
935 938
936 939 It just compute it in the same thread on request"""
937 940
938 941 def sidedatacompanion(revlog, rev):
939 942 sidedata, has_copies_info = {}, False
940 943 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
941 944 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
942 945 new_flag = 0
943 946 if has_copies_info:
944 947 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
945 948
946 949 return False, (), sidedata, new_flag, 0
947 950
948 951 return sidedatacompanion
949 952
950 953
951 954 def getsidedataremover(srcrepo, destrepo):
952 955 def sidedatacompanion(revlog, rev):
953 956 f = ()
954 957 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
955 958 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
956 959 f = (
957 960 sidedatamod.SD_P1COPIES,
958 961 sidedatamod.SD_P2COPIES,
959 962 sidedatamod.SD_FILESADDED,
960 963 sidedatamod.SD_FILESREMOVED,
961 964 )
962 965 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
963 966
964 967 return sidedatacompanion
@@ -1,381 +1,384 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 from ..revlogutils import nodemap as nodemaputil
23 23 from ..revlogutils import constants as revlog_constants
24 24
25 25 stringio = pycompat.bytesio
26 26
27 27
28 28 _pack = struct.pack
29 29 _unpack = struct.unpack
30 30 _compress = zlib.compress
31 31 _decompress = zlib.decompress
32 32
33 33 # Some code below makes tuples directly because it's more convenient. However,
34 34 # code outside this module should always use dirstatetuple.
35 35 def dirstatetuple(*x):
36 36 # x is a tuple
37 37 return x
38 38
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43
44 44 def offset_type(offset, type):
45 45 return int(int(offset) << 16 | type)
46 46
47 47
48 48 class BaseIndexObject(object):
49 49 # Format of an index entry according to Python's `struct` language
50 50 index_format = revlog_constants.INDEX_ENTRY_V1
51 51 # Size of a C unsigned long long int, platform independent
52 52 big_int_size = struct.calcsize(b'>Q')
53 53 # Size of a C long int, platform independent
54 54 int_size = struct.calcsize(b'>i')
55 55 # An empty index entry, used as a default value to be overridden, or nullrev
56 56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
57 57
58 58 @util.propertycache
59 59 def entry_size(self):
60 60 return self.index_format.size
61 61
62 62 @property
63 63 def nodemap(self):
64 64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 66 return self._nodemap
67 67
68 68 @util.propertycache
69 69 def _nodemap(self):
70 70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 71 for r in range(0, len(self)):
72 72 n = self[r][7]
73 73 nodemap[n] = r
74 74 return nodemap
75 75
76 76 def has_node(self, node):
77 77 """return True if the node exist in the index"""
78 78 return node in self._nodemap
79 79
80 80 def rev(self, node):
81 81 """return a revision for a node
82 82
83 83 If the node is unknown, raise a RevlogError"""
84 84 return self._nodemap[node]
85 85
86 86 def get_rev(self, node):
87 87 """return a revision for a node
88 88
89 89 If the node is unknown, return None"""
90 90 return self._nodemap.get(node)
91 91
92 92 def _stripnodes(self, start):
93 93 if '_nodemap' in vars(self):
94 94 for r in range(start, len(self)):
95 95 n = self[r][7]
96 96 del self._nodemap[n]
97 97
98 98 def clearcaches(self):
99 99 self.__dict__.pop('_nodemap', None)
100 100
101 101 def __len__(self):
102 102 return self._lgt + len(self._extra)
103 103
104 104 def append(self, tup):
105 105 if '_nodemap' in vars(self):
106 106 self._nodemap[tup[7]] = len(self)
107 107 data = self.index_format.pack(*tup)
108 108 self._extra.append(data)
109 109
110 110 def _check_index(self, i):
111 111 if not isinstance(i, int):
112 112 raise TypeError(b"expecting int indexes")
113 113 if i < 0 or i >= len(self):
114 114 raise IndexError
115 115
116 116 def __getitem__(self, i):
117 117 if i == -1:
118 118 return self.null_item
119 119 self._check_index(i)
120 120 if i >= self._lgt:
121 121 data = self._extra[i - self._lgt]
122 122 else:
123 123 index = self._calculate_index(i)
124 124 data = self._data[index : index + self.entry_size]
125 125 r = self.index_format.unpack(data)
126 126 if self._lgt and i == 0:
127 127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 128 return r
129 129
130 130 def pack_header(self, header):
131 131 """pack header information as binary"""
132 132 v_fmt = revlog_constants.INDEX_HEADER
133 133 return v_fmt.pack(header)
134 134
135 135 def entry_binary(self, rev):
136 136 """return the raw binary string representing a revision"""
137 137 entry = self[rev]
138 138 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
139 139 if rev == 0:
140 140 p = p[revlog_constants.INDEX_HEADER.size :]
141 141 return p
142 142
143 143
144 144 class IndexObject(BaseIndexObject):
145 145 def __init__(self, data):
146 146 assert len(data) % self.entry_size == 0, (
147 147 len(data),
148 148 self.entry_size,
149 149 len(data) % self.entry_size,
150 150 )
151 151 self._data = data
152 152 self._lgt = len(data) // self.entry_size
153 153 self._extra = []
154 154
155 155 def _calculate_index(self, i):
156 156 return i * self.entry_size
157 157
158 158 def __delitem__(self, i):
159 159 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
160 160 raise ValueError(b"deleting slices only supports a:-1 with step 1")
161 161 i = i.start
162 162 self._check_index(i)
163 163 self._stripnodes(i)
164 164 if i < self._lgt:
165 165 self._data = self._data[: i * self.entry_size]
166 166 self._lgt = i
167 167 self._extra = []
168 168 else:
169 169 self._extra = self._extra[: i - self._lgt]
170 170
171 171
172 172 class PersistentNodeMapIndexObject(IndexObject):
173 173 """a Debug oriented class to test persistent nodemap
174 174
175 175 We need a simple python object to test API and higher level behavior. See
176 176 the Rust implementation for more serious usage. This should be used only
177 177 through the dedicated `devel.persistent-nodemap` config.
178 178 """
179 179
180 180 def nodemap_data_all(self):
181 181 """Return bytes containing a full serialization of a nodemap
182 182
183 183 The nodemap should be valid for the full set of revisions in the
184 184 index."""
185 185 return nodemaputil.persistent_data(self)
186 186
187 187 def nodemap_data_incremental(self):
188 188 """Return bytes containing a incremental update to persistent nodemap
189 189
190 190 This containst the data for an append-only update of the data provided
191 191 in the last call to `update_nodemap_data`.
192 192 """
193 193 if self._nm_root is None:
194 194 return None
195 195 docket = self._nm_docket
196 196 changed, data = nodemaputil.update_persistent_data(
197 197 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
198 198 )
199 199
200 200 self._nm_root = self._nm_max_idx = self._nm_docket = None
201 201 return docket, changed, data
202 202
203 203 def update_nodemap_data(self, docket, nm_data):
204 204 """provide full block of persisted binary data for a nodemap
205 205
206 206 The data are expected to come from disk. See `nodemap_data_all` for a
207 207 produceur of such data."""
208 208 if nm_data is not None:
209 209 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
210 210 if self._nm_root:
211 211 self._nm_docket = docket
212 212 else:
213 213 self._nm_root = self._nm_max_idx = self._nm_docket = None
214 214
215 215
216 216 class InlinedIndexObject(BaseIndexObject):
217 217 def __init__(self, data, inline=0):
218 218 self._data = data
219 219 self._lgt = self._inline_scan(None)
220 220 self._inline_scan(self._lgt)
221 221 self._extra = []
222 222
223 223 def _inline_scan(self, lgt):
224 224 off = 0
225 225 if lgt is not None:
226 226 self._offsets = [0] * lgt
227 227 count = 0
228 228 while off <= len(self._data) - self.entry_size:
229 229 start = off + self.big_int_size
230 230 (s,) = struct.unpack(
231 231 b'>i',
232 232 self._data[start : start + self.int_size],
233 233 )
234 234 if lgt is not None:
235 235 self._offsets[count] = off
236 236 count += 1
237 237 off += self.entry_size + s
238 238 if off != len(self._data):
239 239 raise ValueError(b"corrupted data")
240 240 return count
241 241
242 242 def __delitem__(self, i):
243 243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 245 i = i.start
246 246 self._check_index(i)
247 247 self._stripnodes(i)
248 248 if i < self._lgt:
249 249 self._offsets = self._offsets[:i]
250 250 self._lgt = i
251 251 self._extra = []
252 252 else:
253 253 self._extra = self._extra[: i - self._lgt]
254 254
255 255 def _calculate_index(self, i):
256 256 return self._offsets[i]
257 257
258 258
259 259 def parse_index2(data, inline, revlogv2=False):
260 260 if not inline:
261 261 cls = IndexObject2 if revlogv2 else IndexObject
262 262 return cls(data), None
263 263 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
264 264 return cls(data, inline), (0, data)
265 265
266 266
267 267 class Index2Mixin(object):
268 268 index_format = revlog_constants.INDEX_ENTRY_V2
269 269 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
270 270
271 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
271 def replace_sidedata_info(
272 self, i, sidedata_offset, sidedata_length, offset_flags
273 ):
272 274 """
273 275 Replace an existing index entry's sidedata offset and length with new
274 276 ones.
275 277 This cannot be used outside of the context of sidedata rewriting,
276 278 inside the transaction that creates the revision `i`.
277 279 """
278 280 if i < 0:
279 281 raise KeyError
280 282 self._check_index(i)
281 283 sidedata_format = b">Qi"
282 284 packed_size = struct.calcsize(sidedata_format)
283 285 if i >= self._lgt:
284 286 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
285 287 old = self._extra[i - self._lgt]
286 new = old[:64] + packed + old[64 + packed_size :]
288 offset_flags = struct.pack(b">Q", offset_flags)
289 new = offset_flags + old[8:64] + packed + old[64 + packed_size :]
287 290 self._extra[i - self._lgt] = new
288 291 else:
289 292 msg = b"cannot rewrite entries outside of this transaction"
290 293 raise KeyError(msg)
291 294
292 295 def entry_binary(self, rev):
293 296 """return the raw binary string representing a revision"""
294 297 entry = self[rev]
295 298 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
296 299 if rev == 0:
297 300 p = p[revlog_constants.INDEX_HEADER.size :]
298 301 return p
299 302
300 303
301 304 class IndexObject2(Index2Mixin, IndexObject):
302 305 pass
303 306
304 307
305 308 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
306 309 def _inline_scan(self, lgt):
307 310 sidedata_length_pos = 72
308 311 off = 0
309 312 if lgt is not None:
310 313 self._offsets = [0] * lgt
311 314 count = 0
312 315 while off <= len(self._data) - self.entry_size:
313 316 start = off + self.big_int_size
314 317 (data_size,) = struct.unpack(
315 318 b'>i',
316 319 self._data[start : start + self.int_size],
317 320 )
318 321 start = off + sidedata_length_pos
319 322 (side_data_size,) = struct.unpack(
320 323 b'>i', self._data[start : start + self.int_size]
321 324 )
322 325 if lgt is not None:
323 326 self._offsets[count] = off
324 327 count += 1
325 328 off += self.entry_size + data_size + side_data_size
326 329 if off != len(self._data):
327 330 raise ValueError(b"corrupted data")
328 331 return count
329 332
330 333
331 334 def parse_index_devel_nodemap(data, inline):
332 335 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
333 336 return PersistentNodeMapIndexObject(data), None
334 337
335 338
336 339 def parse_dirstate(dmap, copymap, st):
337 340 parents = [st[:20], st[20:40]]
338 341 # dereference fields so they will be local in loop
339 342 format = b">cllll"
340 343 e_size = struct.calcsize(format)
341 344 pos1 = 40
342 345 l = len(st)
343 346
344 347 # the inner loop
345 348 while pos1 < l:
346 349 pos2 = pos1 + e_size
347 350 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
348 351 pos1 = pos2 + e[4]
349 352 f = st[pos2:pos1]
350 353 if b'\0' in f:
351 354 f, c = f.split(b'\0')
352 355 copymap[f] = c
353 356 dmap[f] = e[:4]
354 357 return parents
355 358
356 359
357 360 def pack_dirstate(dmap, copymap, pl, now):
358 361 now = int(now)
359 362 cs = stringio()
360 363 write = cs.write
361 364 write(b"".join(pl))
362 365 for f, e in pycompat.iteritems(dmap):
363 366 if e[0] == b'n' and e[3] == now:
364 367 # The file was last modified "simultaneously" with the current
365 368 # write to dirstate (i.e. within the same second for file-
366 369 # systems with a granularity of 1 sec). This commonly happens
367 370 # for at least a couple of files on 'update'.
368 371 # The user could change the file without changing its size
369 372 # within the same second. Invalidate the file's mtime in
370 373 # dirstate, forcing future 'status' calls to compare the
371 374 # contents of the file if the size is the same. This prevents
372 375 # mistakenly treating such files as clean.
373 376 e = dirstatetuple(e[0], e[1], e[2], -1)
374 377 dmap[f] = e
375 378
376 379 if f in copymap:
377 380 f = b"%s\0%s" % (f, copymap[f])
378 381 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
379 382 write(e)
380 383 write(f)
381 384 return cs.getvalue()
@@ -1,3141 +1,3145 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_HASCOPIESINFO
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def offset_type(offset, type):
139 139 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 140 raise ValueError(b'unknown revlog index flags')
141 141 return int(int(offset) << 16 | type)
142 142
143 143
144 144 def _verify_revision(rl, skipflags, state, node):
145 145 """Verify the integrity of the given revlog ``node`` while providing a hook
146 146 point for extensions to influence the operation."""
147 147 if skipflags:
148 148 state[b'skipread'].add(node)
149 149 else:
150 150 # Side-effect: read content and verify hash.
151 151 rl.revision(node)
152 152
153 153
154 154 # True if a fast implementation for persistent-nodemap is available
155 155 #
156 156 # We also consider we have a "fast" implementation in "pure" python because
157 157 # people using pure don't really have performance consideration (and a
158 158 # wheelbarrow of other slowness source)
159 159 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 160 parsers, 'BaseIndexObject'
161 161 )
162 162
163 163
164 164 @attr.s(slots=True, frozen=True)
165 165 class _revisioninfo(object):
166 166 """Information about a revision that allows building its fulltext
167 167 node: expected hash of the revision
168 168 p1, p2: parent revs of the revision
169 169 btext: built text cache consisting of a one-element list
170 170 cachedelta: (baserev, uncompressed_delta) or None
171 171 flags: flags associated to the revision storage
172 172
173 173 One of btext[0] or cachedelta must be set.
174 174 """
175 175
176 176 node = attr.ib()
177 177 p1 = attr.ib()
178 178 p2 = attr.ib()
179 179 btext = attr.ib()
180 180 textlen = attr.ib()
181 181 cachedelta = attr.ib()
182 182 flags = attr.ib()
183 183
184 184
185 185 @interfaceutil.implementer(repository.irevisiondelta)
186 186 @attr.s(slots=True)
187 187 class revlogrevisiondelta(object):
188 188 node = attr.ib()
189 189 p1node = attr.ib()
190 190 p2node = attr.ib()
191 191 basenode = attr.ib()
192 192 flags = attr.ib()
193 193 baserevisionsize = attr.ib()
194 194 revision = attr.ib()
195 195 delta = attr.ib()
196 196 sidedata = attr.ib()
197 197 protocol_flags = attr.ib()
198 198 linknode = attr.ib(default=None)
199 199
200 200
201 201 @interfaceutil.implementer(repository.iverifyproblem)
202 202 @attr.s(frozen=True)
203 203 class revlogproblem(object):
204 204 warning = attr.ib(default=None)
205 205 error = attr.ib(default=None)
206 206 node = attr.ib(default=None)
207 207
208 208
209 209 def parse_index_v1(data, inline):
210 210 # call the C implementation to parse the index data
211 211 index, cache = parsers.parse_index2(data, inline)
212 212 return index, cache
213 213
214 214
215 215 def parse_index_v2(data, inline):
216 216 # call the C implementation to parse the index data
217 217 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 218 return index, cache
219 219
220 220
221 221 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 222
223 223 def parse_index_v1_nodemap(data, inline):
224 224 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 225 return index, cache
226 226
227 227
228 228 else:
229 229 parse_index_v1_nodemap = None
230 230
231 231
232 232 def parse_index_v1_mixed(data, inline):
233 233 index, cache = parse_index_v1(data, inline)
234 234 return rustrevlog.MixedIndex(index), cache
235 235
236 236
237 237 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 238 # signed integer)
239 239 _maxentrysize = 0x7FFFFFFF
240 240
241 241
242 242 class revlog(object):
243 243 """
244 244 the underlying revision storage object
245 245
246 246 A revlog consists of two parts, an index and the revision data.
247 247
248 248 The index is a file with a fixed record size containing
249 249 information on each revision, including its nodeid (hash), the
250 250 nodeids of its parents, the position and offset of its data within
251 251 the data file, and the revision it's based on. Finally, each entry
252 252 contains a linkrev entry that can serve as a pointer to external
253 253 data.
254 254
255 255 The revision data itself is a linear collection of data chunks.
256 256 Each chunk represents a revision and is usually represented as a
257 257 delta against the previous chunk. To bound lookup time, runs of
258 258 deltas are limited to about 2 times the length of the original
259 259 version data. This makes retrieval of a version proportional to
260 260 its size, or O(1) relative to the number of revisions.
261 261
262 262 Both pieces of the revlog are written to in an append-only
263 263 fashion, which means we never need to rewrite a file to insert or
264 264 remove data, and can use some simple techniques to avoid the need
265 265 for locking while reading.
266 266
267 267 If checkambig, indexfile is opened with checkambig=True at
268 268 writing, to avoid file stat ambiguity.
269 269
270 270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 271 index will be mmapped rather than read if it is larger than the
272 272 configured threshold.
273 273
274 274 If censorable is True, the revlog can have censored revisions.
275 275
276 276 If `upperboundcomp` is not None, this is the expected maximal gain from
277 277 compression for the data content.
278 278
279 279 `concurrencychecker` is an optional function that receives 3 arguments: a
280 280 file handle, a filename, and an expected position. It should check whether
281 281 the current position in the file handle is valid, and log/warn/fail (by
282 282 raising).
283 283 """
284 284
285 285 _flagserrorclass = error.RevlogError
286 286
287 287 def __init__(
288 288 self,
289 289 opener,
290 290 target,
291 291 indexfile=None,
292 292 datafile=None,
293 293 checkambig=False,
294 294 mmaplargeindex=False,
295 295 censorable=False,
296 296 upperboundcomp=None,
297 297 persistentnodemap=False,
298 298 concurrencychecker=None,
299 299 ):
300 300 """
301 301 create a revlog object
302 302
303 303 opener is a function that abstracts the file opening operation
304 304 and can be used to implement COW semantics or the like.
305 305
306 306 `target`: a (KIND, ID) tuple that identify the content stored in
307 307 this revlog. It help the rest of the code to understand what the revlog
308 308 is about without having to resort to heuristic and index filename
309 309 analysis. Note: that this must be reliably be set by normal code, but
310 310 that test, debug, or performance measurement code might not set this to
311 311 accurate value.
312 312 """
313 313 self.upperboundcomp = upperboundcomp
314 314 self.indexfile = indexfile
315 315 self.datafile = datafile or (indexfile[:-2] + b".d")
316 316 self.nodemap_file = None
317 317 if persistentnodemap:
318 318 self.nodemap_file = nodemaputil.get_nodemap_file(
319 319 opener, self.indexfile
320 320 )
321 321
322 322 self.opener = opener
323 323 assert target[0] in ALL_KINDS
324 324 assert len(target) == 2
325 325 self.target = target
326 326 # When True, indexfile is opened with checkambig=True at writing, to
327 327 # avoid file stat ambiguity.
328 328 self._checkambig = checkambig
329 329 self._mmaplargeindex = mmaplargeindex
330 330 self._censorable = censorable
331 331 # 3-tuple of (node, rev, text) for a raw revision.
332 332 self._revisioncache = None
333 333 # Maps rev to chain base rev.
334 334 self._chainbasecache = util.lrucachedict(100)
335 335 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 336 self._chunkcache = (0, b'')
337 337 # How much data to read and cache into the raw revlog data cache.
338 338 self._chunkcachesize = 65536
339 339 self._maxchainlen = None
340 340 self._deltabothparents = True
341 341 self.index = None
342 342 self._nodemap_docket = None
343 343 # Mapping of partial identifiers to full nodes.
344 344 self._pcache = {}
345 345 # Mapping of revision integer to full node.
346 346 self._compengine = b'zlib'
347 347 self._compengineopts = {}
348 348 self._maxdeltachainspan = -1
349 349 self._withsparseread = False
350 350 self._sparserevlog = False
351 351 self._srdensitythreshold = 0.50
352 352 self._srmingapsize = 262144
353 353
354 354 # Make copy of flag processors so each revlog instance can support
355 355 # custom flags.
356 356 self._flagprocessors = dict(flagutil.flagprocessors)
357 357
358 358 # 2-tuple of file handles being used for active writing.
359 359 self._writinghandles = None
360 360
361 361 self._loadindex()
362 362
363 363 self._concurrencychecker = concurrencychecker
364 364
365 365 def _loadindex(self):
366 366 mmapindexthreshold = None
367 367 opts = self.opener.options
368 368
369 369 if b'revlogv2' in opts:
370 370 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 371 elif b'revlogv1' in opts:
372 372 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 373 if b'generaldelta' in opts:
374 374 newversionflags |= FLAG_GENERALDELTA
375 375 elif b'revlogv0' in self.opener.options:
376 376 newversionflags = REVLOGV0
377 377 else:
378 378 newversionflags = REVLOG_DEFAULT_VERSION
379 379
380 380 if b'chunkcachesize' in opts:
381 381 self._chunkcachesize = opts[b'chunkcachesize']
382 382 if b'maxchainlen' in opts:
383 383 self._maxchainlen = opts[b'maxchainlen']
384 384 if b'deltabothparents' in opts:
385 385 self._deltabothparents = opts[b'deltabothparents']
386 386 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 387 self._lazydeltabase = False
388 388 if self._lazydelta:
389 389 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 390 if b'compengine' in opts:
391 391 self._compengine = opts[b'compengine']
392 392 if b'zlib.level' in opts:
393 393 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 394 if b'zstd.level' in opts:
395 395 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 396 if b'maxdeltachainspan' in opts:
397 397 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 398 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 399 mmapindexthreshold = opts[b'mmapindexthreshold']
400 400 self.hassidedata = bool(opts.get(b'side-data', False))
401 401 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 402 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 403 # sparse-revlog forces sparse-read
404 404 self._withsparseread = self._sparserevlog or withsparseread
405 405 if b'sparse-read-density-threshold' in opts:
406 406 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 407 if b'sparse-read-min-gap-size' in opts:
408 408 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 409 if opts.get(b'enableellipsis'):
410 410 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411 411
412 412 # revlog v0 doesn't have flag processors
413 413 for flag, processor in pycompat.iteritems(
414 414 opts.get(b'flagprocessors', {})
415 415 ):
416 416 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417 417
418 418 if self._chunkcachesize <= 0:
419 419 raise error.RevlogError(
420 420 _(b'revlog chunk cache size %r is not greater than 0')
421 421 % self._chunkcachesize
422 422 )
423 423 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 424 raise error.RevlogError(
425 425 _(b'revlog chunk cache size %r is not a power of 2')
426 426 % self._chunkcachesize
427 427 )
428 428
429 429 indexdata = b''
430 430 self._initempty = True
431 431 try:
432 432 with self._indexfp() as f:
433 433 if (
434 434 mmapindexthreshold is not None
435 435 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 436 ):
437 437 # TODO: should .close() to release resources without
438 438 # relying on Python GC
439 439 indexdata = util.buffer(util.mmapread(f))
440 440 else:
441 441 indexdata = f.read()
442 442 if len(indexdata) > 0:
443 443 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 444 self._initempty = False
445 445 else:
446 446 versionflags = newversionflags
447 447 except IOError as inst:
448 448 if inst.errno != errno.ENOENT:
449 449 raise
450 450
451 451 versionflags = newversionflags
452 452
453 453 self.version = versionflags
454 454
455 455 flags = versionflags & ~0xFFFF
456 456 fmt = versionflags & 0xFFFF
457 457
458 458 if fmt == REVLOGV0:
459 459 if flags:
460 460 raise error.RevlogError(
461 461 _(b'unknown flags (%#04x) in version %d revlog %s')
462 462 % (flags >> 16, fmt, self.indexfile)
463 463 )
464 464
465 465 self._inline = False
466 466 self._generaldelta = False
467 467
468 468 elif fmt == REVLOGV1:
469 469 if flags & ~REVLOGV1_FLAGS:
470 470 raise error.RevlogError(
471 471 _(b'unknown flags (%#04x) in version %d revlog %s')
472 472 % (flags >> 16, fmt, self.indexfile)
473 473 )
474 474
475 475 self._inline = versionflags & FLAG_INLINE_DATA
476 476 self._generaldelta = versionflags & FLAG_GENERALDELTA
477 477
478 478 elif fmt == REVLOGV2:
479 479 if flags & ~REVLOGV2_FLAGS:
480 480 raise error.RevlogError(
481 481 _(b'unknown flags (%#04x) in version %d revlog %s')
482 482 % (flags >> 16, fmt, self.indexfile)
483 483 )
484 484
485 485 # There is a bug in the transaction handling when going from an
486 486 # inline revlog to a separate index and data file. Turn it off until
487 487 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 488 # See issue6485
489 489 self._inline = False
490 490 # generaldelta implied by version 2 revlogs.
491 491 self._generaldelta = True
492 492
493 493 else:
494 494 raise error.RevlogError(
495 495 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 496 )
497 497
498 498 self.nodeconstants = sha1nodeconstants
499 499 self.nullid = self.nodeconstants.nullid
500 500
501 501 # sparse-revlog can't be on without general-delta (issue6056)
502 502 if not self._generaldelta:
503 503 self._sparserevlog = False
504 504
505 505 self._storedeltachains = True
506 506
507 507 devel_nodemap = (
508 508 self.nodemap_file
509 509 and opts.get(b'devel-force-nodemap', False)
510 510 and parse_index_v1_nodemap is not None
511 511 )
512 512
513 513 use_rust_index = False
514 514 if rustrevlog is not None:
515 515 if self.nodemap_file is not None:
516 516 use_rust_index = True
517 517 else:
518 518 use_rust_index = self.opener.options.get(b'rust.index')
519 519
520 520 self._parse_index = parse_index_v1
521 521 if self.version == REVLOGV0:
522 522 self._parse_index = revlogv0.parse_index_v0
523 523 elif fmt == REVLOGV2:
524 524 self._parse_index = parse_index_v2
525 525 elif devel_nodemap:
526 526 self._parse_index = parse_index_v1_nodemap
527 527 elif use_rust_index:
528 528 self._parse_index = parse_index_v1_mixed
529 529 try:
530 530 d = self._parse_index(indexdata, self._inline)
531 531 index, _chunkcache = d
532 532 use_nodemap = (
533 533 not self._inline
534 534 and self.nodemap_file is not None
535 535 and util.safehasattr(index, 'update_nodemap_data')
536 536 )
537 537 if use_nodemap:
538 538 nodemap_data = nodemaputil.persisted_data(self)
539 539 if nodemap_data is not None:
540 540 docket = nodemap_data[0]
541 541 if (
542 542 len(d[0]) > docket.tip_rev
543 543 and d[0][docket.tip_rev][7] == docket.tip_node
544 544 ):
545 545 # no changelog tampering
546 546 self._nodemap_docket = docket
547 547 index.update_nodemap_data(*nodemap_data)
548 548 except (ValueError, IndexError):
549 549 raise error.RevlogError(
550 550 _(b"index %s is corrupted") % self.indexfile
551 551 )
552 552 self.index, self._chunkcache = d
553 553 if not self._chunkcache:
554 554 self._chunkclear()
555 555 # revnum -> (chain-length, sum-delta-length)
556 556 self._chaininfocache = util.lrucachedict(500)
557 557 # revlog header -> revlog compressor
558 558 self._decompressors = {}
559 559
560 560 @util.propertycache
561 561 def revlog_kind(self):
562 562 return self.target[0]
563 563
564 564 @util.propertycache
565 565 def _compressor(self):
566 566 engine = util.compengines[self._compengine]
567 567 return engine.revlogcompressor(self._compengineopts)
568 568
569 569 def _indexfp(self, mode=b'r'):
570 570 """file object for the revlog's index file"""
571 571 args = {'mode': mode}
572 572 if mode != b'r':
573 573 args['checkambig'] = self._checkambig
574 574 if mode == b'w':
575 575 args['atomictemp'] = True
576 576 return self.opener(self.indexfile, **args)
577 577
578 578 def _datafp(self, mode=b'r'):
579 579 """file object for the revlog's data file"""
580 580 return self.opener(self.datafile, mode=mode)
581 581
582 582 @contextlib.contextmanager
583 583 def _datareadfp(self, existingfp=None):
584 584 """file object suitable to read data"""
585 585 # Use explicit file handle, if given.
586 586 if existingfp is not None:
587 587 yield existingfp
588 588
589 589 # Use a file handle being actively used for writes, if available.
590 590 # There is some danger to doing this because reads will seek the
591 591 # file. However, _writeentry() performs a SEEK_END before all writes,
592 592 # so we should be safe.
593 593 elif self._writinghandles:
594 594 if self._inline:
595 595 yield self._writinghandles[0]
596 596 else:
597 597 yield self._writinghandles[1]
598 598
599 599 # Otherwise open a new file handle.
600 600 else:
601 601 if self._inline:
602 602 func = self._indexfp
603 603 else:
604 604 func = self._datafp
605 605 with func() as fp:
606 606 yield fp
607 607
608 608 def tiprev(self):
609 609 return len(self.index) - 1
610 610
611 611 def tip(self):
612 612 return self.node(self.tiprev())
613 613
614 614 def __contains__(self, rev):
615 615 return 0 <= rev < len(self)
616 616
617 617 def __len__(self):
618 618 return len(self.index)
619 619
620 620 def __iter__(self):
621 621 return iter(pycompat.xrange(len(self)))
622 622
623 623 def revs(self, start=0, stop=None):
624 624 """iterate over all rev in this revlog (from start to stop)"""
625 625 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 626
627 627 @property
628 628 def nodemap(self):
629 629 msg = (
630 630 b"revlog.nodemap is deprecated, "
631 631 b"use revlog.index.[has_node|rev|get_rev]"
632 632 )
633 633 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 634 return self.index.nodemap
635 635
636 636 @property
637 637 def _nodecache(self):
638 638 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 639 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 640 return self.index.nodemap
641 641
642 642 def hasnode(self, node):
643 643 try:
644 644 self.rev(node)
645 645 return True
646 646 except KeyError:
647 647 return False
648 648
649 649 def candelta(self, baserev, rev):
650 650 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 651 # Disable delta if either rev requires a content-changing flag
652 652 # processor (ex. LFS). This is because such flag processor can alter
653 653 # the rawtext content that the delta will be based on, and two clients
654 654 # could have a same revlog node with different flags (i.e. different
655 655 # rawtext contents) and the delta could be incompatible.
656 656 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 657 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 658 ):
659 659 return False
660 660 return True
661 661
662 662 def update_caches(self, transaction):
663 663 if self.nodemap_file is not None:
664 664 if transaction is None:
665 665 nodemaputil.update_persistent_nodemap(self)
666 666 else:
667 667 nodemaputil.setup_persistent_nodemap(transaction, self)
668 668
669 669 def clearcaches(self):
670 670 self._revisioncache = None
671 671 self._chainbasecache.clear()
672 672 self._chunkcache = (0, b'')
673 673 self._pcache = {}
674 674 self._nodemap_docket = None
675 675 self.index.clearcaches()
676 676 # The python code is the one responsible for validating the docket, we
677 677 # end up having to refresh it here.
678 678 use_nodemap = (
679 679 not self._inline
680 680 and self.nodemap_file is not None
681 681 and util.safehasattr(self.index, 'update_nodemap_data')
682 682 )
683 683 if use_nodemap:
684 684 nodemap_data = nodemaputil.persisted_data(self)
685 685 if nodemap_data is not None:
686 686 self._nodemap_docket = nodemap_data[0]
687 687 self.index.update_nodemap_data(*nodemap_data)
688 688
689 689 def rev(self, node):
690 690 try:
691 691 return self.index.rev(node)
692 692 except TypeError:
693 693 raise
694 694 except error.RevlogError:
695 695 # parsers.c radix tree lookup failed
696 696 if (
697 697 node == self.nodeconstants.wdirid
698 698 or node in self.nodeconstants.wdirfilenodeids
699 699 ):
700 700 raise error.WdirUnsupported
701 701 raise error.LookupError(node, self.indexfile, _(b'no node'))
702 702
703 703 # Accessors for index entries.
704 704
705 705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 706 # are flags.
707 707 def start(self, rev):
708 708 return int(self.index[rev][0] >> 16)
709 709
710 710 def flags(self, rev):
711 711 return self.index[rev][0] & 0xFFFF
712 712
713 713 def length(self, rev):
714 714 return self.index[rev][1]
715 715
716 716 def sidedata_length(self, rev):
717 717 if self.version & 0xFFFF != REVLOGV2:
718 718 return 0
719 719 return self.index[rev][9]
720 720
721 721 def rawsize(self, rev):
722 722 """return the length of the uncompressed text for a given revision"""
723 723 l = self.index[rev][2]
724 724 if l >= 0:
725 725 return l
726 726
727 727 t = self.rawdata(rev)
728 728 return len(t)
729 729
730 730 def size(self, rev):
731 731 """length of non-raw text (processed by a "read" flag processor)"""
732 732 # fast path: if no "read" flag processor could change the content,
733 733 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 734 flags = self.flags(rev)
735 735 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 736 return self.rawsize(rev)
737 737
738 738 return len(self.revision(rev, raw=False))
739 739
740 740 def chainbase(self, rev):
741 741 base = self._chainbasecache.get(rev)
742 742 if base is not None:
743 743 return base
744 744
745 745 index = self.index
746 746 iterrev = rev
747 747 base = index[iterrev][3]
748 748 while base != iterrev:
749 749 iterrev = base
750 750 base = index[iterrev][3]
751 751
752 752 self._chainbasecache[rev] = base
753 753 return base
754 754
755 755 def linkrev(self, rev):
756 756 return self.index[rev][4]
757 757
758 758 def parentrevs(self, rev):
759 759 try:
760 760 entry = self.index[rev]
761 761 except IndexError:
762 762 if rev == wdirrev:
763 763 raise error.WdirUnsupported
764 764 raise
765 765 if entry[5] == nullrev:
766 766 return entry[6], entry[5]
767 767 else:
768 768 return entry[5], entry[6]
769 769
770 770 # fast parentrevs(rev) where rev isn't filtered
771 771 _uncheckedparentrevs = parentrevs
772 772
773 773 def node(self, rev):
774 774 try:
775 775 return self.index[rev][7]
776 776 except IndexError:
777 777 if rev == wdirrev:
778 778 raise error.WdirUnsupported
779 779 raise
780 780
781 781 # Derived from index values.
782 782
783 783 def end(self, rev):
784 784 return self.start(rev) + self.length(rev)
785 785
786 786 def parents(self, node):
787 787 i = self.index
788 788 d = i[self.rev(node)]
789 789 # inline node() to avoid function call overhead
790 790 if d[5] == self.nullid:
791 791 return i[d[6]][7], i[d[5]][7]
792 792 else:
793 793 return i[d[5]][7], i[d[6]][7]
794 794
795 795 def chainlen(self, rev):
796 796 return self._chaininfo(rev)[0]
797 797
798 798 def _chaininfo(self, rev):
799 799 chaininfocache = self._chaininfocache
800 800 if rev in chaininfocache:
801 801 return chaininfocache[rev]
802 802 index = self.index
803 803 generaldelta = self._generaldelta
804 804 iterrev = rev
805 805 e = index[iterrev]
806 806 clen = 0
807 807 compresseddeltalen = 0
808 808 while iterrev != e[3]:
809 809 clen += 1
810 810 compresseddeltalen += e[1]
811 811 if generaldelta:
812 812 iterrev = e[3]
813 813 else:
814 814 iterrev -= 1
815 815 if iterrev in chaininfocache:
816 816 t = chaininfocache[iterrev]
817 817 clen += t[0]
818 818 compresseddeltalen += t[1]
819 819 break
820 820 e = index[iterrev]
821 821 else:
822 822 # Add text length of base since decompressing that also takes
823 823 # work. For cache hits the length is already included.
824 824 compresseddeltalen += e[1]
825 825 r = (clen, compresseddeltalen)
826 826 chaininfocache[rev] = r
827 827 return r
828 828
829 829 def _deltachain(self, rev, stoprev=None):
830 830 """Obtain the delta chain for a revision.
831 831
832 832 ``stoprev`` specifies a revision to stop at. If not specified, we
833 833 stop at the base of the chain.
834 834
835 835 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 836 revs in ascending order and ``stopped`` is a bool indicating whether
837 837 ``stoprev`` was hit.
838 838 """
839 839 # Try C implementation.
840 840 try:
841 841 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 842 except AttributeError:
843 843 pass
844 844
845 845 chain = []
846 846
847 847 # Alias to prevent attribute lookup in tight loop.
848 848 index = self.index
849 849 generaldelta = self._generaldelta
850 850
851 851 iterrev = rev
852 852 e = index[iterrev]
853 853 while iterrev != e[3] and iterrev != stoprev:
854 854 chain.append(iterrev)
855 855 if generaldelta:
856 856 iterrev = e[3]
857 857 else:
858 858 iterrev -= 1
859 859 e = index[iterrev]
860 860
861 861 if iterrev == stoprev:
862 862 stopped = True
863 863 else:
864 864 chain.append(iterrev)
865 865 stopped = False
866 866
867 867 chain.reverse()
868 868 return chain, stopped
869 869
870 870 def ancestors(self, revs, stoprev=0, inclusive=False):
871 871 """Generate the ancestors of 'revs' in reverse revision order.
872 872 Does not generate revs lower than stoprev.
873 873
874 874 See the documentation for ancestor.lazyancestors for more details."""
875 875
876 876 # first, make sure start revisions aren't filtered
877 877 revs = list(revs)
878 878 checkrev = self.node
879 879 for r in revs:
880 880 checkrev(r)
881 881 # and we're sure ancestors aren't filtered as well
882 882
883 883 if rustancestor is not None:
884 884 lazyancestors = rustancestor.LazyAncestors
885 885 arg = self.index
886 886 else:
887 887 lazyancestors = ancestor.lazyancestors
888 888 arg = self._uncheckedparentrevs
889 889 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890 890
891 891 def descendants(self, revs):
892 892 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893 893
894 894 def findcommonmissing(self, common=None, heads=None):
895 895 """Return a tuple of the ancestors of common and the ancestors of heads
896 896 that are not ancestors of common. In revset terminology, we return the
897 897 tuple:
898 898
899 899 ::common, (::heads) - (::common)
900 900
901 901 The list is sorted by revision number, meaning it is
902 902 topologically sorted.
903 903
904 904 'heads' and 'common' are both lists of node IDs. If heads is
905 905 not supplied, uses all of the revlog's heads. If common is not
906 906 supplied, uses nullid."""
907 907 if common is None:
908 908 common = [self.nullid]
909 909 if heads is None:
910 910 heads = self.heads()
911 911
912 912 common = [self.rev(n) for n in common]
913 913 heads = [self.rev(n) for n in heads]
914 914
915 915 # we want the ancestors, but inclusive
916 916 class lazyset(object):
917 917 def __init__(self, lazyvalues):
918 918 self.addedvalues = set()
919 919 self.lazyvalues = lazyvalues
920 920
921 921 def __contains__(self, value):
922 922 return value in self.addedvalues or value in self.lazyvalues
923 923
924 924 def __iter__(self):
925 925 added = self.addedvalues
926 926 for r in added:
927 927 yield r
928 928 for r in self.lazyvalues:
929 929 if not r in added:
930 930 yield r
931 931
932 932 def add(self, value):
933 933 self.addedvalues.add(value)
934 934
935 935 def update(self, values):
936 936 self.addedvalues.update(values)
937 937
938 938 has = lazyset(self.ancestors(common))
939 939 has.add(nullrev)
940 940 has.update(common)
941 941
942 942 # take all ancestors from heads that aren't in has
943 943 missing = set()
944 944 visit = collections.deque(r for r in heads if r not in has)
945 945 while visit:
946 946 r = visit.popleft()
947 947 if r in missing:
948 948 continue
949 949 else:
950 950 missing.add(r)
951 951 for p in self.parentrevs(r):
952 952 if p not in has:
953 953 visit.append(p)
954 954 missing = list(missing)
955 955 missing.sort()
956 956 return has, [self.node(miss) for miss in missing]
957 957
958 958 def incrementalmissingrevs(self, common=None):
959 959 """Return an object that can be used to incrementally compute the
960 960 revision numbers of the ancestors of arbitrary sets that are not
961 961 ancestors of common. This is an ancestor.incrementalmissingancestors
962 962 object.
963 963
964 964 'common' is a list of revision numbers. If common is not supplied, uses
965 965 nullrev.
966 966 """
967 967 if common is None:
968 968 common = [nullrev]
969 969
970 970 if rustancestor is not None:
971 971 return rustancestor.MissingAncestors(self.index, common)
972 972 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973 973
974 974 def findmissingrevs(self, common=None, heads=None):
975 975 """Return the revision numbers of the ancestors of heads that
976 976 are not ancestors of common.
977 977
978 978 More specifically, return a list of revision numbers corresponding to
979 979 nodes N such that every N satisfies the following constraints:
980 980
981 981 1. N is an ancestor of some node in 'heads'
982 982 2. N is not an ancestor of any node in 'common'
983 983
984 984 The list is sorted by revision number, meaning it is
985 985 topologically sorted.
986 986
987 987 'heads' and 'common' are both lists of revision numbers. If heads is
988 988 not supplied, uses all of the revlog's heads. If common is not
989 989 supplied, uses nullid."""
990 990 if common is None:
991 991 common = [nullrev]
992 992 if heads is None:
993 993 heads = self.headrevs()
994 994
995 995 inc = self.incrementalmissingrevs(common=common)
996 996 return inc.missingancestors(heads)
997 997
998 998 def findmissing(self, common=None, heads=None):
999 999 """Return the ancestors of heads that are not ancestors of common.
1000 1000
1001 1001 More specifically, return a list of nodes N such that every N
1002 1002 satisfies the following constraints:
1003 1003
1004 1004 1. N is an ancestor of some node in 'heads'
1005 1005 2. N is not an ancestor of any node in 'common'
1006 1006
1007 1007 The list is sorted by revision number, meaning it is
1008 1008 topologically sorted.
1009 1009
1010 1010 'heads' and 'common' are both lists of node IDs. If heads is
1011 1011 not supplied, uses all of the revlog's heads. If common is not
1012 1012 supplied, uses nullid."""
1013 1013 if common is None:
1014 1014 common = [self.nullid]
1015 1015 if heads is None:
1016 1016 heads = self.heads()
1017 1017
1018 1018 common = [self.rev(n) for n in common]
1019 1019 heads = [self.rev(n) for n in heads]
1020 1020
1021 1021 inc = self.incrementalmissingrevs(common=common)
1022 1022 return [self.node(r) for r in inc.missingancestors(heads)]
1023 1023
1024 1024 def nodesbetween(self, roots=None, heads=None):
1025 1025 """Return a topological path from 'roots' to 'heads'.
1026 1026
1027 1027 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 1028 topologically sorted list of all nodes N that satisfy both of
1029 1029 these constraints:
1030 1030
1031 1031 1. N is a descendant of some node in 'roots'
1032 1032 2. N is an ancestor of some node in 'heads'
1033 1033
1034 1034 Every node is considered to be both a descendant and an ancestor
1035 1035 of itself, so every reachable node in 'roots' and 'heads' will be
1036 1036 included in 'nodes'.
1037 1037
1038 1038 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 1039 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 1040 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041 1041
1042 1042 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 1043 unspecified, uses nullid as the only root. If 'heads' is
1044 1044 unspecified, uses list of all of the revlog's heads."""
1045 1045 nonodes = ([], [], [])
1046 1046 if roots is not None:
1047 1047 roots = list(roots)
1048 1048 if not roots:
1049 1049 return nonodes
1050 1050 lowestrev = min([self.rev(n) for n in roots])
1051 1051 else:
1052 1052 roots = [self.nullid] # Everybody's a descendant of nullid
1053 1053 lowestrev = nullrev
1054 1054 if (lowestrev == nullrev) and (heads is None):
1055 1055 # We want _all_ the nodes!
1056 1056 return (
1057 1057 [self.node(r) for r in self],
1058 1058 [self.nullid],
1059 1059 list(self.heads()),
1060 1060 )
1061 1061 if heads is None:
1062 1062 # All nodes are ancestors, so the latest ancestor is the last
1063 1063 # node.
1064 1064 highestrev = len(self) - 1
1065 1065 # Set ancestors to None to signal that every node is an ancestor.
1066 1066 ancestors = None
1067 1067 # Set heads to an empty dictionary for later discovery of heads
1068 1068 heads = {}
1069 1069 else:
1070 1070 heads = list(heads)
1071 1071 if not heads:
1072 1072 return nonodes
1073 1073 ancestors = set()
1074 1074 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 1075 # Also, later we will be using it to filter out the heads we can't
1076 1076 # find from roots.
1077 1077 heads = dict.fromkeys(heads, False)
1078 1078 # Start at the top and keep marking parents until we're done.
1079 1079 nodestotag = set(heads)
1080 1080 # Remember where the top was so we can use it as a limit later.
1081 1081 highestrev = max([self.rev(n) for n in nodestotag])
1082 1082 while nodestotag:
1083 1083 # grab a node to tag
1084 1084 n = nodestotag.pop()
1085 1085 # Never tag nullid
1086 1086 if n == self.nullid:
1087 1087 continue
1088 1088 # A node's revision number represents its place in a
1089 1089 # topologically sorted list of nodes.
1090 1090 r = self.rev(n)
1091 1091 if r >= lowestrev:
1092 1092 if n not in ancestors:
1093 1093 # If we are possibly a descendant of one of the roots
1094 1094 # and we haven't already been marked as an ancestor
1095 1095 ancestors.add(n) # Mark as ancestor
1096 1096 # Add non-nullid parents to list of nodes to tag.
1097 1097 nodestotag.update(
1098 1098 [p for p in self.parents(n) if p != self.nullid]
1099 1099 )
1100 1100 elif n in heads: # We've seen it before, is it a fake head?
1101 1101 # So it is, real heads should not be the ancestors of
1102 1102 # any other heads.
1103 1103 heads.pop(n)
1104 1104 if not ancestors:
1105 1105 return nonodes
1106 1106 # Now that we have our set of ancestors, we want to remove any
1107 1107 # roots that are not ancestors.
1108 1108
1109 1109 # If one of the roots was nullid, everything is included anyway.
1110 1110 if lowestrev > nullrev:
1111 1111 # But, since we weren't, let's recompute the lowest rev to not
1112 1112 # include roots that aren't ancestors.
1113 1113
1114 1114 # Filter out roots that aren't ancestors of heads
1115 1115 roots = [root for root in roots if root in ancestors]
1116 1116 # Recompute the lowest revision
1117 1117 if roots:
1118 1118 lowestrev = min([self.rev(root) for root in roots])
1119 1119 else:
1120 1120 # No more roots? Return empty list
1121 1121 return nonodes
1122 1122 else:
1123 1123 # We are descending from nullid, and don't need to care about
1124 1124 # any other roots.
1125 1125 lowestrev = nullrev
1126 1126 roots = [self.nullid]
1127 1127 # Transform our roots list into a set.
1128 1128 descendants = set(roots)
1129 1129 # Also, keep the original roots so we can filter out roots that aren't
1130 1130 # 'real' roots (i.e. are descended from other roots).
1131 1131 roots = descendants.copy()
1132 1132 # Our topologically sorted list of output nodes.
1133 1133 orderedout = []
1134 1134 # Don't start at nullid since we don't want nullid in our output list,
1135 1135 # and if nullid shows up in descendants, empty parents will look like
1136 1136 # they're descendants.
1137 1137 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 1138 n = self.node(r)
1139 1139 isdescendant = False
1140 1140 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 1141 isdescendant = True
1142 1142 elif n in descendants:
1143 1143 # n is already a descendant
1144 1144 isdescendant = True
1145 1145 # This check only needs to be done here because all the roots
1146 1146 # will start being marked is descendants before the loop.
1147 1147 if n in roots:
1148 1148 # If n was a root, check if it's a 'real' root.
1149 1149 p = tuple(self.parents(n))
1150 1150 # If any of its parents are descendants, it's not a root.
1151 1151 if (p[0] in descendants) or (p[1] in descendants):
1152 1152 roots.remove(n)
1153 1153 else:
1154 1154 p = tuple(self.parents(n))
1155 1155 # A node is a descendant if either of its parents are
1156 1156 # descendants. (We seeded the dependents list with the roots
1157 1157 # up there, remember?)
1158 1158 if (p[0] in descendants) or (p[1] in descendants):
1159 1159 descendants.add(n)
1160 1160 isdescendant = True
1161 1161 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 1162 # Only include nodes that are both descendants and ancestors.
1163 1163 orderedout.append(n)
1164 1164 if (ancestors is not None) and (n in heads):
1165 1165 # We're trying to figure out which heads are reachable
1166 1166 # from roots.
1167 1167 # Mark this head as having been reached
1168 1168 heads[n] = True
1169 1169 elif ancestors is None:
1170 1170 # Otherwise, we're trying to discover the heads.
1171 1171 # Assume this is a head because if it isn't, the next step
1172 1172 # will eventually remove it.
1173 1173 heads[n] = True
1174 1174 # But, obviously its parents aren't.
1175 1175 for p in self.parents(n):
1176 1176 heads.pop(p, None)
1177 1177 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 1178 roots = list(roots)
1179 1179 assert orderedout
1180 1180 assert roots
1181 1181 assert heads
1182 1182 return (orderedout, roots, heads)
1183 1183
1184 1184 def headrevs(self, revs=None):
1185 1185 if revs is None:
1186 1186 try:
1187 1187 return self.index.headrevs()
1188 1188 except AttributeError:
1189 1189 return self._headrevs()
1190 1190 if rustdagop is not None:
1191 1191 return rustdagop.headrevs(self.index, revs)
1192 1192 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193 1193
1194 1194 def computephases(self, roots):
1195 1195 return self.index.computephasesmapsets(roots)
1196 1196
1197 1197 def _headrevs(self):
1198 1198 count = len(self)
1199 1199 if not count:
1200 1200 return [nullrev]
1201 1201 # we won't iter over filtered rev so nobody is a head at start
1202 1202 ishead = [0] * (count + 1)
1203 1203 index = self.index
1204 1204 for r in self:
1205 1205 ishead[r] = 1 # I may be an head
1206 1206 e = index[r]
1207 1207 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 1208 return [r for r, val in enumerate(ishead) if val]
1209 1209
1210 1210 def heads(self, start=None, stop=None):
1211 1211 """return the list of all nodes that have no children
1212 1212
1213 1213 if start is specified, only heads that are descendants of
1214 1214 start will be returned
1215 1215 if stop is specified, it will consider all the revs from stop
1216 1216 as if they had no children
1217 1217 """
1218 1218 if start is None and stop is None:
1219 1219 if not len(self):
1220 1220 return [self.nullid]
1221 1221 return [self.node(r) for r in self.headrevs()]
1222 1222
1223 1223 if start is None:
1224 1224 start = nullrev
1225 1225 else:
1226 1226 start = self.rev(start)
1227 1227
1228 1228 stoprevs = {self.rev(n) for n in stop or []}
1229 1229
1230 1230 revs = dagop.headrevssubset(
1231 1231 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 1232 )
1233 1233
1234 1234 return [self.node(rev) for rev in revs]
1235 1235
1236 1236 def children(self, node):
1237 1237 """find the children of a given node"""
1238 1238 c = []
1239 1239 p = self.rev(node)
1240 1240 for r in self.revs(start=p + 1):
1241 1241 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 1242 if prevs:
1243 1243 for pr in prevs:
1244 1244 if pr == p:
1245 1245 c.append(self.node(r))
1246 1246 elif p == nullrev:
1247 1247 c.append(self.node(r))
1248 1248 return c
1249 1249
1250 1250 def commonancestorsheads(self, a, b):
1251 1251 """calculate all the heads of the common ancestors of nodes a and b"""
1252 1252 a, b = self.rev(a), self.rev(b)
1253 1253 ancs = self._commonancestorsheads(a, b)
1254 1254 return pycompat.maplist(self.node, ancs)
1255 1255
1256 1256 def _commonancestorsheads(self, *revs):
1257 1257 """calculate all the heads of the common ancestors of revs"""
1258 1258 try:
1259 1259 ancs = self.index.commonancestorsheads(*revs)
1260 1260 except (AttributeError, OverflowError): # C implementation failed
1261 1261 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 1262 return ancs
1263 1263
1264 1264 def isancestor(self, a, b):
1265 1265 """return True if node a is an ancestor of node b
1266 1266
1267 1267 A revision is considered an ancestor of itself."""
1268 1268 a, b = self.rev(a), self.rev(b)
1269 1269 return self.isancestorrev(a, b)
1270 1270
1271 1271 def isancestorrev(self, a, b):
1272 1272 """return True if revision a is an ancestor of revision b
1273 1273
1274 1274 A revision is considered an ancestor of itself.
1275 1275
1276 1276 The implementation of this is trivial but the use of
1277 1277 reachableroots is not."""
1278 1278 if a == nullrev:
1279 1279 return True
1280 1280 elif a == b:
1281 1281 return True
1282 1282 elif a > b:
1283 1283 return False
1284 1284 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285 1285
1286 1286 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 1287 """return (heads(::(<roots> and <roots>::<heads>)))
1288 1288
1289 1289 If includepath is True, return (<roots>::<heads>)."""
1290 1290 try:
1291 1291 return self.index.reachableroots2(
1292 1292 minroot, heads, roots, includepath
1293 1293 )
1294 1294 except AttributeError:
1295 1295 return dagop._reachablerootspure(
1296 1296 self.parentrevs, minroot, roots, heads, includepath
1297 1297 )
1298 1298
1299 1299 def ancestor(self, a, b):
1300 1300 """calculate the "best" common ancestor of nodes a and b"""
1301 1301
1302 1302 a, b = self.rev(a), self.rev(b)
1303 1303 try:
1304 1304 ancs = self.index.ancestors(a, b)
1305 1305 except (AttributeError, OverflowError):
1306 1306 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 1307 if ancs:
1308 1308 # choose a consistent winner when there's a tie
1309 1309 return min(map(self.node, ancs))
1310 1310 return self.nullid
1311 1311
1312 1312 def _match(self, id):
1313 1313 if isinstance(id, int):
1314 1314 # rev
1315 1315 return self.node(id)
1316 1316 if len(id) == self.nodeconstants.nodelen:
1317 1317 # possibly a binary node
1318 1318 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 1319 try:
1320 1320 node = id
1321 1321 self.rev(node) # quick search the index
1322 1322 return node
1323 1323 except error.LookupError:
1324 1324 pass # may be partial hex id
1325 1325 try:
1326 1326 # str(rev)
1327 1327 rev = int(id)
1328 1328 if b"%d" % rev != id:
1329 1329 raise ValueError
1330 1330 if rev < 0:
1331 1331 rev = len(self) + rev
1332 1332 if rev < 0 or rev >= len(self):
1333 1333 raise ValueError
1334 1334 return self.node(rev)
1335 1335 except (ValueError, OverflowError):
1336 1336 pass
1337 1337 if len(id) == 2 * self.nodeconstants.nodelen:
1338 1338 try:
1339 1339 # a full hex nodeid?
1340 1340 node = bin(id)
1341 1341 self.rev(node)
1342 1342 return node
1343 1343 except (TypeError, error.LookupError):
1344 1344 pass
1345 1345
1346 1346 def _partialmatch(self, id):
1347 1347 # we don't care wdirfilenodeids as they should be always full hash
1348 1348 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 1349 try:
1350 1350 partial = self.index.partialmatch(id)
1351 1351 if partial and self.hasnode(partial):
1352 1352 if maybewdir:
1353 1353 # single 'ff...' match in radix tree, ambiguous with wdir
1354 1354 raise error.RevlogError
1355 1355 return partial
1356 1356 if maybewdir:
1357 1357 # no 'ff...' match in radix tree, wdir identified
1358 1358 raise error.WdirUnsupported
1359 1359 return None
1360 1360 except error.RevlogError:
1361 1361 # parsers.c radix tree lookup gave multiple matches
1362 1362 # fast path: for unfiltered changelog, radix tree is accurate
1363 1363 if not getattr(self, 'filteredrevs', None):
1364 1364 raise error.AmbiguousPrefixLookupError(
1365 1365 id, self.indexfile, _(b'ambiguous identifier')
1366 1366 )
1367 1367 # fall through to slow path that filters hidden revisions
1368 1368 except (AttributeError, ValueError):
1369 1369 # we are pure python, or key was too short to search radix tree
1370 1370 pass
1371 1371
1372 1372 if id in self._pcache:
1373 1373 return self._pcache[id]
1374 1374
1375 1375 if len(id) <= 40:
1376 1376 try:
1377 1377 # hex(node)[:...]
1378 1378 l = len(id) // 2 # grab an even number of digits
1379 1379 prefix = bin(id[: l * 2])
1380 1380 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 1381 nl = [
1382 1382 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 1383 ]
1384 1384 if self.nodeconstants.nullhex.startswith(id):
1385 1385 nl.append(self.nullid)
1386 1386 if len(nl) > 0:
1387 1387 if len(nl) == 1 and not maybewdir:
1388 1388 self._pcache[id] = nl[0]
1389 1389 return nl[0]
1390 1390 raise error.AmbiguousPrefixLookupError(
1391 1391 id, self.indexfile, _(b'ambiguous identifier')
1392 1392 )
1393 1393 if maybewdir:
1394 1394 raise error.WdirUnsupported
1395 1395 return None
1396 1396 except TypeError:
1397 1397 pass
1398 1398
1399 1399 def lookup(self, id):
1400 1400 """locate a node based on:
1401 1401 - revision number or str(revision number)
1402 1402 - nodeid or subset of hex nodeid
1403 1403 """
1404 1404 n = self._match(id)
1405 1405 if n is not None:
1406 1406 return n
1407 1407 n = self._partialmatch(id)
1408 1408 if n:
1409 1409 return n
1410 1410
1411 1411 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412 1412
1413 1413 def shortest(self, node, minlength=1):
1414 1414 """Find the shortest unambiguous prefix that matches node."""
1415 1415
1416 1416 def isvalid(prefix):
1417 1417 try:
1418 1418 matchednode = self._partialmatch(prefix)
1419 1419 except error.AmbiguousPrefixLookupError:
1420 1420 return False
1421 1421 except error.WdirUnsupported:
1422 1422 # single 'ff...' match
1423 1423 return True
1424 1424 if matchednode is None:
1425 1425 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 1426 return True
1427 1427
1428 1428 def maybewdir(prefix):
1429 1429 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430 1430
1431 1431 hexnode = hex(node)
1432 1432
1433 1433 def disambiguate(hexnode, minlength):
1434 1434 """Disambiguate against wdirid."""
1435 1435 for length in range(minlength, len(hexnode) + 1):
1436 1436 prefix = hexnode[:length]
1437 1437 if not maybewdir(prefix):
1438 1438 return prefix
1439 1439
1440 1440 if not getattr(self, 'filteredrevs', None):
1441 1441 try:
1442 1442 length = max(self.index.shortest(node), minlength)
1443 1443 return disambiguate(hexnode, length)
1444 1444 except error.RevlogError:
1445 1445 if node != self.nodeconstants.wdirid:
1446 1446 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 1447 except AttributeError:
1448 1448 # Fall through to pure code
1449 1449 pass
1450 1450
1451 1451 if node == self.nodeconstants.wdirid:
1452 1452 for length in range(minlength, len(hexnode) + 1):
1453 1453 prefix = hexnode[:length]
1454 1454 if isvalid(prefix):
1455 1455 return prefix
1456 1456
1457 1457 for length in range(minlength, len(hexnode) + 1):
1458 1458 prefix = hexnode[:length]
1459 1459 if isvalid(prefix):
1460 1460 return disambiguate(hexnode, length)
1461 1461
1462 1462 def cmp(self, node, text):
1463 1463 """compare text with a given file revision
1464 1464
1465 1465 returns True if text is different than what is stored.
1466 1466 """
1467 1467 p1, p2 = self.parents(node)
1468 1468 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469 1469
1470 1470 def _cachesegment(self, offset, data):
1471 1471 """Add a segment to the revlog cache.
1472 1472
1473 1473 Accepts an absolute offset and the data that is at that location.
1474 1474 """
1475 1475 o, d = self._chunkcache
1476 1476 # try to add to existing cache
1477 1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 1478 self._chunkcache = o, d + data
1479 1479 else:
1480 1480 self._chunkcache = offset, data
1481 1481
1482 1482 def _readsegment(self, offset, length, df=None):
1483 1483 """Load a segment of raw data from the revlog.
1484 1484
1485 1485 Accepts an absolute offset, length to read, and an optional existing
1486 1486 file handle to read from.
1487 1487
1488 1488 If an existing file handle is passed, it will be seeked and the
1489 1489 original seek position will NOT be restored.
1490 1490
1491 1491 Returns a str or buffer of raw byte data.
1492 1492
1493 1493 Raises if the requested number of bytes could not be read.
1494 1494 """
1495 1495 # Cache data both forward and backward around the requested
1496 1496 # data, in a fixed size window. This helps speed up operations
1497 1497 # involving reading the revlog backwards.
1498 1498 cachesize = self._chunkcachesize
1499 1499 realoffset = offset & ~(cachesize - 1)
1500 1500 reallength = (
1501 1501 (offset + length + cachesize) & ~(cachesize - 1)
1502 1502 ) - realoffset
1503 1503 with self._datareadfp(df) as df:
1504 1504 df.seek(realoffset)
1505 1505 d = df.read(reallength)
1506 1506
1507 1507 self._cachesegment(realoffset, d)
1508 1508 if offset != realoffset or reallength != length:
1509 1509 startoffset = offset - realoffset
1510 1510 if len(d) - startoffset < length:
1511 1511 raise error.RevlogError(
1512 1512 _(
1513 1513 b'partial read of revlog %s; expected %d bytes from '
1514 1514 b'offset %d, got %d'
1515 1515 )
1516 1516 % (
1517 1517 self.indexfile if self._inline else self.datafile,
1518 1518 length,
1519 1519 realoffset,
1520 1520 len(d) - startoffset,
1521 1521 )
1522 1522 )
1523 1523
1524 1524 return util.buffer(d, startoffset, length)
1525 1525
1526 1526 if len(d) < length:
1527 1527 raise error.RevlogError(
1528 1528 _(
1529 1529 b'partial read of revlog %s; expected %d bytes from offset '
1530 1530 b'%d, got %d'
1531 1531 )
1532 1532 % (
1533 1533 self.indexfile if self._inline else self.datafile,
1534 1534 length,
1535 1535 offset,
1536 1536 len(d),
1537 1537 )
1538 1538 )
1539 1539
1540 1540 return d
1541 1541
1542 1542 def _getsegment(self, offset, length, df=None):
1543 1543 """Obtain a segment of raw data from the revlog.
1544 1544
1545 1545 Accepts an absolute offset, length of bytes to obtain, and an
1546 1546 optional file handle to the already-opened revlog. If the file
1547 1547 handle is used, it's original seek position will not be preserved.
1548 1548
1549 1549 Requests for data may be returned from a cache.
1550 1550
1551 1551 Returns a str or a buffer instance of raw byte data.
1552 1552 """
1553 1553 o, d = self._chunkcache
1554 1554 l = len(d)
1555 1555
1556 1556 # is it in the cache?
1557 1557 cachestart = offset - o
1558 1558 cacheend = cachestart + length
1559 1559 if cachestart >= 0 and cacheend <= l:
1560 1560 if cachestart == 0 and cacheend == l:
1561 1561 return d # avoid a copy
1562 1562 return util.buffer(d, cachestart, cacheend - cachestart)
1563 1563
1564 1564 return self._readsegment(offset, length, df=df)
1565 1565
1566 1566 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 1567 """Obtain a segment of raw data corresponding to a range of revisions.
1568 1568
1569 1569 Accepts the start and end revisions and an optional already-open
1570 1570 file handle to be used for reading. If the file handle is read, its
1571 1571 seek position will not be preserved.
1572 1572
1573 1573 Requests for data may be satisfied by a cache.
1574 1574
1575 1575 Returns a 2-tuple of (offset, data) for the requested range of
1576 1576 revisions. Offset is the integer offset from the beginning of the
1577 1577 revlog and data is a str or buffer of the raw byte data.
1578 1578
1579 1579 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 1580 to determine where each revision's data begins and ends.
1581 1581 """
1582 1582 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 1583 # (functions are expensive).
1584 1584 index = self.index
1585 1585 istart = index[startrev]
1586 1586 start = int(istart[0] >> 16)
1587 1587 if startrev == endrev:
1588 1588 end = start + istart[1]
1589 1589 else:
1590 1590 iend = index[endrev]
1591 1591 end = int(iend[0] >> 16) + iend[1]
1592 1592
1593 1593 if self._inline:
1594 1594 start += (startrev + 1) * self.index.entry_size
1595 1595 end += (endrev + 1) * self.index.entry_size
1596 1596 length = end - start
1597 1597
1598 1598 return start, self._getsegment(start, length, df=df)
1599 1599
1600 1600 def _chunk(self, rev, df=None):
1601 1601 """Obtain a single decompressed chunk for a revision.
1602 1602
1603 1603 Accepts an integer revision and an optional already-open file handle
1604 1604 to be used for reading. If used, the seek position of the file will not
1605 1605 be preserved.
1606 1606
1607 1607 Returns a str holding uncompressed data for the requested revision.
1608 1608 """
1609 1609 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610 1610
1611 1611 def _chunks(self, revs, df=None, targetsize=None):
1612 1612 """Obtain decompressed chunks for the specified revisions.
1613 1613
1614 1614 Accepts an iterable of numeric revisions that are assumed to be in
1615 1615 ascending order. Also accepts an optional already-open file handle
1616 1616 to be used for reading. If used, the seek position of the file will
1617 1617 not be preserved.
1618 1618
1619 1619 This function is similar to calling ``self._chunk()`` multiple times,
1620 1620 but is faster.
1621 1621
1622 1622 Returns a list with decompressed data for each requested revision.
1623 1623 """
1624 1624 if not revs:
1625 1625 return []
1626 1626 start = self.start
1627 1627 length = self.length
1628 1628 inline = self._inline
1629 1629 iosize = self.index.entry_size
1630 1630 buffer = util.buffer
1631 1631
1632 1632 l = []
1633 1633 ladd = l.append
1634 1634
1635 1635 if not self._withsparseread:
1636 1636 slicedchunks = (revs,)
1637 1637 else:
1638 1638 slicedchunks = deltautil.slicechunk(
1639 1639 self, revs, targetsize=targetsize
1640 1640 )
1641 1641
1642 1642 for revschunk in slicedchunks:
1643 1643 firstrev = revschunk[0]
1644 1644 # Skip trailing revisions with empty diff
1645 1645 for lastrev in revschunk[::-1]:
1646 1646 if length(lastrev) != 0:
1647 1647 break
1648 1648
1649 1649 try:
1650 1650 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 1651 except OverflowError:
1652 1652 # issue4215 - we can't cache a run of chunks greater than
1653 1653 # 2G on Windows
1654 1654 return [self._chunk(rev, df=df) for rev in revschunk]
1655 1655
1656 1656 decomp = self.decompress
1657 1657 for rev in revschunk:
1658 1658 chunkstart = start(rev)
1659 1659 if inline:
1660 1660 chunkstart += (rev + 1) * iosize
1661 1661 chunklength = length(rev)
1662 1662 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663 1663
1664 1664 return l
1665 1665
1666 1666 def _chunkclear(self):
1667 1667 """Clear the raw chunk cache."""
1668 1668 self._chunkcache = (0, b'')
1669 1669
1670 1670 def deltaparent(self, rev):
1671 1671 """return deltaparent of the given revision"""
1672 1672 base = self.index[rev][3]
1673 1673 if base == rev:
1674 1674 return nullrev
1675 1675 elif self._generaldelta:
1676 1676 return base
1677 1677 else:
1678 1678 return rev - 1
1679 1679
1680 1680 def issnapshot(self, rev):
1681 1681 """tells whether rev is a snapshot"""
1682 1682 if not self._sparserevlog:
1683 1683 return self.deltaparent(rev) == nullrev
1684 1684 elif util.safehasattr(self.index, b'issnapshot'):
1685 1685 # directly assign the method to cache the testing and access
1686 1686 self.issnapshot = self.index.issnapshot
1687 1687 return self.issnapshot(rev)
1688 1688 if rev == nullrev:
1689 1689 return True
1690 1690 entry = self.index[rev]
1691 1691 base = entry[3]
1692 1692 if base == rev:
1693 1693 return True
1694 1694 if base == nullrev:
1695 1695 return True
1696 1696 p1 = entry[5]
1697 1697 p2 = entry[6]
1698 1698 if base == p1 or base == p2:
1699 1699 return False
1700 1700 return self.issnapshot(base)
1701 1701
1702 1702 def snapshotdepth(self, rev):
1703 1703 """number of snapshot in the chain before this one"""
1704 1704 if not self.issnapshot(rev):
1705 1705 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 1706 return len(self._deltachain(rev)[0]) - 1
1707 1707
1708 1708 def revdiff(self, rev1, rev2):
1709 1709 """return or calculate a delta between two revisions
1710 1710
1711 1711 The delta calculated is in binary form and is intended to be written to
1712 1712 revlog data directly. So this function needs raw revision data.
1713 1713 """
1714 1714 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 1715 return bytes(self._chunk(rev2))
1716 1716
1717 1717 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718 1718
1719 1719 def _processflags(self, text, flags, operation, raw=False):
1720 1720 """deprecated entry point to access flag processors"""
1721 1721 msg = b'_processflag(...) use the specialized variant'
1722 1722 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 1723 if raw:
1724 1724 return text, flagutil.processflagsraw(self, text, flags)
1725 1725 elif operation == b'read':
1726 1726 return flagutil.processflagsread(self, text, flags)
1727 1727 else: # write operation
1728 1728 return flagutil.processflagswrite(self, text, flags)
1729 1729
1730 1730 def revision(self, nodeorrev, _df=None, raw=False):
1731 1731 """return an uncompressed revision of a given node or revision
1732 1732 number.
1733 1733
1734 1734 _df - an existing file handle to read from. (internal-only)
1735 1735 raw - an optional argument specifying if the revision data is to be
1736 1736 treated as raw data when applying flag transforms. 'raw' should be set
1737 1737 to True when generating changegroups or in debug commands.
1738 1738 """
1739 1739 if raw:
1740 1740 msg = (
1741 1741 b'revlog.revision(..., raw=True) is deprecated, '
1742 1742 b'use revlog.rawdata(...)'
1743 1743 )
1744 1744 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 1745 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746 1746
1747 1747 def sidedata(self, nodeorrev, _df=None):
1748 1748 """a map of extra data related to the changeset but not part of the hash
1749 1749
1750 1750 This function currently return a dictionary. However, more advanced
1751 1751 mapping object will likely be used in the future for a more
1752 1752 efficient/lazy code.
1753 1753 """
1754 1754 return self._revisiondata(nodeorrev, _df)[1]
1755 1755
1756 1756 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 1757 # deal with <nodeorrev> argument type
1758 1758 if isinstance(nodeorrev, int):
1759 1759 rev = nodeorrev
1760 1760 node = self.node(rev)
1761 1761 else:
1762 1762 node = nodeorrev
1763 1763 rev = None
1764 1764
1765 1765 # fast path the special `nullid` rev
1766 1766 if node == self.nullid:
1767 1767 return b"", {}
1768 1768
1769 1769 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 1770 # revision or might need to be processed to retrieve the revision.
1771 1771 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772 1772
1773 1773 if self.version & 0xFFFF == REVLOGV2:
1774 1774 if rev is None:
1775 1775 rev = self.rev(node)
1776 1776 sidedata = self._sidedata(rev)
1777 1777 else:
1778 1778 sidedata = {}
1779 1779
1780 1780 if raw and validated:
1781 1781 # if we don't want to process the raw text and that raw
1782 1782 # text is cached, we can exit early.
1783 1783 return rawtext, sidedata
1784 1784 if rev is None:
1785 1785 rev = self.rev(node)
1786 1786 # the revlog's flag for this revision
1787 1787 # (usually alter its state or content)
1788 1788 flags = self.flags(rev)
1789 1789
1790 1790 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 1791 # no extra flags set, no flag processor runs, text = rawtext
1792 1792 return rawtext, sidedata
1793 1793
1794 1794 if raw:
1795 1795 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 1796 text = rawtext
1797 1797 else:
1798 1798 r = flagutil.processflagsread(self, rawtext, flags)
1799 1799 text, validatehash = r
1800 1800 if validatehash:
1801 1801 self.checkhash(text, node, rev=rev)
1802 1802 if not validated:
1803 1803 self._revisioncache = (node, rev, rawtext)
1804 1804
1805 1805 return text, sidedata
1806 1806
1807 1807 def _rawtext(self, node, rev, _df=None):
1808 1808 """return the possibly unvalidated rawtext for a revision
1809 1809
1810 1810 returns (rev, rawtext, validated)
1811 1811 """
1812 1812
1813 1813 # revision in the cache (could be useful to apply delta)
1814 1814 cachedrev = None
1815 1815 # An intermediate text to apply deltas to
1816 1816 basetext = None
1817 1817
1818 1818 # Check if we have the entry in cache
1819 1819 # The cache entry looks like (node, rev, rawtext)
1820 1820 if self._revisioncache:
1821 1821 if self._revisioncache[0] == node:
1822 1822 return (rev, self._revisioncache[2], True)
1823 1823 cachedrev = self._revisioncache[1]
1824 1824
1825 1825 if rev is None:
1826 1826 rev = self.rev(node)
1827 1827
1828 1828 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 1829 if stopped:
1830 1830 basetext = self._revisioncache[2]
1831 1831
1832 1832 # drop cache to save memory, the caller is expected to
1833 1833 # update self._revisioncache after validating the text
1834 1834 self._revisioncache = None
1835 1835
1836 1836 targetsize = None
1837 1837 rawsize = self.index[rev][2]
1838 1838 if 0 <= rawsize:
1839 1839 targetsize = 4 * rawsize
1840 1840
1841 1841 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 1842 if basetext is None:
1843 1843 basetext = bytes(bins[0])
1844 1844 bins = bins[1:]
1845 1845
1846 1846 rawtext = mdiff.patches(basetext, bins)
1847 1847 del basetext # let us have a chance to free memory early
1848 1848 return (rev, rawtext, False)
1849 1849
1850 1850 def _sidedata(self, rev):
1851 1851 """Return the sidedata for a given revision number."""
1852 1852 index_entry = self.index[rev]
1853 1853 sidedata_offset = index_entry[8]
1854 1854 sidedata_size = index_entry[9]
1855 1855
1856 1856 if self._inline:
1857 1857 sidedata_offset += self.index.entry_size * (1 + rev)
1858 1858 if sidedata_size == 0:
1859 1859 return {}
1860 1860
1861 1861 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 1862 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 1863 return sidedata
1864 1864
1865 1865 def rawdata(self, nodeorrev, _df=None):
1866 1866 """return an uncompressed raw data of a given node or revision number.
1867 1867
1868 1868 _df - an existing file handle to read from. (internal-only)
1869 1869 """
1870 1870 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871 1871
1872 1872 def hash(self, text, p1, p2):
1873 1873 """Compute a node hash.
1874 1874
1875 1875 Available as a function so that subclasses can replace the hash
1876 1876 as needed.
1877 1877 """
1878 1878 return storageutil.hashrevisionsha1(text, p1, p2)
1879 1879
1880 1880 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 1881 """Check node hash integrity.
1882 1882
1883 1883 Available as a function so that subclasses can extend hash mismatch
1884 1884 behaviors as needed.
1885 1885 """
1886 1886 try:
1887 1887 if p1 is None and p2 is None:
1888 1888 p1, p2 = self.parents(node)
1889 1889 if node != self.hash(text, p1, p2):
1890 1890 # Clear the revision cache on hash failure. The revision cache
1891 1891 # only stores the raw revision and clearing the cache does have
1892 1892 # the side-effect that we won't have a cache hit when the raw
1893 1893 # revision data is accessed. But this case should be rare and
1894 1894 # it is extra work to teach the cache about the hash
1895 1895 # verification state.
1896 1896 if self._revisioncache and self._revisioncache[0] == node:
1897 1897 self._revisioncache = None
1898 1898
1899 1899 revornode = rev
1900 1900 if revornode is None:
1901 1901 revornode = templatefilters.short(hex(node))
1902 1902 raise error.RevlogError(
1903 1903 _(b"integrity check failed on %s:%s")
1904 1904 % (self.indexfile, pycompat.bytestr(revornode))
1905 1905 )
1906 1906 except error.RevlogError:
1907 1907 if self._censorable and storageutil.iscensoredtext(text):
1908 1908 raise error.CensoredNodeError(self.indexfile, node, text)
1909 1909 raise
1910 1910
1911 1911 def _enforceinlinesize(self, tr, fp=None):
1912 1912 """Check if the revlog is too big for inline and convert if so.
1913 1913
1914 1914 This should be called after revisions are added to the revlog. If the
1915 1915 revlog has grown too large to be an inline revlog, it will convert it
1916 1916 to use multiple index and data files.
1917 1917 """
1918 1918 tiprev = len(self) - 1
1919 1919 if (
1920 1920 not self._inline
1921 1921 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 1922 ):
1923 1923 return
1924 1924
1925 1925 troffset = tr.findoffset(self.indexfile)
1926 1926 if troffset is None:
1927 1927 raise error.RevlogError(
1928 1928 _(b"%s not found in the transaction") % self.indexfile
1929 1929 )
1930 1930 trindex = 0
1931 1931 tr.add(self.datafile, 0)
1932 1932
1933 1933 if fp:
1934 1934 fp.flush()
1935 1935 fp.close()
1936 1936 # We can't use the cached file handle after close(). So prevent
1937 1937 # its usage.
1938 1938 self._writinghandles = None
1939 1939
1940 1940 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 1941 for r in self:
1942 1942 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 1943 if troffset <= self.start(r):
1944 1944 trindex = r
1945 1945
1946 1946 with self._indexfp(b'w') as fp:
1947 1947 self.version &= ~FLAG_INLINE_DATA
1948 1948 self._inline = False
1949 1949 for i in self:
1950 1950 e = self.index.entry_binary(i)
1951 1951 if i == 0:
1952 1952 header = self.index.pack_header(self.version)
1953 1953 e = header + e
1954 1954 fp.write(e)
1955 1955
1956 1956 # the temp file replace the real index when we exit the context
1957 1957 # manager
1958 1958
1959 1959 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 1960 nodemaputil.setup_persistent_nodemap(tr, self)
1961 1961 self._chunkclear()
1962 1962
1963 1963 def _nodeduplicatecallback(self, transaction, node):
1964 1964 """called when trying to add a node already stored."""
1965 1965
1966 1966 def addrevision(
1967 1967 self,
1968 1968 text,
1969 1969 transaction,
1970 1970 link,
1971 1971 p1,
1972 1972 p2,
1973 1973 cachedelta=None,
1974 1974 node=None,
1975 1975 flags=REVIDX_DEFAULT_FLAGS,
1976 1976 deltacomputer=None,
1977 1977 sidedata=None,
1978 1978 ):
1979 1979 """add a revision to the log
1980 1980
1981 1981 text - the revision data to add
1982 1982 transaction - the transaction object used for rollback
1983 1983 link - the linkrev data to add
1984 1984 p1, p2 - the parent nodeids of the revision
1985 1985 cachedelta - an optional precomputed delta
1986 1986 node - nodeid of revision; typically node is not specified, and it is
1987 1987 computed by default as hash(text, p1, p2), however subclasses might
1988 1988 use different hashing method (and override checkhash() in such case)
1989 1989 flags - the known flags to set on the revision
1990 1990 deltacomputer - an optional deltacomputer instance shared between
1991 1991 multiple calls
1992 1992 """
1993 1993 if link == nullrev:
1994 1994 raise error.RevlogError(
1995 1995 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 1996 )
1997 1997
1998 1998 if sidedata is None:
1999 1999 sidedata = {}
2000 2000 elif not self.hassidedata:
2001 2001 raise error.ProgrammingError(
2002 2002 _(b"trying to add sidedata to a revlog who don't support them")
2003 2003 )
2004 2004
2005 2005 if flags:
2006 2006 node = node or self.hash(text, p1, p2)
2007 2007
2008 2008 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009 2009
2010 2010 # If the flag processor modifies the revision data, ignore any provided
2011 2011 # cachedelta.
2012 2012 if rawtext != text:
2013 2013 cachedelta = None
2014 2014
2015 2015 if len(rawtext) > _maxentrysize:
2016 2016 raise error.RevlogError(
2017 2017 _(
2018 2018 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 2019 )
2020 2020 % (self.indexfile, len(rawtext))
2021 2021 )
2022 2022
2023 2023 node = node or self.hash(rawtext, p1, p2)
2024 2024 rev = self.index.get_rev(node)
2025 2025 if rev is not None:
2026 2026 return rev
2027 2027
2028 2028 if validatehash:
2029 2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 2030
2031 2031 return self.addrawrevision(
2032 2032 rawtext,
2033 2033 transaction,
2034 2034 link,
2035 2035 p1,
2036 2036 p2,
2037 2037 node,
2038 2038 flags,
2039 2039 cachedelta=cachedelta,
2040 2040 deltacomputer=deltacomputer,
2041 2041 sidedata=sidedata,
2042 2042 )
2043 2043
2044 2044 def addrawrevision(
2045 2045 self,
2046 2046 rawtext,
2047 2047 transaction,
2048 2048 link,
2049 2049 p1,
2050 2050 p2,
2051 2051 node,
2052 2052 flags,
2053 2053 cachedelta=None,
2054 2054 deltacomputer=None,
2055 2055 sidedata=None,
2056 2056 ):
2057 2057 """add a raw revision with known flags, node and parents
2058 2058 useful when reusing a revision not stored in this revlog (ex: received
2059 2059 over wire, or read from an external bundle).
2060 2060 """
2061 2061 dfh = None
2062 2062 if not self._inline:
2063 2063 dfh = self._datafp(b"a+")
2064 2064 ifh = self._indexfp(b"a+")
2065 2065 try:
2066 2066 return self._addrevision(
2067 2067 node,
2068 2068 rawtext,
2069 2069 transaction,
2070 2070 link,
2071 2071 p1,
2072 2072 p2,
2073 2073 flags,
2074 2074 cachedelta,
2075 2075 ifh,
2076 2076 dfh,
2077 2077 deltacomputer=deltacomputer,
2078 2078 sidedata=sidedata,
2079 2079 )
2080 2080 finally:
2081 2081 if dfh:
2082 2082 dfh.close()
2083 2083 ifh.close()
2084 2084
2085 2085 def compress(self, data):
2086 2086 """Generate a possibly-compressed representation of data."""
2087 2087 if not data:
2088 2088 return b'', data
2089 2089
2090 2090 compressed = self._compressor.compress(data)
2091 2091
2092 2092 if compressed:
2093 2093 # The revlog compressor added the header in the returned data.
2094 2094 return b'', compressed
2095 2095
2096 2096 if data[0:1] == b'\0':
2097 2097 return b'', data
2098 2098 return b'u', data
2099 2099
2100 2100 def decompress(self, data):
2101 2101 """Decompress a revlog chunk.
2102 2102
2103 2103 The chunk is expected to begin with a header identifying the
2104 2104 format type so it can be routed to an appropriate decompressor.
2105 2105 """
2106 2106 if not data:
2107 2107 return data
2108 2108
2109 2109 # Revlogs are read much more frequently than they are written and many
2110 2110 # chunks only take microseconds to decompress, so performance is
2111 2111 # important here.
2112 2112 #
2113 2113 # We can make a few assumptions about revlogs:
2114 2114 #
2115 2115 # 1) the majority of chunks will be compressed (as opposed to inline
2116 2116 # raw data).
2117 2117 # 2) decompressing *any* data will likely by at least 10x slower than
2118 2118 # returning raw inline data.
2119 2119 # 3) we want to prioritize common and officially supported compression
2120 2120 # engines
2121 2121 #
2122 2122 # It follows that we want to optimize for "decompress compressed data
2123 2123 # when encoded with common and officially supported compression engines"
2124 2124 # case over "raw data" and "data encoded by less common or non-official
2125 2125 # compression engines." That is why we have the inline lookup first
2126 2126 # followed by the compengines lookup.
2127 2127 #
2128 2128 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 2129 # compressed chunks. And this matters for changelog and manifest reads.
2130 2130 t = data[0:1]
2131 2131
2132 2132 if t == b'x':
2133 2133 try:
2134 2134 return _zlibdecompress(data)
2135 2135 except zlib.error as e:
2136 2136 raise error.RevlogError(
2137 2137 _(b'revlog decompress error: %s')
2138 2138 % stringutil.forcebytestr(e)
2139 2139 )
2140 2140 # '\0' is more common than 'u' so it goes first.
2141 2141 elif t == b'\0':
2142 2142 return data
2143 2143 elif t == b'u':
2144 2144 return util.buffer(data, 1)
2145 2145
2146 2146 try:
2147 2147 compressor = self._decompressors[t]
2148 2148 except KeyError:
2149 2149 try:
2150 2150 engine = util.compengines.forrevlogheader(t)
2151 2151 compressor = engine.revlogcompressor(self._compengineopts)
2152 2152 self._decompressors[t] = compressor
2153 2153 except KeyError:
2154 2154 raise error.RevlogError(
2155 2155 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 2156 )
2157 2157
2158 2158 return compressor.decompress(data)
2159 2159
2160 2160 def _addrevision(
2161 2161 self,
2162 2162 node,
2163 2163 rawtext,
2164 2164 transaction,
2165 2165 link,
2166 2166 p1,
2167 2167 p2,
2168 2168 flags,
2169 2169 cachedelta,
2170 2170 ifh,
2171 2171 dfh,
2172 2172 alwayscache=False,
2173 2173 deltacomputer=None,
2174 2174 sidedata=None,
2175 2175 ):
2176 2176 """internal function to add revisions to the log
2177 2177
2178 2178 see addrevision for argument descriptions.
2179 2179
2180 2180 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 2181
2182 2182 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 2183 be used.
2184 2184
2185 2185 invariants:
2186 2186 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 2187 if both are set, they must correspond to each other.
2188 2188 """
2189 2189 if node == self.nullid:
2190 2190 raise error.RevlogError(
2191 2191 _(b"%s: attempt to add null revision") % self.indexfile
2192 2192 )
2193 2193 if (
2194 2194 node == self.nodeconstants.wdirid
2195 2195 or node in self.nodeconstants.wdirfilenodeids
2196 2196 ):
2197 2197 raise error.RevlogError(
2198 2198 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 2199 )
2200 2200
2201 2201 if self._inline:
2202 2202 fh = ifh
2203 2203 else:
2204 2204 fh = dfh
2205 2205
2206 2206 btext = [rawtext]
2207 2207
2208 2208 curr = len(self)
2209 2209 prev = curr - 1
2210 2210
2211 2211 offset = self._get_data_offset(prev)
2212 2212
2213 2213 if self._concurrencychecker:
2214 2214 if self._inline:
2215 2215 # offset is "as if" it were in the .d file, so we need to add on
2216 2216 # the size of the entry metadata.
2217 2217 self._concurrencychecker(
2218 2218 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 2219 )
2220 2220 else:
2221 2221 # Entries in the .i are a consistent size.
2222 2222 self._concurrencychecker(
2223 2223 ifh, self.indexfile, curr * self.index.entry_size
2224 2224 )
2225 2225 self._concurrencychecker(dfh, self.datafile, offset)
2226 2226
2227 2227 p1r, p2r = self.rev(p1), self.rev(p2)
2228 2228
2229 2229 # full versions are inserted when the needed deltas
2230 2230 # become comparable to the uncompressed text
2231 2231 if rawtext is None:
2232 2232 # need rawtext size, before changed by flag processors, which is
2233 2233 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 2234 # logic that might remove metadata size.
2235 2235 textlen = mdiff.patchedsize(
2236 2236 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 2237 )
2238 2238 else:
2239 2239 textlen = len(rawtext)
2240 2240
2241 2241 if deltacomputer is None:
2242 2242 deltacomputer = deltautil.deltacomputer(self)
2243 2243
2244 2244 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245 2245
2246 2246 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247 2247
2248 2248 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 2249 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 2250 sidedata_offset = offset + deltainfo.deltalen
2251 2251 else:
2252 2252 serialized_sidedata = b""
2253 2253 # Don't store the offset if the sidedata is empty, that way
2254 2254 # we can easily detect empty sidedata and they will be no different
2255 2255 # than ones we manually add.
2256 2256 sidedata_offset = 0
2257 2257
2258 2258 e = (
2259 2259 offset_type(offset, flags),
2260 2260 deltainfo.deltalen,
2261 2261 textlen,
2262 2262 deltainfo.base,
2263 2263 link,
2264 2264 p1r,
2265 2265 p2r,
2266 2266 node,
2267 2267 sidedata_offset,
2268 2268 len(serialized_sidedata),
2269 2269 )
2270 2270
2271 2271 if self.version & 0xFFFF != REVLOGV2:
2272 2272 e = e[:8]
2273 2273
2274 2274 self.index.append(e)
2275 2275 entry = self.index.entry_binary(curr)
2276 2276 if curr == 0:
2277 2277 header = self.index.pack_header(self.version)
2278 2278 entry = header + entry
2279 2279 self._writeentry(
2280 2280 transaction,
2281 2281 ifh,
2282 2282 dfh,
2283 2283 entry,
2284 2284 deltainfo.data,
2285 2285 link,
2286 2286 offset,
2287 2287 serialized_sidedata,
2288 2288 )
2289 2289
2290 2290 rawtext = btext[0]
2291 2291
2292 2292 if alwayscache and rawtext is None:
2293 2293 rawtext = deltacomputer.buildtext(revinfo, fh)
2294 2294
2295 2295 if type(rawtext) == bytes: # only accept immutable objects
2296 2296 self._revisioncache = (node, curr, rawtext)
2297 2297 self._chainbasecache[curr] = deltainfo.chainbase
2298 2298 return curr
2299 2299
2300 2300 def _get_data_offset(self, prev):
2301 2301 """Returns the current offset in the (in-transaction) data file.
2302 2302 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 2303 file to store that information: since sidedata can be rewritten to the
2304 2304 end of the data file within a transaction, you can have cases where, for
2305 2305 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 2306 to `n - 1`'s sidedata being written after `n`'s data.
2307 2307
2308 2308 TODO cache this in a docket file before getting out of experimental."""
2309 2309 if self.version & 0xFFFF != REVLOGV2:
2310 2310 return self.end(prev)
2311 2311
2312 2312 offset = 0
2313 2313 for rev, entry in enumerate(self.index):
2314 2314 sidedata_end = entry[8] + entry[9]
2315 2315 # Sidedata for a previous rev has potentially been written after
2316 2316 # this rev's end, so take the max.
2317 2317 offset = max(self.end(rev), offset, sidedata_end)
2318 2318 return offset
2319 2319
2320 2320 def _writeentry(
2321 2321 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 2322 ):
2323 2323 # Files opened in a+ mode have inconsistent behavior on various
2324 2324 # platforms. Windows requires that a file positioning call be made
2325 2325 # when the file handle transitions between reads and writes. See
2326 2326 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 2327 # platforms, Python or the platform itself can be buggy. Some versions
2328 2328 # of Solaris have been observed to not append at the end of the file
2329 2329 # if the file was seeked to before the end. See issue4943 for more.
2330 2330 #
2331 2331 # We work around this issue by inserting a seek() before writing.
2332 2332 # Note: This is likely not necessary on Python 3. However, because
2333 2333 # the file handle is reused for reads and may be seeked there, we need
2334 2334 # to be careful before changing this.
2335 2335 ifh.seek(0, os.SEEK_END)
2336 2336 if dfh:
2337 2337 dfh.seek(0, os.SEEK_END)
2338 2338
2339 2339 curr = len(self) - 1
2340 2340 if not self._inline:
2341 2341 transaction.add(self.datafile, offset)
2342 2342 transaction.add(self.indexfile, curr * len(entry))
2343 2343 if data[0]:
2344 2344 dfh.write(data[0])
2345 2345 dfh.write(data[1])
2346 2346 if sidedata:
2347 2347 dfh.write(sidedata)
2348 2348 ifh.write(entry)
2349 2349 else:
2350 2350 offset += curr * self.index.entry_size
2351 2351 transaction.add(self.indexfile, offset)
2352 2352 ifh.write(entry)
2353 2353 ifh.write(data[0])
2354 2354 ifh.write(data[1])
2355 2355 if sidedata:
2356 2356 ifh.write(sidedata)
2357 2357 self._enforceinlinesize(transaction, ifh)
2358 2358 nodemaputil.setup_persistent_nodemap(transaction, self)
2359 2359
2360 2360 def addgroup(
2361 2361 self,
2362 2362 deltas,
2363 2363 linkmapper,
2364 2364 transaction,
2365 2365 alwayscache=False,
2366 2366 addrevisioncb=None,
2367 2367 duplicaterevisioncb=None,
2368 2368 ):
2369 2369 """
2370 2370 add a delta group
2371 2371
2372 2372 given a set of deltas, add them to the revision log. the
2373 2373 first delta is against its parent, which should be in our
2374 2374 log, the rest are against the previous delta.
2375 2375
2376 2376 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 2377 this revlog and the node that was added.
2378 2378 """
2379 2379
2380 2380 if self._writinghandles:
2381 2381 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 2382
2383 2383 r = len(self)
2384 2384 end = 0
2385 2385 if r:
2386 2386 end = self.end(r - 1)
2387 2387 ifh = self._indexfp(b"a+")
2388 2388 isize = r * self.index.entry_size
2389 2389 if self._inline:
2390 2390 transaction.add(self.indexfile, end + isize)
2391 2391 dfh = None
2392 2392 else:
2393 2393 transaction.add(self.indexfile, isize)
2394 2394 transaction.add(self.datafile, end)
2395 2395 dfh = self._datafp(b"a+")
2396 2396
2397 2397 def flush():
2398 2398 if dfh:
2399 2399 dfh.flush()
2400 2400 ifh.flush()
2401 2401
2402 2402 self._writinghandles = (ifh, dfh)
2403 2403 empty = True
2404 2404
2405 2405 try:
2406 2406 deltacomputer = deltautil.deltacomputer(self)
2407 2407 # loop through our set of deltas
2408 2408 for data in deltas:
2409 2409 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 2410 link = linkmapper(linknode)
2411 2411 flags = flags or REVIDX_DEFAULT_FLAGS
2412 2412
2413 2413 rev = self.index.get_rev(node)
2414 2414 if rev is not None:
2415 2415 # this can happen if two branches make the same change
2416 2416 self._nodeduplicatecallback(transaction, rev)
2417 2417 if duplicaterevisioncb:
2418 2418 duplicaterevisioncb(self, rev)
2419 2419 empty = False
2420 2420 continue
2421 2421
2422 2422 for p in (p1, p2):
2423 2423 if not self.index.has_node(p):
2424 2424 raise error.LookupError(
2425 2425 p, self.indexfile, _(b'unknown parent')
2426 2426 )
2427 2427
2428 2428 if not self.index.has_node(deltabase):
2429 2429 raise error.LookupError(
2430 2430 deltabase, self.indexfile, _(b'unknown delta base')
2431 2431 )
2432 2432
2433 2433 baserev = self.rev(deltabase)
2434 2434
2435 2435 if baserev != nullrev and self.iscensored(baserev):
2436 2436 # if base is censored, delta must be full replacement in a
2437 2437 # single patch operation
2438 2438 hlen = struct.calcsize(b">lll")
2439 2439 oldlen = self.rawsize(baserev)
2440 2440 newlen = len(delta) - hlen
2441 2441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 2442 raise error.CensoredBaseError(
2443 2443 self.indexfile, self.node(baserev)
2444 2444 )
2445 2445
2446 2446 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 2447 flags |= REVIDX_ISCENSORED
2448 2448
2449 2449 # We assume consumers of addrevisioncb will want to retrieve
2450 2450 # the added revision, which will require a call to
2451 2451 # revision(). revision() will fast path if there is a cache
2452 2452 # hit. So, we tell _addrevision() to always cache in this case.
2453 2453 # We're only using addgroup() in the context of changegroup
2454 2454 # generation so the revision data can always be handled as raw
2455 2455 # by the flagprocessor.
2456 2456 rev = self._addrevision(
2457 2457 node,
2458 2458 None,
2459 2459 transaction,
2460 2460 link,
2461 2461 p1,
2462 2462 p2,
2463 2463 flags,
2464 2464 (baserev, delta),
2465 2465 ifh,
2466 2466 dfh,
2467 2467 alwayscache=alwayscache,
2468 2468 deltacomputer=deltacomputer,
2469 2469 sidedata=sidedata,
2470 2470 )
2471 2471
2472 2472 if addrevisioncb:
2473 2473 addrevisioncb(self, rev)
2474 2474 empty = False
2475 2475
2476 2476 if not dfh and not self._inline:
2477 2477 # addrevision switched from inline to conventional
2478 2478 # reopen the index
2479 2479 ifh.close()
2480 2480 dfh = self._datafp(b"a+")
2481 2481 ifh = self._indexfp(b"a+")
2482 2482 self._writinghandles = (ifh, dfh)
2483 2483 finally:
2484 2484 self._writinghandles = None
2485 2485
2486 2486 if dfh:
2487 2487 dfh.close()
2488 2488 ifh.close()
2489 2489 return not empty
2490 2490
2491 2491 def iscensored(self, rev):
2492 2492 """Check if a file revision is censored."""
2493 2493 if not self._censorable:
2494 2494 return False
2495 2495
2496 2496 return self.flags(rev) & REVIDX_ISCENSORED
2497 2497
2498 2498 def _peek_iscensored(self, baserev, delta, flush):
2499 2499 """Quickly check if a delta produces a censored revision."""
2500 2500 if not self._censorable:
2501 2501 return False
2502 2502
2503 2503 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504 2504
2505 2505 def getstrippoint(self, minlink):
2506 2506 """find the minimum rev that must be stripped to strip the linkrev
2507 2507
2508 2508 Returns a tuple containing the minimum rev and a set of all revs that
2509 2509 have linkrevs that will be broken by this strip.
2510 2510 """
2511 2511 return storageutil.resolvestripinfo(
2512 2512 minlink,
2513 2513 len(self) - 1,
2514 2514 self.headrevs(),
2515 2515 self.linkrev,
2516 2516 self.parentrevs,
2517 2517 )
2518 2518
2519 2519 def strip(self, minlink, transaction):
2520 2520 """truncate the revlog on the first revision with a linkrev >= minlink
2521 2521
2522 2522 This function is called when we're stripping revision minlink and
2523 2523 its descendants from the repository.
2524 2524
2525 2525 We have to remove all revisions with linkrev >= minlink, because
2526 2526 the equivalent changelog revisions will be renumbered after the
2527 2527 strip.
2528 2528
2529 2529 So we truncate the revlog on the first of these revisions, and
2530 2530 trust that the caller has saved the revisions that shouldn't be
2531 2531 removed and that it'll re-add them after this truncation.
2532 2532 """
2533 2533 if len(self) == 0:
2534 2534 return
2535 2535
2536 2536 rev, _ = self.getstrippoint(minlink)
2537 2537 if rev == len(self):
2538 2538 return
2539 2539
2540 2540 # first truncate the files on disk
2541 2541 end = self.start(rev)
2542 2542 if not self._inline:
2543 2543 transaction.add(self.datafile, end)
2544 2544 end = rev * self.index.entry_size
2545 2545 else:
2546 2546 end += rev * self.index.entry_size
2547 2547
2548 2548 transaction.add(self.indexfile, end)
2549 2549
2550 2550 # then reset internal state in memory to forget those revisions
2551 2551 self._revisioncache = None
2552 2552 self._chaininfocache = util.lrucachedict(500)
2553 2553 self._chunkclear()
2554 2554
2555 2555 del self.index[rev:-1]
2556 2556
2557 2557 def checksize(self):
2558 2558 """Check size of index and data files
2559 2559
2560 2560 return a (dd, di) tuple.
2561 2561 - dd: extra bytes for the "data" file
2562 2562 - di: extra bytes for the "index" file
2563 2563
2564 2564 A healthy revlog will return (0, 0).
2565 2565 """
2566 2566 expected = 0
2567 2567 if len(self):
2568 2568 expected = max(0, self.end(len(self) - 1))
2569 2569
2570 2570 try:
2571 2571 with self._datafp() as f:
2572 2572 f.seek(0, io.SEEK_END)
2573 2573 actual = f.tell()
2574 2574 dd = actual - expected
2575 2575 except IOError as inst:
2576 2576 if inst.errno != errno.ENOENT:
2577 2577 raise
2578 2578 dd = 0
2579 2579
2580 2580 try:
2581 2581 f = self.opener(self.indexfile)
2582 2582 f.seek(0, io.SEEK_END)
2583 2583 actual = f.tell()
2584 2584 f.close()
2585 2585 s = self.index.entry_size
2586 2586 i = max(0, actual // s)
2587 2587 di = actual - (i * s)
2588 2588 if self._inline:
2589 2589 databytes = 0
2590 2590 for r in self:
2591 2591 databytes += max(0, self.length(r))
2592 2592 dd = 0
2593 2593 di = actual - len(self) * s - databytes
2594 2594 except IOError as inst:
2595 2595 if inst.errno != errno.ENOENT:
2596 2596 raise
2597 2597 di = 0
2598 2598
2599 2599 return (dd, di)
2600 2600
2601 2601 def files(self):
2602 2602 res = [self.indexfile]
2603 2603 if not self._inline:
2604 2604 res.append(self.datafile)
2605 2605 return res
2606 2606
2607 2607 def emitrevisions(
2608 2608 self,
2609 2609 nodes,
2610 2610 nodesorder=None,
2611 2611 revisiondata=False,
2612 2612 assumehaveparentrevisions=False,
2613 2613 deltamode=repository.CG_DELTAMODE_STD,
2614 2614 sidedata_helpers=None,
2615 2615 ):
2616 2616 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 2617 raise error.ProgrammingError(
2618 2618 b'unhandled value for nodesorder: %s' % nodesorder
2619 2619 )
2620 2620
2621 2621 if nodesorder is None and not self._generaldelta:
2622 2622 nodesorder = b'storage'
2623 2623
2624 2624 if (
2625 2625 not self._storedeltachains
2626 2626 and deltamode != repository.CG_DELTAMODE_PREV
2627 2627 ):
2628 2628 deltamode = repository.CG_DELTAMODE_FULL
2629 2629
2630 2630 return storageutil.emitrevisions(
2631 2631 self,
2632 2632 nodes,
2633 2633 nodesorder,
2634 2634 revlogrevisiondelta,
2635 2635 deltaparentfn=self.deltaparent,
2636 2636 candeltafn=self.candelta,
2637 2637 rawsizefn=self.rawsize,
2638 2638 revdifffn=self.revdiff,
2639 2639 flagsfn=self.flags,
2640 2640 deltamode=deltamode,
2641 2641 revisiondata=revisiondata,
2642 2642 assumehaveparentrevisions=assumehaveparentrevisions,
2643 2643 sidedata_helpers=sidedata_helpers,
2644 2644 )
2645 2645
2646 2646 DELTAREUSEALWAYS = b'always'
2647 2647 DELTAREUSESAMEREVS = b'samerevs'
2648 2648 DELTAREUSENEVER = b'never'
2649 2649
2650 2650 DELTAREUSEFULLADD = b'fulladd'
2651 2651
2652 2652 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653 2653
2654 2654 def clone(
2655 2655 self,
2656 2656 tr,
2657 2657 destrevlog,
2658 2658 addrevisioncb=None,
2659 2659 deltareuse=DELTAREUSESAMEREVS,
2660 2660 forcedeltabothparents=None,
2661 2661 sidedatacompanion=None,
2662 2662 ):
2663 2663 """Copy this revlog to another, possibly with format changes.
2664 2664
2665 2665 The destination revlog will contain the same revisions and nodes.
2666 2666 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 2667 differences.
2668 2668
2669 2669 The ``deltareuse`` argument control how deltas from the existing revlog
2670 2670 are preserved in the destination revlog. The argument can have the
2671 2671 following values:
2672 2672
2673 2673 DELTAREUSEALWAYS
2674 2674 Deltas will always be reused (if possible), even if the destination
2675 2675 revlog would not select the same revisions for the delta. This is the
2676 2676 fastest mode of operation.
2677 2677 DELTAREUSESAMEREVS
2678 2678 Deltas will be reused if the destination revlog would pick the same
2679 2679 revisions for the delta. This mode strikes a balance between speed
2680 2680 and optimization.
2681 2681 DELTAREUSENEVER
2682 2682 Deltas will never be reused. This is the slowest mode of execution.
2683 2683 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 2684 algorithm changes).
2685 2685 DELTAREUSEFULLADD
2686 2686 Revision will be re-added as if their were new content. This is
2687 2687 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 2688 eg: large file detection and handling.
2689 2689
2690 2690 Delta computation can be slow, so the choice of delta reuse policy can
2691 2691 significantly affect run time.
2692 2692
2693 2693 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 2694 two extremes. Deltas will be reused if they are appropriate. But if the
2695 2695 delta could choose a better revision, it will do so. This means if you
2696 2696 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 2697 deltas will be recomputed if the delta's parent isn't a parent of the
2698 2698 revision.
2699 2699
2700 2700 In addition to the delta policy, the ``forcedeltabothparents``
2701 2701 argument controls whether to force compute deltas against both parents
2702 2702 for merges. By default, the current default is used.
2703 2703
2704 2704 If not None, the `sidedatacompanion` is callable that accept two
2705 2705 arguments:
2706 2706
2707 2707 (srcrevlog, rev)
2708 2708
2709 2709 and return a quintet that control changes to sidedata content from the
2710 2710 old revision to the new clone result:
2711 2711
2712 2712 (dropall, filterout, update, new_flags, dropped_flags)
2713 2713
2714 2714 * if `dropall` is True, all sidedata should be dropped
2715 2715 * `filterout` is a set of sidedata keys that should be dropped
2716 2716 * `update` is a mapping of additionnal/new key -> value
2717 2717 * new_flags is a bitfields of new flags that the revision should get
2718 2718 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2719 2719 """
2720 2720 if deltareuse not in self.DELTAREUSEALL:
2721 2721 raise ValueError(
2722 2722 _(b'value for deltareuse invalid: %s') % deltareuse
2723 2723 )
2724 2724
2725 2725 if len(destrevlog):
2726 2726 raise ValueError(_(b'destination revlog is not empty'))
2727 2727
2728 2728 if getattr(self, 'filteredrevs', None):
2729 2729 raise ValueError(_(b'source revlog has filtered revisions'))
2730 2730 if getattr(destrevlog, 'filteredrevs', None):
2731 2731 raise ValueError(_(b'destination revlog has filtered revisions'))
2732 2732
2733 2733 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2734 2734 # if possible.
2735 2735 oldlazydelta = destrevlog._lazydelta
2736 2736 oldlazydeltabase = destrevlog._lazydeltabase
2737 2737 oldamd = destrevlog._deltabothparents
2738 2738
2739 2739 try:
2740 2740 if deltareuse == self.DELTAREUSEALWAYS:
2741 2741 destrevlog._lazydeltabase = True
2742 2742 destrevlog._lazydelta = True
2743 2743 elif deltareuse == self.DELTAREUSESAMEREVS:
2744 2744 destrevlog._lazydeltabase = False
2745 2745 destrevlog._lazydelta = True
2746 2746 elif deltareuse == self.DELTAREUSENEVER:
2747 2747 destrevlog._lazydeltabase = False
2748 2748 destrevlog._lazydelta = False
2749 2749
2750 2750 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2751 2751
2752 2752 self._clone(
2753 2753 tr,
2754 2754 destrevlog,
2755 2755 addrevisioncb,
2756 2756 deltareuse,
2757 2757 forcedeltabothparents,
2758 2758 sidedatacompanion,
2759 2759 )
2760 2760
2761 2761 finally:
2762 2762 destrevlog._lazydelta = oldlazydelta
2763 2763 destrevlog._lazydeltabase = oldlazydeltabase
2764 2764 destrevlog._deltabothparents = oldamd
2765 2765
2766 2766 def _clone(
2767 2767 self,
2768 2768 tr,
2769 2769 destrevlog,
2770 2770 addrevisioncb,
2771 2771 deltareuse,
2772 2772 forcedeltabothparents,
2773 2773 sidedatacompanion,
2774 2774 ):
2775 2775 """perform the core duty of `revlog.clone` after parameter processing"""
2776 2776 deltacomputer = deltautil.deltacomputer(destrevlog)
2777 2777 index = self.index
2778 2778 for rev in self:
2779 2779 entry = index[rev]
2780 2780
2781 2781 # Some classes override linkrev to take filtered revs into
2782 2782 # account. Use raw entry from index.
2783 2783 flags = entry[0] & 0xFFFF
2784 2784 linkrev = entry[4]
2785 2785 p1 = index[entry[5]][7]
2786 2786 p2 = index[entry[6]][7]
2787 2787 node = entry[7]
2788 2788
2789 2789 sidedataactions = (False, [], {}, 0, 0)
2790 2790 if sidedatacompanion is not None:
2791 2791 sidedataactions = sidedatacompanion(self, rev)
2792 2792
2793 2793 # (Possibly) reuse the delta from the revlog if allowed and
2794 2794 # the revlog chunk is a delta.
2795 2795 cachedelta = None
2796 2796 rawtext = None
2797 2797 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2798 2798 dropall = sidedataactions[0]
2799 2799 filterout = sidedataactions[1]
2800 2800 update = sidedataactions[2]
2801 2801 new_flags = sidedataactions[3]
2802 2802 dropped_flags = sidedataactions[4]
2803 2803 text, sidedata = self._revisiondata(rev)
2804 2804 if dropall:
2805 2805 sidedata = {}
2806 2806 for key in filterout:
2807 2807 sidedata.pop(key, None)
2808 2808 sidedata.update(update)
2809 2809 if not sidedata:
2810 2810 sidedata = None
2811 2811
2812 2812 flags |= new_flags
2813 2813 flags &= ~dropped_flags
2814 2814
2815 2815 destrevlog.addrevision(
2816 2816 text,
2817 2817 tr,
2818 2818 linkrev,
2819 2819 p1,
2820 2820 p2,
2821 2821 cachedelta=cachedelta,
2822 2822 node=node,
2823 2823 flags=flags,
2824 2824 deltacomputer=deltacomputer,
2825 2825 sidedata=sidedata,
2826 2826 )
2827 2827 else:
2828 2828 if destrevlog._lazydelta:
2829 2829 dp = self.deltaparent(rev)
2830 2830 if dp != nullrev:
2831 2831 cachedelta = (dp, bytes(self._chunk(rev)))
2832 2832
2833 2833 if not cachedelta:
2834 2834 rawtext = self.rawdata(rev)
2835 2835
2836 2836 ifh = destrevlog.opener(
2837 2837 destrevlog.indexfile, b'a+', checkambig=False
2838 2838 )
2839 2839 dfh = None
2840 2840 if not destrevlog._inline:
2841 2841 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2842 2842 try:
2843 2843 destrevlog._addrevision(
2844 2844 node,
2845 2845 rawtext,
2846 2846 tr,
2847 2847 linkrev,
2848 2848 p1,
2849 2849 p2,
2850 2850 flags,
2851 2851 cachedelta,
2852 2852 ifh,
2853 2853 dfh,
2854 2854 deltacomputer=deltacomputer,
2855 2855 )
2856 2856 finally:
2857 2857 if dfh:
2858 2858 dfh.close()
2859 2859 ifh.close()
2860 2860
2861 2861 if addrevisioncb:
2862 2862 addrevisioncb(self, rev, node)
2863 2863
2864 2864 def censorrevision(self, tr, censornode, tombstone=b''):
2865 2865 if (self.version & 0xFFFF) == REVLOGV0:
2866 2866 raise error.RevlogError(
2867 2867 _(b'cannot censor with version %d revlogs') % self.version
2868 2868 )
2869 2869
2870 2870 censorrev = self.rev(censornode)
2871 2871 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2872 2872
2873 2873 if len(tombstone) > self.rawsize(censorrev):
2874 2874 raise error.Abort(
2875 2875 _(b'censor tombstone must be no longer than censored data')
2876 2876 )
2877 2877
2878 2878 # Rewriting the revlog in place is hard. Our strategy for censoring is
2879 2879 # to create a new revlog, copy all revisions to it, then replace the
2880 2880 # revlogs on transaction close.
2881 2881
2882 2882 newindexfile = self.indexfile + b'.tmpcensored'
2883 2883 newdatafile = self.datafile + b'.tmpcensored'
2884 2884
2885 2885 # This is a bit dangerous. We could easily have a mismatch of state.
2886 2886 newrl = revlog(
2887 2887 self.opener,
2888 2888 target=self.target,
2889 2889 indexfile=newindexfile,
2890 2890 datafile=newdatafile,
2891 2891 censorable=True,
2892 2892 )
2893 2893 newrl.version = self.version
2894 2894 newrl._generaldelta = self._generaldelta
2895 2895 newrl._parse_index = self._parse_index
2896 2896
2897 2897 for rev in self.revs():
2898 2898 node = self.node(rev)
2899 2899 p1, p2 = self.parents(node)
2900 2900
2901 2901 if rev == censorrev:
2902 2902 newrl.addrawrevision(
2903 2903 tombstone,
2904 2904 tr,
2905 2905 self.linkrev(censorrev),
2906 2906 p1,
2907 2907 p2,
2908 2908 censornode,
2909 2909 REVIDX_ISCENSORED,
2910 2910 )
2911 2911
2912 2912 if newrl.deltaparent(rev) != nullrev:
2913 2913 raise error.Abort(
2914 2914 _(
2915 2915 b'censored revision stored as delta; '
2916 2916 b'cannot censor'
2917 2917 ),
2918 2918 hint=_(
2919 2919 b'censoring of revlogs is not '
2920 2920 b'fully implemented; please report '
2921 2921 b'this bug'
2922 2922 ),
2923 2923 )
2924 2924 continue
2925 2925
2926 2926 if self.iscensored(rev):
2927 2927 if self.deltaparent(rev) != nullrev:
2928 2928 raise error.Abort(
2929 2929 _(
2930 2930 b'cannot censor due to censored '
2931 2931 b'revision having delta stored'
2932 2932 )
2933 2933 )
2934 2934 rawtext = self._chunk(rev)
2935 2935 else:
2936 2936 rawtext = self.rawdata(rev)
2937 2937
2938 2938 newrl.addrawrevision(
2939 2939 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2940 2940 )
2941 2941
2942 2942 tr.addbackup(self.indexfile, location=b'store')
2943 2943 if not self._inline:
2944 2944 tr.addbackup(self.datafile, location=b'store')
2945 2945
2946 2946 self.opener.rename(newrl.indexfile, self.indexfile)
2947 2947 if not self._inline:
2948 2948 self.opener.rename(newrl.datafile, self.datafile)
2949 2949
2950 2950 self.clearcaches()
2951 2951 self._loadindex()
2952 2952
2953 2953 def verifyintegrity(self, state):
2954 2954 """Verifies the integrity of the revlog.
2955 2955
2956 2956 Yields ``revlogproblem`` instances describing problems that are
2957 2957 found.
2958 2958 """
2959 2959 dd, di = self.checksize()
2960 2960 if dd:
2961 2961 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2962 2962 if di:
2963 2963 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2964 2964
2965 2965 version = self.version & 0xFFFF
2966 2966
2967 2967 # The verifier tells us what version revlog we should be.
2968 2968 if version != state[b'expectedversion']:
2969 2969 yield revlogproblem(
2970 2970 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2971 2971 % (self.indexfile, version, state[b'expectedversion'])
2972 2972 )
2973 2973
2974 2974 state[b'skipread'] = set()
2975 2975 state[b'safe_renamed'] = set()
2976 2976
2977 2977 for rev in self:
2978 2978 node = self.node(rev)
2979 2979
2980 2980 # Verify contents. 4 cases to care about:
2981 2981 #
2982 2982 # common: the most common case
2983 2983 # rename: with a rename
2984 2984 # meta: file content starts with b'\1\n', the metadata
2985 2985 # header defined in filelog.py, but without a rename
2986 2986 # ext: content stored externally
2987 2987 #
2988 2988 # More formally, their differences are shown below:
2989 2989 #
2990 2990 # | common | rename | meta | ext
2991 2991 # -------------------------------------------------------
2992 2992 # flags() | 0 | 0 | 0 | not 0
2993 2993 # renamed() | False | True | False | ?
2994 2994 # rawtext[0:2]=='\1\n'| False | True | True | ?
2995 2995 #
2996 2996 # "rawtext" means the raw text stored in revlog data, which
2997 2997 # could be retrieved by "rawdata(rev)". "text"
2998 2998 # mentioned below is "revision(rev)".
2999 2999 #
3000 3000 # There are 3 different lengths stored physically:
3001 3001 # 1. L1: rawsize, stored in revlog index
3002 3002 # 2. L2: len(rawtext), stored in revlog data
3003 3003 # 3. L3: len(text), stored in revlog data if flags==0, or
3004 3004 # possibly somewhere else if flags!=0
3005 3005 #
3006 3006 # L1 should be equal to L2. L3 could be different from them.
3007 3007 # "text" may or may not affect commit hash depending on flag
3008 3008 # processors (see flagutil.addflagprocessor).
3009 3009 #
3010 3010 # | common | rename | meta | ext
3011 3011 # -------------------------------------------------
3012 3012 # rawsize() | L1 | L1 | L1 | L1
3013 3013 # size() | L1 | L2-LM | L1(*) | L1 (?)
3014 3014 # len(rawtext) | L2 | L2 | L2 | L2
3015 3015 # len(text) | L2 | L2 | L2 | L3
3016 3016 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3017 3017 #
3018 3018 # LM: length of metadata, depending on rawtext
3019 3019 # (*): not ideal, see comment in filelog.size
3020 3020 # (?): could be "- len(meta)" if the resolved content has
3021 3021 # rename metadata
3022 3022 #
3023 3023 # Checks needed to be done:
3024 3024 # 1. length check: L1 == L2, in all cases.
3025 3025 # 2. hash check: depending on flag processor, we may need to
3026 3026 # use either "text" (external), or "rawtext" (in revlog).
3027 3027
3028 3028 try:
3029 3029 skipflags = state.get(b'skipflags', 0)
3030 3030 if skipflags:
3031 3031 skipflags &= self.flags(rev)
3032 3032
3033 3033 _verify_revision(self, skipflags, state, node)
3034 3034
3035 3035 l1 = self.rawsize(rev)
3036 3036 l2 = len(self.rawdata(node))
3037 3037
3038 3038 if l1 != l2:
3039 3039 yield revlogproblem(
3040 3040 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3041 3041 node=node,
3042 3042 )
3043 3043
3044 3044 except error.CensoredNodeError:
3045 3045 if state[b'erroroncensored']:
3046 3046 yield revlogproblem(
3047 3047 error=_(b'censored file data'), node=node
3048 3048 )
3049 3049 state[b'skipread'].add(node)
3050 3050 except Exception as e:
3051 3051 yield revlogproblem(
3052 3052 error=_(b'unpacking %s: %s')
3053 3053 % (short(node), stringutil.forcebytestr(e)),
3054 3054 node=node,
3055 3055 )
3056 3056 state[b'skipread'].add(node)
3057 3057
3058 3058 def storageinfo(
3059 3059 self,
3060 3060 exclusivefiles=False,
3061 3061 sharedfiles=False,
3062 3062 revisionscount=False,
3063 3063 trackedsize=False,
3064 3064 storedsize=False,
3065 3065 ):
3066 3066 d = {}
3067 3067
3068 3068 if exclusivefiles:
3069 3069 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3070 3070 if not self._inline:
3071 3071 d[b'exclusivefiles'].append((self.opener, self.datafile))
3072 3072
3073 3073 if sharedfiles:
3074 3074 d[b'sharedfiles'] = []
3075 3075
3076 3076 if revisionscount:
3077 3077 d[b'revisionscount'] = len(self)
3078 3078
3079 3079 if trackedsize:
3080 3080 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3081 3081
3082 3082 if storedsize:
3083 3083 d[b'storedsize'] = sum(
3084 3084 self.opener.stat(path).st_size for path in self.files()
3085 3085 )
3086 3086
3087 3087 return d
3088 3088
3089 3089 def rewrite_sidedata(self, helpers, startrev, endrev):
3090 3090 if self.version & 0xFFFF != REVLOGV2:
3091 3091 return
3092 3092 # inline are not yet supported because they suffer from an issue when
3093 3093 # rewriting them (since it's not an append-only operation).
3094 3094 # See issue6485.
3095 3095 assert not self._inline
3096 3096 if not helpers[1] and not helpers[2]:
3097 3097 # Nothing to generate or remove
3098 3098 return
3099 3099
3100 3100 new_entries = []
3101 3101 # append the new sidedata
3102 3102 with self._datafp(b'a+') as fp:
3103 3103 # Maybe this bug still exists, see revlog._writeentry
3104 3104 fp.seek(0, os.SEEK_END)
3105 3105 current_offset = fp.tell()
3106 3106 for rev in range(startrev, endrev + 1):
3107 3107 entry = self.index[rev]
3108 new_sidedata = storageutil.run_sidedata_helpers(
3108 new_sidedata, flags = storageutil.run_sidedata_helpers(
3109 3109 store=self,
3110 3110 sidedata_helpers=helpers,
3111 3111 sidedata={},
3112 3112 rev=rev,
3113 3113 )
3114 3114
3115 3115 serialized_sidedata = sidedatautil.serialize_sidedata(
3116 3116 new_sidedata
3117 3117 )
3118 3118 if entry[8] != 0 or entry[9] != 0:
3119 3119 # rewriting entries that already have sidedata is not
3120 3120 # supported yet, because it introduces garbage data in the
3121 3121 # revlog.
3122 3122 msg = b"Rewriting existing sidedata is not supported yet"
3123 3123 raise error.Abort(msg)
3124 entry = entry[:8]
3124
3125 # Apply (potential) flags to add and to remove after running
3126 # the sidedata helpers
3127 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3128 entry = (new_offset_flags,) + entry[1:8]
3125 3129 entry += (current_offset, len(serialized_sidedata))
3126 3130
3127 3131 fp.write(serialized_sidedata)
3128 3132 new_entries.append(entry)
3129 3133 current_offset += len(serialized_sidedata)
3130 3134
3131 3135 # rewrite the new index entries
3132 3136 with self._indexfp(b'w+') as fp:
3133 3137 fp.seek(startrev * self.index.entry_size)
3134 for i, entry in enumerate(new_entries):
3138 for i, e in enumerate(new_entries):
3135 3139 rev = startrev + i
3136 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3140 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3137 3141 packed = self.index.entry_binary(rev)
3138 3142 if rev == 0:
3139 3143 header = self.index.pack_header(self.version)
3140 3144 packed = header + packed
3141 3145 fp.write(packed)
@@ -1,572 +1,585 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullrev,
17 17 sha1nodeconstants,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 26 from ..revlogutils import sidedata as sidedatamod
27 27 from ..utils import hashutil
28 28
29 29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
30 30
31 31 # revision data contains extra metadata not part of the official digest
32 32 # Only used in changegroup >= v4.
33 33 CG_FLAG_SIDEDATA = 1
34 34
35 35
36 36 def hashrevisionsha1(text, p1, p2):
37 37 """Compute the SHA-1 for revision data and its parents.
38 38
39 39 This hash combines both the current file contents and its history
40 40 in a manner that makes it easy to distinguish nodes with the same
41 41 content in the revision graph.
42 42 """
43 43 # As of now, if one of the parent node is null, p2 is null
44 44 if p2 == sha1nodeconstants.nullid:
45 45 # deep copy of a hash is faster than creating one
46 46 s = _nullhash.copy()
47 47 s.update(p1)
48 48 else:
49 49 # none of the parent nodes are nullid
50 50 if p1 < p2:
51 51 a = p1
52 52 b = p2
53 53 else:
54 54 a = p2
55 55 b = p1
56 56 s = hashutil.sha1(a)
57 57 s.update(b)
58 58 s.update(text)
59 59 return s.digest()
60 60
61 61
62 62 METADATA_RE = re.compile(b'\x01\n')
63 63
64 64
65 65 def parsemeta(text):
66 66 """Parse metadata header from revision data.
67 67
68 68 Returns a 2-tuple of (metadata, offset), where both can be None if there
69 69 is no metadata.
70 70 """
71 71 # text can be buffer, so we can't use .startswith or .index
72 72 if text[:2] != b'\x01\n':
73 73 return None, None
74 74 s = METADATA_RE.search(text, 2).start()
75 75 mtext = text[2:s]
76 76 meta = {}
77 77 for l in mtext.splitlines():
78 78 k, v = l.split(b': ', 1)
79 79 meta[k] = v
80 80 return meta, s + 2
81 81
82 82
83 83 def packmeta(meta, text):
84 84 """Add metadata to fulltext to produce revision text."""
85 85 keys = sorted(meta)
86 86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
87 87 return b'\x01\n%s\x01\n%s' % (metatext, text)
88 88
89 89
90 90 def iscensoredtext(text):
91 91 meta = parsemeta(text)[0]
92 92 return meta and b'censored' in meta
93 93
94 94
95 95 def filtermetadata(text):
96 96 """Extract just the revision data from source text.
97 97
98 98 Returns ``text`` unless it has a metadata header, in which case we return
99 99 a new buffer without hte metadata.
100 100 """
101 101 if not text.startswith(b'\x01\n'):
102 102 return text
103 103
104 104 offset = text.index(b'\x01\n', 2)
105 105 return text[offset + 2 :]
106 106
107 107
108 108 def filerevisioncopied(store, node):
109 109 """Resolve file revision copy metadata.
110 110
111 111 Returns ``False`` if the file has no copy metadata. Otherwise a
112 112 2-tuple of the source filename and node.
113 113 """
114 114 if store.parents(node)[0] != sha1nodeconstants.nullid:
115 115 return False
116 116
117 117 meta = parsemeta(store.revision(node))[0]
118 118
119 119 # copy and copyrev occur in pairs. In rare cases due to old bugs,
120 120 # one can occur without the other. So ensure both are present to flag
121 121 # as a copy.
122 122 if meta and b'copy' in meta and b'copyrev' in meta:
123 123 return meta[b'copy'], bin(meta[b'copyrev'])
124 124
125 125 return False
126 126
127 127
128 128 def filedataequivalent(store, node, filedata):
129 129 """Determines whether file data is equivalent to a stored node.
130 130
131 131 Returns True if the passed file data would hash to the same value
132 132 as a stored revision and False otherwise.
133 133
134 134 When a stored revision is censored, filedata must be empty to have
135 135 equivalence.
136 136
137 137 When a stored revision has copy metadata, it is ignored as part
138 138 of the compare.
139 139 """
140 140
141 141 if filedata.startswith(b'\x01\n'):
142 142 revisiontext = b'\x01\n\x01\n' + filedata
143 143 else:
144 144 revisiontext = filedata
145 145
146 146 p1, p2 = store.parents(node)
147 147
148 148 computednode = hashrevisionsha1(revisiontext, p1, p2)
149 149
150 150 if computednode == node:
151 151 return True
152 152
153 153 # Censored files compare against the empty file.
154 154 if store.iscensored(store.rev(node)):
155 155 return filedata == b''
156 156
157 157 # Renaming a file produces a different hash, even if the data
158 158 # remains unchanged. Check if that's the case.
159 159 if store.renamed(node):
160 160 return store.read(node) == filedata
161 161
162 162 return False
163 163
164 164
165 165 def iterrevs(storelen, start=0, stop=None):
166 166 """Iterate over revision numbers in a store."""
167 167 step = 1
168 168
169 169 if stop is not None:
170 170 if start > stop:
171 171 step = -1
172 172 stop += step
173 173 if stop > storelen:
174 174 stop = storelen
175 175 else:
176 176 stop = storelen
177 177
178 178 return pycompat.xrange(start, stop, step)
179 179
180 180
181 181 def fileidlookup(store, fileid, identifier):
182 182 """Resolve the file node for a value.
183 183
184 184 ``store`` is an object implementing the ``ifileindex`` interface.
185 185
186 186 ``fileid`` can be:
187 187
188 188 * A 20 or 32 byte binary node.
189 189 * An integer revision number
190 190 * A 40 or 64 byte hex node.
191 191 * A bytes that can be parsed as an integer representing a revision number.
192 192
193 193 ``identifier`` is used to populate ``error.LookupError`` with an identifier
194 194 for the store.
195 195
196 196 Raises ``error.LookupError`` on failure.
197 197 """
198 198 if isinstance(fileid, int):
199 199 try:
200 200 return store.node(fileid)
201 201 except IndexError:
202 202 raise error.LookupError(
203 203 b'%d' % fileid, identifier, _(b'no match found')
204 204 )
205 205
206 206 if len(fileid) in (20, 32):
207 207 try:
208 208 store.rev(fileid)
209 209 return fileid
210 210 except error.LookupError:
211 211 pass
212 212
213 213 if len(fileid) in (40, 64):
214 214 try:
215 215 rawnode = bin(fileid)
216 216 store.rev(rawnode)
217 217 return rawnode
218 218 except TypeError:
219 219 pass
220 220
221 221 try:
222 222 rev = int(fileid)
223 223
224 224 if b'%d' % rev != fileid:
225 225 raise ValueError
226 226
227 227 try:
228 228 return store.node(rev)
229 229 except (IndexError, TypeError):
230 230 pass
231 231 except (ValueError, OverflowError):
232 232 pass
233 233
234 234 raise error.LookupError(fileid, identifier, _(b'no match found'))
235 235
236 236
237 237 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
238 238 """Resolve information needed to strip revisions.
239 239
240 240 Finds the minimum revision number that must be stripped in order to
241 241 strip ``minlinkrev``.
242 242
243 243 Returns a 2-tuple of the minimum revision number to do that and a set
244 244 of all revision numbers that have linkrevs that would be broken
245 245 by that strip.
246 246
247 247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
248 248 ``headrevs`` is an iterable of head revisions.
249 249 ``linkrevfn`` is a callable that receives a revision and returns a linked
250 250 revision.
251 251 ``parentrevsfn`` is a callable that receives a revision number and returns
252 252 an iterable of its parent revision numbers.
253 253 """
254 254 brokenrevs = set()
255 255 strippoint = tiprev + 1
256 256
257 257 heads = {}
258 258 futurelargelinkrevs = set()
259 259 for head in headrevs:
260 260 headlinkrev = linkrevfn(head)
261 261 heads[head] = headlinkrev
262 262 if headlinkrev >= minlinkrev:
263 263 futurelargelinkrevs.add(headlinkrev)
264 264
265 265 # This algorithm involves walking down the rev graph, starting at the
266 266 # heads. Since the revs are topologically sorted according to linkrev,
267 267 # once all head linkrevs are below the minlink, we know there are
268 268 # no more revs that could have a linkrev greater than minlink.
269 269 # So we can stop walking.
270 270 while futurelargelinkrevs:
271 271 strippoint -= 1
272 272 linkrev = heads.pop(strippoint)
273 273
274 274 if linkrev < minlinkrev:
275 275 brokenrevs.add(strippoint)
276 276 else:
277 277 futurelargelinkrevs.remove(linkrev)
278 278
279 279 for p in parentrevsfn(strippoint):
280 280 if p != nullrev:
281 281 plinkrev = linkrevfn(p)
282 282 heads[p] = plinkrev
283 283 if plinkrev >= minlinkrev:
284 284 futurelargelinkrevs.add(plinkrev)
285 285
286 286 return strippoint, brokenrevs
287 287
288 288
289 289 def emitrevisions(
290 290 store,
291 291 nodes,
292 292 nodesorder,
293 293 resultcls,
294 294 deltaparentfn=None,
295 295 candeltafn=None,
296 296 rawsizefn=None,
297 297 revdifffn=None,
298 298 flagsfn=None,
299 299 deltamode=repository.CG_DELTAMODE_STD,
300 300 revisiondata=False,
301 301 assumehaveparentrevisions=False,
302 302 sidedata_helpers=None,
303 303 ):
304 304 """Generic implementation of ifiledata.emitrevisions().
305 305
306 306 Emitting revision data is subtly complex. This function attempts to
307 307 encapsulate all the logic for doing so in a backend-agnostic way.
308 308
309 309 ``store``
310 310 Object conforming to ``ifilestorage`` interface.
311 311
312 312 ``nodes``
313 313 List of revision nodes whose data to emit.
314 314
315 315 ``resultcls``
316 316 A type implementing the ``irevisiondelta`` interface that will be
317 317 constructed and returned.
318 318
319 319 ``deltaparentfn`` (optional)
320 320 Callable receiving a revision number and returning the revision number
321 321 of a revision that the internal delta is stored against. This delta
322 322 will be preferred over computing a new arbitrary delta.
323 323
324 324 If not defined, a delta will always be computed from raw revision
325 325 data.
326 326
327 327 ``candeltafn`` (optional)
328 328 Callable receiving a pair of revision numbers that returns a bool
329 329 indicating whether a delta between them can be produced.
330 330
331 331 If not defined, it is assumed that any two revisions can delta with
332 332 each other.
333 333
334 334 ``rawsizefn`` (optional)
335 335 Callable receiving a revision number and returning the length of the
336 336 ``store.rawdata(rev)``.
337 337
338 338 If not defined, ``len(store.rawdata(rev))`` will be called.
339 339
340 340 ``revdifffn`` (optional)
341 341 Callable receiving a pair of revision numbers that returns a delta
342 342 between them.
343 343
344 344 If not defined, a delta will be computed by invoking mdiff code
345 345 on ``store.revision()`` results.
346 346
347 347 Defining this function allows a precomputed or stored delta to be
348 348 used without having to compute on.
349 349
350 350 ``flagsfn`` (optional)
351 351 Callable receiving a revision number and returns the integer flags
352 352 value for it. If not defined, flags value will be 0.
353 353
354 354 ``deltamode``
355 355 constaint on delta to be sent:
356 356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
357 357 * CG_DELTAMODE_PREV - only delta against "prev",
358 358 * CG_DELTAMODE_FULL - only issue full snapshot.
359 359
360 360 Whether to send fulltext revisions instead of deltas, if allowed.
361 361
362 362 ``nodesorder``
363 363 ``revisiondata``
364 364 ``assumehaveparentrevisions``
365 365 ``sidedata_helpers`` (optional)
366 366 If not None, means that sidedata should be included.
367 367 A dictionary of revlog type to tuples of `(repo, computers, removers)`:
368 368 * `repo` is used as an argument for computers
369 * `computers` is a list of `(category, (keys, computer)` that
369 * `computers` is a list of `(category, (keys, computer, flags)` that
370 370 compute the missing sidedata categories that were asked:
371 371 * `category` is the sidedata category
372 372 * `keys` are the sidedata keys to be affected
373 * `flags` is a bitmask (an integer) of flags to remove when
374 removing the category.
373 375 * `computer` is the function `(repo, store, rev, sidedata)` that
374 returns a new sidedata dict.
376 returns a tuple of
377 `(new sidedata dict, (flags to add, flags to remove))`.
378 For example, it will return `({}, (0, 1 << 15))` to return no
379 sidedata, with no flags to add and one flag to remove.
375 380 * `removers` will remove the keys corresponding to the categories
376 381 that are present, but not needed.
377 382 If both `computers` and `removers` are empty, sidedata are simply not
378 383 transformed.
379 384 Revlog types are `changelog`, `manifest` or `filelog`.
380 385 """
381 386
382 387 fnode = store.node
383 388 frev = store.rev
384 389
385 390 if nodesorder == b'nodes':
386 391 revs = [frev(n) for n in nodes]
387 392 elif nodesorder == b'linear':
388 393 revs = {frev(n) for n in nodes}
389 394 revs = dagop.linearize(revs, store.parentrevs)
390 395 else: # storage and default
391 396 revs = sorted(frev(n) for n in nodes)
392 397
393 398 prevrev = None
394 399
395 400 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
396 401 prevrev = store.parentrevs(revs[0])[0]
397 402
398 403 # Set of revs available to delta against.
399 404 available = set()
400 405
401 406 for rev in revs:
402 407 if rev == nullrev:
403 408 continue
404 409
405 410 node = fnode(rev)
406 411 p1rev, p2rev = store.parentrevs(rev)
407 412
408 413 if deltaparentfn:
409 414 deltaparentrev = deltaparentfn(rev)
410 415 else:
411 416 deltaparentrev = nullrev
412 417
413 418 # Forced delta against previous mode.
414 419 if deltamode == repository.CG_DELTAMODE_PREV:
415 420 baserev = prevrev
416 421
417 422 # We're instructed to send fulltext. Honor that.
418 423 elif deltamode == repository.CG_DELTAMODE_FULL:
419 424 baserev = nullrev
420 425 # We're instructed to use p1. Honor that
421 426 elif deltamode == repository.CG_DELTAMODE_P1:
422 427 baserev = p1rev
423 428
424 429 # There is a delta in storage. We try to use that because it
425 430 # amounts to effectively copying data from storage and is
426 431 # therefore the fastest.
427 432 elif deltaparentrev != nullrev:
428 433 # Base revision was already emitted in this group. We can
429 434 # always safely use the delta.
430 435 if deltaparentrev in available:
431 436 baserev = deltaparentrev
432 437
433 438 # Base revision is a parent that hasn't been emitted already.
434 439 # Use it if we can assume the receiver has the parent revision.
435 440 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
436 441 baserev = deltaparentrev
437 442
438 443 # No guarantee the receiver has the delta parent. Send delta
439 444 # against last revision (if possible), which in the common case
440 445 # should be similar enough to this revision that the delta is
441 446 # reasonable.
442 447 elif prevrev is not None:
443 448 baserev = prevrev
444 449 else:
445 450 baserev = nullrev
446 451
447 452 # Storage has a fulltext revision.
448 453
449 454 # Let's use the previous revision, which is as good a guess as any.
450 455 # There is definitely room to improve this logic.
451 456 elif prevrev is not None:
452 457 baserev = prevrev
453 458 else:
454 459 baserev = nullrev
455 460
456 461 # But we can't actually use our chosen delta base for whatever
457 462 # reason. Reset to fulltext.
458 463 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
459 464 baserev = nullrev
460 465
461 466 revision = None
462 467 delta = None
463 468 baserevisionsize = None
464 469
465 470 if revisiondata:
466 471 if store.iscensored(baserev) or store.iscensored(rev):
467 472 try:
468 473 revision = store.rawdata(node)
469 474 except error.CensoredNodeError as e:
470 475 revision = e.tombstone
471 476
472 477 if baserev != nullrev:
473 478 if rawsizefn:
474 479 baserevisionsize = rawsizefn(baserev)
475 480 else:
476 481 baserevisionsize = len(store.rawdata(baserev))
477 482
478 483 elif (
479 484 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
480 485 ):
481 486 revision = store.rawdata(node)
482 487 available.add(rev)
483 488 else:
484 489 if revdifffn:
485 490 delta = revdifffn(baserev, rev)
486 491 else:
487 492 delta = mdiff.textdiff(
488 493 store.rawdata(baserev), store.rawdata(rev)
489 494 )
490 495
491 496 available.add(rev)
492 497
493 498 serialized_sidedata = None
499 sidedata_flags = (0, 0)
494 500 if sidedata_helpers:
495 sidedata = store.sidedata(rev)
496 sidedata = run_sidedata_helpers(
501 old_sidedata = store.sidedata(rev)
502 sidedata, sidedata_flags = run_sidedata_helpers(
497 503 store=store,
498 504 sidedata_helpers=sidedata_helpers,
499 sidedata=sidedata,
505 sidedata=old_sidedata,
500 506 rev=rev,
501 507 )
502 508 if sidedata:
503 509 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
504 510
505 511 flags = flagsfn(rev) if flagsfn else 0
506 512 protocol_flags = 0
507 513 if serialized_sidedata:
508 514 # Advertise that sidedata exists to the other side
509 515 protocol_flags |= CG_FLAG_SIDEDATA
516 # Computers and removers can return flags to add and/or remove
517 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
510 518
511 519 yield resultcls(
512 520 node=node,
513 521 p1node=fnode(p1rev),
514 522 p2node=fnode(p2rev),
515 523 basenode=fnode(baserev),
516 524 flags=flags,
517 525 baserevisionsize=baserevisionsize,
518 526 revision=revision,
519 527 delta=delta,
520 528 sidedata=serialized_sidedata,
521 529 protocol_flags=protocol_flags,
522 530 )
523 531
524 532 prevrev = rev
525 533
526 534
527 535 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
528 536 """Returns the sidedata for the given revision after running through
529 537 the given helpers.
530 538 - `store`: the revlog this applies to (changelog, manifest, or filelog
531 539 instance)
532 540 - `sidedata_helpers`: see `storageutil.emitrevisions`
533 541 - `sidedata`: previous sidedata at the given rev, if any
534 542 - `rev`: affected rev of `store`
535 543 """
536 544 repo, sd_computers, sd_removers = sidedata_helpers
537 545 kind = store.revlog_kind
538 for _keys, sd_computer in sd_computers.get(kind, []):
539 sidedata = sd_computer(repo, store, rev, sidedata)
540 for keys, _computer in sd_removers.get(kind, []):
546 flags_to_add = 0
547 flags_to_remove = 0
548 for _keys, sd_computer, _flags in sd_computers.get(kind, []):
549 sidedata, flags = sd_computer(repo, store, rev, sidedata)
550 flags_to_add |= flags[0]
551 flags_to_remove |= flags[1]
552 for keys, _computer, flags in sd_removers.get(kind, []):
541 553 for key in keys:
542 554 sidedata.pop(key, None)
543 return sidedata
555 flags_to_remove |= flags
556 return sidedata, (flags_to_add, flags_to_remove)
544 557
545 558
546 559 def deltaiscensored(delta, baserev, baselenfn):
547 560 """Determine if a delta represents censored revision data.
548 561
549 562 ``baserev`` is the base revision this delta is encoded against.
550 563 ``baselenfn`` is a callable receiving a revision number that resolves the
551 564 length of the revision fulltext.
552 565
553 566 Returns a bool indicating if the result of the delta represents a censored
554 567 revision.
555 568 """
556 569 # Fragile heuristic: unless new file meta keys are added alphabetically
557 570 # preceding "censored", all censored revisions are prefixed by
558 571 # "\1\ncensored:". A delta producing such a censored revision must be a
559 572 # full-replacement delta, so we inspect the first and only patch in the
560 573 # delta for this prefix.
561 574 hlen = struct.calcsize(b">lll")
562 575 if len(delta) <= hlen:
563 576 return False
564 577
565 578 oldlen = baselenfn(baserev)
566 579 newlen = len(delta) - hlen
567 580 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
568 581 return False
569 582
570 583 add = b"\1\ncensored:"
571 584 addlen = len(add)
572 585 return newlen >= addlen and delta[hlen : hlen + addlen] == add
@@ -1,51 +1,55 b''
1 1 # coding: utf8
2 2 # ext-sidedata-2.py - small extension to test (differently) the sidedata logic
3 3 #
4 4 # Simulates a client for a complex sidedata exchange.
5 5 #
6 6 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import hashlib
14 14 import struct
15 15
16 16 from mercurial.revlogutils import sidedata as sidedatamod
17 17 from mercurial.revlogutils import constants
18 18
19 NO_FLAGS = (0, 0) # hoot
20
19 21
20 22 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
21 23 sidedata = sidedata.copy()
22 24 if text is None:
23 25 text = revlog.revision(rev)
24 26 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
25 return sidedata
27 return sidedata, NO_FLAGS
26 28
27 29
28 30 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
29 31 sidedata = sidedata.copy()
30 32 if text is None:
31 33 text = revlog.revision(rev)
32 34 sha256 = hashlib.sha256(text).digest()
33 35 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
34 return sidedata
36 return sidedata, NO_FLAGS
35 37
36 38
37 39 def reposetup(ui, repo):
38 40 # Sidedata keys happen to be the same as the categories, easier for testing.
39 41 for kind in constants.ALL_KINDS:
40 42 repo.register_sidedata_computer(
41 43 kind,
42 44 sidedatamod.SD_TEST1,
43 45 (sidedatamod.SD_TEST1,),
44 46 compute_sidedata_1,
47 0,
45 48 )
46 49 repo.register_sidedata_computer(
47 50 kind,
48 51 sidedatamod.SD_TEST2,
49 52 (sidedatamod.SD_TEST2,),
50 53 compute_sidedata_2,
54 0,
51 55 )
@@ -1,89 +1,94 b''
1 1 # coding: utf8
2 2 # ext-sidedata-3.py - small extension to test (differently still) the sidedata
3 3 # logic
4 4 #
5 5 # Simulates a client for a complex sidedata exchange.
6 6 #
7 7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 8 #
9 9 # This software may be used and distributed according to the terms of the
10 10 # GNU General Public License version 2 or any later version.
11 11
12 12 from __future__ import absolute_import
13 13
14 14 import hashlib
15 15 import struct
16 16
17 17 from mercurial import (
18 18 extensions,
19 19 revlog,
20 20 )
21 21
22 22 from mercurial.revlogutils import sidedata as sidedatamod
23 23 from mercurial.revlogutils import constants
24 24
25 NO_FLAGS = (0, 0)
26
25 27
26 28 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
27 29 sidedata = sidedata.copy()
28 30 if text is None:
29 31 text = revlog.revision(rev)
30 32 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
31 return sidedata
33 return sidedata, NO_FLAGS
32 34
33 35
34 36 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
35 37 sidedata = sidedata.copy()
36 38 if text is None:
37 39 text = revlog.revision(rev)
38 40 sha256 = hashlib.sha256(text).digest()
39 41 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
40 return sidedata
42 return sidedata, NO_FLAGS
41 43
42 44
43 45 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
44 46 sidedata = sidedata.copy()
45 47 if text is None:
46 48 text = revlog.revision(rev)
47 49 sha384 = hashlib.sha384(text).digest()
48 50 sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
49 return sidedata
51 return sidedata, NO_FLAGS
50 52
51 53
52 54 def wrapaddrevision(
53 55 orig, self, text, transaction, link, p1, p2, *args, **kwargs
54 56 ):
55 57 if kwargs.get('sidedata') is None:
56 58 kwargs['sidedata'] = {}
57 59 sd = kwargs['sidedata']
58 sd = compute_sidedata_1(None, self, None, sd, text=text)
59 kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
60 sd, flags = compute_sidedata_1(None, self, None, sd, text=text)
61 kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)[0]
60 62 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
61 63
62 64
63 65 def extsetup(ui):
64 66 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
65 67
66 68
67 69 def reposetup(ui, repo):
68 70 # Sidedata keys happen to be the same as the categories, easier for testing.
69 71 for kind in constants.ALL_KINDS:
70 72 repo.register_sidedata_computer(
71 73 kind,
72 74 sidedatamod.SD_TEST1,
73 75 (sidedatamod.SD_TEST1,),
74 76 compute_sidedata_1,
77 0,
75 78 )
76 79 repo.register_sidedata_computer(
77 80 kind,
78 81 sidedatamod.SD_TEST2,
79 82 (sidedatamod.SD_TEST2,),
80 83 compute_sidedata_2,
84 0,
81 85 )
82 86 repo.register_sidedata_computer(
83 87 kind,
84 88 sidedatamod.SD_TEST3,
85 89 (sidedatamod.SD_TEST3,),
86 90 compute_sidedata_3,
91 0,
87 92 )
88 93 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
89 94 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
@@ -1,82 +1,86 b''
1 1 # coding: utf8
2 2 # ext-sidedata-5.py - small extension to test (differently still) the sidedata
3 3 # logic
4 4 #
5 5 # Simulates a server for a simple sidedata exchange.
6 6 #
7 7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 8 #
9 9 # This software may be used and distributed according to the terms of the
10 10 # GNU General Public License version 2 or any later version.
11 11
12 12 from __future__ import absolute_import
13 13
14 14 import hashlib
15 15 import struct
16 16
17 17 from mercurial import (
18 18 extensions,
19 19 revlog,
20 20 )
21 21
22 22
23 23 from mercurial.revlogutils import sidedata as sidedatamod
24 24 from mercurial.revlogutils import constants
25 25
26 NO_FLAGS = (0, 0)
27
26 28
27 29 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
28 30 sidedata = sidedata.copy()
29 31 if text is None:
30 32 text = revlog.revision(rev)
31 33 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
32 return sidedata
34 return sidedata, NO_FLAGS
33 35
34 36
35 37 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
36 38 sidedata = sidedata.copy()
37 39 if text is None:
38 40 text = revlog.revision(rev)
39 41 sha256 = hashlib.sha256(text).digest()
40 42 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
41 return sidedata
43 return sidedata, NO_FLAGS
42 44
43 45
44 46 def reposetup(ui, repo):
45 47 # Sidedata keys happen to be the same as the categories, easier for testing.
46 48 for kind in constants.ALL_KINDS:
47 49 repo.register_sidedata_computer(
48 50 kind,
49 51 sidedatamod.SD_TEST1,
50 52 (sidedatamod.SD_TEST1,),
51 53 compute_sidedata_1,
54 0,
52 55 )
53 56 repo.register_sidedata_computer(
54 57 kind,
55 58 sidedatamod.SD_TEST2,
56 59 (sidedatamod.SD_TEST2,),
57 60 compute_sidedata_2,
61 0,
58 62 )
59 63
60 64 # We don't register sidedata computers because we don't care within these
61 65 # tests
62 66 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
63 67 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
64 68
65 69
66 70 def wrapaddrevision(
67 71 orig, self, text, transaction, link, p1, p2, *args, **kwargs
68 72 ):
69 73 if kwargs.get('sidedata') is None:
70 74 kwargs['sidedata'] = {}
71 75 sd = kwargs['sidedata']
72 76 ## let's store some arbitrary data just for testing
73 77 # text length
74 78 sd[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
75 79 # and sha2 hashes
76 80 sha256 = hashlib.sha256(text).digest()
77 81 sd[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
78 82 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
79 83
80 84
81 85 def extsetup(ui):
82 86 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
General Comments 0
You need to be logged in to leave comments. Login now