##// END OF EJS Templates
manifest: tigher manifest parsing and flag use...
Joerg Sonnenberger -
r45678:d0ef8c1d default
parent child Browse files
Show More
@@ -1,1006 +1,1014 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 41 #define MANIFEST_BOGUS_FILENAME -4
42 42 #define MANIFEST_TOO_SHORT_LINE -5
43 43
44 44 /* get the length of the path for a line */
45 45 static Py_ssize_t pathlen(line *l)
46 46 {
47 47 const char *end = memchr(l->start, '\0', l->len);
48 48 return (end) ? (Py_ssize_t)(end - l->start) : l->len;
49 49 }
50 50
51 51 /* get the node value of a single line */
52 static PyObject *nodeof(line *l)
52 static PyObject *nodeof(line *l, char *flag)
53 53 {
54 54 char *s = l->start;
55 55 Py_ssize_t llen = pathlen(l);
56 56 Py_ssize_t hlen = l->len - llen - 2;
57 Py_ssize_t hlen_raw = 20;
57 Py_ssize_t hlen_raw;
58 58 PyObject *hash;
59 59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
60 60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
61 61 return NULL;
62 62 }
63 /* Detect flags after the hash first. */
64 switch (s[llen + hlen]) {
65 case 'l':
66 case 't':
67 case 'x':
68 *flag = s[llen + hlen];
69 --hlen;
70 break;
71 default:
72 *flag = '\0';
73 break;
74 }
75
63 76 switch (hlen) {
64 77 case 40: /* sha1 */
65 case 41: /* sha1 with cruft for a merge */
78 hlen_raw = 20;
66 79 break;
67 80 case 64: /* new hash */
68 case 65: /* new hash with cruft for a merge */
69 81 hlen_raw = 32;
70 82 break;
71 83 default:
72 84 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
73 85 return NULL;
74 86 }
75 87 hash = unhexlify(s + llen + 1, hlen_raw * 2);
76 88 if (!hash) {
77 89 return NULL;
78 90 }
79 91 if (l->hash_suffix != '\0') {
80 92 char newhash[33];
81 93 memcpy(newhash, PyBytes_AsString(hash), hlen_raw);
82 94 Py_DECREF(hash);
83 95 newhash[hlen_raw] = l->hash_suffix;
84 96 hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1);
85 97 }
86 98 return hash;
87 99 }
88 100
89 101 /* get the node hash and flags of a line as a tuple */
90 102 static PyObject *hashflags(line *l)
91 103 {
92 char *s = l->start;
93 Py_ssize_t plen = pathlen(l);
94 PyObject *hash = nodeof(l);
104 char flag;
105 PyObject *hash = nodeof(l, &flag);
95 106 ssize_t hlen;
96 107 Py_ssize_t hplen, flen;
97 108 PyObject *flags;
98 109 PyObject *tup;
99 110
100 111 if (!hash)
101 112 return NULL;
102 /* hash is either 20 or 21 bytes for an old hash, so we use a
103 ternary here to get the "real" hexlified sha length. */
104 hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
105 /* 1 for null byte, 1 for newline */
106 hplen = plen + hlen + 2;
107 flen = l->len - hplen;
108
109 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
113 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
110 114 if (!flags) {
111 115 Py_DECREF(hash);
112 116 return NULL;
113 117 }
114 118 tup = PyTuple_Pack(2, hash, flags);
115 119 Py_DECREF(flags);
116 120 Py_DECREF(hash);
117 121 return tup;
118 122 }
119 123
120 124 /* if we're about to run out of space in the line index, add more */
121 125 static bool realloc_if_full(lazymanifest *self)
122 126 {
123 127 if (self->numlines == self->maxlines) {
124 128 self->maxlines *= 2;
125 129 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
126 130 }
127 131 return !!self->lines;
128 132 }
129 133
130 134 /*
131 135 * Find the line boundaries in the manifest that 'data' points to and store
132 136 * information about each line in 'self'.
133 137 */
134 138 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
135 139 {
136 140 char *prev = NULL;
137 141 while (len > 0) {
138 142 line *l;
139 143 char *next;
140 144 if (*data == '\0') {
141 145 /* It's implausible there's no filename, don't
142 146 * even bother looking for the newline. */
143 147 return MANIFEST_BOGUS_FILENAME;
144 148 }
145 149 next = memchr(data, '\n', len);
146 150 if (!next) {
147 151 return MANIFEST_MALFORMED;
148 152 }
149 153 if ((next - data) < 42) {
150 154 /* We should have at least 42 bytes in a line:
151 155 1 byte filename
152 156 1 NUL
153 157 40 bytes of hash
154 158 so we can give up here.
155 159 */
156 160 return MANIFEST_TOO_SHORT_LINE;
157 161 }
158 162 next++; /* advance past newline */
159 163 if (prev && strcmp(prev, data) > -1) {
160 164 /* This data isn't sorted, so we have to abort. */
161 165 return MANIFEST_NOT_SORTED;
162 166 }
163 167 if (!realloc_if_full(self)) {
164 168 return MANIFEST_OOM; /* no memory */
165 169 }
166 170 l = self->lines + ((self->numlines)++);
167 171 l->start = data;
168 172 l->len = next - data;
169 173 l->hash_suffix = '\0';
170 174 l->from_malloc = false;
171 175 l->deleted = false;
172 176 len = len - l->len;
173 177 prev = data;
174 178 data = next;
175 179 }
176 180 self->livelines = self->numlines;
177 181 return 0;
178 182 }
179 183
180 184 static void lazymanifest_init_early(lazymanifest *self)
181 185 {
182 186 self->pydata = NULL;
183 187 self->lines = NULL;
184 188 self->numlines = 0;
185 189 self->maxlines = 0;
186 190 }
187 191
188 192 static int lazymanifest_init(lazymanifest *self, PyObject *args)
189 193 {
190 194 char *data;
191 195 Py_ssize_t len;
192 196 int err, ret;
193 197 PyObject *pydata;
194 198
195 199 lazymanifest_init_early(self);
196 200 if (!PyArg_ParseTuple(args, "S", &pydata)) {
197 201 return -1;
198 202 }
199 203 err = PyBytes_AsStringAndSize(pydata, &data, &len);
200 204
201 205 self->dirty = false;
202 206 if (err == -1)
203 207 return -1;
204 208 self->pydata = pydata;
205 209 Py_INCREF(self->pydata);
206 210 Py_BEGIN_ALLOW_THREADS
207 211 self->lines = malloc(DEFAULT_LINES * sizeof(line));
208 212 self->maxlines = DEFAULT_LINES;
209 213 self->numlines = 0;
210 214 if (!self->lines)
211 215 ret = MANIFEST_OOM;
212 216 else
213 217 ret = find_lines(self, data, len);
214 218 Py_END_ALLOW_THREADS
215 219 switch (ret) {
216 220 case 0:
217 221 break;
218 222 case MANIFEST_OOM:
219 223 PyErr_NoMemory();
220 224 break;
221 225 case MANIFEST_NOT_SORTED:
222 226 PyErr_Format(PyExc_ValueError,
223 227 "Manifest lines not in sorted order.");
224 228 break;
225 229 case MANIFEST_MALFORMED:
226 230 PyErr_Format(PyExc_ValueError,
227 231 "Manifest did not end in a newline.");
228 232 break;
229 233 case MANIFEST_BOGUS_FILENAME:
230 234 PyErr_Format(
231 235 PyExc_ValueError,
232 236 "Manifest had an entry with a zero-length filename.");
233 237 break;
234 238 case MANIFEST_TOO_SHORT_LINE:
235 239 PyErr_Format(
236 240 PyExc_ValueError,
237 241 "Manifest had implausibly-short line.");
238 242 break;
239 243 default:
240 244 PyErr_Format(PyExc_ValueError,
241 245 "Unknown problem parsing manifest.");
242 246 }
243 247 return ret == 0 ? 0 : -1;
244 248 }
245 249
246 250 static void lazymanifest_dealloc(lazymanifest *self)
247 251 {
248 252 /* free any extra lines we had to allocate */
249 253 int i;
250 254 for (i = 0; self->lines && (i < self->numlines); i++) {
251 255 if (self->lines[i].from_malloc) {
252 256 free(self->lines[i].start);
253 257 }
254 258 }
255 259 free(self->lines);
256 260 self->lines = NULL;
257 261 if (self->pydata) {
258 262 Py_DECREF(self->pydata);
259 263 self->pydata = NULL;
260 264 }
261 265 PyObject_Del(self);
262 266 }
263 267
264 268 /* iteration support */
265 269
266 270 typedef struct {
267 271 PyObject_HEAD lazymanifest *m;
268 272 Py_ssize_t pos;
269 273 } lmIter;
270 274
271 275 static void lmiter_dealloc(PyObject *o)
272 276 {
273 277 lmIter *self = (lmIter *)o;
274 278 Py_DECREF(self->m);
275 279 PyObject_Del(self);
276 280 }
277 281
278 282 static line *lmiter_nextline(lmIter *self)
279 283 {
280 284 do {
281 285 self->pos++;
282 286 if (self->pos >= self->m->numlines) {
283 287 return NULL;
284 288 }
285 289 /* skip over deleted manifest entries */
286 290 } while (self->m->lines[self->pos].deleted);
287 291 return self->m->lines + self->pos;
288 292 }
289 293
290 294 static PyObject *lmiter_iterentriesnext(PyObject *o)
291 295 {
292 296 Py_ssize_t pl;
293 297 line *l;
298 char flag;
294 299 Py_ssize_t consumed;
295 300 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
296 301 l = lmiter_nextline((lmIter *)o);
297 302 if (!l) {
298 303 goto done;
299 304 }
300 305 pl = pathlen(l);
301 306 path = PyBytes_FromStringAndSize(l->start, pl);
302 hash = nodeof(l);
307 hash = nodeof(l, &flag);
303 308 if (!path || !hash) {
304 309 goto done;
305 310 }
306 consumed = pl + 41;
307 flags = PyBytes_FromStringAndSize(l->start + consumed,
308 l->len - consumed - 1);
311 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
309 312 if (!flags) {
310 313 goto done;
311 314 }
312 315 ret = PyTuple_Pack(3, path, hash, flags);
313 316 done:
314 317 Py_XDECREF(path);
315 318 Py_XDECREF(hash);
316 319 Py_XDECREF(flags);
317 320 return ret;
318 321 }
319 322
320 323 #ifdef IS_PY3K
321 324 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
322 325 #else
323 326 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
324 327 | Py_TPFLAGS_HAVE_ITER
325 328 #endif
326 329
327 330 static PyTypeObject lazymanifestEntriesIterator = {
328 331 PyVarObject_HEAD_INIT(NULL, 0) /* header */
329 332 "parsers.lazymanifest.entriesiterator", /*tp_name */
330 333 sizeof(lmIter), /*tp_basicsize */
331 334 0, /*tp_itemsize */
332 335 lmiter_dealloc, /*tp_dealloc */
333 336 0, /*tp_print */
334 337 0, /*tp_getattr */
335 338 0, /*tp_setattr */
336 339 0, /*tp_compare */
337 340 0, /*tp_repr */
338 341 0, /*tp_as_number */
339 342 0, /*tp_as_sequence */
340 343 0, /*tp_as_mapping */
341 344 0, /*tp_hash */
342 345 0, /*tp_call */
343 346 0, /*tp_str */
344 347 0, /*tp_getattro */
345 348 0, /*tp_setattro */
346 349 0, /*tp_as_buffer */
347 350 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
348 351 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
349 352 0, /* tp_traverse */
350 353 0, /* tp_clear */
351 354 0, /* tp_richcompare */
352 355 0, /* tp_weaklistoffset */
353 356 PyObject_SelfIter, /* tp_iter: __iter__() method */
354 357 lmiter_iterentriesnext, /* tp_iternext: next() method */
355 358 };
356 359
357 360 static PyObject *lmiter_iterkeysnext(PyObject *o)
358 361 {
359 362 Py_ssize_t pl;
360 363 line *l = lmiter_nextline((lmIter *)o);
361 364 if (!l) {
362 365 return NULL;
363 366 }
364 367 pl = pathlen(l);
365 368 return PyBytes_FromStringAndSize(l->start, pl);
366 369 }
367 370
368 371 #ifdef IS_PY3K
369 372 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
370 373 #else
371 374 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
372 375 | Py_TPFLAGS_HAVE_ITER
373 376 #endif
374 377
375 378 static PyTypeObject lazymanifestKeysIterator = {
376 379 PyVarObject_HEAD_INIT(NULL, 0) /* header */
377 380 "parsers.lazymanifest.keysiterator", /*tp_name */
378 381 sizeof(lmIter), /*tp_basicsize */
379 382 0, /*tp_itemsize */
380 383 lmiter_dealloc, /*tp_dealloc */
381 384 0, /*tp_print */
382 385 0, /*tp_getattr */
383 386 0, /*tp_setattr */
384 387 0, /*tp_compare */
385 388 0, /*tp_repr */
386 389 0, /*tp_as_number */
387 390 0, /*tp_as_sequence */
388 391 0, /*tp_as_mapping */
389 392 0, /*tp_hash */
390 393 0, /*tp_call */
391 394 0, /*tp_str */
392 395 0, /*tp_getattro */
393 396 0, /*tp_setattro */
394 397 0, /*tp_as_buffer */
395 398 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
396 399 "Keys iterator for a lazymanifest.", /* tp_doc */
397 400 0, /* tp_traverse */
398 401 0, /* tp_clear */
399 402 0, /* tp_richcompare */
400 403 0, /* tp_weaklistoffset */
401 404 PyObject_SelfIter, /* tp_iter: __iter__() method */
402 405 lmiter_iterkeysnext, /* tp_iternext: next() method */
403 406 };
404 407
405 408 static lazymanifest *lazymanifest_copy(lazymanifest *self);
406 409
407 410 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
408 411 {
409 412 lmIter *i = NULL;
410 413 lazymanifest *t = lazymanifest_copy(self);
411 414 if (!t) {
412 415 PyErr_NoMemory();
413 416 return NULL;
414 417 }
415 418 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
416 419 if (i) {
417 420 i->m = t;
418 421 i->pos = -1;
419 422 } else {
420 423 Py_DECREF(t);
421 424 PyErr_NoMemory();
422 425 }
423 426 return (PyObject *)i;
424 427 }
425 428
426 429 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
427 430 {
428 431 lmIter *i = NULL;
429 432 lazymanifest *t = lazymanifest_copy(self);
430 433 if (!t) {
431 434 PyErr_NoMemory();
432 435 return NULL;
433 436 }
434 437 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
435 438 if (i) {
436 439 i->m = t;
437 440 i->pos = -1;
438 441 } else {
439 442 Py_DECREF(t);
440 443 PyErr_NoMemory();
441 444 }
442 445 return (PyObject *)i;
443 446 }
444 447
445 448 /* __getitem__ and __setitem__ support */
446 449
447 450 static Py_ssize_t lazymanifest_size(lazymanifest *self)
448 451 {
449 452 return self->livelines;
450 453 }
451 454
452 455 static int linecmp(const void *left, const void *right)
453 456 {
454 457 return strcmp(((const line *)left)->start,
455 458 ((const line *)right)->start);
456 459 }
457 460
458 461 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
459 462 {
460 463 line needle;
461 464 line *hit;
462 465 if (!PyBytes_Check(key)) {
463 466 PyErr_Format(PyExc_TypeError,
464 467 "getitem: manifest keys must be a string.");
465 468 return NULL;
466 469 }
467 470 needle.start = PyBytes_AsString(key);
468 471 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
469 472 &linecmp);
470 473 if (!hit || hit->deleted) {
471 474 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
472 475 return NULL;
473 476 }
474 477 return hashflags(hit);
475 478 }
476 479
477 480 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
478 481 {
479 482 line needle;
480 483 line *hit;
481 484 if (!PyBytes_Check(key)) {
482 485 PyErr_Format(PyExc_TypeError,
483 486 "delitem: manifest keys must be a string.");
484 487 return -1;
485 488 }
486 489 needle.start = PyBytes_AsString(key);
487 490 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
488 491 &linecmp);
489 492 if (!hit || hit->deleted) {
490 493 PyErr_Format(PyExc_KeyError,
491 494 "Tried to delete nonexistent manifest entry.");
492 495 return -1;
493 496 }
494 497 self->dirty = true;
495 498 hit->deleted = true;
496 499 self->livelines--;
497 500 return 0;
498 501 }
499 502
500 503 /* Do a binary search for the insertion point for new, creating the
501 504 * new entry if needed. */
502 505 static int internalsetitem(lazymanifest *self, line *new)
503 506 {
504 507 int start = 0, end = self->numlines;
505 508 while (start < end) {
506 509 int pos = start + (end - start) / 2;
507 510 int c = linecmp(new, self->lines + pos);
508 511 if (c < 0)
509 512 end = pos;
510 513 else if (c > 0)
511 514 start = pos + 1;
512 515 else {
513 516 if (self->lines[pos].deleted)
514 517 self->livelines++;
515 518 if (self->lines[pos].from_malloc)
516 519 free(self->lines[pos].start);
517 520 start = pos;
518 521 goto finish;
519 522 }
520 523 }
521 524 /* being here means we need to do an insert */
522 525 if (!realloc_if_full(self)) {
523 526 PyErr_NoMemory();
524 527 return -1;
525 528 }
526 529 memmove(self->lines + start + 1, self->lines + start,
527 530 (self->numlines - start) * sizeof(line));
528 531 self->numlines++;
529 532 self->livelines++;
530 533 finish:
531 534 self->lines[start] = *new;
532 535 self->dirty = true;
533 536 return 0;
534 537 }
535 538
536 539 static int lazymanifest_setitem(
537 540 lazymanifest *self, PyObject *key, PyObject *value)
538 541 {
539 542 char *path;
540 543 Py_ssize_t plen;
541 544 PyObject *pyhash;
542 545 Py_ssize_t hlen;
543 546 char *hash;
544 547 PyObject *pyflags;
545 548 char *flags;
546 549 Py_ssize_t flen;
547 550 Py_ssize_t dlen;
548 551 char *dest;
549 552 int i;
550 553 line new;
551 554 if (!PyBytes_Check(key)) {
552 555 PyErr_Format(PyExc_TypeError,
553 556 "setitem: manifest keys must be a string.");
554 557 return -1;
555 558 }
556 559 if (!value) {
557 560 return lazymanifest_delitem(self, key);
558 561 }
559 562 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
560 563 PyErr_Format(PyExc_TypeError,
561 564 "Manifest values must be a tuple of (node, flags).");
562 565 return -1;
563 566 }
564 567 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
565 568 return -1;
566 569 }
567 570
568 571 pyhash = PyTuple_GetItem(value, 0);
569 572 if (!PyBytes_Check(pyhash)) {
570 573 PyErr_Format(PyExc_TypeError,
571 "node must be a 20-byte string");
574 "node must be a 20 or 32 bytes string");
572 575 return -1;
573 576 }
574 577 hlen = PyBytes_Size(pyhash);
575 /* Some parts of the codebase try and set 21 or 22
576 * byte "hash" values in order to perturb things for
577 * status. We have to preserve at least the 21st
578 * byte. Sigh. If there's a 22nd byte, we drop it on
579 * the floor, which works fine.
580 */
581 if (hlen != 20 && hlen != 21 && hlen != 22) {
578 if (hlen != 20 && hlen != 32) {
582 579 PyErr_Format(PyExc_TypeError,
583 "node must be a 20-byte string");
580 "node must be a 20 or 32 bytes string");
584 581 return -1;
585 582 }
586 583 hash = PyBytes_AsString(pyhash);
587 584
588 585 pyflags = PyTuple_GetItem(value, 1);
589 586 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
590 587 PyErr_Format(PyExc_TypeError,
591 "flags must a 0 or 1 byte string");
588 "flags must a 0 or 1 bytes string");
592 589 return -1;
593 590 }
594 591 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
595 592 return -1;
596 593 }
594 if (flen == 1) {
595 switch (*flags) {
596 case 'l':
597 case 't':
598 case 'x':
599 break;
600 default:
601 PyErr_Format(PyExc_TypeError, "invalid manifest flag");
602 return -1;
603 }
604 }
597 605 /* one null byte and one newline */
598 dlen = plen + 41 + flen + 1;
606 dlen = plen + hlen * 2 + 1 + flen + 1;
599 607 dest = malloc(dlen);
600 608 if (!dest) {
601 609 PyErr_NoMemory();
602 610 return -1;
603 611 }
604 612 memcpy(dest, path, plen + 1);
605 for (i = 0; i < 20; i++) {
613 for (i = 0; i < hlen; i++) {
606 614 /* Cast to unsigned, so it will not get sign-extended when promoted
607 615 * to int (as is done when passing to a variadic function)
608 616 */
609 617 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
610 618 }
611 memcpy(dest + plen + 41, flags, flen);
612 dest[plen + 41 + flen] = '\n';
619 memcpy(dest + plen + 2 * hlen + 1, flags, flen);
620 dest[plen + 2 * hlen + 1 + flen] = '\n';
613 621 new.start = dest;
614 622 new.len = dlen;
615 623 new.hash_suffix = '\0';
616 624 if (hlen > 20) {
617 625 new.hash_suffix = hash[20];
618 626 }
619 627 new.from_malloc = true; /* is `start` a pointer we allocated? */
620 628 new.deleted = false; /* is this entry deleted? */
621 629 if (internalsetitem(self, &new)) {
622 630 return -1;
623 631 }
624 632 return 0;
625 633 }
626 634
627 635 static PyMappingMethods lazymanifest_mapping_methods = {
628 636 (lenfunc)lazymanifest_size, /* mp_length */
629 637 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
630 638 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
631 639 };
632 640
633 641 /* sequence methods (important or __contains__ builds an iterator) */
634 642
635 643 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
636 644 {
637 645 line needle;
638 646 line *hit;
639 647 if (!PyBytes_Check(key)) {
640 648 /* Our keys are always strings, so if the contains
641 649 * check is for a non-string, just return false. */
642 650 return 0;
643 651 }
644 652 needle.start = PyBytes_AsString(key);
645 653 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
646 654 &linecmp);
647 655 if (!hit || hit->deleted) {
648 656 return 0;
649 657 }
650 658 return 1;
651 659 }
652 660
653 661 static PySequenceMethods lazymanifest_seq_meths = {
654 662 (lenfunc)lazymanifest_size, /* sq_length */
655 663 0, /* sq_concat */
656 664 0, /* sq_repeat */
657 665 0, /* sq_item */
658 666 0, /* sq_slice */
659 667 0, /* sq_ass_item */
660 668 0, /* sq_ass_slice */
661 669 (objobjproc)lazymanifest_contains, /* sq_contains */
662 670 0, /* sq_inplace_concat */
663 671 0, /* sq_inplace_repeat */
664 672 };
665 673
666 674
667 675 /* Other methods (copy, diff, etc) */
668 676 static PyTypeObject lazymanifestType;
669 677
670 678 /* If the manifest has changes, build the new manifest text and reindex it. */
671 679 static int compact(lazymanifest *self)
672 680 {
673 681 int i;
674 682 ssize_t need = 0;
675 683 char *data;
676 684 line *src, *dst;
677 685 PyObject *pydata;
678 686 if (!self->dirty)
679 687 return 0;
680 688 for (i = 0; i < self->numlines; i++) {
681 689 if (!self->lines[i].deleted) {
682 690 need += self->lines[i].len;
683 691 }
684 692 }
685 693 pydata = PyBytes_FromStringAndSize(NULL, need);
686 694 if (!pydata)
687 695 return -1;
688 696 data = PyBytes_AsString(pydata);
689 697 if (!data) {
690 698 return -1;
691 699 }
692 700 src = self->lines;
693 701 dst = self->lines;
694 702 for (i = 0; i < self->numlines; i++, src++) {
695 703 char *tofree = NULL;
696 704 if (src->from_malloc) {
697 705 tofree = src->start;
698 706 }
699 707 if (!src->deleted) {
700 708 memcpy(data, src->start, src->len);
701 709 *dst = *src;
702 710 dst->start = data;
703 711 dst->from_malloc = false;
704 712 data += dst->len;
705 713 dst++;
706 714 }
707 715 free(tofree);
708 716 }
709 717 Py_DECREF(self->pydata);
710 718 self->pydata = pydata;
711 719 self->numlines = self->livelines;
712 720 self->dirty = false;
713 721 return 0;
714 722 }
715 723
716 724 static PyObject *lazymanifest_text(lazymanifest *self)
717 725 {
718 726 if (compact(self) != 0) {
719 727 PyErr_NoMemory();
720 728 return NULL;
721 729 }
722 730 Py_INCREF(self->pydata);
723 731 return self->pydata;
724 732 }
725 733
726 734 static lazymanifest *lazymanifest_copy(lazymanifest *self)
727 735 {
728 736 lazymanifest *copy = NULL;
729 737 if (compact(self) != 0) {
730 738 goto nomem;
731 739 }
732 740 copy = PyObject_New(lazymanifest, &lazymanifestType);
733 741 if (!copy) {
734 742 goto nomem;
735 743 }
736 744 lazymanifest_init_early(copy);
737 745 copy->numlines = self->numlines;
738 746 copy->livelines = self->livelines;
739 747 copy->dirty = false;
740 748 copy->lines = malloc(self->maxlines *sizeof(line));
741 749 if (!copy->lines) {
742 750 goto nomem;
743 751 }
744 752 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
745 753 copy->maxlines = self->maxlines;
746 754 copy->pydata = self->pydata;
747 755 Py_INCREF(copy->pydata);
748 756 return copy;
749 757 nomem:
750 758 PyErr_NoMemory();
751 759 Py_XDECREF(copy);
752 760 return NULL;
753 761 }
754 762
755 763 static lazymanifest *lazymanifest_filtercopy(
756 764 lazymanifest *self, PyObject *matchfn)
757 765 {
758 766 lazymanifest *copy = NULL;
759 767 int i;
760 768 if (!PyCallable_Check(matchfn)) {
761 769 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
762 770 return NULL;
763 771 }
764 772 /* compact ourselves first to avoid double-frees later when we
765 773 * compact tmp so that it doesn't have random pointers to our
766 774 * underlying from_malloc-data (self->pydata is safe) */
767 775 if (compact(self) != 0) {
768 776 goto nomem;
769 777 }
770 778 copy = PyObject_New(lazymanifest, &lazymanifestType);
771 779 if (!copy) {
772 780 goto nomem;
773 781 }
774 782 lazymanifest_init_early(copy);
775 783 copy->dirty = true;
776 784 copy->lines = malloc(self->maxlines * sizeof(line));
777 785 if (!copy->lines) {
778 786 goto nomem;
779 787 }
780 788 copy->maxlines = self->maxlines;
781 789 copy->numlines = 0;
782 790 copy->pydata = self->pydata;
783 791 Py_INCREF(copy->pydata);
784 792 for (i = 0; i < self->numlines; i++) {
785 793 PyObject *arglist = NULL, *result = NULL;
786 794 arglist = Py_BuildValue(PY23("(s)", "(y)"),
787 795 self->lines[i].start);
788 796 if (!arglist) {
789 797 goto bail;
790 798 }
791 799 result = PyObject_CallObject(matchfn, arglist);
792 800 Py_DECREF(arglist);
793 801 /* if the callback raised an exception, just let it
794 802 * through and give up */
795 803 if (!result) {
796 804 goto bail;
797 805 }
798 806 if (PyObject_IsTrue(result)) {
799 807 assert(!(self->lines[i].from_malloc));
800 808 copy->lines[copy->numlines++] = self->lines[i];
801 809 }
802 810 Py_DECREF(result);
803 811 }
804 812 copy->livelines = copy->numlines;
805 813 return copy;
806 814 nomem:
807 815 PyErr_NoMemory();
808 816 bail:
809 817 Py_XDECREF(copy);
810 818 return NULL;
811 819 }
812 820
813 821 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
814 822 {
815 823 lazymanifest *other;
816 824 PyObject *pyclean = NULL;
817 825 bool listclean;
818 826 PyObject *emptyTup = NULL, *ret = NULL;
819 827 PyObject *es;
820 828 int sneedle = 0, oneedle = 0;
821 829 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
822 830 return NULL;
823 831 }
824 832 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
825 833 es = PyBytes_FromString("");
826 834 if (!es) {
827 835 goto nomem;
828 836 }
829 837 emptyTup = PyTuple_Pack(2, Py_None, es);
830 838 Py_DECREF(es);
831 839 if (!emptyTup) {
832 840 goto nomem;
833 841 }
834 842 ret = PyDict_New();
835 843 if (!ret) {
836 844 goto nomem;
837 845 }
838 846 while (sneedle != self->numlines || oneedle != other->numlines) {
839 847 line *left = self->lines + sneedle;
840 848 line *right = other->lines + oneedle;
841 849 int result;
842 850 PyObject *key;
843 851 PyObject *outer;
844 852 /* If we're looking at a deleted entry and it's not
845 853 * the end of the manifest, just skip it. */
846 854 if (sneedle < self->numlines && left->deleted) {
847 855 sneedle++;
848 856 continue;
849 857 }
850 858 if (oneedle < other->numlines && right->deleted) {
851 859 oneedle++;
852 860 continue;
853 861 }
854 862 /* if we're at the end of either manifest, then we
855 863 * know the remaining items are adds so we can skip
856 864 * the strcmp. */
857 865 if (sneedle == self->numlines) {
858 866 result = 1;
859 867 } else if (oneedle == other->numlines) {
860 868 result = -1;
861 869 } else {
862 870 result = linecmp(left, right);
863 871 }
864 872 key = result <= 0 ?
865 873 PyBytes_FromString(left->start) :
866 874 PyBytes_FromString(right->start);
867 875 if (!key)
868 876 goto nomem;
869 877 if (result < 0) {
870 878 PyObject *l = hashflags(left);
871 879 if (!l) {
872 880 goto nomem;
873 881 }
874 882 outer = PyTuple_Pack(2, l, emptyTup);
875 883 Py_DECREF(l);
876 884 if (!outer) {
877 885 goto nomem;
878 886 }
879 887 PyDict_SetItem(ret, key, outer);
880 888 Py_DECREF(outer);
881 889 sneedle++;
882 890 } else if (result > 0) {
883 891 PyObject *r = hashflags(right);
884 892 if (!r) {
885 893 goto nomem;
886 894 }
887 895 outer = PyTuple_Pack(2, emptyTup, r);
888 896 Py_DECREF(r);
889 897 if (!outer) {
890 898 goto nomem;
891 899 }
892 900 PyDict_SetItem(ret, key, outer);
893 901 Py_DECREF(outer);
894 902 oneedle++;
895 903 } else {
896 904 /* file exists in both manifests */
897 905 if (left->len != right->len
898 906 || memcmp(left->start, right->start, left->len)
899 907 || left->hash_suffix != right->hash_suffix) {
900 908 PyObject *l = hashflags(left);
901 909 PyObject *r;
902 910 if (!l) {
903 911 goto nomem;
904 912 }
905 913 r = hashflags(right);
906 914 if (!r) {
907 915 Py_DECREF(l);
908 916 goto nomem;
909 917 }
910 918 outer = PyTuple_Pack(2, l, r);
911 919 Py_DECREF(l);
912 920 Py_DECREF(r);
913 921 if (!outer) {
914 922 goto nomem;
915 923 }
916 924 PyDict_SetItem(ret, key, outer);
917 925 Py_DECREF(outer);
918 926 } else if (listclean) {
919 927 PyDict_SetItem(ret, key, Py_None);
920 928 }
921 929 sneedle++;
922 930 oneedle++;
923 931 }
924 932 Py_DECREF(key);
925 933 }
926 934 Py_DECREF(emptyTup);
927 935 return ret;
928 936 nomem:
929 937 PyErr_NoMemory();
930 938 Py_XDECREF(ret);
931 939 Py_XDECREF(emptyTup);
932 940 return NULL;
933 941 }
934 942
935 943 static PyMethodDef lazymanifest_methods[] = {
936 944 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
937 945 "Iterate over file names in this lazymanifest."},
938 946 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
939 947 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
940 948 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
941 949 "Make a copy of this lazymanifest."},
942 950 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
943 951 "Make a copy of this manifest filtered by matchfn."},
944 952 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
945 953 "Compare this lazymanifest to another one."},
946 954 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
947 955 "Encode this manifest to text."},
948 956 {NULL},
949 957 };
950 958
951 959 #ifdef IS_PY3K
952 960 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
953 961 #else
954 962 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
955 963 #endif
956 964
957 965 static PyTypeObject lazymanifestType = {
958 966 PyVarObject_HEAD_INIT(NULL, 0) /* header */
959 967 "parsers.lazymanifest", /* tp_name */
960 968 sizeof(lazymanifest), /* tp_basicsize */
961 969 0, /* tp_itemsize */
962 970 (destructor)lazymanifest_dealloc, /* tp_dealloc */
963 971 0, /* tp_print */
964 972 0, /* tp_getattr */
965 973 0, /* tp_setattr */
966 974 0, /* tp_compare */
967 975 0, /* tp_repr */
968 976 0, /* tp_as_number */
969 977 &lazymanifest_seq_meths, /* tp_as_sequence */
970 978 &lazymanifest_mapping_methods, /* tp_as_mapping */
971 979 0, /* tp_hash */
972 980 0, /* tp_call */
973 981 0, /* tp_str */
974 982 0, /* tp_getattro */
975 983 0, /* tp_setattro */
976 984 0, /* tp_as_buffer */
977 985 LAZYMANIFEST_TPFLAGS, /* tp_flags */
978 986 "TODO(augie)", /* tp_doc */
979 987 0, /* tp_traverse */
980 988 0, /* tp_clear */
981 989 0, /* tp_richcompare */
982 990 0, /* tp_weaklistoffset */
983 991 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
984 992 0, /* tp_iternext */
985 993 lazymanifest_methods, /* tp_methods */
986 994 0, /* tp_members */
987 995 0, /* tp_getset */
988 996 0, /* tp_base */
989 997 0, /* tp_dict */
990 998 0, /* tp_descr_get */
991 999 0, /* tp_descr_set */
992 1000 0, /* tp_dictoffset */
993 1001 (initproc)lazymanifest_init, /* tp_init */
994 1002 0, /* tp_alloc */
995 1003 };
996 1004
997 1005 void manifest_module_init(PyObject * mod)
998 1006 {
999 1007 lazymanifestType.tp_new = PyType_GenericNew;
1000 1008 if (PyType_Ready(&lazymanifestType) < 0)
1001 1009 return;
1002 1010 Py_INCREF(&lazymanifestType);
1003 1011
1004 1012 PyModule_AddObject(mod, "lazymanifest",
1005 1013 (PyObject *)&lazymanifestType);
1006 1014 }
@@ -1,2329 +1,2340 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 if 64 < nl:
62 62 # modern hash, full width
63 63 yield f, bin(n[:64]), n[64:]
64 64 elif 40 < nl < 45:
65 65 # legacy hash, always sha1
66 66 yield f, bin(n[:40]), n[40:]
67 67 else:
68 68 yield f, bin(n), b''
69 69
70 70
71 71 def _text(it):
72 72 files = []
73 73 lines = []
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return b''.join(lines)
82 82
83 83
84 84 class lazymanifestiter(object):
85 85 def __init__(self, lm):
86 86 self.pos = 0
87 87 self.lm = lm
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data[0]
100 100 self.pos += 1
101 101 zeropos = data.find(b'\x00', pos)
102 102 return data[pos:zeropos]
103 103
104 104 __next__ = next
105 105
106 106
107 107 class lazymanifestiterentries(object):
108 108 def __init__(self, lm):
109 109 self.lm = lm
110 110 self.pos = 0
111 111
112 112 def __iter__(self):
113 113 return self
114 114
115 115 def next(self):
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 self.pos += 1
122 122 return data
123 123 zeropos = data.find(b'\x00', pos)
124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
125 flags = self.lm._getflags(data, self.pos, zeropos)
124 nlpos = data.find(b'\n', pos)
125 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
126 raise error.StorageError(b'Invalid manifest line')
127 flags = data[nlpos - 1 : nlpos]
128 if flags in _manifestflags:
129 hlen = nlpos - zeropos - 2
130 else:
131 hlen = nlpos - zeropos - 1
132 flags = b''
133 if hlen not in (40, 64):
134 raise error.StorageError(b'Invalid manifest line')
135 hashval = unhexlify(
136 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
137 )
126 138 self.pos += 1
127 139 return (data[pos:zeropos], hashval, flags)
128 140
129 141 __next__ = next
130 142
131 143
132 144 def unhexlify(data, extra, pos, length):
133 145 s = bin(data[pos : pos + length])
134 146 if extra:
135 147 s += chr(extra & 0xFF)
136 148 return s
137 149
138 150
139 151 def _cmp(a, b):
140 152 return (a > b) - (a < b)
141 153
142 154
155 _manifestflags = {b'', b'l', b't', b'x'}
156
157
143 158 class _lazymanifest(object):
144 159 """A pure python manifest backed by a byte string. It is supplimented with
145 160 internal lists as it is modified, until it is compacted back to a pure byte
146 161 string.
147 162
148 163 ``data`` is the initial manifest data.
149 164
150 165 ``positions`` is a list of offsets, one per manifest entry. Positive
151 166 values are offsets into ``data``, negative values are offsets into the
152 167 ``extradata`` list. When an entry is removed, its entry is dropped from
153 168 ``positions``. The values are encoded such that when walking the list and
154 169 indexing into ``data`` or ``extradata`` as appropriate, the entries are
155 170 sorted by filename.
156 171
157 172 ``extradata`` is a list of (key, hash, flags) for entries that were added or
158 173 modified since the manifest was created or compacted.
159 174 """
160 175
161 176 def __init__(
162 177 self,
163 178 data,
164 179 positions=None,
165 180 extrainfo=None,
166 181 extradata=None,
167 182 hasremovals=False,
168 183 ):
169 184 if positions is None:
170 185 self.positions = self.findlines(data)
171 186 self.extrainfo = [0] * len(self.positions)
172 187 self.data = data
173 188 self.extradata = []
174 189 self.hasremovals = False
175 190 else:
176 191 self.positions = positions[:]
177 192 self.extrainfo = extrainfo[:]
178 193 self.extradata = extradata[:]
179 194 self.data = data
180 195 self.hasremovals = hasremovals
181 196
182 197 def findlines(self, data):
183 198 if not data:
184 199 return []
185 200 pos = data.find(b"\n")
186 201 if pos == -1 or data[-1:] != b'\n':
187 202 raise ValueError(b"Manifest did not end in a newline.")
188 203 positions = [0]
189 204 prev = data[: data.find(b'\x00')]
190 205 while pos < len(data) - 1 and pos != -1:
191 206 positions.append(pos + 1)
192 207 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
193 208 if nexts < prev:
194 209 raise ValueError(b"Manifest lines not in sorted order.")
195 210 prev = nexts
196 211 pos = data.find(b"\n", pos + 1)
197 212 return positions
198 213
199 214 def _get(self, index):
200 215 # get the position encoded in pos:
201 216 # positive number is an index in 'data'
202 217 # negative number is in extrapieces
203 218 pos = self.positions[index]
204 219 if pos >= 0:
205 220 return self.data, pos
206 221 return self.extradata[-pos - 1], -1
207 222
208 223 def _getkey(self, pos):
209 224 if pos >= 0:
210 225 return self.data[pos : self.data.find(b'\x00', pos + 1)]
211 226 return self.extradata[-pos - 1][0]
212 227
213 228 def bsearch(self, key):
214 229 first = 0
215 230 last = len(self.positions) - 1
216 231
217 232 while first <= last:
218 233 midpoint = (first + last) // 2
219 234 nextpos = self.positions[midpoint]
220 235 candidate = self._getkey(nextpos)
221 236 r = _cmp(key, candidate)
222 237 if r == 0:
223 238 return midpoint
224 239 else:
225 240 if r < 0:
226 241 last = midpoint - 1
227 242 else:
228 243 first = midpoint + 1
229 244 return -1
230 245
231 246 def bsearch2(self, key):
232 247 # same as the above, but will always return the position
233 248 # done for performance reasons
234 249 first = 0
235 250 last = len(self.positions) - 1
236 251
237 252 while first <= last:
238 253 midpoint = (first + last) // 2
239 254 nextpos = self.positions[midpoint]
240 255 candidate = self._getkey(nextpos)
241 256 r = _cmp(key, candidate)
242 257 if r == 0:
243 258 return (midpoint, True)
244 259 else:
245 260 if r < 0:
246 261 last = midpoint - 1
247 262 else:
248 263 first = midpoint + 1
249 264 return (first, False)
250 265
251 266 def __contains__(self, key):
252 267 return self.bsearch(key) != -1
253 268
254 def _getflags(self, data, needle, pos):
255 start = pos + 41
256 end = data.find(b"\n", start)
257 if end == -1:
258 end = len(data) - 1
259 if start == end:
260 return b''
261 return self.data[start:end]
262
263 269 def __getitem__(self, key):
264 270 if not isinstance(key, bytes):
265 271 raise TypeError(b"getitem: manifest keys must be a bytes.")
266 272 needle = self.bsearch(key)
267 273 if needle == -1:
268 274 raise KeyError
269 275 data, pos = self._get(needle)
270 276 if pos == -1:
271 277 return (data[1], data[2])
272 278 zeropos = data.find(b'\x00', pos)
273 279 nlpos = data.find(b'\n', zeropos)
274 280 assert 0 <= needle <= len(self.positions)
275 281 assert len(self.extrainfo) == len(self.positions)
282 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
283 raise error.StorageError(b'Invalid manifest line')
276 284 hlen = nlpos - zeropos - 1
277 # Hashes sometimes have an extra byte tucked on the end, so
278 # detect that.
279 if hlen % 2:
285 flags = data[nlpos - 1 : nlpos]
286 if flags in _manifestflags:
280 287 hlen -= 1
288 else:
289 flags = b''
290 if hlen not in (40, 64):
291 raise error.StorageError(b'Invalid manifest line')
281 292 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 flags = self._getflags(data, needle, zeropos)
283 293 return (hashval, flags)
284 294
285 295 def __delitem__(self, key):
286 296 needle, found = self.bsearch2(key)
287 297 if not found:
288 298 raise KeyError
289 299 cur = self.positions[needle]
290 300 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
291 301 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
292 302 if cur >= 0:
293 303 # This does NOT unsort the list as far as the search functions are
294 304 # concerned, as they only examine lines mapped by self.positions.
295 305 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
296 306 self.hasremovals = True
297 307
298 308 def __setitem__(self, key, value):
299 309 if not isinstance(key, bytes):
300 310 raise TypeError(b"setitem: manifest keys must be a byte string.")
301 311 if not isinstance(value, tuple) or len(value) != 2:
302 312 raise TypeError(
303 313 b"Manifest values must be a tuple of (node, flags)."
304 314 )
305 315 hashval = value[0]
306 316 # hashes are either 20 or 32 bytes (sha1 or its replacement),
307 317 # and allow one extra byte taht won't be persisted to disk but
308 318 # is sometimes used in memory.
309 319 if not isinstance(hashval, bytes) or not (
310 320 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
311 321 ):
312 322 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
313 323 flags = value[1]
314 324 if len(hashval) == 22:
315 325 hashval = hashval[:-1]
316 326 if not isinstance(flags, bytes) or len(flags) > 1:
317 327 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
318 328 needle, found = self.bsearch2(key)
319 329 if found:
320 330 # put the item
321 331 pos = self.positions[needle]
322 332 if pos < 0:
323 333 self.extradata[-pos - 1] = (key, hashval, value[1])
324 334 else:
325 335 # just don't bother
326 336 self.extradata.append((key, hashval, value[1]))
327 337 self.positions[needle] = -len(self.extradata)
328 338 else:
329 339 # not found, put it in with extra positions
330 340 self.extradata.append((key, hashval, value[1]))
331 341 self.positions = (
332 342 self.positions[:needle]
333 343 + [-len(self.extradata)]
334 344 + self.positions[needle:]
335 345 )
336 346 self.extrainfo = (
337 347 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
338 348 )
339 349
340 350 def copy(self):
341 351 # XXX call _compact like in C?
342 352 return _lazymanifest(
343 353 self.data,
344 354 self.positions,
345 355 self.extrainfo,
346 356 self.extradata,
347 357 self.hasremovals,
348 358 )
349 359
350 360 def _compact(self):
351 361 # hopefully not called TOO often
352 362 if len(self.extradata) == 0 and not self.hasremovals:
353 363 return
354 364 l = []
355 365 i = 0
356 366 offset = 0
357 367 self.extrainfo = [0] * len(self.positions)
358 368 while i < len(self.positions):
359 369 if self.positions[i] >= 0:
360 370 cur = self.positions[i]
361 371 last_cut = cur
362 372
363 373 # Collect all contiguous entries in the buffer at the current
364 374 # offset, breaking out only for added/modified items held in
365 375 # extradata, or a deleted line prior to the next position.
366 376 while True:
367 377 self.positions[i] = offset
368 378 i += 1
369 379 if i == len(self.positions) or self.positions[i] < 0:
370 380 break
371 381
372 382 # A removed file has no positions[] entry, but does have an
373 383 # overwritten first byte. Break out and find the end of the
374 384 # current good entry/entries if there is a removed file
375 385 # before the next position.
376 386 if (
377 387 self.hasremovals
378 388 and self.data.find(b'\n\x00', cur, self.positions[i])
379 389 != -1
380 390 ):
381 391 break
382 392
383 393 offset += self.positions[i] - cur
384 394 cur = self.positions[i]
385 395 end_cut = self.data.find(b'\n', cur)
386 396 if end_cut != -1:
387 397 end_cut += 1
388 398 offset += end_cut - cur
389 399 l.append(self.data[last_cut:end_cut])
390 400 else:
391 401 while i < len(self.positions) and self.positions[i] < 0:
392 402 cur = self.positions[i]
393 403 t = self.extradata[-cur - 1]
394 404 l.append(self._pack(t))
395 405 self.positions[i] = offset
396 406 # Hashes are either 20 bytes (old sha1s) or 32
397 407 # bytes (new non-sha1).
398 408 hlen = 20
399 409 if len(t[1]) > 25:
400 410 hlen = 32
401 411 if len(t[1]) > hlen:
402 412 self.extrainfo[i] = ord(t[1][hlen + 1])
403 413 offset += len(l[-1])
404 414 i += 1
405 415 self.data = b''.join(l)
406 416 self.hasremovals = False
407 417 self.extradata = []
408 418
409 419 def _pack(self, d):
410 420 n = d[1]
411 if len(n) == 21 or len(n) == 33:
412 n = n[:-1]
413 assert len(n) == 20 or len(n) == 32
421 assert len(n) in (20, 32)
414 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415 423
416 424 def text(self):
417 425 self._compact()
418 426 return self.data
419 427
420 428 def diff(self, m2, clean=False):
421 429 '''Finds changes between the current manifest and m2.'''
422 430 # XXX think whether efficiency matters here
423 431 diff = {}
424 432
425 433 for fn, e1, flags in self.iterentries():
426 434 if fn not in m2:
427 435 diff[fn] = (e1, flags), (None, b'')
428 436 else:
429 437 e2 = m2[fn]
430 438 if (e1, flags) != e2:
431 439 diff[fn] = (e1, flags), e2
432 440 elif clean:
433 441 diff[fn] = None
434 442
435 443 for fn, e2, flags in m2.iterentries():
436 444 if fn not in self:
437 445 diff[fn] = (None, b''), (e2, flags)
438 446
439 447 return diff
440 448
441 449 def iterentries(self):
442 450 return lazymanifestiterentries(self)
443 451
444 452 def iterkeys(self):
445 453 return lazymanifestiter(self)
446 454
447 455 def __iter__(self):
448 456 return lazymanifestiter(self)
449 457
450 458 def __len__(self):
451 459 return len(self.positions)
452 460
453 461 def filtercopy(self, filterfn):
454 462 # XXX should be optimized
455 463 c = _lazymanifest(b'')
456 464 for f, n, fl in self.iterentries():
457 465 if filterfn(f):
458 466 c[f] = n, fl
459 467 return c
460 468
461 469
462 470 try:
463 471 _lazymanifest = parsers.lazymanifest
464 472 except AttributeError:
465 473 pass
466 474
467 475
468 476 @interfaceutil.implementer(repository.imanifestdict)
469 477 class manifestdict(object):
470 478 def __init__(self, data=b''):
471 479 self._lm = _lazymanifest(data)
472 480
473 481 def __getitem__(self, key):
474 482 return self._lm[key][0]
475 483
476 484 def find(self, key):
477 485 return self._lm[key]
478 486
479 487 def __len__(self):
480 488 return len(self._lm)
481 489
482 490 def __nonzero__(self):
483 491 # nonzero is covered by the __len__ function, but implementing it here
484 492 # makes it easier for extensions to override.
485 493 return len(self._lm) != 0
486 494
487 495 __bool__ = __nonzero__
488 496
489 497 def __setitem__(self, key, node):
490 498 self._lm[key] = node, self.flags(key)
491 499
492 500 def __contains__(self, key):
493 501 if key is None:
494 502 return False
495 503 return key in self._lm
496 504
497 505 def __delitem__(self, key):
498 506 del self._lm[key]
499 507
500 508 def __iter__(self):
501 509 return self._lm.__iter__()
502 510
503 511 def iterkeys(self):
504 512 return self._lm.iterkeys()
505 513
506 514 def keys(self):
507 515 return list(self.iterkeys())
508 516
509 517 def filesnotin(self, m2, match=None):
510 518 '''Set of files in this manifest that are not in the other'''
511 519 if match is not None:
512 520 match = matchmod.badmatch(match, lambda path, msg: None)
513 521 sm2 = set(m2.walk(match))
514 522 return {f for f in self.walk(match) if f not in sm2}
515 523 return {f for f in self if f not in m2}
516 524
517 525 @propertycache
518 526 def _dirs(self):
519 527 return pathutil.dirs(self)
520 528
521 529 def dirs(self):
522 530 return self._dirs
523 531
524 532 def hasdir(self, dir):
525 533 return dir in self._dirs
526 534
527 535 def _filesfastpath(self, match):
528 536 '''Checks whether we can correctly and quickly iterate over matcher
529 537 files instead of over manifest files.'''
530 538 files = match.files()
531 539 return len(files) < 100 and (
532 540 match.isexact()
533 541 or (match.prefix() and all(fn in self for fn in files))
534 542 )
535 543
536 544 def walk(self, match):
537 545 '''Generates matching file names.
538 546
539 547 Equivalent to manifest.matches(match).iterkeys(), but without creating
540 548 an entirely new manifest.
541 549
542 550 It also reports nonexistent files by marking them bad with match.bad().
543 551 '''
544 552 if match.always():
545 553 for f in iter(self):
546 554 yield f
547 555 return
548 556
549 557 fset = set(match.files())
550 558
551 559 # avoid the entire walk if we're only looking for specific files
552 560 if self._filesfastpath(match):
553 561 for fn in sorted(fset):
554 562 if fn in self:
555 563 yield fn
556 564 return
557 565
558 566 for fn in self:
559 567 if fn in fset:
560 568 # specified pattern is the exact name
561 569 fset.remove(fn)
562 570 if match(fn):
563 571 yield fn
564 572
565 573 # for dirstate.walk, files=[''] means "walk the whole tree".
566 574 # follow that here, too
567 575 fset.discard(b'')
568 576
569 577 for fn in sorted(fset):
570 578 if not self.hasdir(fn):
571 579 match.bad(fn, None)
572 580
573 581 def _matches(self, match):
574 582 '''generate a new manifest filtered by the match argument'''
575 583 if match.always():
576 584 return self.copy()
577 585
578 586 if self._filesfastpath(match):
579 587 m = manifestdict()
580 588 lm = self._lm
581 589 for fn in match.files():
582 590 if fn in lm:
583 591 m._lm[fn] = lm[fn]
584 592 return m
585 593
586 594 m = manifestdict()
587 595 m._lm = self._lm.filtercopy(match)
588 596 return m
589 597
590 598 def diff(self, m2, match=None, clean=False):
591 599 '''Finds changes between the current manifest and m2.
592 600
593 601 Args:
594 602 m2: the manifest to which this manifest should be compared.
595 603 clean: if true, include files unchanged between these manifests
596 604 with a None value in the returned dictionary.
597 605
598 606 The result is returned as a dict with filename as key and
599 607 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
600 608 nodeid in the current/other manifest and fl1/fl2 is the flag
601 609 in the current/other manifest. Where the file does not exist,
602 610 the nodeid will be None and the flags will be the empty
603 611 string.
604 612 '''
605 613 if match:
606 614 m1 = self._matches(match)
607 615 m2 = m2._matches(match)
608 616 return m1.diff(m2, clean=clean)
609 617 return self._lm.diff(m2._lm, clean)
610 618
611 619 def setflag(self, key, flag):
620 if flag not in _manifestflags:
621 raise TypeError(b"Invalid manifest flag set.")
612 622 self._lm[key] = self[key], flag
613 623
614 624 def get(self, key, default=None):
615 625 try:
616 626 return self._lm[key][0]
617 627 except KeyError:
618 628 return default
619 629
620 630 def flags(self, key):
621 631 try:
622 632 return self._lm[key][1]
623 633 except KeyError:
624 634 return b''
625 635
626 636 def copy(self):
627 637 c = manifestdict()
628 638 c._lm = self._lm.copy()
629 639 return c
630 640
631 641 def items(self):
632 642 return (x[:2] for x in self._lm.iterentries())
633 643
634 644 def iteritems(self):
635 645 return (x[:2] for x in self._lm.iterentries())
636 646
637 647 def iterentries(self):
638 648 return self._lm.iterentries()
639 649
640 650 def text(self):
641 651 # most likely uses native version
642 652 return self._lm.text()
643 653
644 654 def fastdelta(self, base, changes):
645 655 """Given a base manifest text as a bytearray and a list of changes
646 656 relative to that text, compute a delta that can be used by revlog.
647 657 """
648 658 delta = []
649 659 dstart = None
650 660 dend = None
651 661 dline = [b""]
652 662 start = 0
653 663 # zero copy representation of base as a buffer
654 664 addbuf = util.buffer(base)
655 665
656 666 changes = list(changes)
657 667 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
658 668 # start with a readonly loop that finds the offset of
659 669 # each line and creates the deltas
660 670 for f, todelete in changes:
661 671 # bs will either be the index of the item or the insert point
662 672 start, end = _msearch(addbuf, f, start)
663 673 if not todelete:
664 674 h, fl = self._lm[f]
665 675 l = b"%s\0%s%s\n" % (f, hex(h), fl)
666 676 else:
667 677 if start == end:
668 678 # item we want to delete was not found, error out
669 679 raise AssertionError(
670 680 _(b"failed to remove %s from manifest") % f
671 681 )
672 682 l = b""
673 683 if dstart is not None and dstart <= start and dend >= start:
674 684 if dend < end:
675 685 dend = end
676 686 if l:
677 687 dline.append(l)
678 688 else:
679 689 if dstart is not None:
680 690 delta.append([dstart, dend, b"".join(dline)])
681 691 dstart = start
682 692 dend = end
683 693 dline = [l]
684 694
685 695 if dstart is not None:
686 696 delta.append([dstart, dend, b"".join(dline)])
687 697 # apply the delta to the base, and get a delta for addrevision
688 698 deltatext, arraytext = _addlistdelta(base, delta)
689 699 else:
690 700 # For large changes, it's much cheaper to just build the text and
691 701 # diff it.
692 702 arraytext = bytearray(self.text())
693 703 deltatext = mdiff.textdiff(
694 704 util.buffer(base), util.buffer(arraytext)
695 705 )
696 706
697 707 return arraytext, deltatext
698 708
699 709
700 710 def _msearch(m, s, lo=0, hi=None):
701 711 '''return a tuple (start, end) that says where to find s within m.
702 712
703 713 If the string is found m[start:end] are the line containing
704 714 that string. If start == end the string was not found and
705 715 they indicate the proper sorted insertion point.
706 716
707 717 m should be a buffer, a memoryview or a byte string.
708 718 s is a byte string'''
709 719
710 720 def advance(i, c):
711 721 while i < lenm and m[i : i + 1] != c:
712 722 i += 1
713 723 return i
714 724
715 725 if not s:
716 726 return (lo, lo)
717 727 lenm = len(m)
718 728 if not hi:
719 729 hi = lenm
720 730 while lo < hi:
721 731 mid = (lo + hi) // 2
722 732 start = mid
723 733 while start > 0 and m[start - 1 : start] != b'\n':
724 734 start -= 1
725 735 end = advance(start, b'\0')
726 736 if bytes(m[start:end]) < s:
727 737 # we know that after the null there are 40 bytes of sha1
728 738 # this translates to the bisect lo = mid + 1
729 739 lo = advance(end + 40, b'\n') + 1
730 740 else:
731 741 # this translates to the bisect hi = mid
732 742 hi = start
733 743 end = advance(lo, b'\0')
734 744 found = m[lo:end]
735 745 if s == found:
736 746 # we know that after the null there are 40 bytes of sha1
737 747 end = advance(end + 40, b'\n')
738 748 return (lo, end + 1)
739 749 else:
740 750 return (lo, lo)
741 751
742 752
743 753 def _checkforbidden(l):
744 754 """Check filenames for illegal characters."""
745 755 for f in l:
746 756 if b'\n' in f or b'\r' in f:
747 757 raise error.StorageError(
748 758 _(b"'\\n' and '\\r' disallowed in filenames: %r")
749 759 % pycompat.bytestr(f)
750 760 )
751 761
752 762
753 763 # apply the changes collected during the bisect loop to our addlist
754 764 # return a delta suitable for addrevision
755 765 def _addlistdelta(addlist, x):
756 766 # for large addlist arrays, building a new array is cheaper
757 767 # than repeatedly modifying the existing one
758 768 currentposition = 0
759 769 newaddlist = bytearray()
760 770
761 771 for start, end, content in x:
762 772 newaddlist += addlist[currentposition:start]
763 773 if content:
764 774 newaddlist += bytearray(content)
765 775
766 776 currentposition = end
767 777
768 778 newaddlist += addlist[currentposition:]
769 779
770 780 deltatext = b"".join(
771 781 struct.pack(b">lll", start, end, len(content)) + content
772 782 for start, end, content in x
773 783 )
774 784 return deltatext, newaddlist
775 785
776 786
777 787 def _splittopdir(f):
778 788 if b'/' in f:
779 789 dir, subpath = f.split(b'/', 1)
780 790 return dir + b'/', subpath
781 791 else:
782 792 return b'', f
783 793
784 794
785 795 _noop = lambda s: None
786 796
787 797
788 798 @interfaceutil.implementer(repository.imanifestdict)
789 799 class treemanifest(object):
790 800 def __init__(self, dir=b'', text=b''):
791 801 self._dir = dir
792 802 self._node = nullid
793 803 self._loadfunc = _noop
794 804 self._copyfunc = _noop
795 805 self._dirty = False
796 806 self._dirs = {}
797 807 self._lazydirs = {}
798 808 # Using _lazymanifest here is a little slower than plain old dicts
799 809 self._files = {}
800 810 self._flags = {}
801 811 if text:
802 812
803 813 def readsubtree(subdir, subm):
804 814 raise AssertionError(
805 815 b'treemanifest constructor only accepts flat manifests'
806 816 )
807 817
808 818 self.parse(text, readsubtree)
809 819 self._dirty = True # Mark flat manifest dirty after parsing
810 820
811 821 def _subpath(self, path):
812 822 return self._dir + path
813 823
814 824 def _loadalllazy(self):
815 825 selfdirs = self._dirs
816 826 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
817 827 self._lazydirs
818 828 ):
819 829 if docopy:
820 830 selfdirs[d] = readsubtree(path, node).copy()
821 831 else:
822 832 selfdirs[d] = readsubtree(path, node)
823 833 self._lazydirs = {}
824 834
825 835 def _loadlazy(self, d):
826 836 v = self._lazydirs.get(d)
827 837 if v:
828 838 path, node, readsubtree, docopy = v
829 839 if docopy:
830 840 self._dirs[d] = readsubtree(path, node).copy()
831 841 else:
832 842 self._dirs[d] = readsubtree(path, node)
833 843 del self._lazydirs[d]
834 844
835 845 def _loadchildrensetlazy(self, visit):
836 846 if not visit:
837 847 return None
838 848 if visit == b'all' or visit == b'this':
839 849 self._loadalllazy()
840 850 return None
841 851
842 852 loadlazy = self._loadlazy
843 853 for k in visit:
844 854 loadlazy(k + b'/')
845 855 return visit
846 856
847 857 def _loaddifflazy(self, t1, t2):
848 858 """load items in t1 and t2 if they're needed for diffing.
849 859
850 860 The criteria currently is:
851 861 - if it's not present in _lazydirs in either t1 or t2, load it in the
852 862 other (it may already be loaded or it may not exist, doesn't matter)
853 863 - if it's present in _lazydirs in both, compare the nodeid; if it
854 864 differs, load it in both
855 865 """
856 866 toloadlazy = []
857 867 for d, v1 in pycompat.iteritems(t1._lazydirs):
858 868 v2 = t2._lazydirs.get(d)
859 869 if not v2 or v2[1] != v1[1]:
860 870 toloadlazy.append(d)
861 871 for d, v1 in pycompat.iteritems(t2._lazydirs):
862 872 if d not in t1._lazydirs:
863 873 toloadlazy.append(d)
864 874
865 875 for d in toloadlazy:
866 876 t1._loadlazy(d)
867 877 t2._loadlazy(d)
868 878
869 879 def __len__(self):
870 880 self._load()
871 881 size = len(self._files)
872 882 self._loadalllazy()
873 883 for m in self._dirs.values():
874 884 size += m.__len__()
875 885 return size
876 886
877 887 def __nonzero__(self):
878 888 # Faster than "__len() != 0" since it avoids loading sub-manifests
879 889 return not self._isempty()
880 890
881 891 __bool__ = __nonzero__
882 892
883 893 def _isempty(self):
884 894 self._load() # for consistency; already loaded by all callers
885 895 # See if we can skip loading everything.
886 896 if self._files or (
887 897 self._dirs and any(not m._isempty() for m in self._dirs.values())
888 898 ):
889 899 return False
890 900 self._loadalllazy()
891 901 return not self._dirs or all(m._isempty() for m in self._dirs.values())
892 902
893 903 @encoding.strmethod
894 904 def __repr__(self):
895 905 return (
896 906 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
897 907 % (
898 908 self._dir,
899 909 hex(self._node),
900 910 bool(self._loadfunc is _noop),
901 911 self._dirty,
902 912 id(self),
903 913 )
904 914 )
905 915
906 916 def dir(self):
907 917 '''The directory that this tree manifest represents, including a
908 918 trailing '/'. Empty string for the repo root directory.'''
909 919 return self._dir
910 920
911 921 def node(self):
912 922 '''This node of this instance. nullid for unsaved instances. Should
913 923 be updated when the instance is read or written from a revlog.
914 924 '''
915 925 assert not self._dirty
916 926 return self._node
917 927
918 928 def setnode(self, node):
919 929 self._node = node
920 930 self._dirty = False
921 931
922 932 def iterentries(self):
923 933 self._load()
924 934 self._loadalllazy()
925 935 for p, n in sorted(
926 936 itertools.chain(self._dirs.items(), self._files.items())
927 937 ):
928 938 if p in self._files:
929 939 yield self._subpath(p), n, self._flags.get(p, b'')
930 940 else:
931 941 for x in n.iterentries():
932 942 yield x
933 943
934 944 def items(self):
935 945 self._load()
936 946 self._loadalllazy()
937 947 for p, n in sorted(
938 948 itertools.chain(self._dirs.items(), self._files.items())
939 949 ):
940 950 if p in self._files:
941 951 yield self._subpath(p), n
942 952 else:
943 953 for f, sn in pycompat.iteritems(n):
944 954 yield f, sn
945 955
946 956 iteritems = items
947 957
948 958 def iterkeys(self):
949 959 self._load()
950 960 self._loadalllazy()
951 961 for p in sorted(itertools.chain(self._dirs, self._files)):
952 962 if p in self._files:
953 963 yield self._subpath(p)
954 964 else:
955 965 for f in self._dirs[p]:
956 966 yield f
957 967
958 968 def keys(self):
959 969 return list(self.iterkeys())
960 970
961 971 def __iter__(self):
962 972 return self.iterkeys()
963 973
964 974 def __contains__(self, f):
965 975 if f is None:
966 976 return False
967 977 self._load()
968 978 dir, subpath = _splittopdir(f)
969 979 if dir:
970 980 self._loadlazy(dir)
971 981
972 982 if dir not in self._dirs:
973 983 return False
974 984
975 985 return self._dirs[dir].__contains__(subpath)
976 986 else:
977 987 return f in self._files
978 988
979 989 def get(self, f, default=None):
980 990 self._load()
981 991 dir, subpath = _splittopdir(f)
982 992 if dir:
983 993 self._loadlazy(dir)
984 994
985 995 if dir not in self._dirs:
986 996 return default
987 997 return self._dirs[dir].get(subpath, default)
988 998 else:
989 999 return self._files.get(f, default)
990 1000
991 1001 def __getitem__(self, f):
992 1002 self._load()
993 1003 dir, subpath = _splittopdir(f)
994 1004 if dir:
995 1005 self._loadlazy(dir)
996 1006
997 1007 return self._dirs[dir].__getitem__(subpath)
998 1008 else:
999 1009 return self._files[f]
1000 1010
1001 1011 def flags(self, f):
1002 1012 self._load()
1003 1013 dir, subpath = _splittopdir(f)
1004 1014 if dir:
1005 1015 self._loadlazy(dir)
1006 1016
1007 1017 if dir not in self._dirs:
1008 1018 return b''
1009 1019 return self._dirs[dir].flags(subpath)
1010 1020 else:
1011 1021 if f in self._lazydirs or f in self._dirs:
1012 1022 return b''
1013 1023 return self._flags.get(f, b'')
1014 1024
1015 1025 def find(self, f):
1016 1026 self._load()
1017 1027 dir, subpath = _splittopdir(f)
1018 1028 if dir:
1019 1029 self._loadlazy(dir)
1020 1030
1021 1031 return self._dirs[dir].find(subpath)
1022 1032 else:
1023 1033 return self._files[f], self._flags.get(f, b'')
1024 1034
1025 1035 def __delitem__(self, f):
1026 1036 self._load()
1027 1037 dir, subpath = _splittopdir(f)
1028 1038 if dir:
1029 1039 self._loadlazy(dir)
1030 1040
1031 1041 self._dirs[dir].__delitem__(subpath)
1032 1042 # If the directory is now empty, remove it
1033 1043 if self._dirs[dir]._isempty():
1034 1044 del self._dirs[dir]
1035 1045 else:
1036 1046 del self._files[f]
1037 1047 if f in self._flags:
1038 1048 del self._flags[f]
1039 1049 self._dirty = True
1040 1050
1041 1051 def __setitem__(self, f, n):
1042 1052 assert n is not None
1043 1053 self._load()
1044 1054 dir, subpath = _splittopdir(f)
1045 1055 if dir:
1046 1056 self._loadlazy(dir)
1047 1057 if dir not in self._dirs:
1048 1058 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 1059 self._dirs[dir].__setitem__(subpath, n)
1050 1060 else:
1051 1061 # manifest nodes are either 20 bytes or 32 bytes,
1052 # depending on the hash in use. An extra byte is
1053 # occasionally used by hg, but won't ever be
1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1055 trim = 21 if len(n) < 25 else 33
1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1062 # depending on the hash in use. Assert this as historically
1063 # sometimes extra bytes were added.
1064 assert len(n) in (20, 32)
1065 self._files[f] = n
1057 1066 self._dirty = True
1058 1067
1059 1068 def _load(self):
1060 1069 if self._loadfunc is not _noop:
1061 1070 lf, self._loadfunc = self._loadfunc, _noop
1062 1071 lf(self)
1063 1072 elif self._copyfunc is not _noop:
1064 1073 cf, self._copyfunc = self._copyfunc, _noop
1065 1074 cf(self)
1066 1075
1067 1076 def setflag(self, f, flags):
1068 1077 """Set the flags (symlink, executable) for path f."""
1078 if flags not in _manifestflags:
1079 raise TypeError(b"Invalid manifest flag set.")
1069 1080 self._load()
1070 1081 dir, subpath = _splittopdir(f)
1071 1082 if dir:
1072 1083 self._loadlazy(dir)
1073 1084 if dir not in self._dirs:
1074 1085 self._dirs[dir] = treemanifest(self._subpath(dir))
1075 1086 self._dirs[dir].setflag(subpath, flags)
1076 1087 else:
1077 1088 self._flags[f] = flags
1078 1089 self._dirty = True
1079 1090
1080 1091 def copy(self):
1081 1092 copy = treemanifest(self._dir)
1082 1093 copy._node = self._node
1083 1094 copy._dirty = self._dirty
1084 1095 if self._copyfunc is _noop:
1085 1096
1086 1097 def _copyfunc(s):
1087 1098 self._load()
1088 1099 s._lazydirs = {
1089 1100 d: (p, n, r, True)
1090 1101 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1091 1102 }
1092 1103 sdirs = s._dirs
1093 1104 for d, v in pycompat.iteritems(self._dirs):
1094 1105 sdirs[d] = v.copy()
1095 1106 s._files = dict.copy(self._files)
1096 1107 s._flags = dict.copy(self._flags)
1097 1108
1098 1109 if self._loadfunc is _noop:
1099 1110 _copyfunc(copy)
1100 1111 else:
1101 1112 copy._copyfunc = _copyfunc
1102 1113 else:
1103 1114 copy._copyfunc = self._copyfunc
1104 1115 return copy
1105 1116
1106 1117 def filesnotin(self, m2, match=None):
1107 1118 '''Set of files in this manifest that are not in the other'''
1108 1119 if match and not match.always():
1109 1120 m1 = self._matches(match)
1110 1121 m2 = m2._matches(match)
1111 1122 return m1.filesnotin(m2)
1112 1123
1113 1124 files = set()
1114 1125
1115 1126 def _filesnotin(t1, t2):
1116 1127 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1117 1128 return
1118 1129 t1._load()
1119 1130 t2._load()
1120 1131 self._loaddifflazy(t1, t2)
1121 1132 for d, m1 in pycompat.iteritems(t1._dirs):
1122 1133 if d in t2._dirs:
1123 1134 m2 = t2._dirs[d]
1124 1135 _filesnotin(m1, m2)
1125 1136 else:
1126 1137 files.update(m1.iterkeys())
1127 1138
1128 1139 for fn in t1._files:
1129 1140 if fn not in t2._files:
1130 1141 files.add(t1._subpath(fn))
1131 1142
1132 1143 _filesnotin(self, m2)
1133 1144 return files
1134 1145
1135 1146 @propertycache
1136 1147 def _alldirs(self):
1137 1148 return pathutil.dirs(self)
1138 1149
1139 1150 def dirs(self):
1140 1151 return self._alldirs
1141 1152
1142 1153 def hasdir(self, dir):
1143 1154 self._load()
1144 1155 topdir, subdir = _splittopdir(dir)
1145 1156 if topdir:
1146 1157 self._loadlazy(topdir)
1147 1158 if topdir in self._dirs:
1148 1159 return self._dirs[topdir].hasdir(subdir)
1149 1160 return False
1150 1161 dirslash = dir + b'/'
1151 1162 return dirslash in self._dirs or dirslash in self._lazydirs
1152 1163
1153 1164 def walk(self, match):
1154 1165 '''Generates matching file names.
1155 1166
1156 1167 It also reports nonexistent files by marking them bad with match.bad().
1157 1168 '''
1158 1169 if match.always():
1159 1170 for f in iter(self):
1160 1171 yield f
1161 1172 return
1162 1173
1163 1174 fset = set(match.files())
1164 1175
1165 1176 for fn in self._walk(match):
1166 1177 if fn in fset:
1167 1178 # specified pattern is the exact name
1168 1179 fset.remove(fn)
1169 1180 yield fn
1170 1181
1171 1182 # for dirstate.walk, files=[''] means "walk the whole tree".
1172 1183 # follow that here, too
1173 1184 fset.discard(b'')
1174 1185
1175 1186 for fn in sorted(fset):
1176 1187 if not self.hasdir(fn):
1177 1188 match.bad(fn, None)
1178 1189
1179 1190 def _walk(self, match):
1180 1191 '''Recursively generates matching file names for walk().'''
1181 1192 visit = match.visitchildrenset(self._dir[:-1])
1182 1193 if not visit:
1183 1194 return
1184 1195
1185 1196 # yield this dir's files and walk its submanifests
1186 1197 self._load()
1187 1198 visit = self._loadchildrensetlazy(visit)
1188 1199 for p in sorted(list(self._dirs) + list(self._files)):
1189 1200 if p in self._files:
1190 1201 fullp = self._subpath(p)
1191 1202 if match(fullp):
1192 1203 yield fullp
1193 1204 else:
1194 1205 if not visit or p[:-1] in visit:
1195 1206 for f in self._dirs[p]._walk(match):
1196 1207 yield f
1197 1208
1198 1209 def _matches(self, match):
1199 1210 '''recursively generate a new manifest filtered by the match argument.
1200 1211 '''
1201 1212 if match.always():
1202 1213 return self.copy()
1203 1214 return self._matches_inner(match)
1204 1215
1205 1216 def _matches_inner(self, match):
1206 1217 if match.always():
1207 1218 return self.copy()
1208 1219
1209 1220 visit = match.visitchildrenset(self._dir[:-1])
1210 1221 if visit == b'all':
1211 1222 return self.copy()
1212 1223 ret = treemanifest(self._dir)
1213 1224 if not visit:
1214 1225 return ret
1215 1226
1216 1227 self._load()
1217 1228 for fn in self._files:
1218 1229 # While visitchildrenset *usually* lists only subdirs, this is
1219 1230 # actually up to the matcher and may have some files in the set().
1220 1231 # If visit == 'this', we should obviously look at the files in this
1221 1232 # directory; if visit is a set, and fn is in it, we should inspect
1222 1233 # fn (but no need to inspect things not in the set).
1223 1234 if visit != b'this' and fn not in visit:
1224 1235 continue
1225 1236 fullp = self._subpath(fn)
1226 1237 # visitchildrenset isn't perfect, we still need to call the regular
1227 1238 # matcher code to further filter results.
1228 1239 if not match(fullp):
1229 1240 continue
1230 1241 ret._files[fn] = self._files[fn]
1231 1242 if fn in self._flags:
1232 1243 ret._flags[fn] = self._flags[fn]
1233 1244
1234 1245 visit = self._loadchildrensetlazy(visit)
1235 1246 for dir, subm in pycompat.iteritems(self._dirs):
1236 1247 if visit and dir[:-1] not in visit:
1237 1248 continue
1238 1249 m = subm._matches_inner(match)
1239 1250 if not m._isempty():
1240 1251 ret._dirs[dir] = m
1241 1252
1242 1253 if not ret._isempty():
1243 1254 ret._dirty = True
1244 1255 return ret
1245 1256
1246 1257 def fastdelta(self, base, changes):
1247 1258 raise FastdeltaUnavailable()
1248 1259
1249 1260 def diff(self, m2, match=None, clean=False):
1250 1261 '''Finds changes between the current manifest and m2.
1251 1262
1252 1263 Args:
1253 1264 m2: the manifest to which this manifest should be compared.
1254 1265 clean: if true, include files unchanged between these manifests
1255 1266 with a None value in the returned dictionary.
1256 1267
1257 1268 The result is returned as a dict with filename as key and
1258 1269 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1259 1270 nodeid in the current/other manifest and fl1/fl2 is the flag
1260 1271 in the current/other manifest. Where the file does not exist,
1261 1272 the nodeid will be None and the flags will be the empty
1262 1273 string.
1263 1274 '''
1264 1275 if match and not match.always():
1265 1276 m1 = self._matches(match)
1266 1277 m2 = m2._matches(match)
1267 1278 return m1.diff(m2, clean=clean)
1268 1279 result = {}
1269 1280 emptytree = treemanifest()
1270 1281
1271 1282 def _iterativediff(t1, t2, stack):
1272 1283 """compares two tree manifests and append new tree-manifests which
1273 1284 needs to be compared to stack"""
1274 1285 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1275 1286 return
1276 1287 t1._load()
1277 1288 t2._load()
1278 1289 self._loaddifflazy(t1, t2)
1279 1290
1280 1291 for d, m1 in pycompat.iteritems(t1._dirs):
1281 1292 m2 = t2._dirs.get(d, emptytree)
1282 1293 stack.append((m1, m2))
1283 1294
1284 1295 for d, m2 in pycompat.iteritems(t2._dirs):
1285 1296 if d not in t1._dirs:
1286 1297 stack.append((emptytree, m2))
1287 1298
1288 1299 for fn, n1 in pycompat.iteritems(t1._files):
1289 1300 fl1 = t1._flags.get(fn, b'')
1290 1301 n2 = t2._files.get(fn, None)
1291 1302 fl2 = t2._flags.get(fn, b'')
1292 1303 if n1 != n2 or fl1 != fl2:
1293 1304 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1294 1305 elif clean:
1295 1306 result[t1._subpath(fn)] = None
1296 1307
1297 1308 for fn, n2 in pycompat.iteritems(t2._files):
1298 1309 if fn not in t1._files:
1299 1310 fl2 = t2._flags.get(fn, b'')
1300 1311 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1301 1312
1302 1313 stackls = []
1303 1314 _iterativediff(self, m2, stackls)
1304 1315 while stackls:
1305 1316 t1, t2 = stackls.pop()
1306 1317 # stackls is populated in the function call
1307 1318 _iterativediff(t1, t2, stackls)
1308 1319 return result
1309 1320
1310 1321 def unmodifiedsince(self, m2):
1311 1322 return not self._dirty and not m2._dirty and self._node == m2._node
1312 1323
1313 1324 def parse(self, text, readsubtree):
1314 1325 selflazy = self._lazydirs
1315 1326 subpath = self._subpath
1316 1327 for f, n, fl in _parse(text):
1317 1328 if fl == b't':
1318 1329 f = f + b'/'
1319 1330 # False below means "doesn't need to be copied" and can use the
1320 1331 # cached value from readsubtree directly.
1321 1332 selflazy[f] = (subpath(f), n, readsubtree, False)
1322 1333 elif b'/' in f:
1323 1334 # This is a flat manifest, so use __setitem__ and setflag rather
1324 1335 # than assigning directly to _files and _flags, so we can
1325 1336 # assign a path in a subdirectory, and to mark dirty (compared
1326 1337 # to nullid).
1327 1338 self[f] = n
1328 1339 if fl:
1329 1340 self.setflag(f, fl)
1330 1341 else:
1331 1342 # Assigning to _files and _flags avoids marking as dirty,
1332 1343 # and should be a little faster.
1333 1344 self._files[f] = n
1334 1345 if fl:
1335 1346 self._flags[f] = fl
1336 1347
1337 1348 def text(self):
1338 1349 """Get the full data of this manifest as a bytestring."""
1339 1350 self._load()
1340 1351 return _text(self.iterentries())
1341 1352
1342 1353 def dirtext(self):
1343 1354 """Get the full data of this directory as a bytestring. Make sure that
1344 1355 any submanifests have been written first, so their nodeids are correct.
1345 1356 """
1346 1357 self._load()
1347 1358 flags = self.flags
1348 1359 lazydirs = [
1349 1360 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1350 1361 ]
1351 1362 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1352 1363 files = [(f, self._files[f], flags(f)) for f in self._files]
1353 1364 return _text(sorted(dirs + files + lazydirs))
1354 1365
1355 1366 def read(self, gettext, readsubtree):
1356 1367 def _load_for_read(s):
1357 1368 s.parse(gettext(), readsubtree)
1358 1369 s._dirty = False
1359 1370
1360 1371 self._loadfunc = _load_for_read
1361 1372
1362 1373 def writesubtrees(self, m1, m2, writesubtree, match):
1363 1374 self._load() # for consistency; should never have any effect here
1364 1375 m1._load()
1365 1376 m2._load()
1366 1377 emptytree = treemanifest()
1367 1378
1368 1379 def getnode(m, d):
1369 1380 ld = m._lazydirs.get(d)
1370 1381 if ld:
1371 1382 return ld[1]
1372 1383 return m._dirs.get(d, emptytree)._node
1373 1384
1374 1385 # let's skip investigating things that `match` says we do not need.
1375 1386 visit = match.visitchildrenset(self._dir[:-1])
1376 1387 visit = self._loadchildrensetlazy(visit)
1377 1388 if visit == b'this' or visit == b'all':
1378 1389 visit = None
1379 1390 for d, subm in pycompat.iteritems(self._dirs):
1380 1391 if visit and d[:-1] not in visit:
1381 1392 continue
1382 1393 subp1 = getnode(m1, d)
1383 1394 subp2 = getnode(m2, d)
1384 1395 if subp1 == nullid:
1385 1396 subp1, subp2 = subp2, subp1
1386 1397 writesubtree(subm, subp1, subp2, match)
1387 1398
1388 1399 def walksubtrees(self, matcher=None):
1389 1400 """Returns an iterator of the subtrees of this manifest, including this
1390 1401 manifest itself.
1391 1402
1392 1403 If `matcher` is provided, it only returns subtrees that match.
1393 1404 """
1394 1405 if matcher and not matcher.visitdir(self._dir[:-1]):
1395 1406 return
1396 1407 if not matcher or matcher(self._dir[:-1]):
1397 1408 yield self
1398 1409
1399 1410 self._load()
1400 1411 # OPT: use visitchildrenset to avoid loading everything.
1401 1412 self._loadalllazy()
1402 1413 for d, subm in pycompat.iteritems(self._dirs):
1403 1414 for subtree in subm.walksubtrees(matcher=matcher):
1404 1415 yield subtree
1405 1416
1406 1417
1407 1418 class manifestfulltextcache(util.lrucachedict):
1408 1419 """File-backed LRU cache for the manifest cache
1409 1420
1410 1421 File consists of entries, up to EOF:
1411 1422
1412 1423 - 20 bytes node, 4 bytes length, <length> manifest data
1413 1424
1414 1425 These are written in reverse cache order (oldest to newest).
1415 1426
1416 1427 """
1417 1428
1418 1429 _file = b'manifestfulltextcache'
1419 1430
1420 1431 def __init__(self, max):
1421 1432 super(manifestfulltextcache, self).__init__(max)
1422 1433 self._dirty = False
1423 1434 self._read = False
1424 1435 self._opener = None
1425 1436
1426 1437 def read(self):
1427 1438 if self._read or self._opener is None:
1428 1439 return
1429 1440
1430 1441 try:
1431 1442 with self._opener(self._file) as fp:
1432 1443 set = super(manifestfulltextcache, self).__setitem__
1433 1444 # ignore trailing data, this is a cache, corruption is skipped
1434 1445 while True:
1435 1446 # TODO do we need to do work here for sha1 portability?
1436 1447 node = fp.read(20)
1437 1448 if len(node) < 20:
1438 1449 break
1439 1450 try:
1440 1451 size = struct.unpack(b'>L', fp.read(4))[0]
1441 1452 except struct.error:
1442 1453 break
1443 1454 value = bytearray(fp.read(size))
1444 1455 if len(value) != size:
1445 1456 break
1446 1457 set(node, value)
1447 1458 except IOError:
1448 1459 # the file is allowed to be missing
1449 1460 pass
1450 1461
1451 1462 self._read = True
1452 1463 self._dirty = False
1453 1464
1454 1465 def write(self):
1455 1466 if not self._dirty or self._opener is None:
1456 1467 return
1457 1468 # rotate backwards to the first used node
1458 1469 try:
1459 1470 with self._opener(
1460 1471 self._file, b'w', atomictemp=True, checkambig=True
1461 1472 ) as fp:
1462 1473 node = self._head.prev
1463 1474 while True:
1464 1475 if node.key in self._cache:
1465 1476 fp.write(node.key)
1466 1477 fp.write(struct.pack(b'>L', len(node.value)))
1467 1478 fp.write(node.value)
1468 1479 if node is self._head:
1469 1480 break
1470 1481 node = node.prev
1471 1482 except IOError:
1472 1483 # We could not write the cache (eg: permission error)
1473 1484 # the content can be missing.
1474 1485 #
1475 1486 # We could try harder and see if we could recreate a wcache
1476 1487 # directory were we coudl write too.
1477 1488 #
1478 1489 # XXX the error pass silently, having some way to issue an error
1479 1490 # log `ui.log` would be nice.
1480 1491 pass
1481 1492
1482 1493 def __len__(self):
1483 1494 if not self._read:
1484 1495 self.read()
1485 1496 return super(manifestfulltextcache, self).__len__()
1486 1497
1487 1498 def __contains__(self, k):
1488 1499 if not self._read:
1489 1500 self.read()
1490 1501 return super(manifestfulltextcache, self).__contains__(k)
1491 1502
1492 1503 def __iter__(self):
1493 1504 if not self._read:
1494 1505 self.read()
1495 1506 return super(manifestfulltextcache, self).__iter__()
1496 1507
1497 1508 def __getitem__(self, k):
1498 1509 if not self._read:
1499 1510 self.read()
1500 1511 # the cache lru order can change on read
1501 1512 setdirty = self._cache.get(k) is not self._head
1502 1513 value = super(manifestfulltextcache, self).__getitem__(k)
1503 1514 if setdirty:
1504 1515 self._dirty = True
1505 1516 return value
1506 1517
1507 1518 def __setitem__(self, k, v):
1508 1519 if not self._read:
1509 1520 self.read()
1510 1521 super(manifestfulltextcache, self).__setitem__(k, v)
1511 1522 self._dirty = True
1512 1523
1513 1524 def __delitem__(self, k):
1514 1525 if not self._read:
1515 1526 self.read()
1516 1527 super(manifestfulltextcache, self).__delitem__(k)
1517 1528 self._dirty = True
1518 1529
1519 1530 def get(self, k, default=None):
1520 1531 if not self._read:
1521 1532 self.read()
1522 1533 return super(manifestfulltextcache, self).get(k, default=default)
1523 1534
1524 1535 def clear(self, clear_persisted_data=False):
1525 1536 super(manifestfulltextcache, self).clear()
1526 1537 if clear_persisted_data:
1527 1538 self._dirty = True
1528 1539 self.write()
1529 1540 self._read = False
1530 1541
1531 1542
1532 1543 # and upper bound of what we expect from compression
1533 1544 # (real live value seems to be "3")
1534 1545 MAXCOMPRESSION = 3
1535 1546
1536 1547
1537 1548 class FastdeltaUnavailable(Exception):
1538 1549 """Exception raised when fastdelta isn't usable on a manifest."""
1539 1550
1540 1551
1541 1552 @interfaceutil.implementer(repository.imanifeststorage)
1542 1553 class manifestrevlog(object):
1543 1554 '''A revlog that stores manifest texts. This is responsible for caching the
1544 1555 full-text manifest contents.
1545 1556 '''
1546 1557
1547 1558 def __init__(
1548 1559 self,
1549 1560 opener,
1550 1561 tree=b'',
1551 1562 dirlogcache=None,
1552 1563 indexfile=None,
1553 1564 treemanifest=False,
1554 1565 ):
1555 1566 """Constructs a new manifest revlog
1556 1567
1557 1568 `indexfile` - used by extensions to have two manifests at once, like
1558 1569 when transitioning between flatmanifeset and treemanifests.
1559 1570
1560 1571 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1561 1572 options can also be used to make this a tree manifest revlog. The opener
1562 1573 option takes precedence, so if it is set to True, we ignore whatever
1563 1574 value is passed in to the constructor.
1564 1575 """
1565 1576 # During normal operations, we expect to deal with not more than four
1566 1577 # revs at a time (such as during commit --amend). When rebasing large
1567 1578 # stacks of commits, the number can go up, hence the config knob below.
1568 1579 cachesize = 4
1569 1580 optiontreemanifest = False
1570 1581 opts = getattr(opener, 'options', None)
1571 1582 if opts is not None:
1572 1583 cachesize = opts.get(b'manifestcachesize', cachesize)
1573 1584 optiontreemanifest = opts.get(b'treemanifest', False)
1574 1585
1575 1586 self._treeondisk = optiontreemanifest or treemanifest
1576 1587
1577 1588 self._fulltextcache = manifestfulltextcache(cachesize)
1578 1589
1579 1590 if tree:
1580 1591 assert self._treeondisk, b'opts is %r' % opts
1581 1592
1582 1593 if indexfile is None:
1583 1594 indexfile = b'00manifest.i'
1584 1595 if tree:
1585 1596 indexfile = b"meta/" + tree + indexfile
1586 1597
1587 1598 self.tree = tree
1588 1599
1589 1600 # The dirlogcache is kept on the root manifest log
1590 1601 if tree:
1591 1602 self._dirlogcache = dirlogcache
1592 1603 else:
1593 1604 self._dirlogcache = {b'': self}
1594 1605
1595 1606 self._revlog = revlog.revlog(
1596 1607 opener,
1597 1608 indexfile,
1598 1609 # only root indexfile is cached
1599 1610 checkambig=not bool(tree),
1600 1611 mmaplargeindex=True,
1601 1612 upperboundcomp=MAXCOMPRESSION,
1602 1613 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1603 1614 )
1604 1615
1605 1616 self.index = self._revlog.index
1606 1617 self.version = self._revlog.version
1607 1618 self._generaldelta = self._revlog._generaldelta
1608 1619
1609 1620 def _setupmanifestcachehooks(self, repo):
1610 1621 """Persist the manifestfulltextcache on lock release"""
1611 1622 if not util.safehasattr(repo, b'_wlockref'):
1612 1623 return
1613 1624
1614 1625 self._fulltextcache._opener = repo.wcachevfs
1615 1626 if repo._currentlock(repo._wlockref) is None:
1616 1627 return
1617 1628
1618 1629 reporef = weakref.ref(repo)
1619 1630 manifestrevlogref = weakref.ref(self)
1620 1631
1621 1632 def persistmanifestcache(success):
1622 1633 # Repo is in an unknown state, do not persist.
1623 1634 if not success:
1624 1635 return
1625 1636
1626 1637 repo = reporef()
1627 1638 self = manifestrevlogref()
1628 1639 if repo is None or self is None:
1629 1640 return
1630 1641 if repo.manifestlog.getstorage(b'') is not self:
1631 1642 # there's a different manifest in play now, abort
1632 1643 return
1633 1644 self._fulltextcache.write()
1634 1645
1635 1646 repo._afterlock(persistmanifestcache)
1636 1647
1637 1648 @property
1638 1649 def fulltextcache(self):
1639 1650 return self._fulltextcache
1640 1651
1641 1652 def clearcaches(self, clear_persisted_data=False):
1642 1653 self._revlog.clearcaches()
1643 1654 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1644 1655 self._dirlogcache = {self.tree: self}
1645 1656
1646 1657 def dirlog(self, d):
1647 1658 if d:
1648 1659 assert self._treeondisk
1649 1660 if d not in self._dirlogcache:
1650 1661 mfrevlog = manifestrevlog(
1651 1662 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1652 1663 )
1653 1664 self._dirlogcache[d] = mfrevlog
1654 1665 return self._dirlogcache[d]
1655 1666
1656 1667 def add(
1657 1668 self,
1658 1669 m,
1659 1670 transaction,
1660 1671 link,
1661 1672 p1,
1662 1673 p2,
1663 1674 added,
1664 1675 removed,
1665 1676 readtree=None,
1666 1677 match=None,
1667 1678 ):
1668 1679 """add some manifest entry in to the manifest log
1669 1680
1670 1681 input:
1671 1682
1672 1683 m: the manifest dict we want to store
1673 1684 transaction: the open transaction
1674 1685 p1: manifest-node of p1
1675 1686 p2: manifest-node of p2
1676 1687 added: file added/changed compared to parent
1677 1688 removed: file removed compared to parent
1678 1689
1679 1690 tree manifest input:
1680 1691
1681 1692 readtree: a function to read a subtree
1682 1693 match: a filematcher for the subpart of the tree manifest
1683 1694 """
1684 1695 try:
1685 1696 if p1 not in self.fulltextcache:
1686 1697 raise FastdeltaUnavailable()
1687 1698 # If our first parent is in the manifest cache, we can
1688 1699 # compute a delta here using properties we know about the
1689 1700 # manifest up-front, which may save time later for the
1690 1701 # revlog layer.
1691 1702
1692 1703 _checkforbidden(added)
1693 1704 # combine the changed lists into one sorted iterator
1694 1705 work = heapq.merge(
1695 1706 [(x, False) for x in sorted(added)],
1696 1707 [(x, True) for x in sorted(removed)],
1697 1708 )
1698 1709
1699 1710 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1700 1711 cachedelta = self._revlog.rev(p1), deltatext
1701 1712 text = util.buffer(arraytext)
1702 1713 n = self._revlog.addrevision(
1703 1714 text, transaction, link, p1, p2, cachedelta
1704 1715 )
1705 1716 except FastdeltaUnavailable:
1706 1717 # The first parent manifest isn't already loaded or the
1707 1718 # manifest implementation doesn't support fastdelta, so
1708 1719 # we'll just encode a fulltext of the manifest and pass
1709 1720 # that through to the revlog layer, and let it handle the
1710 1721 # delta process.
1711 1722 if self._treeondisk:
1712 1723 assert readtree, b"readtree must be set for treemanifest writes"
1713 1724 assert match, b"match must be specified for treemanifest writes"
1714 1725 m1 = readtree(self.tree, p1)
1715 1726 m2 = readtree(self.tree, p2)
1716 1727 n = self._addtree(
1717 1728 m, transaction, link, m1, m2, readtree, match=match
1718 1729 )
1719 1730 arraytext = None
1720 1731 else:
1721 1732 text = m.text()
1722 1733 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1723 1734 arraytext = bytearray(text)
1724 1735
1725 1736 if arraytext is not None:
1726 1737 self.fulltextcache[n] = arraytext
1727 1738
1728 1739 return n
1729 1740
1730 1741 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1731 1742 # If the manifest is unchanged compared to one parent,
1732 1743 # don't write a new revision
1733 1744 if self.tree != b'' and (
1734 1745 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1735 1746 ):
1736 1747 return m.node()
1737 1748
1738 1749 def writesubtree(subm, subp1, subp2, match):
1739 1750 sublog = self.dirlog(subm.dir())
1740 1751 sublog.add(
1741 1752 subm,
1742 1753 transaction,
1743 1754 link,
1744 1755 subp1,
1745 1756 subp2,
1746 1757 None,
1747 1758 None,
1748 1759 readtree=readtree,
1749 1760 match=match,
1750 1761 )
1751 1762
1752 1763 m.writesubtrees(m1, m2, writesubtree, match)
1753 1764 text = m.dirtext()
1754 1765 n = None
1755 1766 if self.tree != b'':
1756 1767 # Double-check whether contents are unchanged to one parent
1757 1768 if text == m1.dirtext():
1758 1769 n = m1.node()
1759 1770 elif text == m2.dirtext():
1760 1771 n = m2.node()
1761 1772
1762 1773 if not n:
1763 1774 n = self._revlog.addrevision(
1764 1775 text, transaction, link, m1.node(), m2.node()
1765 1776 )
1766 1777
1767 1778 # Save nodeid so parent manifest can calculate its nodeid
1768 1779 m.setnode(n)
1769 1780 return n
1770 1781
1771 1782 def __len__(self):
1772 1783 return len(self._revlog)
1773 1784
1774 1785 def __iter__(self):
1775 1786 return self._revlog.__iter__()
1776 1787
1777 1788 def rev(self, node):
1778 1789 return self._revlog.rev(node)
1779 1790
1780 1791 def node(self, rev):
1781 1792 return self._revlog.node(rev)
1782 1793
1783 1794 def lookup(self, value):
1784 1795 return self._revlog.lookup(value)
1785 1796
1786 1797 def parentrevs(self, rev):
1787 1798 return self._revlog.parentrevs(rev)
1788 1799
1789 1800 def parents(self, node):
1790 1801 return self._revlog.parents(node)
1791 1802
1792 1803 def linkrev(self, rev):
1793 1804 return self._revlog.linkrev(rev)
1794 1805
1795 1806 def checksize(self):
1796 1807 return self._revlog.checksize()
1797 1808
1798 1809 def revision(self, node, _df=None, raw=False):
1799 1810 return self._revlog.revision(node, _df=_df, raw=raw)
1800 1811
1801 1812 def rawdata(self, node, _df=None):
1802 1813 return self._revlog.rawdata(node, _df=_df)
1803 1814
1804 1815 def revdiff(self, rev1, rev2):
1805 1816 return self._revlog.revdiff(rev1, rev2)
1806 1817
1807 1818 def cmp(self, node, text):
1808 1819 return self._revlog.cmp(node, text)
1809 1820
1810 1821 def deltaparent(self, rev):
1811 1822 return self._revlog.deltaparent(rev)
1812 1823
1813 1824 def emitrevisions(
1814 1825 self,
1815 1826 nodes,
1816 1827 nodesorder=None,
1817 1828 revisiondata=False,
1818 1829 assumehaveparentrevisions=False,
1819 1830 deltamode=repository.CG_DELTAMODE_STD,
1820 1831 ):
1821 1832 return self._revlog.emitrevisions(
1822 1833 nodes,
1823 1834 nodesorder=nodesorder,
1824 1835 revisiondata=revisiondata,
1825 1836 assumehaveparentrevisions=assumehaveparentrevisions,
1826 1837 deltamode=deltamode,
1827 1838 )
1828 1839
1829 1840 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1830 1841 return self._revlog.addgroup(
1831 1842 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1832 1843 )
1833 1844
1834 1845 def rawsize(self, rev):
1835 1846 return self._revlog.rawsize(rev)
1836 1847
1837 1848 def getstrippoint(self, minlink):
1838 1849 return self._revlog.getstrippoint(minlink)
1839 1850
1840 1851 def strip(self, minlink, transaction):
1841 1852 return self._revlog.strip(minlink, transaction)
1842 1853
1843 1854 def files(self):
1844 1855 return self._revlog.files()
1845 1856
1846 1857 def clone(self, tr, destrevlog, **kwargs):
1847 1858 if not isinstance(destrevlog, manifestrevlog):
1848 1859 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1849 1860
1850 1861 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1851 1862
1852 1863 def storageinfo(
1853 1864 self,
1854 1865 exclusivefiles=False,
1855 1866 sharedfiles=False,
1856 1867 revisionscount=False,
1857 1868 trackedsize=False,
1858 1869 storedsize=False,
1859 1870 ):
1860 1871 return self._revlog.storageinfo(
1861 1872 exclusivefiles=exclusivefiles,
1862 1873 sharedfiles=sharedfiles,
1863 1874 revisionscount=revisionscount,
1864 1875 trackedsize=trackedsize,
1865 1876 storedsize=storedsize,
1866 1877 )
1867 1878
1868 1879 @property
1869 1880 def indexfile(self):
1870 1881 return self._revlog.indexfile
1871 1882
1872 1883 @indexfile.setter
1873 1884 def indexfile(self, value):
1874 1885 self._revlog.indexfile = value
1875 1886
1876 1887 @property
1877 1888 def opener(self):
1878 1889 return self._revlog.opener
1879 1890
1880 1891 @opener.setter
1881 1892 def opener(self, value):
1882 1893 self._revlog.opener = value
1883 1894
1884 1895
1885 1896 @interfaceutil.implementer(repository.imanifestlog)
1886 1897 class manifestlog(object):
1887 1898 """A collection class representing the collection of manifest snapshots
1888 1899 referenced by commits in the repository.
1889 1900
1890 1901 In this situation, 'manifest' refers to the abstract concept of a snapshot
1891 1902 of the list of files in the given commit. Consumers of the output of this
1892 1903 class do not care about the implementation details of the actual manifests
1893 1904 they receive (i.e. tree or flat or lazily loaded, etc)."""
1894 1905
1895 1906 def __init__(self, opener, repo, rootstore, narrowmatch):
1896 1907 usetreemanifest = False
1897 1908 cachesize = 4
1898 1909
1899 1910 opts = getattr(opener, 'options', None)
1900 1911 if opts is not None:
1901 1912 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1902 1913 cachesize = opts.get(b'manifestcachesize', cachesize)
1903 1914
1904 1915 self._treemanifests = usetreemanifest
1905 1916
1906 1917 self._rootstore = rootstore
1907 1918 self._rootstore._setupmanifestcachehooks(repo)
1908 1919 self._narrowmatch = narrowmatch
1909 1920
1910 1921 # A cache of the manifestctx or treemanifestctx for each directory
1911 1922 self._dirmancache = {}
1912 1923 self._dirmancache[b''] = util.lrucachedict(cachesize)
1913 1924
1914 1925 self._cachesize = cachesize
1915 1926
1916 1927 def __getitem__(self, node):
1917 1928 """Retrieves the manifest instance for the given node. Throws a
1918 1929 LookupError if not found.
1919 1930 """
1920 1931 return self.get(b'', node)
1921 1932
1922 1933 def get(self, tree, node, verify=True):
1923 1934 """Retrieves the manifest instance for the given node. Throws a
1924 1935 LookupError if not found.
1925 1936
1926 1937 `verify` - if True an exception will be thrown if the node is not in
1927 1938 the revlog
1928 1939 """
1929 1940 if node in self._dirmancache.get(tree, ()):
1930 1941 return self._dirmancache[tree][node]
1931 1942
1932 1943 if not self._narrowmatch.always():
1933 1944 if not self._narrowmatch.visitdir(tree[:-1]):
1934 1945 return excludeddirmanifestctx(tree, node)
1935 1946 if tree:
1936 1947 if self._rootstore._treeondisk:
1937 1948 if verify:
1938 1949 # Side-effect is LookupError is raised if node doesn't
1939 1950 # exist.
1940 1951 self.getstorage(tree).rev(node)
1941 1952
1942 1953 m = treemanifestctx(self, tree, node)
1943 1954 else:
1944 1955 raise error.Abort(
1945 1956 _(
1946 1957 b"cannot ask for manifest directory '%s' in a flat "
1947 1958 b"manifest"
1948 1959 )
1949 1960 % tree
1950 1961 )
1951 1962 else:
1952 1963 if verify:
1953 1964 # Side-effect is LookupError is raised if node doesn't exist.
1954 1965 self._rootstore.rev(node)
1955 1966
1956 1967 if self._treemanifests:
1957 1968 m = treemanifestctx(self, b'', node)
1958 1969 else:
1959 1970 m = manifestctx(self, node)
1960 1971
1961 1972 if node != nullid:
1962 1973 mancache = self._dirmancache.get(tree)
1963 1974 if not mancache:
1964 1975 mancache = util.lrucachedict(self._cachesize)
1965 1976 self._dirmancache[tree] = mancache
1966 1977 mancache[node] = m
1967 1978 return m
1968 1979
1969 1980 def getstorage(self, tree):
1970 1981 return self._rootstore.dirlog(tree)
1971 1982
1972 1983 def clearcaches(self, clear_persisted_data=False):
1973 1984 self._dirmancache.clear()
1974 1985 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1975 1986
1976 1987 def rev(self, node):
1977 1988 return self._rootstore.rev(node)
1978 1989
1979 1990 def update_caches(self, transaction):
1980 1991 return self._rootstore._revlog.update_caches(transaction=transaction)
1981 1992
1982 1993
1983 1994 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1984 1995 class memmanifestctx(object):
1985 1996 def __init__(self, manifestlog):
1986 1997 self._manifestlog = manifestlog
1987 1998 self._manifestdict = manifestdict()
1988 1999
1989 2000 def _storage(self):
1990 2001 return self._manifestlog.getstorage(b'')
1991 2002
1992 2003 def copy(self):
1993 2004 memmf = memmanifestctx(self._manifestlog)
1994 2005 memmf._manifestdict = self.read().copy()
1995 2006 return memmf
1996 2007
1997 2008 def read(self):
1998 2009 return self._manifestdict
1999 2010
2000 2011 def write(self, transaction, link, p1, p2, added, removed, match=None):
2001 2012 return self._storage().add(
2002 2013 self._manifestdict,
2003 2014 transaction,
2004 2015 link,
2005 2016 p1,
2006 2017 p2,
2007 2018 added,
2008 2019 removed,
2009 2020 match=match,
2010 2021 )
2011 2022
2012 2023
2013 2024 @interfaceutil.implementer(repository.imanifestrevisionstored)
2014 2025 class manifestctx(object):
2015 2026 """A class representing a single revision of a manifest, including its
2016 2027 contents, its parent revs, and its linkrev.
2017 2028 """
2018 2029
2019 2030 def __init__(self, manifestlog, node):
2020 2031 self._manifestlog = manifestlog
2021 2032 self._data = None
2022 2033
2023 2034 self._node = node
2024 2035
2025 2036 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2026 2037 # but let's add it later when something needs it and we can load it
2027 2038 # lazily.
2028 2039 # self.p1, self.p2 = store.parents(node)
2029 2040 # rev = store.rev(node)
2030 2041 # self.linkrev = store.linkrev(rev)
2031 2042
2032 2043 def _storage(self):
2033 2044 return self._manifestlog.getstorage(b'')
2034 2045
2035 2046 def node(self):
2036 2047 return self._node
2037 2048
2038 2049 def copy(self):
2039 2050 memmf = memmanifestctx(self._manifestlog)
2040 2051 memmf._manifestdict = self.read().copy()
2041 2052 return memmf
2042 2053
2043 2054 @propertycache
2044 2055 def parents(self):
2045 2056 return self._storage().parents(self._node)
2046 2057
2047 2058 def read(self):
2048 2059 if self._data is None:
2049 2060 if self._node == nullid:
2050 2061 self._data = manifestdict()
2051 2062 else:
2052 2063 store = self._storage()
2053 2064 if self._node in store.fulltextcache:
2054 2065 text = pycompat.bytestr(store.fulltextcache[self._node])
2055 2066 else:
2056 2067 text = store.revision(self._node)
2057 2068 arraytext = bytearray(text)
2058 2069 store.fulltextcache[self._node] = arraytext
2059 2070 self._data = manifestdict(text)
2060 2071 return self._data
2061 2072
2062 2073 def readfast(self, shallow=False):
2063 2074 '''Calls either readdelta or read, based on which would be less work.
2064 2075 readdelta is called if the delta is against the p1, and therefore can be
2065 2076 read quickly.
2066 2077
2067 2078 If `shallow` is True, nothing changes since this is a flat manifest.
2068 2079 '''
2069 2080 store = self._storage()
2070 2081 r = store.rev(self._node)
2071 2082 deltaparent = store.deltaparent(r)
2072 2083 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2073 2084 return self.readdelta()
2074 2085 return self.read()
2075 2086
2076 2087 def readdelta(self, shallow=False):
2077 2088 '''Returns a manifest containing just the entries that are present
2078 2089 in this manifest, but not in its p1 manifest. This is efficient to read
2079 2090 if the revlog delta is already p1.
2080 2091
2081 2092 Changing the value of `shallow` has no effect on flat manifests.
2082 2093 '''
2083 2094 store = self._storage()
2084 2095 r = store.rev(self._node)
2085 2096 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2086 2097 return manifestdict(d)
2087 2098
2088 2099 def find(self, key):
2089 2100 return self.read().find(key)
2090 2101
2091 2102
2092 2103 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2093 2104 class memtreemanifestctx(object):
2094 2105 def __init__(self, manifestlog, dir=b''):
2095 2106 self._manifestlog = manifestlog
2096 2107 self._dir = dir
2097 2108 self._treemanifest = treemanifest()
2098 2109
2099 2110 def _storage(self):
2100 2111 return self._manifestlog.getstorage(b'')
2101 2112
2102 2113 def copy(self):
2103 2114 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2104 2115 memmf._treemanifest = self._treemanifest.copy()
2105 2116 return memmf
2106 2117
2107 2118 def read(self):
2108 2119 return self._treemanifest
2109 2120
2110 2121 def write(self, transaction, link, p1, p2, added, removed, match=None):
2111 2122 def readtree(dir, node):
2112 2123 return self._manifestlog.get(dir, node).read()
2113 2124
2114 2125 return self._storage().add(
2115 2126 self._treemanifest,
2116 2127 transaction,
2117 2128 link,
2118 2129 p1,
2119 2130 p2,
2120 2131 added,
2121 2132 removed,
2122 2133 readtree=readtree,
2123 2134 match=match,
2124 2135 )
2125 2136
2126 2137
2127 2138 @interfaceutil.implementer(repository.imanifestrevisionstored)
2128 2139 class treemanifestctx(object):
2129 2140 def __init__(self, manifestlog, dir, node):
2130 2141 self._manifestlog = manifestlog
2131 2142 self._dir = dir
2132 2143 self._data = None
2133 2144
2134 2145 self._node = node
2135 2146
2136 2147 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2137 2148 # we can instantiate treemanifestctx objects for directories we don't
2138 2149 # have on disk.
2139 2150 # self.p1, self.p2 = store.parents(node)
2140 2151 # rev = store.rev(node)
2141 2152 # self.linkrev = store.linkrev(rev)
2142 2153
2143 2154 def _storage(self):
2144 2155 narrowmatch = self._manifestlog._narrowmatch
2145 2156 if not narrowmatch.always():
2146 2157 if not narrowmatch.visitdir(self._dir[:-1]):
2147 2158 return excludedmanifestrevlog(self._dir)
2148 2159 return self._manifestlog.getstorage(self._dir)
2149 2160
2150 2161 def read(self):
2151 2162 if self._data is None:
2152 2163 store = self._storage()
2153 2164 if self._node == nullid:
2154 2165 self._data = treemanifest()
2155 2166 # TODO accessing non-public API
2156 2167 elif store._treeondisk:
2157 2168 m = treemanifest(dir=self._dir)
2158 2169
2159 2170 def gettext():
2160 2171 return store.revision(self._node)
2161 2172
2162 2173 def readsubtree(dir, subm):
2163 2174 # Set verify to False since we need to be able to create
2164 2175 # subtrees for trees that don't exist on disk.
2165 2176 return self._manifestlog.get(dir, subm, verify=False).read()
2166 2177
2167 2178 m.read(gettext, readsubtree)
2168 2179 m.setnode(self._node)
2169 2180 self._data = m
2170 2181 else:
2171 2182 if self._node in store.fulltextcache:
2172 2183 text = pycompat.bytestr(store.fulltextcache[self._node])
2173 2184 else:
2174 2185 text = store.revision(self._node)
2175 2186 arraytext = bytearray(text)
2176 2187 store.fulltextcache[self._node] = arraytext
2177 2188 self._data = treemanifest(dir=self._dir, text=text)
2178 2189
2179 2190 return self._data
2180 2191
2181 2192 def node(self):
2182 2193 return self._node
2183 2194
2184 2195 def copy(self):
2185 2196 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2186 2197 memmf._treemanifest = self.read().copy()
2187 2198 return memmf
2188 2199
2189 2200 @propertycache
2190 2201 def parents(self):
2191 2202 return self._storage().parents(self._node)
2192 2203
2193 2204 def readdelta(self, shallow=False):
2194 2205 '''Returns a manifest containing just the entries that are present
2195 2206 in this manifest, but not in its p1 manifest. This is efficient to read
2196 2207 if the revlog delta is already p1.
2197 2208
2198 2209 If `shallow` is True, this will read the delta for this directory,
2199 2210 without recursively reading subdirectory manifests. Instead, any
2200 2211 subdirectory entry will be reported as it appears in the manifest, i.e.
2201 2212 the subdirectory will be reported among files and distinguished only by
2202 2213 its 't' flag.
2203 2214 '''
2204 2215 store = self._storage()
2205 2216 if shallow:
2206 2217 r = store.rev(self._node)
2207 2218 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2208 2219 return manifestdict(d)
2209 2220 else:
2210 2221 # Need to perform a slow delta
2211 2222 r0 = store.deltaparent(store.rev(self._node))
2212 2223 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2213 2224 m1 = self.read()
2214 2225 md = treemanifest(dir=self._dir)
2215 2226 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2216 2227 if n1:
2217 2228 md[f] = n1
2218 2229 if fl1:
2219 2230 md.setflag(f, fl1)
2220 2231 return md
2221 2232
2222 2233 def readfast(self, shallow=False):
2223 2234 '''Calls either readdelta or read, based on which would be less work.
2224 2235 readdelta is called if the delta is against the p1, and therefore can be
2225 2236 read quickly.
2226 2237
2227 2238 If `shallow` is True, it only returns the entries from this manifest,
2228 2239 and not any submanifests.
2229 2240 '''
2230 2241 store = self._storage()
2231 2242 r = store.rev(self._node)
2232 2243 deltaparent = store.deltaparent(r)
2233 2244 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2234 2245 return self.readdelta(shallow=shallow)
2235 2246
2236 2247 if shallow:
2237 2248 return manifestdict(store.revision(self._node))
2238 2249 else:
2239 2250 return self.read()
2240 2251
2241 2252 def find(self, key):
2242 2253 return self.read().find(key)
2243 2254
2244 2255
2245 2256 class excludeddir(treemanifest):
2246 2257 """Stand-in for a directory that is excluded from the repository.
2247 2258
2248 2259 With narrowing active on a repository that uses treemanifests,
2249 2260 some of the directory revlogs will be excluded from the resulting
2250 2261 clone. This is a huge storage win for clients, but means we need
2251 2262 some sort of pseudo-manifest to surface to internals so we can
2252 2263 detect a merge conflict outside the narrowspec. That's what this
2253 2264 class is: it stands in for a directory whose node is known, but
2254 2265 whose contents are unknown.
2255 2266 """
2256 2267
2257 2268 def __init__(self, dir, node):
2258 2269 super(excludeddir, self).__init__(dir)
2259 2270 self._node = node
2260 2271 # Add an empty file, which will be included by iterators and such,
2261 2272 # appearing as the directory itself (i.e. something like "dir/")
2262 2273 self._files[b''] = node
2263 2274 self._flags[b''] = b't'
2264 2275
2265 2276 # Manifests outside the narrowspec should never be modified, so avoid
2266 2277 # copying. This makes a noticeable difference when there are very many
2267 2278 # directories outside the narrowspec. Also, it makes sense for the copy to
2268 2279 # be of the same type as the original, which would not happen with the
2269 2280 # super type's copy().
2270 2281 def copy(self):
2271 2282 return self
2272 2283
2273 2284
2274 2285 class excludeddirmanifestctx(treemanifestctx):
2275 2286 """context wrapper for excludeddir - see that docstring for rationale"""
2276 2287
2277 2288 def __init__(self, dir, node):
2278 2289 self._dir = dir
2279 2290 self._node = node
2280 2291
2281 2292 def read(self):
2282 2293 return excludeddir(self._dir, self._node)
2283 2294
2284 2295 def write(self, *args):
2285 2296 raise error.ProgrammingError(
2286 2297 b'attempt to write manifest from excluded dir %s' % self._dir
2287 2298 )
2288 2299
2289 2300
2290 2301 class excludedmanifestrevlog(manifestrevlog):
2291 2302 """Stand-in for excluded treemanifest revlogs.
2292 2303
2293 2304 When narrowing is active on a treemanifest repository, we'll have
2294 2305 references to directories we can't see due to the revlog being
2295 2306 skipped. This class exists to conform to the manifestrevlog
2296 2307 interface for those directories and proactively prevent writes to
2297 2308 outside the narrowspec.
2298 2309 """
2299 2310
2300 2311 def __init__(self, dir):
2301 2312 self._dir = dir
2302 2313
2303 2314 def __len__(self):
2304 2315 raise error.ProgrammingError(
2305 2316 b'attempt to get length of excluded dir %s' % self._dir
2306 2317 )
2307 2318
2308 2319 def rev(self, node):
2309 2320 raise error.ProgrammingError(
2310 2321 b'attempt to get rev from excluded dir %s' % self._dir
2311 2322 )
2312 2323
2313 2324 def linkrev(self, node):
2314 2325 raise error.ProgrammingError(
2315 2326 b'attempt to get linkrev from excluded dir %s' % self._dir
2316 2327 )
2317 2328
2318 2329 def node(self, rev):
2319 2330 raise error.ProgrammingError(
2320 2331 b'attempt to get node from excluded dir %s' % self._dir
2321 2332 )
2322 2333
2323 2334 def add(self, *args, **kwargs):
2324 2335 # We should never write entries in dirlogs outside the narrow clone.
2325 2336 # However, the method still gets called from writesubtree() in
2326 2337 # _addtree(), so we need to handle it. We should possibly make that
2327 2338 # avoid calling add() with a clean manifest (_dirty is always False
2328 2339 # in excludeddir instances).
2329 2340 pass
@@ -1,2083 +1,2082 b''
1 1 # merge.py - directory-level update/merge handling for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import stat
12 12 import struct
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 addednodeid,
17 17 modifiednodeid,
18 18 nullid,
19 19 nullrev,
20 20 )
21 21 from .thirdparty import attr
22 22 from . import (
23 23 copies,
24 24 encoding,
25 25 error,
26 26 filemerge,
27 27 match as matchmod,
28 28 mergestate as mergestatemod,
29 29 obsutil,
30 30 pathutil,
31 31 pycompat,
32 32 scmutil,
33 33 subrepoutil,
34 34 util,
35 35 worker,
36 36 )
37 37
38 38 _pack = struct.pack
39 39 _unpack = struct.unpack
40 40
41 41
42 42 def _getcheckunknownconfig(repo, section, name):
43 43 config = repo.ui.config(section, name)
44 44 valid = [b'abort', b'ignore', b'warn']
45 45 if config not in valid:
46 46 validstr = b', '.join([b"'" + v + b"'" for v in valid])
47 47 raise error.ConfigError(
48 48 _(b"%s.%s not valid ('%s' is none of %s)")
49 49 % (section, name, config, validstr)
50 50 )
51 51 return config
52 52
53 53
54 54 def _checkunknownfile(repo, wctx, mctx, f, f2=None):
55 55 if wctx.isinmemory():
56 56 # Nothing to do in IMM because nothing in the "working copy" can be an
57 57 # unknown file.
58 58 #
59 59 # Note that we should bail out here, not in ``_checkunknownfiles()``,
60 60 # because that function does other useful work.
61 61 return False
62 62
63 63 if f2 is None:
64 64 f2 = f
65 65 return (
66 66 repo.wvfs.audit.check(f)
67 67 and repo.wvfs.isfileorlink(f)
68 68 and repo.dirstate.normalize(f) not in repo.dirstate
69 69 and mctx[f2].cmp(wctx[f])
70 70 )
71 71
72 72
73 73 class _unknowndirschecker(object):
74 74 """
75 75 Look for any unknown files or directories that may have a path conflict
76 76 with a file. If any path prefix of the file exists as a file or link,
77 77 then it conflicts. If the file itself is a directory that contains any
78 78 file that is not tracked, then it conflicts.
79 79
80 80 Returns the shortest path at which a conflict occurs, or None if there is
81 81 no conflict.
82 82 """
83 83
84 84 def __init__(self):
85 85 # A set of paths known to be good. This prevents repeated checking of
86 86 # dirs. It will be updated with any new dirs that are checked and found
87 87 # to be safe.
88 88 self._unknowndircache = set()
89 89
90 90 # A set of paths that are known to be absent. This prevents repeated
91 91 # checking of subdirectories that are known not to exist. It will be
92 92 # updated with any new dirs that are checked and found to be absent.
93 93 self._missingdircache = set()
94 94
95 95 def __call__(self, repo, wctx, f):
96 96 if wctx.isinmemory():
97 97 # Nothing to do in IMM for the same reason as ``_checkunknownfile``.
98 98 return False
99 99
100 100 # Check for path prefixes that exist as unknown files.
101 101 for p in reversed(list(pathutil.finddirs(f))):
102 102 if p in self._missingdircache:
103 103 return
104 104 if p in self._unknowndircache:
105 105 continue
106 106 if repo.wvfs.audit.check(p):
107 107 if (
108 108 repo.wvfs.isfileorlink(p)
109 109 and repo.dirstate.normalize(p) not in repo.dirstate
110 110 ):
111 111 return p
112 112 if not repo.wvfs.lexists(p):
113 113 self._missingdircache.add(p)
114 114 return
115 115 self._unknowndircache.add(p)
116 116
117 117 # Check if the file conflicts with a directory containing unknown files.
118 118 if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
119 119 # Does the directory contain any files that are not in the dirstate?
120 120 for p, dirs, files in repo.wvfs.walk(f):
121 121 for fn in files:
122 122 relf = util.pconvert(repo.wvfs.reljoin(p, fn))
123 123 relf = repo.dirstate.normalize(relf, isknown=True)
124 124 if relf not in repo.dirstate:
125 125 return f
126 126 return None
127 127
128 128
129 129 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
130 130 """
131 131 Considers any actions that care about the presence of conflicting unknown
132 132 files. For some actions, the result is to abort; for others, it is to
133 133 choose a different action.
134 134 """
135 135 fileconflicts = set()
136 136 pathconflicts = set()
137 137 warnconflicts = set()
138 138 abortconflicts = set()
139 139 unknownconfig = _getcheckunknownconfig(repo, b'merge', b'checkunknown')
140 140 ignoredconfig = _getcheckunknownconfig(repo, b'merge', b'checkignored')
141 141 pathconfig = repo.ui.configbool(
142 142 b'experimental', b'merge.checkpathconflicts'
143 143 )
144 144 if not force:
145 145
146 146 def collectconflicts(conflicts, config):
147 147 if config == b'abort':
148 148 abortconflicts.update(conflicts)
149 149 elif config == b'warn':
150 150 warnconflicts.update(conflicts)
151 151
152 152 checkunknowndirs = _unknowndirschecker()
153 153 for f, (m, args, msg) in pycompat.iteritems(actions):
154 154 if m in (
155 155 mergestatemod.ACTION_CREATED,
156 156 mergestatemod.ACTION_DELETED_CHANGED,
157 157 ):
158 158 if _checkunknownfile(repo, wctx, mctx, f):
159 159 fileconflicts.add(f)
160 160 elif pathconfig and f not in wctx:
161 161 path = checkunknowndirs(repo, wctx, f)
162 162 if path is not None:
163 163 pathconflicts.add(path)
164 164 elif m == mergestatemod.ACTION_LOCAL_DIR_RENAME_GET:
165 165 if _checkunknownfile(repo, wctx, mctx, f, args[0]):
166 166 fileconflicts.add(f)
167 167
168 168 allconflicts = fileconflicts | pathconflicts
169 169 ignoredconflicts = {c for c in allconflicts if repo.dirstate._ignore(c)}
170 170 unknownconflicts = allconflicts - ignoredconflicts
171 171 collectconflicts(ignoredconflicts, ignoredconfig)
172 172 collectconflicts(unknownconflicts, unknownconfig)
173 173 else:
174 174 for f, (m, args, msg) in pycompat.iteritems(actions):
175 175 if m == mergestatemod.ACTION_CREATED_MERGE:
176 176 fl2, anc = args
177 177 different = _checkunknownfile(repo, wctx, mctx, f)
178 178 if repo.dirstate._ignore(f):
179 179 config = ignoredconfig
180 180 else:
181 181 config = unknownconfig
182 182
183 183 # The behavior when force is True is described by this table:
184 184 # config different mergeforce | action backup
185 185 # * n * | get n
186 186 # * y y | merge -
187 187 # abort y n | merge - (1)
188 188 # warn y n | warn + get y
189 189 # ignore y n | get y
190 190 #
191 191 # (1) this is probably the wrong behavior here -- we should
192 192 # probably abort, but some actions like rebases currently
193 193 # don't like an abort happening in the middle of
194 194 # merge.update.
195 195 if not different:
196 196 actions[f] = (
197 197 mergestatemod.ACTION_GET,
198 198 (fl2, False),
199 199 b'remote created',
200 200 )
201 201 elif mergeforce or config == b'abort':
202 202 actions[f] = (
203 203 mergestatemod.ACTION_MERGE,
204 204 (f, f, None, False, anc),
205 205 b'remote differs from untracked local',
206 206 )
207 207 elif config == b'abort':
208 208 abortconflicts.add(f)
209 209 else:
210 210 if config == b'warn':
211 211 warnconflicts.add(f)
212 212 actions[f] = (
213 213 mergestatemod.ACTION_GET,
214 214 (fl2, True),
215 215 b'remote created',
216 216 )
217 217
218 218 for f in sorted(abortconflicts):
219 219 warn = repo.ui.warn
220 220 if f in pathconflicts:
221 221 if repo.wvfs.isfileorlink(f):
222 222 warn(_(b"%s: untracked file conflicts with directory\n") % f)
223 223 else:
224 224 warn(_(b"%s: untracked directory conflicts with file\n") % f)
225 225 else:
226 226 warn(_(b"%s: untracked file differs\n") % f)
227 227 if abortconflicts:
228 228 raise error.Abort(
229 229 _(
230 230 b"untracked files in working directory "
231 231 b"differ from files in requested revision"
232 232 )
233 233 )
234 234
235 235 for f in sorted(warnconflicts):
236 236 if repo.wvfs.isfileorlink(f):
237 237 repo.ui.warn(_(b"%s: replacing untracked file\n") % f)
238 238 else:
239 239 repo.ui.warn(_(b"%s: replacing untracked files in directory\n") % f)
240 240
241 241 for f, (m, args, msg) in pycompat.iteritems(actions):
242 242 if m == mergestatemod.ACTION_CREATED:
243 243 backup = (
244 244 f in fileconflicts
245 245 or f in pathconflicts
246 246 or any(p in pathconflicts for p in pathutil.finddirs(f))
247 247 )
248 248 (flags,) = args
249 249 actions[f] = (mergestatemod.ACTION_GET, (flags, backup), msg)
250 250
251 251
252 252 def _forgetremoved(wctx, mctx, branchmerge):
253 253 """
254 254 Forget removed files
255 255
256 256 If we're jumping between revisions (as opposed to merging), and if
257 257 neither the working directory nor the target rev has the file,
258 258 then we need to remove it from the dirstate, to prevent the
259 259 dirstate from listing the file when it is no longer in the
260 260 manifest.
261 261
262 262 If we're merging, and the other revision has removed a file
263 263 that is not present in the working directory, we need to mark it
264 264 as removed.
265 265 """
266 266
267 267 actions = {}
268 268 m = mergestatemod.ACTION_FORGET
269 269 if branchmerge:
270 270 m = mergestatemod.ACTION_REMOVE
271 271 for f in wctx.deleted():
272 272 if f not in mctx:
273 273 actions[f] = m, None, b"forget deleted"
274 274
275 275 if not branchmerge:
276 276 for f in wctx.removed():
277 277 if f not in mctx:
278 278 actions[f] = (
279 279 mergestatemod.ACTION_FORGET,
280 280 None,
281 281 b"forget removed",
282 282 )
283 283
284 284 return actions
285 285
286 286
287 287 def _checkcollision(repo, wmf, actions):
288 288 """
289 289 Check for case-folding collisions.
290 290 """
291 291 # If the repo is narrowed, filter out files outside the narrowspec.
292 292 narrowmatch = repo.narrowmatch()
293 293 if not narrowmatch.always():
294 294 pmmf = set(wmf.walk(narrowmatch))
295 295 if actions:
296 296 narrowactions = {}
297 297 for m, actionsfortype in pycompat.iteritems(actions):
298 298 narrowactions[m] = []
299 299 for (f, args, msg) in actionsfortype:
300 300 if narrowmatch(f):
301 301 narrowactions[m].append((f, args, msg))
302 302 actions = narrowactions
303 303 else:
304 304 # build provisional merged manifest up
305 305 pmmf = set(wmf)
306 306
307 307 if actions:
308 308 # KEEP and EXEC are no-op
309 309 for m in (
310 310 mergestatemod.ACTION_ADD,
311 311 mergestatemod.ACTION_ADD_MODIFIED,
312 312 mergestatemod.ACTION_FORGET,
313 313 mergestatemod.ACTION_GET,
314 314 mergestatemod.ACTION_CHANGED_DELETED,
315 315 mergestatemod.ACTION_DELETED_CHANGED,
316 316 ):
317 317 for f, args, msg in actions[m]:
318 318 pmmf.add(f)
319 319 for f, args, msg in actions[mergestatemod.ACTION_REMOVE]:
320 320 pmmf.discard(f)
321 321 for f, args, msg in actions[mergestatemod.ACTION_DIR_RENAME_MOVE_LOCAL]:
322 322 f2, flags = args
323 323 pmmf.discard(f2)
324 324 pmmf.add(f)
325 325 for f, args, msg in actions[mergestatemod.ACTION_LOCAL_DIR_RENAME_GET]:
326 326 pmmf.add(f)
327 327 for f, args, msg in actions[mergestatemod.ACTION_MERGE]:
328 328 f1, f2, fa, move, anc = args
329 329 if move:
330 330 pmmf.discard(f1)
331 331 pmmf.add(f)
332 332
333 333 # check case-folding collision in provisional merged manifest
334 334 foldmap = {}
335 335 for f in pmmf:
336 336 fold = util.normcase(f)
337 337 if fold in foldmap:
338 338 raise error.Abort(
339 339 _(b"case-folding collision between %s and %s")
340 340 % (f, foldmap[fold])
341 341 )
342 342 foldmap[fold] = f
343 343
344 344 # check case-folding of directories
345 345 foldprefix = unfoldprefix = lastfull = b''
346 346 for fold, f in sorted(foldmap.items()):
347 347 if fold.startswith(foldprefix) and not f.startswith(unfoldprefix):
348 348 # the folded prefix matches but actual casing is different
349 349 raise error.Abort(
350 350 _(b"case-folding collision between %s and directory of %s")
351 351 % (lastfull, f)
352 352 )
353 353 foldprefix = fold + b'/'
354 354 unfoldprefix = f + b'/'
355 355 lastfull = f
356 356
357 357
358 358 def driverpreprocess(repo, ms, wctx, labels=None):
359 359 """run the preprocess step of the merge driver, if any
360 360
361 361 This is currently not implemented -- it's an extension point."""
362 362 return True
363 363
364 364
365 365 def driverconclude(repo, ms, wctx, labels=None):
366 366 """run the conclude step of the merge driver, if any
367 367
368 368 This is currently not implemented -- it's an extension point."""
369 369 return True
370 370
371 371
372 372 def _filesindirs(repo, manifest, dirs):
373 373 """
374 374 Generator that yields pairs of all the files in the manifest that are found
375 375 inside the directories listed in dirs, and which directory they are found
376 376 in.
377 377 """
378 378 for f in manifest:
379 379 for p in pathutil.finddirs(f):
380 380 if p in dirs:
381 381 yield f, p
382 382 break
383 383
384 384
385 385 def checkpathconflicts(repo, wctx, mctx, actions):
386 386 """
387 387 Check if any actions introduce path conflicts in the repository, updating
388 388 actions to record or handle the path conflict accordingly.
389 389 """
390 390 mf = wctx.manifest()
391 391
392 392 # The set of local files that conflict with a remote directory.
393 393 localconflicts = set()
394 394
395 395 # The set of directories that conflict with a remote file, and so may cause
396 396 # conflicts if they still contain any files after the merge.
397 397 remoteconflicts = set()
398 398
399 399 # The set of directories that appear as both a file and a directory in the
400 400 # remote manifest. These indicate an invalid remote manifest, which
401 401 # can't be updated to cleanly.
402 402 invalidconflicts = set()
403 403
404 404 # The set of directories that contain files that are being created.
405 405 createdfiledirs = set()
406 406
407 407 # The set of files deleted by all the actions.
408 408 deletedfiles = set()
409 409
410 410 for f, (m, args, msg) in actions.items():
411 411 if m in (
412 412 mergestatemod.ACTION_CREATED,
413 413 mergestatemod.ACTION_DELETED_CHANGED,
414 414 mergestatemod.ACTION_MERGE,
415 415 mergestatemod.ACTION_CREATED_MERGE,
416 416 ):
417 417 # This action may create a new local file.
418 418 createdfiledirs.update(pathutil.finddirs(f))
419 419 if mf.hasdir(f):
420 420 # The file aliases a local directory. This might be ok if all
421 421 # the files in the local directory are being deleted. This
422 422 # will be checked once we know what all the deleted files are.
423 423 remoteconflicts.add(f)
424 424 # Track the names of all deleted files.
425 425 if m == mergestatemod.ACTION_REMOVE:
426 426 deletedfiles.add(f)
427 427 if m == mergestatemod.ACTION_MERGE:
428 428 f1, f2, fa, move, anc = args
429 429 if move:
430 430 deletedfiles.add(f1)
431 431 if m == mergestatemod.ACTION_DIR_RENAME_MOVE_LOCAL:
432 432 f2, flags = args
433 433 deletedfiles.add(f2)
434 434
435 435 # Check all directories that contain created files for path conflicts.
436 436 for p in createdfiledirs:
437 437 if p in mf:
438 438 if p in mctx:
439 439 # A file is in a directory which aliases both a local
440 440 # and a remote file. This is an internal inconsistency
441 441 # within the remote manifest.
442 442 invalidconflicts.add(p)
443 443 else:
444 444 # A file is in a directory which aliases a local file.
445 445 # We will need to rename the local file.
446 446 localconflicts.add(p)
447 447 if p in actions and actions[p][0] in (
448 448 mergestatemod.ACTION_CREATED,
449 449 mergestatemod.ACTION_DELETED_CHANGED,
450 450 mergestatemod.ACTION_MERGE,
451 451 mergestatemod.ACTION_CREATED_MERGE,
452 452 ):
453 453 # The file is in a directory which aliases a remote file.
454 454 # This is an internal inconsistency within the remote
455 455 # manifest.
456 456 invalidconflicts.add(p)
457 457
458 458 # Rename all local conflicting files that have not been deleted.
459 459 for p in localconflicts:
460 460 if p not in deletedfiles:
461 461 ctxname = bytes(wctx).rstrip(b'+')
462 462 pnew = util.safename(p, ctxname, wctx, set(actions.keys()))
463 463 porig = wctx[p].copysource() or p
464 464 actions[pnew] = (
465 465 mergestatemod.ACTION_PATH_CONFLICT_RESOLVE,
466 466 (p, porig),
467 467 b'local path conflict',
468 468 )
469 469 actions[p] = (
470 470 mergestatemod.ACTION_PATH_CONFLICT,
471 471 (pnew, b'l'),
472 472 b'path conflict',
473 473 )
474 474
475 475 if remoteconflicts:
476 476 # Check if all files in the conflicting directories have been removed.
477 477 ctxname = bytes(mctx).rstrip(b'+')
478 478 for f, p in _filesindirs(repo, mf, remoteconflicts):
479 479 if f not in deletedfiles:
480 480 m, args, msg = actions[p]
481 481 pnew = util.safename(p, ctxname, wctx, set(actions.keys()))
482 482 if m in (
483 483 mergestatemod.ACTION_DELETED_CHANGED,
484 484 mergestatemod.ACTION_MERGE,
485 485 ):
486 486 # Action was merge, just update target.
487 487 actions[pnew] = (m, args, msg)
488 488 else:
489 489 # Action was create, change to renamed get action.
490 490 fl = args[0]
491 491 actions[pnew] = (
492 492 mergestatemod.ACTION_LOCAL_DIR_RENAME_GET,
493 493 (p, fl),
494 494 b'remote path conflict',
495 495 )
496 496 actions[p] = (
497 497 mergestatemod.ACTION_PATH_CONFLICT,
498 498 (pnew, mergestatemod.ACTION_REMOVE),
499 499 b'path conflict',
500 500 )
501 501 remoteconflicts.remove(p)
502 502 break
503 503
504 504 if invalidconflicts:
505 505 for p in invalidconflicts:
506 506 repo.ui.warn(_(b"%s: is both a file and a directory\n") % p)
507 507 raise error.Abort(_(b"destination manifest contains path conflicts"))
508 508
509 509
510 510 def _filternarrowactions(narrowmatch, branchmerge, actions):
511 511 """
512 512 Filters out actions that can ignored because the repo is narrowed.
513 513
514 514 Raise an exception if the merge cannot be completed because the repo is
515 515 narrowed.
516 516 """
517 517 nooptypes = {b'k'} # TODO: handle with nonconflicttypes
518 518 nonconflicttypes = set(b'a am c cm f g gs r e'.split())
519 519 # We mutate the items in the dict during iteration, so iterate
520 520 # over a copy.
521 521 for f, action in list(actions.items()):
522 522 if narrowmatch(f):
523 523 pass
524 524 elif not branchmerge:
525 525 del actions[f] # just updating, ignore changes outside clone
526 526 elif action[0] in nooptypes:
527 527 del actions[f] # merge does not affect file
528 528 elif action[0] in nonconflicttypes:
529 529 raise error.Abort(
530 530 _(
531 531 b'merge affects file \'%s\' outside narrow, '
532 532 b'which is not yet supported'
533 533 )
534 534 % f,
535 535 hint=_(b'merging in the other direction may work'),
536 536 )
537 537 else:
538 538 raise error.Abort(
539 539 _(b'conflict in file \'%s\' is outside narrow clone') % f
540 540 )
541 541
542 542
543 543 def manifestmerge(
544 544 repo,
545 545 wctx,
546 546 p2,
547 547 pa,
548 548 branchmerge,
549 549 force,
550 550 matcher,
551 551 acceptremote,
552 552 followcopies,
553 553 forcefulldiff=False,
554 554 ):
555 555 """
556 556 Merge wctx and p2 with ancestor pa and generate merge action list
557 557
558 558 branchmerge and force are as passed in to update
559 559 matcher = matcher to filter file lists
560 560 acceptremote = accept the incoming changes without prompting
561 561 """
562 562 if matcher is not None and matcher.always():
563 563 matcher = None
564 564
565 565 # manifests fetched in order are going to be faster, so prime the caches
566 566 [
567 567 x.manifest()
568 568 for x in sorted(wctx.parents() + [p2, pa], key=scmutil.intrev)
569 569 ]
570 570
571 571 branch_copies1 = copies.branch_copies()
572 572 branch_copies2 = copies.branch_copies()
573 573 diverge = {}
574 574 if followcopies:
575 575 branch_copies1, branch_copies2, diverge = copies.mergecopies(
576 576 repo, wctx, p2, pa
577 577 )
578 578
579 579 boolbm = pycompat.bytestr(bool(branchmerge))
580 580 boolf = pycompat.bytestr(bool(force))
581 581 boolm = pycompat.bytestr(bool(matcher))
582 582 repo.ui.note(_(b"resolving manifests\n"))
583 583 repo.ui.debug(
584 584 b" branchmerge: %s, force: %s, partial: %s\n" % (boolbm, boolf, boolm)
585 585 )
586 586 repo.ui.debug(b" ancestor: %s, local: %s, remote: %s\n" % (pa, wctx, p2))
587 587
588 588 m1, m2, ma = wctx.manifest(), p2.manifest(), pa.manifest()
589 589 copied1 = set(branch_copies1.copy.values())
590 590 copied1.update(branch_copies1.movewithdir.values())
591 591 copied2 = set(branch_copies2.copy.values())
592 592 copied2.update(branch_copies2.movewithdir.values())
593 593
594 594 if b'.hgsubstate' in m1 and wctx.rev() is None:
595 595 # Check whether sub state is modified, and overwrite the manifest
596 596 # to flag the change. If wctx is a committed revision, we shouldn't
597 597 # care for the dirty state of the working directory.
598 598 if any(wctx.sub(s).dirty() for s in wctx.substate):
599 599 m1[b'.hgsubstate'] = modifiednodeid
600 600
601 601 # Don't use m2-vs-ma optimization if:
602 602 # - ma is the same as m1 or m2, which we're just going to diff again later
603 603 # - The caller specifically asks for a full diff, which is useful during bid
604 604 # merge.
605 605 if pa not in ([wctx, p2] + wctx.parents()) and not forcefulldiff:
606 606 # Identify which files are relevant to the merge, so we can limit the
607 607 # total m1-vs-m2 diff to just those files. This has significant
608 608 # performance benefits in large repositories.
609 609 relevantfiles = set(ma.diff(m2).keys())
610 610
611 611 # For copied and moved files, we need to add the source file too.
612 612 for copykey, copyvalue in pycompat.iteritems(branch_copies1.copy):
613 613 if copyvalue in relevantfiles:
614 614 relevantfiles.add(copykey)
615 615 for movedirkey in branch_copies1.movewithdir:
616 616 relevantfiles.add(movedirkey)
617 617 filesmatcher = scmutil.matchfiles(repo, relevantfiles)
618 618 matcher = matchmod.intersectmatchers(matcher, filesmatcher)
619 619
620 620 diff = m1.diff(m2, match=matcher)
621 621
622 622 actions = {}
623 623 for f, ((n1, fl1), (n2, fl2)) in pycompat.iteritems(diff):
624 624 if n1 and n2: # file exists on both local and remote side
625 625 if f not in ma:
626 626 # TODO: what if they're renamed from different sources?
627 627 fa = branch_copies1.copy.get(
628 628 f, None
629 629 ) or branch_copies2.copy.get(f, None)
630 630 if fa is not None:
631 631 actions[f] = (
632 632 mergestatemod.ACTION_MERGE,
633 633 (f, f, fa, False, pa.node()),
634 634 b'both renamed from %s' % fa,
635 635 )
636 636 else:
637 637 actions[f] = (
638 638 mergestatemod.ACTION_MERGE,
639 639 (f, f, None, False, pa.node()),
640 640 b'both created',
641 641 )
642 642 else:
643 643 a = ma[f]
644 644 fla = ma.flags(f)
645 645 nol = b'l' not in fl1 + fl2 + fla
646 646 if n2 == a and fl2 == fla:
647 647 actions[f] = (
648 648 mergestatemod.ACTION_KEEP,
649 649 (),
650 650 b'remote unchanged',
651 651 )
652 652 elif n1 == a and fl1 == fla: # local unchanged - use remote
653 653 if n1 == n2: # optimization: keep local content
654 654 actions[f] = (
655 655 mergestatemod.ACTION_EXEC,
656 656 (fl2,),
657 657 b'update permissions',
658 658 )
659 659 else:
660 660 actions[f] = (
661 661 mergestatemod.ACTION_GET_OTHER_AND_STORE
662 662 if branchmerge
663 663 else mergestatemod.ACTION_GET,
664 664 (fl2, False),
665 665 b'remote is newer',
666 666 )
667 667 elif nol and n2 == a: # remote only changed 'x'
668 668 actions[f] = (
669 669 mergestatemod.ACTION_EXEC,
670 670 (fl2,),
671 671 b'update permissions',
672 672 )
673 673 elif nol and n1 == a: # local only changed 'x'
674 674 actions[f] = (
675 675 mergestatemod.ACTION_GET_OTHER_AND_STORE
676 676 if branchmerge
677 677 else mergestatemod.ACTION_GET,
678 678 (fl1, False),
679 679 b'remote is newer',
680 680 )
681 681 else: # both changed something
682 682 actions[f] = (
683 683 mergestatemod.ACTION_MERGE,
684 684 (f, f, f, False, pa.node()),
685 685 b'versions differ',
686 686 )
687 687 elif n1: # file exists only on local side
688 688 if f in copied2:
689 689 pass # we'll deal with it on m2 side
690 690 elif (
691 691 f in branch_copies1.movewithdir
692 692 ): # directory rename, move local
693 693 f2 = branch_copies1.movewithdir[f]
694 694 if f2 in m2:
695 695 actions[f2] = (
696 696 mergestatemod.ACTION_MERGE,
697 697 (f, f2, None, True, pa.node()),
698 698 b'remote directory rename, both created',
699 699 )
700 700 else:
701 701 actions[f2] = (
702 702 mergestatemod.ACTION_DIR_RENAME_MOVE_LOCAL,
703 703 (f, fl1),
704 704 b'remote directory rename - move from %s' % f,
705 705 )
706 706 elif f in branch_copies1.copy:
707 707 f2 = branch_copies1.copy[f]
708 708 actions[f] = (
709 709 mergestatemod.ACTION_MERGE,
710 710 (f, f2, f2, False, pa.node()),
711 711 b'local copied/moved from %s' % f2,
712 712 )
713 713 elif f in ma: # clean, a different, no remote
714 714 if n1 != ma[f]:
715 715 if acceptremote:
716 716 actions[f] = (
717 717 mergestatemod.ACTION_REMOVE,
718 718 None,
719 719 b'remote delete',
720 720 )
721 721 else:
722 722 actions[f] = (
723 723 mergestatemod.ACTION_CHANGED_DELETED,
724 724 (f, None, f, False, pa.node()),
725 725 b'prompt changed/deleted',
726 726 )
727 727 elif n1 == addednodeid:
728 # This extra 'a' is added by working copy manifest to mark
729 # the file as locally added. We should forget it instead of
728 # This file was locally added. We should forget it instead of
730 729 # deleting it.
731 730 actions[f] = (
732 731 mergestatemod.ACTION_FORGET,
733 732 None,
734 733 b'remote deleted',
735 734 )
736 735 else:
737 736 actions[f] = (
738 737 mergestatemod.ACTION_REMOVE,
739 738 None,
740 739 b'other deleted',
741 740 )
742 741 elif n2: # file exists only on remote side
743 742 if f in copied1:
744 743 pass # we'll deal with it on m1 side
745 744 elif f in branch_copies2.movewithdir:
746 745 f2 = branch_copies2.movewithdir[f]
747 746 if f2 in m1:
748 747 actions[f2] = (
749 748 mergestatemod.ACTION_MERGE,
750 749 (f2, f, None, False, pa.node()),
751 750 b'local directory rename, both created',
752 751 )
753 752 else:
754 753 actions[f2] = (
755 754 mergestatemod.ACTION_LOCAL_DIR_RENAME_GET,
756 755 (f, fl2),
757 756 b'local directory rename - get from %s' % f,
758 757 )
759 758 elif f in branch_copies2.copy:
760 759 f2 = branch_copies2.copy[f]
761 760 if f2 in m2:
762 761 actions[f] = (
763 762 mergestatemod.ACTION_MERGE,
764 763 (f2, f, f2, False, pa.node()),
765 764 b'remote copied from %s' % f2,
766 765 )
767 766 else:
768 767 actions[f] = (
769 768 mergestatemod.ACTION_MERGE,
770 769 (f2, f, f2, True, pa.node()),
771 770 b'remote moved from %s' % f2,
772 771 )
773 772 elif f not in ma:
774 773 # local unknown, remote created: the logic is described by the
775 774 # following table:
776 775 #
777 776 # force branchmerge different | action
778 777 # n * * | create
779 778 # y n * | create
780 779 # y y n | create
781 780 # y y y | merge
782 781 #
783 782 # Checking whether the files are different is expensive, so we
784 783 # don't do that when we can avoid it.
785 784 if not force:
786 785 actions[f] = (
787 786 mergestatemod.ACTION_CREATED,
788 787 (fl2,),
789 788 b'remote created',
790 789 )
791 790 elif not branchmerge:
792 791 actions[f] = (
793 792 mergestatemod.ACTION_CREATED,
794 793 (fl2,),
795 794 b'remote created',
796 795 )
797 796 else:
798 797 actions[f] = (
799 798 mergestatemod.ACTION_CREATED_MERGE,
800 799 (fl2, pa.node()),
801 800 b'remote created, get or merge',
802 801 )
803 802 elif n2 != ma[f]:
804 803 df = None
805 804 for d in branch_copies1.dirmove:
806 805 if f.startswith(d):
807 806 # new file added in a directory that was moved
808 807 df = branch_copies1.dirmove[d] + f[len(d) :]
809 808 break
810 809 if df is not None and df in m1:
811 810 actions[df] = (
812 811 mergestatemod.ACTION_MERGE,
813 812 (df, f, f, False, pa.node()),
814 813 b'local directory rename - respect move '
815 814 b'from %s' % f,
816 815 )
817 816 elif acceptremote:
818 817 actions[f] = (
819 818 mergestatemod.ACTION_CREATED,
820 819 (fl2,),
821 820 b'remote recreating',
822 821 )
823 822 else:
824 823 actions[f] = (
825 824 mergestatemod.ACTION_DELETED_CHANGED,
826 825 (None, f, f, False, pa.node()),
827 826 b'prompt deleted/changed',
828 827 )
829 828
830 829 if repo.ui.configbool(b'experimental', b'merge.checkpathconflicts'):
831 830 # If we are merging, look for path conflicts.
832 831 checkpathconflicts(repo, wctx, p2, actions)
833 832
834 833 narrowmatch = repo.narrowmatch()
835 834 if not narrowmatch.always():
836 835 # Updates "actions" in place
837 836 _filternarrowactions(narrowmatch, branchmerge, actions)
838 837
839 838 renamedelete = branch_copies1.renamedelete
840 839 renamedelete.update(branch_copies2.renamedelete)
841 840
842 841 return actions, diverge, renamedelete
843 842
844 843
845 844 def _resolvetrivial(repo, wctx, mctx, ancestor, actions):
846 845 """Resolves false conflicts where the nodeid changed but the content
847 846 remained the same."""
848 847 # We force a copy of actions.items() because we're going to mutate
849 848 # actions as we resolve trivial conflicts.
850 849 for f, (m, args, msg) in list(actions.items()):
851 850 if (
852 851 m == mergestatemod.ACTION_CHANGED_DELETED
853 852 and f in ancestor
854 853 and not wctx[f].cmp(ancestor[f])
855 854 ):
856 855 # local did change but ended up with same content
857 856 actions[f] = mergestatemod.ACTION_REMOVE, None, b'prompt same'
858 857 elif (
859 858 m == mergestatemod.ACTION_DELETED_CHANGED
860 859 and f in ancestor
861 860 and not mctx[f].cmp(ancestor[f])
862 861 ):
863 862 # remote did change but ended up with same content
864 863 del actions[f] # don't get = keep local deleted
865 864
866 865
867 866 def calculateupdates(
868 867 repo,
869 868 wctx,
870 869 mctx,
871 870 ancestors,
872 871 branchmerge,
873 872 force,
874 873 acceptremote,
875 874 followcopies,
876 875 matcher=None,
877 876 mergeforce=False,
878 877 ):
879 878 """Calculate the actions needed to merge mctx into wctx using ancestors"""
880 879 # Avoid cycle.
881 880 from . import sparse
882 881
883 882 if len(ancestors) == 1: # default
884 883 actions, diverge, renamedelete = manifestmerge(
885 884 repo,
886 885 wctx,
887 886 mctx,
888 887 ancestors[0],
889 888 branchmerge,
890 889 force,
891 890 matcher,
892 891 acceptremote,
893 892 followcopies,
894 893 )
895 894 _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce)
896 895
897 896 else: # only when merge.preferancestor=* - the default
898 897 repo.ui.note(
899 898 _(b"note: merging %s and %s using bids from ancestors %s\n")
900 899 % (
901 900 wctx,
902 901 mctx,
903 902 _(b' and ').join(pycompat.bytestr(anc) for anc in ancestors),
904 903 )
905 904 )
906 905
907 906 # Call for bids
908 907 fbids = (
909 908 {}
910 909 ) # mapping filename to bids (action method to list af actions)
911 910 diverge, renamedelete = None, None
912 911 for ancestor in ancestors:
913 912 repo.ui.note(_(b'\ncalculating bids for ancestor %s\n') % ancestor)
914 913 actions, diverge1, renamedelete1 = manifestmerge(
915 914 repo,
916 915 wctx,
917 916 mctx,
918 917 ancestor,
919 918 branchmerge,
920 919 force,
921 920 matcher,
922 921 acceptremote,
923 922 followcopies,
924 923 forcefulldiff=True,
925 924 )
926 925 _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce)
927 926
928 927 # Track the shortest set of warning on the theory that bid
929 928 # merge will correctly incorporate more information
930 929 if diverge is None or len(diverge1) < len(diverge):
931 930 diverge = diverge1
932 931 if renamedelete is None or len(renamedelete) < len(renamedelete1):
933 932 renamedelete = renamedelete1
934 933
935 934 for f, a in sorted(pycompat.iteritems(actions)):
936 935 m, args, msg = a
937 936 if m == mergestatemod.ACTION_GET_OTHER_AND_STORE:
938 937 m = mergestatemod.ACTION_GET
939 938 repo.ui.debug(b' %s: %s -> %s\n' % (f, msg, m))
940 939 if f in fbids:
941 940 d = fbids[f]
942 941 if m in d:
943 942 d[m].append(a)
944 943 else:
945 944 d[m] = [a]
946 945 else:
947 946 fbids[f] = {m: [a]}
948 947
949 948 # Pick the best bid for each file
950 949 repo.ui.note(_(b'\nauction for merging merge bids\n'))
951 950 actions = {}
952 951 for f, bids in sorted(fbids.items()):
953 952 # bids is a mapping from action method to list af actions
954 953 # Consensus?
955 954 if len(bids) == 1: # all bids are the same kind of method
956 955 m, l = list(bids.items())[0]
957 956 if all(a == l[0] for a in l[1:]): # len(bids) is > 1
958 957 repo.ui.note(_(b" %s: consensus for %s\n") % (f, m))
959 958 actions[f] = l[0]
960 959 continue
961 960 # If keep is an option, just do it.
962 961 if mergestatemod.ACTION_KEEP in bids:
963 962 repo.ui.note(_(b" %s: picking 'keep' action\n") % f)
964 963 actions[f] = bids[mergestatemod.ACTION_KEEP][0]
965 964 continue
966 965 # If there are gets and they all agree [how could they not?], do it.
967 966 if mergestatemod.ACTION_GET in bids:
968 967 ga0 = bids[mergestatemod.ACTION_GET][0]
969 968 if all(a == ga0 for a in bids[mergestatemod.ACTION_GET][1:]):
970 969 repo.ui.note(_(b" %s: picking 'get' action\n") % f)
971 970 actions[f] = ga0
972 971 continue
973 972 # TODO: Consider other simple actions such as mode changes
974 973 # Handle inefficient democrazy.
975 974 repo.ui.note(_(b' %s: multiple bids for merge action:\n') % f)
976 975 for m, l in sorted(bids.items()):
977 976 for _f, args, msg in l:
978 977 repo.ui.note(b' %s -> %s\n' % (msg, m))
979 978 # Pick random action. TODO: Instead, prompt user when resolving
980 979 m, l = list(bids.items())[0]
981 980 repo.ui.warn(
982 981 _(b' %s: ambiguous merge - picked %s action\n') % (f, m)
983 982 )
984 983 actions[f] = l[0]
985 984 continue
986 985 repo.ui.note(_(b'end of auction\n\n'))
987 986
988 987 if wctx.rev() is None:
989 988 fractions = _forgetremoved(wctx, mctx, branchmerge)
990 989 actions.update(fractions)
991 990
992 991 prunedactions = sparse.filterupdatesactions(
993 992 repo, wctx, mctx, branchmerge, actions
994 993 )
995 994 _resolvetrivial(repo, wctx, mctx, ancestors[0], actions)
996 995
997 996 return prunedactions, diverge, renamedelete
998 997
999 998
1000 999 def _getcwd():
1001 1000 try:
1002 1001 return encoding.getcwd()
1003 1002 except OSError as err:
1004 1003 if err.errno == errno.ENOENT:
1005 1004 return None
1006 1005 raise
1007 1006
1008 1007
1009 1008 def batchremove(repo, wctx, actions):
1010 1009 """apply removes to the working directory
1011 1010
1012 1011 yields tuples for progress updates
1013 1012 """
1014 1013 verbose = repo.ui.verbose
1015 1014 cwd = _getcwd()
1016 1015 i = 0
1017 1016 for f, args, msg in actions:
1018 1017 repo.ui.debug(b" %s: %s -> r\n" % (f, msg))
1019 1018 if verbose:
1020 1019 repo.ui.note(_(b"removing %s\n") % f)
1021 1020 wctx[f].audit()
1022 1021 try:
1023 1022 wctx[f].remove(ignoremissing=True)
1024 1023 except OSError as inst:
1025 1024 repo.ui.warn(
1026 1025 _(b"update failed to remove %s: %s!\n") % (f, inst.strerror)
1027 1026 )
1028 1027 if i == 100:
1029 1028 yield i, f
1030 1029 i = 0
1031 1030 i += 1
1032 1031 if i > 0:
1033 1032 yield i, f
1034 1033
1035 1034 if cwd and not _getcwd():
1036 1035 # cwd was removed in the course of removing files; print a helpful
1037 1036 # warning.
1038 1037 repo.ui.warn(
1039 1038 _(
1040 1039 b"current directory was removed\n"
1041 1040 b"(consider changing to repo root: %s)\n"
1042 1041 )
1043 1042 % repo.root
1044 1043 )
1045 1044
1046 1045
1047 1046 def batchget(repo, mctx, wctx, wantfiledata, actions):
1048 1047 """apply gets to the working directory
1049 1048
1050 1049 mctx is the context to get from
1051 1050
1052 1051 Yields arbitrarily many (False, tuple) for progress updates, followed by
1053 1052 exactly one (True, filedata). When wantfiledata is false, filedata is an
1054 1053 empty dict. When wantfiledata is true, filedata[f] is a triple (mode, size,
1055 1054 mtime) of the file f written for each action.
1056 1055 """
1057 1056 filedata = {}
1058 1057 verbose = repo.ui.verbose
1059 1058 fctx = mctx.filectx
1060 1059 ui = repo.ui
1061 1060 i = 0
1062 1061 with repo.wvfs.backgroundclosing(ui, expectedcount=len(actions)):
1063 1062 for f, (flags, backup), msg in actions:
1064 1063 repo.ui.debug(b" %s: %s -> g\n" % (f, msg))
1065 1064 if verbose:
1066 1065 repo.ui.note(_(b"getting %s\n") % f)
1067 1066
1068 1067 if backup:
1069 1068 # If a file or directory exists with the same name, back that
1070 1069 # up. Otherwise, look to see if there is a file that conflicts
1071 1070 # with a directory this file is in, and if so, back that up.
1072 1071 conflicting = f
1073 1072 if not repo.wvfs.lexists(f):
1074 1073 for p in pathutil.finddirs(f):
1075 1074 if repo.wvfs.isfileorlink(p):
1076 1075 conflicting = p
1077 1076 break
1078 1077 if repo.wvfs.lexists(conflicting):
1079 1078 orig = scmutil.backuppath(ui, repo, conflicting)
1080 1079 util.rename(repo.wjoin(conflicting), orig)
1081 1080 wfctx = wctx[f]
1082 1081 wfctx.clearunknown()
1083 1082 atomictemp = ui.configbool(b"experimental", b"update.atomic-file")
1084 1083 size = wfctx.write(
1085 1084 fctx(f).data(),
1086 1085 flags,
1087 1086 backgroundclose=True,
1088 1087 atomictemp=atomictemp,
1089 1088 )
1090 1089 if wantfiledata:
1091 1090 s = wfctx.lstat()
1092 1091 mode = s.st_mode
1093 1092 mtime = s[stat.ST_MTIME]
1094 1093 filedata[f] = (mode, size, mtime) # for dirstate.normal
1095 1094 if i == 100:
1096 1095 yield False, (i, f)
1097 1096 i = 0
1098 1097 i += 1
1099 1098 if i > 0:
1100 1099 yield False, (i, f)
1101 1100 yield True, filedata
1102 1101
1103 1102
1104 1103 def _prefetchfiles(repo, ctx, actions):
1105 1104 """Invoke ``scmutil.prefetchfiles()`` for the files relevant to the dict
1106 1105 of merge actions. ``ctx`` is the context being merged in."""
1107 1106
1108 1107 # Skipping 'a', 'am', 'f', 'r', 'dm', 'e', 'k', 'p' and 'pr', because they
1109 1108 # don't touch the context to be merged in. 'cd' is skipped, because
1110 1109 # changed/deleted never resolves to something from the remote side.
1111 1110 oplist = [
1112 1111 actions[a]
1113 1112 for a in (
1114 1113 mergestatemod.ACTION_GET,
1115 1114 mergestatemod.ACTION_DELETED_CHANGED,
1116 1115 mergestatemod.ACTION_LOCAL_DIR_RENAME_GET,
1117 1116 mergestatemod.ACTION_MERGE,
1118 1117 )
1119 1118 ]
1120 1119 prefetch = scmutil.prefetchfiles
1121 1120 matchfiles = scmutil.matchfiles
1122 1121 prefetch(
1123 1122 repo,
1124 1123 [
1125 1124 (
1126 1125 ctx.rev(),
1127 1126 matchfiles(
1128 1127 repo, [f for sublist in oplist for f, args, msg in sublist]
1129 1128 ),
1130 1129 )
1131 1130 ],
1132 1131 )
1133 1132
1134 1133
1135 1134 @attr.s(frozen=True)
1136 1135 class updateresult(object):
1137 1136 updatedcount = attr.ib()
1138 1137 mergedcount = attr.ib()
1139 1138 removedcount = attr.ib()
1140 1139 unresolvedcount = attr.ib()
1141 1140
1142 1141 def isempty(self):
1143 1142 return not (
1144 1143 self.updatedcount
1145 1144 or self.mergedcount
1146 1145 or self.removedcount
1147 1146 or self.unresolvedcount
1148 1147 )
1149 1148
1150 1149
1151 1150 def emptyactions():
1152 1151 """create an actions dict, to be populated and passed to applyupdates()"""
1153 1152 return {
1154 1153 m: []
1155 1154 for m in (
1156 1155 mergestatemod.ACTION_ADD,
1157 1156 mergestatemod.ACTION_ADD_MODIFIED,
1158 1157 mergestatemod.ACTION_FORGET,
1159 1158 mergestatemod.ACTION_GET,
1160 1159 mergestatemod.ACTION_CHANGED_DELETED,
1161 1160 mergestatemod.ACTION_DELETED_CHANGED,
1162 1161 mergestatemod.ACTION_REMOVE,
1163 1162 mergestatemod.ACTION_DIR_RENAME_MOVE_LOCAL,
1164 1163 mergestatemod.ACTION_LOCAL_DIR_RENAME_GET,
1165 1164 mergestatemod.ACTION_MERGE,
1166 1165 mergestatemod.ACTION_EXEC,
1167 1166 mergestatemod.ACTION_KEEP,
1168 1167 mergestatemod.ACTION_PATH_CONFLICT,
1169 1168 mergestatemod.ACTION_PATH_CONFLICT_RESOLVE,
1170 1169 mergestatemod.ACTION_GET_OTHER_AND_STORE,
1171 1170 )
1172 1171 }
1173 1172
1174 1173
1175 1174 def applyupdates(
1176 1175 repo, actions, wctx, mctx, overwrite, wantfiledata, labels=None
1177 1176 ):
1178 1177 """apply the merge action list to the working directory
1179 1178
1180 1179 wctx is the working copy context
1181 1180 mctx is the context to be merged into the working copy
1182 1181
1183 1182 Return a tuple of (counts, filedata), where counts is a tuple
1184 1183 (updated, merged, removed, unresolved) that describes how many
1185 1184 files were affected by the update, and filedata is as described in
1186 1185 batchget.
1187 1186 """
1188 1187
1189 1188 _prefetchfiles(repo, mctx, actions)
1190 1189
1191 1190 updated, merged, removed = 0, 0, 0
1192 1191 ms = mergestatemod.mergestate.clean(
1193 1192 repo, wctx.p1().node(), mctx.node(), labels
1194 1193 )
1195 1194
1196 1195 # add ACTION_GET_OTHER_AND_STORE to mergestate
1197 1196 for e in actions[mergestatemod.ACTION_GET_OTHER_AND_STORE]:
1198 1197 ms.addmergedother(e[0])
1199 1198
1200 1199 moves = []
1201 1200 for m, l in actions.items():
1202 1201 l.sort()
1203 1202
1204 1203 # 'cd' and 'dc' actions are treated like other merge conflicts
1205 1204 mergeactions = sorted(actions[mergestatemod.ACTION_CHANGED_DELETED])
1206 1205 mergeactions.extend(sorted(actions[mergestatemod.ACTION_DELETED_CHANGED]))
1207 1206 mergeactions.extend(actions[mergestatemod.ACTION_MERGE])
1208 1207 for f, args, msg in mergeactions:
1209 1208 f1, f2, fa, move, anc = args
1210 1209 if f == b'.hgsubstate': # merged internally
1211 1210 continue
1212 1211 if f1 is None:
1213 1212 fcl = filemerge.absentfilectx(wctx, fa)
1214 1213 else:
1215 1214 repo.ui.debug(b" preserving %s for resolve of %s\n" % (f1, f))
1216 1215 fcl = wctx[f1]
1217 1216 if f2 is None:
1218 1217 fco = filemerge.absentfilectx(mctx, fa)
1219 1218 else:
1220 1219 fco = mctx[f2]
1221 1220 actx = repo[anc]
1222 1221 if fa in actx:
1223 1222 fca = actx[fa]
1224 1223 else:
1225 1224 # TODO: move to absentfilectx
1226 1225 fca = repo.filectx(f1, fileid=nullrev)
1227 1226 ms.add(fcl, fco, fca, f)
1228 1227 if f1 != f and move:
1229 1228 moves.append(f1)
1230 1229
1231 1230 # remove renamed files after safely stored
1232 1231 for f in moves:
1233 1232 if wctx[f].lexists():
1234 1233 repo.ui.debug(b"removing %s\n" % f)
1235 1234 wctx[f].audit()
1236 1235 wctx[f].remove()
1237 1236
1238 1237 numupdates = sum(
1239 1238 len(l) for m, l in actions.items() if m != mergestatemod.ACTION_KEEP
1240 1239 )
1241 1240 progress = repo.ui.makeprogress(
1242 1241 _(b'updating'), unit=_(b'files'), total=numupdates
1243 1242 )
1244 1243
1245 1244 if [
1246 1245 a
1247 1246 for a in actions[mergestatemod.ACTION_REMOVE]
1248 1247 if a[0] == b'.hgsubstate'
1249 1248 ]:
1250 1249 subrepoutil.submerge(repo, wctx, mctx, wctx, overwrite, labels)
1251 1250
1252 1251 # record path conflicts
1253 1252 for f, args, msg in actions[mergestatemod.ACTION_PATH_CONFLICT]:
1254 1253 f1, fo = args
1255 1254 s = repo.ui.status
1256 1255 s(
1257 1256 _(
1258 1257 b"%s: path conflict - a file or link has the same name as a "
1259 1258 b"directory\n"
1260 1259 )
1261 1260 % f
1262 1261 )
1263 1262 if fo == b'l':
1264 1263 s(_(b"the local file has been renamed to %s\n") % f1)
1265 1264 else:
1266 1265 s(_(b"the remote file has been renamed to %s\n") % f1)
1267 1266 s(_(b"resolve manually then use 'hg resolve --mark %s'\n") % f)
1268 1267 ms.addpath(f, f1, fo)
1269 1268 progress.increment(item=f)
1270 1269
1271 1270 # When merging in-memory, we can't support worker processes, so set the
1272 1271 # per-item cost at 0 in that case.
1273 1272 cost = 0 if wctx.isinmemory() else 0.001
1274 1273
1275 1274 # remove in parallel (must come before resolving path conflicts and getting)
1276 1275 prog = worker.worker(
1277 1276 repo.ui,
1278 1277 cost,
1279 1278 batchremove,
1280 1279 (repo, wctx),
1281 1280 actions[mergestatemod.ACTION_REMOVE],
1282 1281 )
1283 1282 for i, item in prog:
1284 1283 progress.increment(step=i, item=item)
1285 1284 removed = len(actions[mergestatemod.ACTION_REMOVE])
1286 1285
1287 1286 # resolve path conflicts (must come before getting)
1288 1287 for f, args, msg in actions[mergestatemod.ACTION_PATH_CONFLICT_RESOLVE]:
1289 1288 repo.ui.debug(b" %s: %s -> pr\n" % (f, msg))
1290 1289 (f0, origf0) = args
1291 1290 if wctx[f0].lexists():
1292 1291 repo.ui.note(_(b"moving %s to %s\n") % (f0, f))
1293 1292 wctx[f].audit()
1294 1293 wctx[f].write(wctx.filectx(f0).data(), wctx.filectx(f0).flags())
1295 1294 wctx[f0].remove()
1296 1295 progress.increment(item=f)
1297 1296
1298 1297 # get in parallel.
1299 1298 threadsafe = repo.ui.configbool(
1300 1299 b'experimental', b'worker.wdir-get-thread-safe'
1301 1300 )
1302 1301 prog = worker.worker(
1303 1302 repo.ui,
1304 1303 cost,
1305 1304 batchget,
1306 1305 (repo, mctx, wctx, wantfiledata),
1307 1306 actions[mergestatemod.ACTION_GET],
1308 1307 threadsafe=threadsafe,
1309 1308 hasretval=True,
1310 1309 )
1311 1310 getfiledata = {}
1312 1311 for final, res in prog:
1313 1312 if final:
1314 1313 getfiledata = res
1315 1314 else:
1316 1315 i, item = res
1317 1316 progress.increment(step=i, item=item)
1318 1317 updated = len(actions[mergestatemod.ACTION_GET])
1319 1318
1320 1319 if [a for a in actions[mergestatemod.ACTION_GET] if a[0] == b'.hgsubstate']:
1321 1320 subrepoutil.submerge(repo, wctx, mctx, wctx, overwrite, labels)
1322 1321
1323 1322 # forget (manifest only, just log it) (must come first)
1324 1323 for f, args, msg in actions[mergestatemod.ACTION_FORGET]:
1325 1324 repo.ui.debug(b" %s: %s -> f\n" % (f, msg))
1326 1325 progress.increment(item=f)
1327 1326
1328 1327 # re-add (manifest only, just log it)
1329 1328 for f, args, msg in actions[mergestatemod.ACTION_ADD]:
1330 1329 repo.ui.debug(b" %s: %s -> a\n" % (f, msg))
1331 1330 progress.increment(item=f)
1332 1331
1333 1332 # re-add/mark as modified (manifest only, just log it)
1334 1333 for f, args, msg in actions[mergestatemod.ACTION_ADD_MODIFIED]:
1335 1334 repo.ui.debug(b" %s: %s -> am\n" % (f, msg))
1336 1335 progress.increment(item=f)
1337 1336
1338 1337 # keep (noop, just log it)
1339 1338 for f, args, msg in actions[mergestatemod.ACTION_KEEP]:
1340 1339 repo.ui.debug(b" %s: %s -> k\n" % (f, msg))
1341 1340 # no progress
1342 1341
1343 1342 # directory rename, move local
1344 1343 for f, args, msg in actions[mergestatemod.ACTION_DIR_RENAME_MOVE_LOCAL]:
1345 1344 repo.ui.debug(b" %s: %s -> dm\n" % (f, msg))
1346 1345 progress.increment(item=f)
1347 1346 f0, flags = args
1348 1347 repo.ui.note(_(b"moving %s to %s\n") % (f0, f))
1349 1348 wctx[f].audit()
1350 1349 wctx[f].write(wctx.filectx(f0).data(), flags)
1351 1350 wctx[f0].remove()
1352 1351 updated += 1
1353 1352
1354 1353 # local directory rename, get
1355 1354 for f, args, msg in actions[mergestatemod.ACTION_LOCAL_DIR_RENAME_GET]:
1356 1355 repo.ui.debug(b" %s: %s -> dg\n" % (f, msg))
1357 1356 progress.increment(item=f)
1358 1357 f0, flags = args
1359 1358 repo.ui.note(_(b"getting %s to %s\n") % (f0, f))
1360 1359 wctx[f].write(mctx.filectx(f0).data(), flags)
1361 1360 updated += 1
1362 1361
1363 1362 # exec
1364 1363 for f, args, msg in actions[mergestatemod.ACTION_EXEC]:
1365 1364 repo.ui.debug(b" %s: %s -> e\n" % (f, msg))
1366 1365 progress.increment(item=f)
1367 1366 (flags,) = args
1368 1367 wctx[f].audit()
1369 1368 wctx[f].setflags(b'l' in flags, b'x' in flags)
1370 1369 updated += 1
1371 1370
1372 1371 # the ordering is important here -- ms.mergedriver will raise if the merge
1373 1372 # driver has changed, and we want to be able to bypass it when overwrite is
1374 1373 # True
1375 1374 usemergedriver = not overwrite and mergeactions and ms.mergedriver
1376 1375
1377 1376 if usemergedriver:
1378 1377 if wctx.isinmemory():
1379 1378 raise error.InMemoryMergeConflictsError(
1380 1379 b"in-memory merge does not support mergedriver"
1381 1380 )
1382 1381 ms.commit()
1383 1382 proceed = driverpreprocess(repo, ms, wctx, labels=labels)
1384 1383 # the driver might leave some files unresolved
1385 1384 unresolvedf = set(ms.unresolved())
1386 1385 if not proceed:
1387 1386 # XXX setting unresolved to at least 1 is a hack to make sure we
1388 1387 # error out
1389 1388 return updateresult(
1390 1389 updated, merged, removed, max(len(unresolvedf), 1)
1391 1390 )
1392 1391 newactions = []
1393 1392 for f, args, msg in mergeactions:
1394 1393 if f in unresolvedf:
1395 1394 newactions.append((f, args, msg))
1396 1395 mergeactions = newactions
1397 1396
1398 1397 try:
1399 1398 # premerge
1400 1399 tocomplete = []
1401 1400 for f, args, msg in mergeactions:
1402 1401 repo.ui.debug(b" %s: %s -> m (premerge)\n" % (f, msg))
1403 1402 progress.increment(item=f)
1404 1403 if f == b'.hgsubstate': # subrepo states need updating
1405 1404 subrepoutil.submerge(
1406 1405 repo, wctx, mctx, wctx.ancestor(mctx), overwrite, labels
1407 1406 )
1408 1407 continue
1409 1408 wctx[f].audit()
1410 1409 complete, r = ms.preresolve(f, wctx)
1411 1410 if not complete:
1412 1411 numupdates += 1
1413 1412 tocomplete.append((f, args, msg))
1414 1413
1415 1414 # merge
1416 1415 for f, args, msg in tocomplete:
1417 1416 repo.ui.debug(b" %s: %s -> m (merge)\n" % (f, msg))
1418 1417 progress.increment(item=f, total=numupdates)
1419 1418 ms.resolve(f, wctx)
1420 1419
1421 1420 finally:
1422 1421 ms.commit()
1423 1422
1424 1423 unresolved = ms.unresolvedcount()
1425 1424
1426 1425 if (
1427 1426 usemergedriver
1428 1427 and not unresolved
1429 1428 and ms.mdstate() != mergestatemod.MERGE_DRIVER_STATE_SUCCESS
1430 1429 ):
1431 1430 if not driverconclude(repo, ms, wctx, labels=labels):
1432 1431 # XXX setting unresolved to at least 1 is a hack to make sure we
1433 1432 # error out
1434 1433 unresolved = max(unresolved, 1)
1435 1434
1436 1435 ms.commit()
1437 1436
1438 1437 msupdated, msmerged, msremoved = ms.counts()
1439 1438 updated += msupdated
1440 1439 merged += msmerged
1441 1440 removed += msremoved
1442 1441
1443 1442 extraactions = ms.actions()
1444 1443 if extraactions:
1445 1444 mfiles = {a[0] for a in actions[mergestatemod.ACTION_MERGE]}
1446 1445 for k, acts in pycompat.iteritems(extraactions):
1447 1446 actions[k].extend(acts)
1448 1447 if k == mergestatemod.ACTION_GET and wantfiledata:
1449 1448 # no filedata until mergestate is updated to provide it
1450 1449 for a in acts:
1451 1450 getfiledata[a[0]] = None
1452 1451 # Remove these files from actions[ACTION_MERGE] as well. This is
1453 1452 # important because in recordupdates, files in actions[ACTION_MERGE]
1454 1453 # are processed after files in other actions, and the merge driver
1455 1454 # might add files to those actions via extraactions above. This can
1456 1455 # lead to a file being recorded twice, with poor results. This is
1457 1456 # especially problematic for actions[ACTION_REMOVE] (currently only
1458 1457 # possible with the merge driver in the initial merge process;
1459 1458 # interrupted merges don't go through this flow).
1460 1459 #
1461 1460 # The real fix here is to have indexes by both file and action so
1462 1461 # that when the action for a file is changed it is automatically
1463 1462 # reflected in the other action lists. But that involves a more
1464 1463 # complex data structure, so this will do for now.
1465 1464 #
1466 1465 # We don't need to do the same operation for 'dc' and 'cd' because
1467 1466 # those lists aren't consulted again.
1468 1467 mfiles.difference_update(a[0] for a in acts)
1469 1468
1470 1469 actions[mergestatemod.ACTION_MERGE] = [
1471 1470 a for a in actions[mergestatemod.ACTION_MERGE] if a[0] in mfiles
1472 1471 ]
1473 1472
1474 1473 progress.complete()
1475 1474 assert len(getfiledata) == (
1476 1475 len(actions[mergestatemod.ACTION_GET]) if wantfiledata else 0
1477 1476 )
1478 1477 return updateresult(updated, merged, removed, unresolved), getfiledata
1479 1478
1480 1479
1481 1480 UPDATECHECK_ABORT = b'abort' # handled at higher layers
1482 1481 UPDATECHECK_NONE = b'none'
1483 1482 UPDATECHECK_LINEAR = b'linear'
1484 1483 UPDATECHECK_NO_CONFLICT = b'noconflict'
1485 1484
1486 1485
1487 1486 def update(
1488 1487 repo,
1489 1488 node,
1490 1489 branchmerge,
1491 1490 force,
1492 1491 ancestor=None,
1493 1492 mergeancestor=False,
1494 1493 labels=None,
1495 1494 matcher=None,
1496 1495 mergeforce=False,
1497 1496 updatedirstate=True,
1498 1497 updatecheck=None,
1499 1498 wc=None,
1500 1499 ):
1501 1500 """
1502 1501 Perform a merge between the working directory and the given node
1503 1502
1504 1503 node = the node to update to
1505 1504 branchmerge = whether to merge between branches
1506 1505 force = whether to force branch merging or file overwriting
1507 1506 matcher = a matcher to filter file lists (dirstate not updated)
1508 1507 mergeancestor = whether it is merging with an ancestor. If true,
1509 1508 we should accept the incoming changes for any prompts that occur.
1510 1509 If false, merging with an ancestor (fast-forward) is only allowed
1511 1510 between different named branches. This flag is used by rebase extension
1512 1511 as a temporary fix and should be avoided in general.
1513 1512 labels = labels to use for base, local and other
1514 1513 mergeforce = whether the merge was run with 'merge --force' (deprecated): if
1515 1514 this is True, then 'force' should be True as well.
1516 1515
1517 1516 The table below shows all the behaviors of the update command given the
1518 1517 -c/--check and -C/--clean or no options, whether the working directory is
1519 1518 dirty, whether a revision is specified, and the relationship of the parent
1520 1519 rev to the target rev (linear or not). Match from top first. The -n
1521 1520 option doesn't exist on the command line, but represents the
1522 1521 experimental.updatecheck=noconflict option.
1523 1522
1524 1523 This logic is tested by test-update-branches.t.
1525 1524
1526 1525 -c -C -n -m dirty rev linear | result
1527 1526 y y * * * * * | (1)
1528 1527 y * y * * * * | (1)
1529 1528 y * * y * * * | (1)
1530 1529 * y y * * * * | (1)
1531 1530 * y * y * * * | (1)
1532 1531 * * y y * * * | (1)
1533 1532 * * * * * n n | x
1534 1533 * * * * n * * | ok
1535 1534 n n n n y * y | merge
1536 1535 n n n n y y n | (2)
1537 1536 n n n y y * * | merge
1538 1537 n n y n y * * | merge if no conflict
1539 1538 n y n n y * * | discard
1540 1539 y n n n y * * | (3)
1541 1540
1542 1541 x = can't happen
1543 1542 * = don't-care
1544 1543 1 = incompatible options (checked in commands.py)
1545 1544 2 = abort: uncommitted changes (commit or update --clean to discard changes)
1546 1545 3 = abort: uncommitted changes (checked in commands.py)
1547 1546
1548 1547 The merge is performed inside ``wc``, a workingctx-like objects. It defaults
1549 1548 to repo[None] if None is passed.
1550 1549
1551 1550 Return the same tuple as applyupdates().
1552 1551 """
1553 1552 # Avoid cycle.
1554 1553 from . import sparse
1555 1554
1556 1555 # This function used to find the default destination if node was None, but
1557 1556 # that's now in destutil.py.
1558 1557 assert node is not None
1559 1558 if not branchmerge and not force:
1560 1559 # TODO: remove the default once all callers that pass branchmerge=False
1561 1560 # and force=False pass a value for updatecheck. We may want to allow
1562 1561 # updatecheck='abort' to better suppport some of these callers.
1563 1562 if updatecheck is None:
1564 1563 updatecheck = UPDATECHECK_LINEAR
1565 1564 if updatecheck not in (
1566 1565 UPDATECHECK_NONE,
1567 1566 UPDATECHECK_LINEAR,
1568 1567 UPDATECHECK_NO_CONFLICT,
1569 1568 ):
1570 1569 raise ValueError(
1571 1570 r'Invalid updatecheck %r (can accept %r)'
1572 1571 % (
1573 1572 updatecheck,
1574 1573 (
1575 1574 UPDATECHECK_NONE,
1576 1575 UPDATECHECK_LINEAR,
1577 1576 UPDATECHECK_NO_CONFLICT,
1578 1577 ),
1579 1578 )
1580 1579 )
1581 1580 if wc is not None and wc.isinmemory():
1582 1581 maybe_wlock = util.nullcontextmanager()
1583 1582 else:
1584 1583 maybe_wlock = repo.wlock()
1585 1584 with maybe_wlock:
1586 1585 if wc is None:
1587 1586 wc = repo[None]
1588 1587 pl = wc.parents()
1589 1588 p1 = pl[0]
1590 1589 p2 = repo[node]
1591 1590 if ancestor is not None:
1592 1591 pas = [repo[ancestor]]
1593 1592 else:
1594 1593 if repo.ui.configlist(b'merge', b'preferancestor') == [b'*']:
1595 1594 cahs = repo.changelog.commonancestorsheads(p1.node(), p2.node())
1596 1595 pas = [repo[anc] for anc in (sorted(cahs) or [nullid])]
1597 1596 else:
1598 1597 pas = [p1.ancestor(p2, warn=branchmerge)]
1599 1598
1600 1599 fp1, fp2, xp1, xp2 = p1.node(), p2.node(), bytes(p1), bytes(p2)
1601 1600
1602 1601 overwrite = force and not branchmerge
1603 1602 ### check phase
1604 1603 if not overwrite:
1605 1604 if len(pl) > 1:
1606 1605 raise error.Abort(_(b"outstanding uncommitted merge"))
1607 1606 ms = mergestatemod.mergestate.read(repo)
1608 1607 if list(ms.unresolved()):
1609 1608 raise error.Abort(
1610 1609 _(b"outstanding merge conflicts"),
1611 1610 hint=_(b"use 'hg resolve' to resolve"),
1612 1611 )
1613 1612 if branchmerge:
1614 1613 if pas == [p2]:
1615 1614 raise error.Abort(
1616 1615 _(
1617 1616 b"merging with a working directory ancestor"
1618 1617 b" has no effect"
1619 1618 )
1620 1619 )
1621 1620 elif pas == [p1]:
1622 1621 if not mergeancestor and wc.branch() == p2.branch():
1623 1622 raise error.Abort(
1624 1623 _(b"nothing to merge"),
1625 1624 hint=_(b"use 'hg update' or check 'hg heads'"),
1626 1625 )
1627 1626 if not force and (wc.files() or wc.deleted()):
1628 1627 raise error.Abort(
1629 1628 _(b"uncommitted changes"),
1630 1629 hint=_(b"use 'hg status' to list changes"),
1631 1630 )
1632 1631 if not wc.isinmemory():
1633 1632 for s in sorted(wc.substate):
1634 1633 wc.sub(s).bailifchanged()
1635 1634
1636 1635 elif not overwrite:
1637 1636 if p1 == p2: # no-op update
1638 1637 # call the hooks and exit early
1639 1638 repo.hook(b'preupdate', throw=True, parent1=xp2, parent2=b'')
1640 1639 repo.hook(b'update', parent1=xp2, parent2=b'', error=0)
1641 1640 return updateresult(0, 0, 0, 0)
1642 1641
1643 1642 if updatecheck == UPDATECHECK_LINEAR and pas not in (
1644 1643 [p1],
1645 1644 [p2],
1646 1645 ): # nonlinear
1647 1646 dirty = wc.dirty(missing=True)
1648 1647 if dirty:
1649 1648 # Branching is a bit strange to ensure we do the minimal
1650 1649 # amount of call to obsutil.foreground.
1651 1650 foreground = obsutil.foreground(repo, [p1.node()])
1652 1651 # note: the <node> variable contains a random identifier
1653 1652 if repo[node].node() in foreground:
1654 1653 pass # allow updating to successors
1655 1654 else:
1656 1655 msg = _(b"uncommitted changes")
1657 1656 hint = _(b"commit or update --clean to discard changes")
1658 1657 raise error.UpdateAbort(msg, hint=hint)
1659 1658 else:
1660 1659 # Allow jumping branches if clean and specific rev given
1661 1660 pass
1662 1661
1663 1662 if overwrite:
1664 1663 pas = [wc]
1665 1664 elif not branchmerge:
1666 1665 pas = [p1]
1667 1666
1668 1667 # deprecated config: merge.followcopies
1669 1668 followcopies = repo.ui.configbool(b'merge', b'followcopies')
1670 1669 if overwrite:
1671 1670 followcopies = False
1672 1671 elif not pas[0]:
1673 1672 followcopies = False
1674 1673 if not branchmerge and not wc.dirty(missing=True):
1675 1674 followcopies = False
1676 1675
1677 1676 ### calculate phase
1678 1677 actionbyfile, diverge, renamedelete = calculateupdates(
1679 1678 repo,
1680 1679 wc,
1681 1680 p2,
1682 1681 pas,
1683 1682 branchmerge,
1684 1683 force,
1685 1684 mergeancestor,
1686 1685 followcopies,
1687 1686 matcher=matcher,
1688 1687 mergeforce=mergeforce,
1689 1688 )
1690 1689
1691 1690 if updatecheck == UPDATECHECK_NO_CONFLICT:
1692 1691 for f, (m, args, msg) in pycompat.iteritems(actionbyfile):
1693 1692 if m not in (
1694 1693 mergestatemod.ACTION_GET,
1695 1694 mergestatemod.ACTION_KEEP,
1696 1695 mergestatemod.ACTION_EXEC,
1697 1696 mergestatemod.ACTION_REMOVE,
1698 1697 mergestatemod.ACTION_PATH_CONFLICT_RESOLVE,
1699 1698 mergestatemod.ACTION_GET_OTHER_AND_STORE,
1700 1699 ):
1701 1700 msg = _(b"conflicting changes")
1702 1701 hint = _(b"commit or update --clean to discard changes")
1703 1702 raise error.Abort(msg, hint=hint)
1704 1703
1705 1704 # Prompt and create actions. Most of this is in the resolve phase
1706 1705 # already, but we can't handle .hgsubstate in filemerge or
1707 1706 # subrepoutil.submerge yet so we have to keep prompting for it.
1708 1707 if b'.hgsubstate' in actionbyfile:
1709 1708 f = b'.hgsubstate'
1710 1709 m, args, msg = actionbyfile[f]
1711 1710 prompts = filemerge.partextras(labels)
1712 1711 prompts[b'f'] = f
1713 1712 if m == mergestatemod.ACTION_CHANGED_DELETED:
1714 1713 if repo.ui.promptchoice(
1715 1714 _(
1716 1715 b"local%(l)s changed %(f)s which other%(o)s deleted\n"
1717 1716 b"use (c)hanged version or (d)elete?"
1718 1717 b"$$ &Changed $$ &Delete"
1719 1718 )
1720 1719 % prompts,
1721 1720 0,
1722 1721 ):
1723 1722 actionbyfile[f] = (
1724 1723 mergestatemod.ACTION_REMOVE,
1725 1724 None,
1726 1725 b'prompt delete',
1727 1726 )
1728 1727 elif f in p1:
1729 1728 actionbyfile[f] = (
1730 1729 mergestatemod.ACTION_ADD_MODIFIED,
1731 1730 None,
1732 1731 b'prompt keep',
1733 1732 )
1734 1733 else:
1735 1734 actionbyfile[f] = (
1736 1735 mergestatemod.ACTION_ADD,
1737 1736 None,
1738 1737 b'prompt keep',
1739 1738 )
1740 1739 elif m == mergestatemod.ACTION_DELETED_CHANGED:
1741 1740 f1, f2, fa, move, anc = args
1742 1741 flags = p2[f2].flags()
1743 1742 if (
1744 1743 repo.ui.promptchoice(
1745 1744 _(
1746 1745 b"other%(o)s changed %(f)s which local%(l)s deleted\n"
1747 1746 b"use (c)hanged version or leave (d)eleted?"
1748 1747 b"$$ &Changed $$ &Deleted"
1749 1748 )
1750 1749 % prompts,
1751 1750 0,
1752 1751 )
1753 1752 == 0
1754 1753 ):
1755 1754 actionbyfile[f] = (
1756 1755 mergestatemod.ACTION_GET,
1757 1756 (flags, False),
1758 1757 b'prompt recreating',
1759 1758 )
1760 1759 else:
1761 1760 del actionbyfile[f]
1762 1761
1763 1762 # Convert to dictionary-of-lists format
1764 1763 actions = emptyactions()
1765 1764 for f, (m, args, msg) in pycompat.iteritems(actionbyfile):
1766 1765 if m not in actions:
1767 1766 actions[m] = []
1768 1767 actions[m].append((f, args, msg))
1769 1768
1770 1769 # ACTION_GET_OTHER_AND_STORE is a mergestatemod.ACTION_GET + store in mergestate
1771 1770 for e in actions[mergestatemod.ACTION_GET_OTHER_AND_STORE]:
1772 1771 actions[mergestatemod.ACTION_GET].append(e)
1773 1772
1774 1773 if not util.fscasesensitive(repo.path):
1775 1774 # check collision between files only in p2 for clean update
1776 1775 if not branchmerge and (
1777 1776 force or not wc.dirty(missing=True, branch=False)
1778 1777 ):
1779 1778 _checkcollision(repo, p2.manifest(), None)
1780 1779 else:
1781 1780 _checkcollision(repo, wc.manifest(), actions)
1782 1781
1783 1782 # divergent renames
1784 1783 for f, fl in sorted(pycompat.iteritems(diverge)):
1785 1784 repo.ui.warn(
1786 1785 _(
1787 1786 b"note: possible conflict - %s was renamed "
1788 1787 b"multiple times to:\n"
1789 1788 )
1790 1789 % f
1791 1790 )
1792 1791 for nf in sorted(fl):
1793 1792 repo.ui.warn(b" %s\n" % nf)
1794 1793
1795 1794 # rename and delete
1796 1795 for f, fl in sorted(pycompat.iteritems(renamedelete)):
1797 1796 repo.ui.warn(
1798 1797 _(
1799 1798 b"note: possible conflict - %s was deleted "
1800 1799 b"and renamed to:\n"
1801 1800 )
1802 1801 % f
1803 1802 )
1804 1803 for nf in sorted(fl):
1805 1804 repo.ui.warn(b" %s\n" % nf)
1806 1805
1807 1806 ### apply phase
1808 1807 if not branchmerge: # just jump to the new rev
1809 1808 fp1, fp2, xp1, xp2 = fp2, nullid, xp2, b''
1810 1809 # If we're doing a partial update, we need to skip updating
1811 1810 # the dirstate.
1812 1811 always = matcher is None or matcher.always()
1813 1812 updatedirstate = updatedirstate and always and not wc.isinmemory()
1814 1813 if updatedirstate:
1815 1814 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
1816 1815 # note that we're in the middle of an update
1817 1816 repo.vfs.write(b'updatestate', p2.hex())
1818 1817
1819 1818 # Advertise fsmonitor when its presence could be useful.
1820 1819 #
1821 1820 # We only advertise when performing an update from an empty working
1822 1821 # directory. This typically only occurs during initial clone.
1823 1822 #
1824 1823 # We give users a mechanism to disable the warning in case it is
1825 1824 # annoying.
1826 1825 #
1827 1826 # We only allow on Linux and MacOS because that's where fsmonitor is
1828 1827 # considered stable.
1829 1828 fsmonitorwarning = repo.ui.configbool(b'fsmonitor', b'warn_when_unused')
1830 1829 fsmonitorthreshold = repo.ui.configint(
1831 1830 b'fsmonitor', b'warn_update_file_count'
1832 1831 )
1833 1832 try:
1834 1833 # avoid cycle: extensions -> cmdutil -> merge
1835 1834 from . import extensions
1836 1835
1837 1836 extensions.find(b'fsmonitor')
1838 1837 fsmonitorenabled = repo.ui.config(b'fsmonitor', b'mode') != b'off'
1839 1838 # We intentionally don't look at whether fsmonitor has disabled
1840 1839 # itself because a) fsmonitor may have already printed a warning
1841 1840 # b) we only care about the config state here.
1842 1841 except KeyError:
1843 1842 fsmonitorenabled = False
1844 1843
1845 1844 if (
1846 1845 fsmonitorwarning
1847 1846 and not fsmonitorenabled
1848 1847 and p1.node() == nullid
1849 1848 and len(actions[mergestatemod.ACTION_GET]) >= fsmonitorthreshold
1850 1849 and pycompat.sysplatform.startswith((b'linux', b'darwin'))
1851 1850 ):
1852 1851 repo.ui.warn(
1853 1852 _(
1854 1853 b'(warning: large working directory being used without '
1855 1854 b'fsmonitor enabled; enable fsmonitor to improve performance; '
1856 1855 b'see "hg help -e fsmonitor")\n'
1857 1856 )
1858 1857 )
1859 1858
1860 1859 wantfiledata = updatedirstate and not branchmerge
1861 1860 stats, getfiledata = applyupdates(
1862 1861 repo, actions, wc, p2, overwrite, wantfiledata, labels=labels
1863 1862 )
1864 1863
1865 1864 if updatedirstate:
1866 1865 with repo.dirstate.parentchange():
1867 1866 repo.setparents(fp1, fp2)
1868 1867 mergestatemod.recordupdates(
1869 1868 repo, actions, branchmerge, getfiledata
1870 1869 )
1871 1870 # update completed, clear state
1872 1871 util.unlink(repo.vfs.join(b'updatestate'))
1873 1872
1874 1873 if not branchmerge:
1875 1874 repo.dirstate.setbranch(p2.branch())
1876 1875
1877 1876 # If we're updating to a location, clean up any stale temporary includes
1878 1877 # (ex: this happens during hg rebase --abort).
1879 1878 if not branchmerge:
1880 1879 sparse.prunetemporaryincludes(repo)
1881 1880
1882 1881 if updatedirstate:
1883 1882 repo.hook(
1884 1883 b'update', parent1=xp1, parent2=xp2, error=stats.unresolvedcount
1885 1884 )
1886 1885 return stats
1887 1886
1888 1887
1889 1888 def merge(ctx, labels=None, force=False, wc=None):
1890 1889 """Merge another topological branch into the working copy.
1891 1890
1892 1891 force = whether the merge was run with 'merge --force' (deprecated)
1893 1892 """
1894 1893
1895 1894 return update(
1896 1895 ctx.repo(),
1897 1896 ctx.rev(),
1898 1897 labels=labels,
1899 1898 branchmerge=True,
1900 1899 force=force,
1901 1900 mergeforce=force,
1902 1901 wc=wc,
1903 1902 )
1904 1903
1905 1904
1906 1905 def clean_update(ctx, wc=None):
1907 1906 """Do a clean update to the given commit.
1908 1907
1909 1908 This involves updating to the commit and discarding any changes in the
1910 1909 working copy.
1911 1910 """
1912 1911 return update(ctx.repo(), ctx.rev(), branchmerge=False, force=True, wc=wc)
1913 1912
1914 1913
1915 1914 def revert_to(ctx, matcher=None, wc=None):
1916 1915 """Revert the working copy to the given commit.
1917 1916
1918 1917 The working copy will keep its current parent(s) but its content will
1919 1918 be the same as in the given commit.
1920 1919 """
1921 1920
1922 1921 return update(
1923 1922 ctx.repo(),
1924 1923 ctx.rev(),
1925 1924 branchmerge=False,
1926 1925 force=True,
1927 1926 updatedirstate=False,
1928 1927 matcher=matcher,
1929 1928 wc=wc,
1930 1929 )
1931 1930
1932 1931
1933 1932 def graft(
1934 1933 repo,
1935 1934 ctx,
1936 1935 base=None,
1937 1936 labels=None,
1938 1937 keepparent=False,
1939 1938 keepconflictparent=False,
1940 1939 wctx=None,
1941 1940 ):
1942 1941 """Do a graft-like merge.
1943 1942
1944 1943 This is a merge where the merge ancestor is chosen such that one
1945 1944 or more changesets are grafted onto the current changeset. In
1946 1945 addition to the merge, this fixes up the dirstate to include only
1947 1946 a single parent (if keepparent is False) and tries to duplicate any
1948 1947 renames/copies appropriately.
1949 1948
1950 1949 ctx - changeset to rebase
1951 1950 base - merge base, or ctx.p1() if not specified
1952 1951 labels - merge labels eg ['local', 'graft']
1953 1952 keepparent - keep second parent if any
1954 1953 keepconflictparent - if unresolved, keep parent used for the merge
1955 1954
1956 1955 """
1957 1956 # If we're grafting a descendant onto an ancestor, be sure to pass
1958 1957 # mergeancestor=True to update. This does two things: 1) allows the merge if
1959 1958 # the destination is the same as the parent of the ctx (so we can use graft
1960 1959 # to copy commits), and 2) informs update that the incoming changes are
1961 1960 # newer than the destination so it doesn't prompt about "remote changed foo
1962 1961 # which local deleted".
1963 1962 # We also pass mergeancestor=True when base is the same revision as p1. 2)
1964 1963 # doesn't matter as there can't possibly be conflicts, but 1) is necessary.
1965 1964 wctx = wctx or repo[None]
1966 1965 pctx = wctx.p1()
1967 1966 base = base or ctx.p1()
1968 1967 mergeancestor = (
1969 1968 repo.changelog.isancestor(pctx.node(), ctx.node())
1970 1969 or pctx.rev() == base.rev()
1971 1970 )
1972 1971
1973 1972 stats = update(
1974 1973 repo,
1975 1974 ctx.node(),
1976 1975 True,
1977 1976 True,
1978 1977 base.node(),
1979 1978 mergeancestor=mergeancestor,
1980 1979 labels=labels,
1981 1980 wc=wctx,
1982 1981 )
1983 1982
1984 1983 if keepconflictparent and stats.unresolvedcount:
1985 1984 pother = ctx.node()
1986 1985 else:
1987 1986 pother = nullid
1988 1987 parents = ctx.parents()
1989 1988 if keepparent and len(parents) == 2 and base in parents:
1990 1989 parents.remove(base)
1991 1990 pother = parents[0].node()
1992 1991 # Never set both parents equal to each other
1993 1992 if pother == pctx.node():
1994 1993 pother = nullid
1995 1994
1996 1995 if wctx.isinmemory():
1997 1996 wctx.setparents(pctx.node(), pother)
1998 1997 # fix up dirstate for copies and renames
1999 1998 copies.graftcopies(wctx, ctx, base)
2000 1999 else:
2001 2000 with repo.dirstate.parentchange():
2002 2001 repo.setparents(pctx.node(), pother)
2003 2002 repo.dirstate.write(repo.currenttransaction())
2004 2003 # fix up dirstate for copies and renames
2005 2004 copies.graftcopies(wctx, ctx, base)
2006 2005 return stats
2007 2006
2008 2007
2009 2008 def purge(
2010 2009 repo,
2011 2010 matcher,
2012 2011 unknown=True,
2013 2012 ignored=False,
2014 2013 removeemptydirs=True,
2015 2014 removefiles=True,
2016 2015 abortonerror=False,
2017 2016 noop=False,
2018 2017 ):
2019 2018 """Purge the working directory of untracked files.
2020 2019
2021 2020 ``matcher`` is a matcher configured to scan the working directory -
2022 2021 potentially a subset.
2023 2022
2024 2023 ``unknown`` controls whether unknown files should be purged.
2025 2024
2026 2025 ``ignored`` controls whether ignored files should be purged.
2027 2026
2028 2027 ``removeemptydirs`` controls whether empty directories should be removed.
2029 2028
2030 2029 ``removefiles`` controls whether files are removed.
2031 2030
2032 2031 ``abortonerror`` causes an exception to be raised if an error occurs
2033 2032 deleting a file or directory.
2034 2033
2035 2034 ``noop`` controls whether to actually remove files. If not defined, actions
2036 2035 will be taken.
2037 2036
2038 2037 Returns an iterable of relative paths in the working directory that were
2039 2038 or would be removed.
2040 2039 """
2041 2040
2042 2041 def remove(removefn, path):
2043 2042 try:
2044 2043 removefn(path)
2045 2044 except OSError:
2046 2045 m = _(b'%s cannot be removed') % path
2047 2046 if abortonerror:
2048 2047 raise error.Abort(m)
2049 2048 else:
2050 2049 repo.ui.warn(_(b'warning: %s\n') % m)
2051 2050
2052 2051 # There's no API to copy a matcher. So mutate the passed matcher and
2053 2052 # restore it when we're done.
2054 2053 oldtraversedir = matcher.traversedir
2055 2054
2056 2055 res = []
2057 2056
2058 2057 try:
2059 2058 if removeemptydirs:
2060 2059 directories = []
2061 2060 matcher.traversedir = directories.append
2062 2061
2063 2062 status = repo.status(match=matcher, ignored=ignored, unknown=unknown)
2064 2063
2065 2064 if removefiles:
2066 2065 for f in sorted(status.unknown + status.ignored):
2067 2066 if not noop:
2068 2067 repo.ui.note(_(b'removing file %s\n') % f)
2069 2068 remove(repo.wvfs.unlink, f)
2070 2069 res.append(f)
2071 2070
2072 2071 if removeemptydirs:
2073 2072 for f in sorted(directories, reverse=True):
2074 2073 if matcher(f) and not repo.wvfs.listdir(f):
2075 2074 if not noop:
2076 2075 repo.ui.note(_(b'removing directory %s\n') % f)
2077 2076 remove(repo.wvfs.rmdir, f)
2078 2077 res.append(f)
2079 2078
2080 2079 return res
2081 2080
2082 2081 finally:
2083 2082 matcher.traversedir = oldtraversedir
@@ -1,487 +1,454 b''
1 1 from __future__ import absolute_import
2 2
3 3 import binascii
4 4 import itertools
5 5 import silenttestrunner
6 6 import unittest
7 7 import zlib
8 8
9 9 from mercurial import (
10 10 manifest as manifestmod,
11 11 match as matchmod,
12 12 util,
13 13 )
14 14
15 15 EMTPY_MANIFEST = b''
16 16
17 17 HASH_1 = b'1' * 40
18 18 BIN_HASH_1 = binascii.unhexlify(HASH_1)
19 19 HASH_2 = b'f' * 40
20 20 BIN_HASH_2 = binascii.unhexlify(HASH_2)
21 21 HASH_3 = b'1234567890abcdef0987654321deadbeef0fcafe'
22 22 BIN_HASH_3 = binascii.unhexlify(HASH_3)
23 23 A_SHORT_MANIFEST = (
24 24 b'bar/baz/qux.py\0%(hash2)s%(flag2)s\n' b'foo\0%(hash1)s%(flag1)s\n'
25 25 ) % {b'hash1': HASH_1, b'flag1': b'', b'hash2': HASH_2, b'flag2': b'l',}
26 26
27 27 A_DEEPER_MANIFEST = (
28 28 b'a/b/c/bar.py\0%(hash3)s%(flag1)s\n'
29 29 b'a/b/c/bar.txt\0%(hash1)s%(flag1)s\n'
30 30 b'a/b/c/foo.py\0%(hash3)s%(flag1)s\n'
31 31 b'a/b/c/foo.txt\0%(hash2)s%(flag2)s\n'
32 32 b'a/b/d/baz.py\0%(hash3)s%(flag1)s\n'
33 33 b'a/b/d/qux.py\0%(hash1)s%(flag2)s\n'
34 34 b'a/b/d/ten.txt\0%(hash3)s%(flag2)s\n'
35 35 b'a/b/dog.py\0%(hash3)s%(flag1)s\n'
36 36 b'a/b/fish.py\0%(hash2)s%(flag1)s\n'
37 37 b'a/c/london.py\0%(hash3)s%(flag2)s\n'
38 38 b'a/c/paper.txt\0%(hash2)s%(flag2)s\n'
39 39 b'a/c/paris.py\0%(hash2)s%(flag1)s\n'
40 40 b'a/d/apple.py\0%(hash3)s%(flag1)s\n'
41 41 b'a/d/pizza.py\0%(hash3)s%(flag2)s\n'
42 42 b'a/green.py\0%(hash1)s%(flag2)s\n'
43 43 b'a/purple.py\0%(hash2)s%(flag1)s\n'
44 44 b'app.py\0%(hash3)s%(flag1)s\n'
45 45 b'readme.txt\0%(hash2)s%(flag1)s\n'
46 46 ) % {
47 47 b'hash1': HASH_1,
48 48 b'flag1': b'',
49 49 b'hash2': HASH_2,
50 50 b'flag2': b'l',
51 51 b'hash3': HASH_3,
52 52 }
53 53
54 54 HUGE_MANIFEST_ENTRIES = 200001
55 55
56 56 izip = getattr(itertools, 'izip', zip)
57 57 if 'xrange' not in globals():
58 58 xrange = range
59 59
60 60 A_HUGE_MANIFEST = b''.join(
61 61 sorted(
62 62 b'file%d\0%s%s\n' % (i, h, f)
63 63 for i, h, f in izip(
64 64 xrange(200001),
65 65 itertools.cycle((HASH_1, HASH_2)),
66 66 itertools.cycle((b'', b'x', b'l')),
67 67 )
68 68 )
69 69 )
70 70
71 71
72 72 class basemanifesttests(object):
73 73 def parsemanifest(self, text):
74 74 raise NotImplementedError('parsemanifest not implemented by test case')
75 75
76 76 def testEmptyManifest(self):
77 77 m = self.parsemanifest(EMTPY_MANIFEST)
78 78 self.assertEqual(0, len(m))
79 79 self.assertEqual([], list(m))
80 80
81 81 def testManifest(self):
82 82 m = self.parsemanifest(A_SHORT_MANIFEST)
83 83 self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m))
84 84 self.assertEqual(BIN_HASH_2, m[b'bar/baz/qux.py'])
85 85 self.assertEqual(b'l', m.flags(b'bar/baz/qux.py'))
86 86 self.assertEqual(BIN_HASH_1, m[b'foo'])
87 87 self.assertEqual(b'', m.flags(b'foo'))
88 88 with self.assertRaises(KeyError):
89 89 m[b'wat']
90 90
91 91 def testManifestLongHashes(self):
92 92 m = self.parsemanifest(b'a\0' + b'f' * 64 + b'\n')
93 93 self.assertEqual(binascii.unhexlify(b'f' * 64), m[b'a'])
94 94
95 95 def testSetItem(self):
96 96 want = BIN_HASH_1
97 97
98 98 m = self.parsemanifest(EMTPY_MANIFEST)
99 99 m[b'a'] = want
100 100 self.assertIn(b'a', m)
101 101 self.assertEqual(want, m[b'a'])
102 102 self.assertEqual(b'a\0' + HASH_1 + b'\n', m.text())
103 103
104 104 m = self.parsemanifest(A_SHORT_MANIFEST)
105 105 m[b'a'] = want
106 106 self.assertEqual(want, m[b'a'])
107 107 self.assertEqual(b'a\0' + HASH_1 + b'\n' + A_SHORT_MANIFEST, m.text())
108 108
109 109 def testSetFlag(self):
110 110 want = b'x'
111 111
112 112 m = self.parsemanifest(EMTPY_MANIFEST)
113 113 # first add a file; a file-less flag makes no sense
114 114 m[b'a'] = BIN_HASH_1
115 115 m.setflag(b'a', want)
116 116 self.assertEqual(want, m.flags(b'a'))
117 117 self.assertEqual(b'a\0' + HASH_1 + want + b'\n', m.text())
118 118
119 119 m = self.parsemanifest(A_SHORT_MANIFEST)
120 120 # first add a file; a file-less flag makes no sense
121 121 m[b'a'] = BIN_HASH_1
122 122 m.setflag(b'a', want)
123 123 self.assertEqual(want, m.flags(b'a'))
124 124 self.assertEqual(
125 125 b'a\0' + HASH_1 + want + b'\n' + A_SHORT_MANIFEST, m.text()
126 126 )
127 127
128 128 def testCopy(self):
129 129 m = self.parsemanifest(A_SHORT_MANIFEST)
130 130 m[b'a'] = BIN_HASH_1
131 131 m2 = m.copy()
132 132 del m
133 133 del m2 # make sure we don't double free() anything
134 134
135 135 def testCompaction(self):
136 136 unhex = binascii.unhexlify
137 137 h1, h2 = unhex(HASH_1), unhex(HASH_2)
138 138 m = self.parsemanifest(A_SHORT_MANIFEST)
139 139 m[b'alpha'] = h1
140 140 m[b'beta'] = h2
141 141 del m[b'foo']
142 142 want = b'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
143 143 HASH_1,
144 144 HASH_2,
145 145 HASH_2,
146 146 )
147 147 self.assertEqual(want, m.text())
148 148 self.assertEqual(3, len(m))
149 149 self.assertEqual([b'alpha', b'bar/baz/qux.py', b'beta'], list(m))
150 150 self.assertEqual(h1, m[b'alpha'])
151 151 self.assertEqual(h2, m[b'bar/baz/qux.py'])
152 152 self.assertEqual(h2, m[b'beta'])
153 153 self.assertEqual(b'', m.flags(b'alpha'))
154 154 self.assertEqual(b'l', m.flags(b'bar/baz/qux.py'))
155 155 self.assertEqual(b'', m.flags(b'beta'))
156 156 with self.assertRaises(KeyError):
157 157 m[b'foo']
158 158
159 def testSetGetNodeSuffix(self):
160 clean = self.parsemanifest(A_SHORT_MANIFEST)
161 m = self.parsemanifest(A_SHORT_MANIFEST)
162 h = m[b'foo']
163 f = m.flags(b'foo')
164 want = h + b'a'
165 # Merge code wants to set 21-byte fake hashes at times
166 m[b'foo'] = want
167 self.assertEqual(want, m[b'foo'])
168 self.assertEqual(
169 [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', BIN_HASH_1 + b'a')],
170 list(m.items()),
171 )
172 # Sometimes it even tries a 22-byte fake hash, but we can
173 # return 21 and it'll work out
174 m[b'foo'] = want + b'+'
175 self.assertEqual(want, m[b'foo'])
176 # make sure the suffix survives a copy
177 match = matchmod.match(util.localpath(b'/repo'), b'', [b're:foo'])
178 m2 = m._matches(match)
179 self.assertEqual(want, m2[b'foo'])
180 self.assertEqual(1, len(m2))
181 m2 = m.copy()
182 self.assertEqual(want, m2[b'foo'])
183 # suffix with iteration
184 self.assertEqual(
185 [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', want)], list(m.items())
186 )
187
188 # shows up in diff
189 self.assertEqual({b'foo': ((want, f), (h, b''))}, m.diff(clean))
190 self.assertEqual({b'foo': ((h, b''), (want, f))}, clean.diff(m))
191
192 159 def testMatchException(self):
193 160 m = self.parsemanifest(A_SHORT_MANIFEST)
194 161 match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*'])
195 162
196 163 def filt(path):
197 164 if path == b'foo':
198 165 assert False
199 166 return True
200 167
201 168 match.matchfn = filt
202 169 with self.assertRaises(AssertionError):
203 170 m._matches(match)
204 171
205 172 def testRemoveItem(self):
206 173 m = self.parsemanifest(A_SHORT_MANIFEST)
207 174 del m[b'foo']
208 175 with self.assertRaises(KeyError):
209 176 m[b'foo']
210 177 self.assertEqual(1, len(m))
211 178 self.assertEqual(1, len(list(m)))
212 179 # now restore and make sure everything works right
213 180 m[b'foo'] = b'a' * 20
214 181 self.assertEqual(2, len(m))
215 182 self.assertEqual(2, len(list(m)))
216 183
217 184 def testManifestDiff(self):
218 185 MISSING = (None, b'')
219 186 addl = b'z-only-in-left\0' + HASH_1 + b'\n'
220 187 addr = b'z-only-in-right\0' + HASH_2 + b'x\n'
221 188 left = self.parsemanifest(
222 189 A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + b'x') + addl
223 190 )
224 191 right = self.parsemanifest(A_SHORT_MANIFEST + addr)
225 192 want = {
226 193 b'foo': ((BIN_HASH_3, b'x'), (BIN_HASH_1, b'')),
227 194 b'z-only-in-left': ((BIN_HASH_1, b''), MISSING),
228 195 b'z-only-in-right': (MISSING, (BIN_HASH_2, b'x')),
229 196 }
230 197 self.assertEqual(want, left.diff(right))
231 198
232 199 want = {
233 200 b'bar/baz/qux.py': (MISSING, (BIN_HASH_2, b'l')),
234 201 b'foo': (MISSING, (BIN_HASH_3, b'x')),
235 202 b'z-only-in-left': (MISSING, (BIN_HASH_1, b'')),
236 203 }
237 204 self.assertEqual(want, self.parsemanifest(EMTPY_MANIFEST).diff(left))
238 205
239 206 want = {
240 207 b'bar/baz/qux.py': ((BIN_HASH_2, b'l'), MISSING),
241 208 b'foo': ((BIN_HASH_3, b'x'), MISSING),
242 209 b'z-only-in-left': ((BIN_HASH_1, b''), MISSING),
243 210 }
244 211 self.assertEqual(want, left.diff(self.parsemanifest(EMTPY_MANIFEST)))
245 212 copy = right.copy()
246 213 del copy[b'z-only-in-right']
247 214 del right[b'foo']
248 215 want = {
249 216 b'foo': (MISSING, (BIN_HASH_1, b'')),
250 217 b'z-only-in-right': ((BIN_HASH_2, b'x'), MISSING),
251 218 }
252 219 self.assertEqual(want, right.diff(copy))
253 220
254 221 short = self.parsemanifest(A_SHORT_MANIFEST)
255 222 pruned = short.copy()
256 223 del pruned[b'foo']
257 224 want = {
258 225 b'foo': ((BIN_HASH_1, b''), MISSING),
259 226 }
260 227 self.assertEqual(want, short.diff(pruned))
261 228 want = {
262 229 b'foo': (MISSING, (BIN_HASH_1, b'')),
263 230 }
264 231 self.assertEqual(want, pruned.diff(short))
265 232 want = {
266 233 b'bar/baz/qux.py': None,
267 234 b'foo': (MISSING, (BIN_HASH_1, b'')),
268 235 }
269 236 self.assertEqual(want, pruned.diff(short, clean=True))
270 237
271 238 def testReversedLines(self):
272 239 backwards = b''.join(
273 240 l + b'\n' for l in reversed(A_SHORT_MANIFEST.split(b'\n')) if l
274 241 )
275 242 try:
276 243 self.parsemanifest(backwards)
277 244 self.fail('Should have raised ValueError')
278 245 except ValueError as v:
279 246 self.assertIn('Manifest lines not in sorted order.', str(v))
280 247
281 248 def testNoTerminalNewline(self):
282 249 try:
283 250 self.parsemanifest(A_SHORT_MANIFEST + b'wat')
284 251 self.fail('Should have raised ValueError')
285 252 except ValueError as v:
286 253 self.assertIn('Manifest did not end in a newline.', str(v))
287 254
288 255 def testNoNewLineAtAll(self):
289 256 try:
290 257 self.parsemanifest(b'wat')
291 258 self.fail('Should have raised ValueError')
292 259 except ValueError as v:
293 260 self.assertIn('Manifest did not end in a newline.', str(v))
294 261
295 262 def testHugeManifest(self):
296 263 m = self.parsemanifest(A_HUGE_MANIFEST)
297 264 self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
298 265 self.assertEqual(len(m), len(list(m)))
299 266
300 267 def testMatchesMetadata(self):
301 268 '''Tests matches() for a few specific files to make sure that both
302 269 the set of files as well as their flags and nodeids are correct in
303 270 the resulting manifest.'''
304 271 m = self.parsemanifest(A_HUGE_MANIFEST)
305 272
306 273 match = matchmod.exact([b'file1', b'file200', b'file300'])
307 274 m2 = m._matches(match)
308 275
309 276 w = (b'file1\0%sx\n' b'file200\0%sl\n' b'file300\0%s\n') % (
310 277 HASH_2,
311 278 HASH_1,
312 279 HASH_1,
313 280 )
314 281 self.assertEqual(w, m2.text())
315 282
316 283 def testMatchesNonexistentFile(self):
317 284 '''Tests matches() for a small set of specific files, including one
318 285 nonexistent file to make sure in only matches against existing files.
319 286 '''
320 287 m = self.parsemanifest(A_DEEPER_MANIFEST)
321 288
322 289 match = matchmod.exact(
323 290 [b'a/b/c/bar.txt', b'a/b/d/qux.py', b'readme.txt', b'nonexistent']
324 291 )
325 292 m2 = m._matches(match)
326 293
327 294 self.assertEqual(
328 295 [b'a/b/c/bar.txt', b'a/b/d/qux.py', b'readme.txt'], m2.keys()
329 296 )
330 297
331 298 def testMatchesNonexistentDirectory(self):
332 299 '''Tests matches() for a relpath match on a directory that doesn't
333 300 actually exist.'''
334 301 m = self.parsemanifest(A_DEEPER_MANIFEST)
335 302
336 303 match = matchmod.match(
337 304 util.localpath(b'/repo'), b'', [b'a/f'], default=b'relpath'
338 305 )
339 306 m2 = m._matches(match)
340 307
341 308 self.assertEqual([], m2.keys())
342 309
343 310 def testMatchesExactLarge(self):
344 311 '''Tests matches() for files matching a large list of exact files.
345 312 '''
346 313 m = self.parsemanifest(A_HUGE_MANIFEST)
347 314
348 315 flist = m.keys()[80:300]
349 316 match = matchmod.exact(flist)
350 317 m2 = m._matches(match)
351 318
352 319 self.assertEqual(flist, m2.keys())
353 320
354 321 def testMatchesFull(self):
355 322 '''Tests matches() for what should be a full match.'''
356 323 m = self.parsemanifest(A_DEEPER_MANIFEST)
357 324
358 325 match = matchmod.match(util.localpath(b'/repo'), b'', [b''])
359 326 m2 = m._matches(match)
360 327
361 328 self.assertEqual(m.keys(), m2.keys())
362 329
363 330 def testMatchesDirectory(self):
364 331 '''Tests matches() on a relpath match on a directory, which should
365 332 match against all files within said directory.'''
366 333 m = self.parsemanifest(A_DEEPER_MANIFEST)
367 334
368 335 match = matchmod.match(
369 336 util.localpath(b'/repo'), b'', [b'a/b'], default=b'relpath'
370 337 )
371 338 m2 = m._matches(match)
372 339
373 340 self.assertEqual(
374 341 [
375 342 b'a/b/c/bar.py',
376 343 b'a/b/c/bar.txt',
377 344 b'a/b/c/foo.py',
378 345 b'a/b/c/foo.txt',
379 346 b'a/b/d/baz.py',
380 347 b'a/b/d/qux.py',
381 348 b'a/b/d/ten.txt',
382 349 b'a/b/dog.py',
383 350 b'a/b/fish.py',
384 351 ],
385 352 m2.keys(),
386 353 )
387 354
388 355 def testMatchesExactPath(self):
389 356 '''Tests matches() on an exact match on a directory, which should
390 357 result in an empty manifest because you can't perform an exact match
391 358 against a directory.'''
392 359 m = self.parsemanifest(A_DEEPER_MANIFEST)
393 360
394 361 match = matchmod.exact([b'a/b'])
395 362 m2 = m._matches(match)
396 363
397 364 self.assertEqual([], m2.keys())
398 365
399 366 def testMatchesCwd(self):
400 367 '''Tests matches() on a relpath match with the current directory ('.')
401 368 when not in the root directory.'''
402 369 m = self.parsemanifest(A_DEEPER_MANIFEST)
403 370
404 371 match = matchmod.match(
405 372 util.localpath(b'/repo'), b'a/b', [b'.'], default=b'relpath'
406 373 )
407 374 m2 = m._matches(match)
408 375
409 376 self.assertEqual(
410 377 [
411 378 b'a/b/c/bar.py',
412 379 b'a/b/c/bar.txt',
413 380 b'a/b/c/foo.py',
414 381 b'a/b/c/foo.txt',
415 382 b'a/b/d/baz.py',
416 383 b'a/b/d/qux.py',
417 384 b'a/b/d/ten.txt',
418 385 b'a/b/dog.py',
419 386 b'a/b/fish.py',
420 387 ],
421 388 m2.keys(),
422 389 )
423 390
424 391 def testMatchesWithPattern(self):
425 392 '''Tests matches() for files matching a pattern that reside
426 393 deeper than the specified directory.'''
427 394 m = self.parsemanifest(A_DEEPER_MANIFEST)
428 395
429 396 match = matchmod.match(util.localpath(b'/repo'), b'', [b'a/b/*/*.txt'])
430 397 m2 = m._matches(match)
431 398
432 399 self.assertEqual(
433 400 [b'a/b/c/bar.txt', b'a/b/c/foo.txt', b'a/b/d/ten.txt'], m2.keys()
434 401 )
435 402
436 403
437 404 class testmanifestdict(unittest.TestCase, basemanifesttests):
438 405 def parsemanifest(self, text):
439 406 return manifestmod.manifestdict(text)
440 407
441 408 def testObviouslyBogusManifest(self):
442 409 # This is a 163k manifest that came from oss-fuzz. It was a
443 410 # timeout there, but when run normally it doesn't seem to
444 411 # present any particular slowness.
445 412 data = zlib.decompress(
446 413 b'x\x9c\xed\xce;\n\x83\x00\x10\x04\xd0\x8deNa\x93~\xf1\x03\xc9q\xf4'
447 414 b'\x14\xeaU\xbdB\xda\xd4\xe6Cj\xc1FA\xde+\x86\xe9f\xa2\xfci\xbb\xfb'
448 415 b'\xa3\xef\xea\xba\xca\x7fk\x86q\x9a\xc6\xc8\xcc&\xb3\xcf\xf8\xb8|#'
449 416 b'\x8a9\x00\xd8\xe6v\xf4\x01N\xe1\n\x00\x00\x00\x00\x00\x00\x00\x00'
450 417 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
451 418 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
452 419 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
453 420 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
454 421 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
455 422 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
456 423 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
457 424 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
458 425 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
459 426 b'\x00\x00\xc0\x8aey\x1d}\x01\xd8\xe0\xb9\xf3\xde\x1b\xcf\x17'
460 427 b'\xac\xbe'
461 428 )
462 429 with self.assertRaises(ValueError):
463 430 self.parsemanifest(data)
464 431
465 432
466 433 class testtreemanifest(unittest.TestCase, basemanifesttests):
467 434 def parsemanifest(self, text):
468 435 return manifestmod.treemanifest(b'', text)
469 436
470 437 def testWalkSubtrees(self):
471 438 m = self.parsemanifest(A_DEEPER_MANIFEST)
472 439
473 440 dirs = [s._dir for s in m.walksubtrees()]
474 441 self.assertEqual(
475 442 sorted(
476 443 [b'', b'a/', b'a/c/', b'a/d/', b'a/b/', b'a/b/c/', b'a/b/d/']
477 444 ),
478 445 sorted(dirs),
479 446 )
480 447
481 448 match = matchmod.match(util.localpath(b'/repo'), b'', [b'path:a/b/'])
482 449 dirs = [s._dir for s in m.walksubtrees(matcher=match)]
483 450 self.assertEqual(sorted([b'a/b/', b'a/b/c/', b'a/b/d/']), sorted(dirs))
484 451
485 452
486 453 if __name__ == '__main__':
487 454 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now