##// END OF EJS Templates
manifest: rewrite filesnotin to not make superfluous manifest copies...
Augie Fackler -
r44768:bbecb6d8 default
parent child Browse files
Show More
@@ -1,2264 +1,2259
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 match as matchmod,
26 27 mdiff,
27 28 pathutil,
28 29 policy,
29 30 pycompat,
30 31 revlog,
31 32 util,
32 33 )
33 34 from .interfaces import (
34 35 repository,
35 36 util as interfaceutil,
36 37 )
37 38
38 39 parsers = policy.importmod('parsers')
39 40 propertycache = util.propertycache
40 41
41 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
42 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
43 44
44 45
45 46 def _parse(data):
46 47 # This method does a little bit of excessive-looking
47 48 # precondition checking. This is so that the behavior of this
48 49 # class exactly matches its C counterpart to try and help
49 50 # prevent surprise breakage for anyone that develops against
50 51 # the pure version.
51 52 if data and data[-1:] != b'\n':
52 53 raise ValueError(b'Manifest did not end in a newline.')
53 54 prev = None
54 55 for l in data.splitlines():
55 56 if prev is not None and prev > l:
56 57 raise ValueError(b'Manifest lines not in sorted order.')
57 58 prev = l
58 59 f, n = l.split(b'\0')
59 60 if len(n) > 40:
60 61 yield f, bin(n[:40]), n[40:]
61 62 else:
62 63 yield f, bin(n), b''
63 64
64 65
65 66 def _text(it):
66 67 files = []
67 68 lines = []
68 69 for f, n, fl in it:
69 70 files.append(f)
70 71 # if this is changed to support newlines in filenames,
71 72 # be sure to check the templates/ dir again (especially *-raw.tmpl)
72 73 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
73 74
74 75 _checkforbidden(files)
75 76 return b''.join(lines)
76 77
77 78
78 79 class lazymanifestiter(object):
79 80 def __init__(self, lm):
80 81 self.pos = 0
81 82 self.lm = lm
82 83
83 84 def __iter__(self):
84 85 return self
85 86
86 87 def next(self):
87 88 try:
88 89 data, pos = self.lm._get(self.pos)
89 90 except IndexError:
90 91 raise StopIteration
91 92 if pos == -1:
92 93 self.pos += 1
93 94 return data[0]
94 95 self.pos += 1
95 96 zeropos = data.find(b'\x00', pos)
96 97 return data[pos:zeropos]
97 98
98 99 __next__ = next
99 100
100 101
101 102 class lazymanifestiterentries(object):
102 103 def __init__(self, lm):
103 104 self.lm = lm
104 105 self.pos = 0
105 106
106 107 def __iter__(self):
107 108 return self
108 109
109 110 def next(self):
110 111 try:
111 112 data, pos = self.lm._get(self.pos)
112 113 except IndexError:
113 114 raise StopIteration
114 115 if pos == -1:
115 116 self.pos += 1
116 117 return data
117 118 zeropos = data.find(b'\x00', pos)
118 119 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
119 120 flags = self.lm._getflags(data, self.pos, zeropos)
120 121 self.pos += 1
121 122 return (data[pos:zeropos], hashval, flags)
122 123
123 124 __next__ = next
124 125
125 126
126 127 def unhexlify(data, extra, pos, length):
127 128 s = bin(data[pos : pos + length])
128 129 if extra:
129 130 s += chr(extra & 0xFF)
130 131 return s
131 132
132 133
133 134 def _cmp(a, b):
134 135 return (a > b) - (a < b)
135 136
136 137
137 138 class _lazymanifest(object):
138 139 """A pure python manifest backed by a byte string. It is supplimented with
139 140 internal lists as it is modified, until it is compacted back to a pure byte
140 141 string.
141 142
142 143 ``data`` is the initial manifest data.
143 144
144 145 ``positions`` is a list of offsets, one per manifest entry. Positive
145 146 values are offsets into ``data``, negative values are offsets into the
146 147 ``extradata`` list. When an entry is removed, its entry is dropped from
147 148 ``positions``. The values are encoded such that when walking the list and
148 149 indexing into ``data`` or ``extradata`` as appropriate, the entries are
149 150 sorted by filename.
150 151
151 152 ``extradata`` is a list of (key, hash, flags) for entries that were added or
152 153 modified since the manifest was created or compacted.
153 154 """
154 155
155 156 def __init__(
156 157 self,
157 158 data,
158 159 positions=None,
159 160 extrainfo=None,
160 161 extradata=None,
161 162 hasremovals=False,
162 163 ):
163 164 if positions is None:
164 165 self.positions = self.findlines(data)
165 166 self.extrainfo = [0] * len(self.positions)
166 167 self.data = data
167 168 self.extradata = []
168 169 self.hasremovals = False
169 170 else:
170 171 self.positions = positions[:]
171 172 self.extrainfo = extrainfo[:]
172 173 self.extradata = extradata[:]
173 174 self.data = data
174 175 self.hasremovals = hasremovals
175 176
176 177 def findlines(self, data):
177 178 if not data:
178 179 return []
179 180 pos = data.find(b"\n")
180 181 if pos == -1 or data[-1:] != b'\n':
181 182 raise ValueError(b"Manifest did not end in a newline.")
182 183 positions = [0]
183 184 prev = data[: data.find(b'\x00')]
184 185 while pos < len(data) - 1 and pos != -1:
185 186 positions.append(pos + 1)
186 187 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
187 188 if nexts < prev:
188 189 raise ValueError(b"Manifest lines not in sorted order.")
189 190 prev = nexts
190 191 pos = data.find(b"\n", pos + 1)
191 192 return positions
192 193
193 194 def _get(self, index):
194 195 # get the position encoded in pos:
195 196 # positive number is an index in 'data'
196 197 # negative number is in extrapieces
197 198 pos = self.positions[index]
198 199 if pos >= 0:
199 200 return self.data, pos
200 201 return self.extradata[-pos - 1], -1
201 202
202 203 def _getkey(self, pos):
203 204 if pos >= 0:
204 205 return self.data[pos : self.data.find(b'\x00', pos + 1)]
205 206 return self.extradata[-pos - 1][0]
206 207
207 208 def bsearch(self, key):
208 209 first = 0
209 210 last = len(self.positions) - 1
210 211
211 212 while first <= last:
212 213 midpoint = (first + last) // 2
213 214 nextpos = self.positions[midpoint]
214 215 candidate = self._getkey(nextpos)
215 216 r = _cmp(key, candidate)
216 217 if r == 0:
217 218 return midpoint
218 219 else:
219 220 if r < 0:
220 221 last = midpoint - 1
221 222 else:
222 223 first = midpoint + 1
223 224 return -1
224 225
225 226 def bsearch2(self, key):
226 227 # same as the above, but will always return the position
227 228 # done for performance reasons
228 229 first = 0
229 230 last = len(self.positions) - 1
230 231
231 232 while first <= last:
232 233 midpoint = (first + last) // 2
233 234 nextpos = self.positions[midpoint]
234 235 candidate = self._getkey(nextpos)
235 236 r = _cmp(key, candidate)
236 237 if r == 0:
237 238 return (midpoint, True)
238 239 else:
239 240 if r < 0:
240 241 last = midpoint - 1
241 242 else:
242 243 first = midpoint + 1
243 244 return (first, False)
244 245
245 246 def __contains__(self, key):
246 247 return self.bsearch(key) != -1
247 248
248 249 def _getflags(self, data, needle, pos):
249 250 start = pos + 41
250 251 end = data.find(b"\n", start)
251 252 if end == -1:
252 253 end = len(data) - 1
253 254 if start == end:
254 255 return b''
255 256 return self.data[start:end]
256 257
257 258 def __getitem__(self, key):
258 259 if not isinstance(key, bytes):
259 260 raise TypeError(b"getitem: manifest keys must be a bytes.")
260 261 needle = self.bsearch(key)
261 262 if needle == -1:
262 263 raise KeyError
263 264 data, pos = self._get(needle)
264 265 if pos == -1:
265 266 return (data[1], data[2])
266 267 zeropos = data.find(b'\x00', pos)
267 268 assert 0 <= needle <= len(self.positions)
268 269 assert len(self.extrainfo) == len(self.positions)
269 270 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
270 271 flags = self._getflags(data, needle, zeropos)
271 272 return (hashval, flags)
272 273
273 274 def __delitem__(self, key):
274 275 needle, found = self.bsearch2(key)
275 276 if not found:
276 277 raise KeyError
277 278 cur = self.positions[needle]
278 279 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
279 280 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
280 281 if cur >= 0:
281 282 # This does NOT unsort the list as far as the search functions are
282 283 # concerned, as they only examine lines mapped by self.positions.
283 284 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
284 285 self.hasremovals = True
285 286
286 287 def __setitem__(self, key, value):
287 288 if not isinstance(key, bytes):
288 289 raise TypeError(b"setitem: manifest keys must be a byte string.")
289 290 if not isinstance(value, tuple) or len(value) != 2:
290 291 raise TypeError(
291 292 b"Manifest values must be a tuple of (node, flags)."
292 293 )
293 294 hashval = value[0]
294 295 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
295 296 raise TypeError(b"node must be a 20-byte byte string")
296 297 flags = value[1]
297 298 if len(hashval) == 22:
298 299 hashval = hashval[:-1]
299 300 if not isinstance(flags, bytes) or len(flags) > 1:
300 301 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
301 302 needle, found = self.bsearch2(key)
302 303 if found:
303 304 # put the item
304 305 pos = self.positions[needle]
305 306 if pos < 0:
306 307 self.extradata[-pos - 1] = (key, hashval, value[1])
307 308 else:
308 309 # just don't bother
309 310 self.extradata.append((key, hashval, value[1]))
310 311 self.positions[needle] = -len(self.extradata)
311 312 else:
312 313 # not found, put it in with extra positions
313 314 self.extradata.append((key, hashval, value[1]))
314 315 self.positions = (
315 316 self.positions[:needle]
316 317 + [-len(self.extradata)]
317 318 + self.positions[needle:]
318 319 )
319 320 self.extrainfo = (
320 321 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
321 322 )
322 323
323 324 def copy(self):
324 325 # XXX call _compact like in C?
325 326 return _lazymanifest(
326 327 self.data,
327 328 self.positions,
328 329 self.extrainfo,
329 330 self.extradata,
330 331 self.hasremovals,
331 332 )
332 333
333 334 def _compact(self):
334 335 # hopefully not called TOO often
335 336 if len(self.extradata) == 0 and not self.hasremovals:
336 337 return
337 338 l = []
338 339 i = 0
339 340 offset = 0
340 341 self.extrainfo = [0] * len(self.positions)
341 342 while i < len(self.positions):
342 343 if self.positions[i] >= 0:
343 344 cur = self.positions[i]
344 345 last_cut = cur
345 346
346 347 # Collect all contiguous entries in the buffer at the current
347 348 # offset, breaking out only for added/modified items held in
348 349 # extradata, or a deleted line prior to the next position.
349 350 while True:
350 351 self.positions[i] = offset
351 352 i += 1
352 353 if i == len(self.positions) or self.positions[i] < 0:
353 354 break
354 355
355 356 # A removed file has no positions[] entry, but does have an
356 357 # overwritten first byte. Break out and find the end of the
357 358 # current good entry/entries if there is a removed file
358 359 # before the next position.
359 360 if (
360 361 self.hasremovals
361 362 and self.data.find(b'\n\x00', cur, self.positions[i])
362 363 != -1
363 364 ):
364 365 break
365 366
366 367 offset += self.positions[i] - cur
367 368 cur = self.positions[i]
368 369 end_cut = self.data.find(b'\n', cur)
369 370 if end_cut != -1:
370 371 end_cut += 1
371 372 offset += end_cut - cur
372 373 l.append(self.data[last_cut:end_cut])
373 374 else:
374 375 while i < len(self.positions) and self.positions[i] < 0:
375 376 cur = self.positions[i]
376 377 t = self.extradata[-cur - 1]
377 378 l.append(self._pack(t))
378 379 self.positions[i] = offset
379 380 if len(t[1]) > 20:
380 381 self.extrainfo[i] = ord(t[1][21])
381 382 offset += len(l[-1])
382 383 i += 1
383 384 self.data = b''.join(l)
384 385 self.hasremovals = False
385 386 self.extradata = []
386 387
387 388 def _pack(self, d):
388 389 return d[0] + b'\x00' + hex(d[1][:20]) + d[2] + b'\n'
389 390
390 391 def text(self):
391 392 self._compact()
392 393 return self.data
393 394
394 395 def diff(self, m2, clean=False):
395 396 '''Finds changes between the current manifest and m2.'''
396 397 # XXX think whether efficiency matters here
397 398 diff = {}
398 399
399 400 for fn, e1, flags in self.iterentries():
400 401 if fn not in m2:
401 402 diff[fn] = (e1, flags), (None, b'')
402 403 else:
403 404 e2 = m2[fn]
404 405 if (e1, flags) != e2:
405 406 diff[fn] = (e1, flags), e2
406 407 elif clean:
407 408 diff[fn] = None
408 409
409 410 for fn, e2, flags in m2.iterentries():
410 411 if fn not in self:
411 412 diff[fn] = (None, b''), (e2, flags)
412 413
413 414 return diff
414 415
415 416 def iterentries(self):
416 417 return lazymanifestiterentries(self)
417 418
418 419 def iterkeys(self):
419 420 return lazymanifestiter(self)
420 421
421 422 def __iter__(self):
422 423 return lazymanifestiter(self)
423 424
424 425 def __len__(self):
425 426 return len(self.positions)
426 427
427 428 def filtercopy(self, filterfn):
428 429 # XXX should be optimized
429 430 c = _lazymanifest(b'')
430 431 for f, n, fl in self.iterentries():
431 432 if filterfn(f):
432 433 c[f] = n, fl
433 434 return c
434 435
435 436
436 437 try:
437 438 _lazymanifest = parsers.lazymanifest
438 439 except AttributeError:
439 440 pass
440 441
441 442
442 443 @interfaceutil.implementer(repository.imanifestdict)
443 444 class manifestdict(object):
444 445 def __init__(self, data=b''):
445 446 self._lm = _lazymanifest(data)
446 447
447 448 def __getitem__(self, key):
448 449 return self._lm[key][0]
449 450
450 451 def find(self, key):
451 452 return self._lm[key]
452 453
453 454 def __len__(self):
454 455 return len(self._lm)
455 456
456 457 def __nonzero__(self):
457 458 # nonzero is covered by the __len__ function, but implementing it here
458 459 # makes it easier for extensions to override.
459 460 return len(self._lm) != 0
460 461
461 462 __bool__ = __nonzero__
462 463
463 464 def __setitem__(self, key, node):
464 465 self._lm[key] = node, self.flags(key)
465 466
466 467 def __contains__(self, key):
467 468 if key is None:
468 469 return False
469 470 return key in self._lm
470 471
471 472 def __delitem__(self, key):
472 473 del self._lm[key]
473 474
474 475 def __iter__(self):
475 476 return self._lm.__iter__()
476 477
477 478 def iterkeys(self):
478 479 return self._lm.iterkeys()
479 480
480 481 def keys(self):
481 482 return list(self.iterkeys())
482 483
483 484 def filesnotin(self, m2, match=None):
484 485 '''Set of files in this manifest that are not in the other'''
485 if match:
486 m1 = self.matches(match)
487 m2 = m2.matches(match)
488 return m1.filesnotin(m2)
489 diff = self.diff(m2)
490 files = set(
491 filepath
492 for filepath, hashflags in pycompat.iteritems(diff)
493 if hashflags[1][0] is None
494 )
495 return files
486 if match is not None:
487 match = matchmod.badmatch(match, lambda path, msg: None)
488 sm2 = set(m2.walk(match))
489 return {f for f in self.walk(match) if f not in sm2}
490 return {f for f in self if f not in m2}
496 491
497 492 @propertycache
498 493 def _dirs(self):
499 494 return pathutil.dirs(self)
500 495
501 496 def dirs(self):
502 497 return self._dirs
503 498
504 499 def hasdir(self, dir):
505 500 return dir in self._dirs
506 501
507 502 def _filesfastpath(self, match):
508 503 '''Checks whether we can correctly and quickly iterate over matcher
509 504 files instead of over manifest files.'''
510 505 files = match.files()
511 506 return len(files) < 100 and (
512 507 match.isexact()
513 508 or (match.prefix() and all(fn in self for fn in files))
514 509 )
515 510
516 511 def walk(self, match):
517 512 '''Generates matching file names.
518 513
519 514 Equivalent to manifest.matches(match).iterkeys(), but without creating
520 515 an entirely new manifest.
521 516
522 517 It also reports nonexistent files by marking them bad with match.bad().
523 518 '''
524 519 if match.always():
525 520 for f in iter(self):
526 521 yield f
527 522 return
528 523
529 524 fset = set(match.files())
530 525
531 526 # avoid the entire walk if we're only looking for specific files
532 527 if self._filesfastpath(match):
533 528 for fn in sorted(fset):
534 529 if fn in self:
535 530 yield fn
536 531 return
537 532
538 533 for fn in self:
539 534 if fn in fset:
540 535 # specified pattern is the exact name
541 536 fset.remove(fn)
542 537 if match(fn):
543 538 yield fn
544 539
545 540 # for dirstate.walk, files=[''] means "walk the whole tree".
546 541 # follow that here, too
547 542 fset.discard(b'')
548 543
549 544 for fn in sorted(fset):
550 545 if not self.hasdir(fn):
551 546 match.bad(fn, None)
552 547
553 548 def matches(self, match):
554 549 '''generate a new manifest filtered by the match argument'''
555 550 if match.always():
556 551 return self.copy()
557 552
558 553 if self._filesfastpath(match):
559 554 m = manifestdict()
560 555 lm = self._lm
561 556 for fn in match.files():
562 557 if fn in lm:
563 558 m._lm[fn] = lm[fn]
564 559 return m
565 560
566 561 m = manifestdict()
567 562 m._lm = self._lm.filtercopy(match)
568 563 return m
569 564
570 565 def diff(self, m2, match=None, clean=False):
571 566 '''Finds changes between the current manifest and m2.
572 567
573 568 Args:
574 569 m2: the manifest to which this manifest should be compared.
575 570 clean: if true, include files unchanged between these manifests
576 571 with a None value in the returned dictionary.
577 572
578 573 The result is returned as a dict with filename as key and
579 574 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
580 575 nodeid in the current/other manifest and fl1/fl2 is the flag
581 576 in the current/other manifest. Where the file does not exist,
582 577 the nodeid will be None and the flags will be the empty
583 578 string.
584 579 '''
585 580 if match:
586 581 m1 = self.matches(match)
587 582 m2 = m2.matches(match)
588 583 return m1.diff(m2, clean=clean)
589 584 return self._lm.diff(m2._lm, clean)
590 585
591 586 def setflag(self, key, flag):
592 587 self._lm[key] = self[key], flag
593 588
594 589 def get(self, key, default=None):
595 590 try:
596 591 return self._lm[key][0]
597 592 except KeyError:
598 593 return default
599 594
600 595 def flags(self, key):
601 596 try:
602 597 return self._lm[key][1]
603 598 except KeyError:
604 599 return b''
605 600
606 601 def copy(self):
607 602 c = manifestdict()
608 603 c._lm = self._lm.copy()
609 604 return c
610 605
611 606 def items(self):
612 607 return (x[:2] for x in self._lm.iterentries())
613 608
614 609 def iteritems(self):
615 610 return (x[:2] for x in self._lm.iterentries())
616 611
617 612 def iterentries(self):
618 613 return self._lm.iterentries()
619 614
620 615 def text(self):
621 616 # most likely uses native version
622 617 return self._lm.text()
623 618
624 619 def fastdelta(self, base, changes):
625 620 """Given a base manifest text as a bytearray and a list of changes
626 621 relative to that text, compute a delta that can be used by revlog.
627 622 """
628 623 delta = []
629 624 dstart = None
630 625 dend = None
631 626 dline = [b""]
632 627 start = 0
633 628 # zero copy representation of base as a buffer
634 629 addbuf = util.buffer(base)
635 630
636 631 changes = list(changes)
637 632 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
638 633 # start with a readonly loop that finds the offset of
639 634 # each line and creates the deltas
640 635 for f, todelete in changes:
641 636 # bs will either be the index of the item or the insert point
642 637 start, end = _msearch(addbuf, f, start)
643 638 if not todelete:
644 639 h, fl = self._lm[f]
645 640 l = b"%s\0%s%s\n" % (f, hex(h), fl)
646 641 else:
647 642 if start == end:
648 643 # item we want to delete was not found, error out
649 644 raise AssertionError(
650 645 _(b"failed to remove %s from manifest") % f
651 646 )
652 647 l = b""
653 648 if dstart is not None and dstart <= start and dend >= start:
654 649 if dend < end:
655 650 dend = end
656 651 if l:
657 652 dline.append(l)
658 653 else:
659 654 if dstart is not None:
660 655 delta.append([dstart, dend, b"".join(dline)])
661 656 dstart = start
662 657 dend = end
663 658 dline = [l]
664 659
665 660 if dstart is not None:
666 661 delta.append([dstart, dend, b"".join(dline)])
667 662 # apply the delta to the base, and get a delta for addrevision
668 663 deltatext, arraytext = _addlistdelta(base, delta)
669 664 else:
670 665 # For large changes, it's much cheaper to just build the text and
671 666 # diff it.
672 667 arraytext = bytearray(self.text())
673 668 deltatext = mdiff.textdiff(
674 669 util.buffer(base), util.buffer(arraytext)
675 670 )
676 671
677 672 return arraytext, deltatext
678 673
679 674
680 675 def _msearch(m, s, lo=0, hi=None):
681 676 '''return a tuple (start, end) that says where to find s within m.
682 677
683 678 If the string is found m[start:end] are the line containing
684 679 that string. If start == end the string was not found and
685 680 they indicate the proper sorted insertion point.
686 681
687 682 m should be a buffer, a memoryview or a byte string.
688 683 s is a byte string'''
689 684
690 685 def advance(i, c):
691 686 while i < lenm and m[i : i + 1] != c:
692 687 i += 1
693 688 return i
694 689
695 690 if not s:
696 691 return (lo, lo)
697 692 lenm = len(m)
698 693 if not hi:
699 694 hi = lenm
700 695 while lo < hi:
701 696 mid = (lo + hi) // 2
702 697 start = mid
703 698 while start > 0 and m[start - 1 : start] != b'\n':
704 699 start -= 1
705 700 end = advance(start, b'\0')
706 701 if bytes(m[start:end]) < s:
707 702 # we know that after the null there are 40 bytes of sha1
708 703 # this translates to the bisect lo = mid + 1
709 704 lo = advance(end + 40, b'\n') + 1
710 705 else:
711 706 # this translates to the bisect hi = mid
712 707 hi = start
713 708 end = advance(lo, b'\0')
714 709 found = m[lo:end]
715 710 if s == found:
716 711 # we know that after the null there are 40 bytes of sha1
717 712 end = advance(end + 40, b'\n')
718 713 return (lo, end + 1)
719 714 else:
720 715 return (lo, lo)
721 716
722 717
723 718 def _checkforbidden(l):
724 719 """Check filenames for illegal characters."""
725 720 for f in l:
726 721 if b'\n' in f or b'\r' in f:
727 722 raise error.StorageError(
728 723 _(b"'\\n' and '\\r' disallowed in filenames: %r")
729 724 % pycompat.bytestr(f)
730 725 )
731 726
732 727
733 728 # apply the changes collected during the bisect loop to our addlist
734 729 # return a delta suitable for addrevision
735 730 def _addlistdelta(addlist, x):
736 731 # for large addlist arrays, building a new array is cheaper
737 732 # than repeatedly modifying the existing one
738 733 currentposition = 0
739 734 newaddlist = bytearray()
740 735
741 736 for start, end, content in x:
742 737 newaddlist += addlist[currentposition:start]
743 738 if content:
744 739 newaddlist += bytearray(content)
745 740
746 741 currentposition = end
747 742
748 743 newaddlist += addlist[currentposition:]
749 744
750 745 deltatext = b"".join(
751 746 struct.pack(b">lll", start, end, len(content)) + content
752 747 for start, end, content in x
753 748 )
754 749 return deltatext, newaddlist
755 750
756 751
757 752 def _splittopdir(f):
758 753 if b'/' in f:
759 754 dir, subpath = f.split(b'/', 1)
760 755 return dir + b'/', subpath
761 756 else:
762 757 return b'', f
763 758
764 759
765 760 _noop = lambda s: None
766 761
767 762
768 763 class treemanifest(object):
769 764 def __init__(self, dir=b'', text=b''):
770 765 self._dir = dir
771 766 self._node = nullid
772 767 self._loadfunc = _noop
773 768 self._copyfunc = _noop
774 769 self._dirty = False
775 770 self._dirs = {}
776 771 self._lazydirs = {}
777 772 # Using _lazymanifest here is a little slower than plain old dicts
778 773 self._files = {}
779 774 self._flags = {}
780 775 if text:
781 776
782 777 def readsubtree(subdir, subm):
783 778 raise AssertionError(
784 779 b'treemanifest constructor only accepts flat manifests'
785 780 )
786 781
787 782 self.parse(text, readsubtree)
788 783 self._dirty = True # Mark flat manifest dirty after parsing
789 784
790 785 def _subpath(self, path):
791 786 return self._dir + path
792 787
793 788 def _loadalllazy(self):
794 789 selfdirs = self._dirs
795 790 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
796 791 self._lazydirs
797 792 ):
798 793 if docopy:
799 794 selfdirs[d] = readsubtree(path, node).copy()
800 795 else:
801 796 selfdirs[d] = readsubtree(path, node)
802 797 self._lazydirs = {}
803 798
804 799 def _loadlazy(self, d):
805 800 v = self._lazydirs.get(d)
806 801 if v:
807 802 path, node, readsubtree, docopy = v
808 803 if docopy:
809 804 self._dirs[d] = readsubtree(path, node).copy()
810 805 else:
811 806 self._dirs[d] = readsubtree(path, node)
812 807 del self._lazydirs[d]
813 808
814 809 def _loadchildrensetlazy(self, visit):
815 810 if not visit:
816 811 return None
817 812 if visit == b'all' or visit == b'this':
818 813 self._loadalllazy()
819 814 return None
820 815
821 816 loadlazy = self._loadlazy
822 817 for k in visit:
823 818 loadlazy(k + b'/')
824 819 return visit
825 820
826 821 def _loaddifflazy(self, t1, t2):
827 822 """load items in t1 and t2 if they're needed for diffing.
828 823
829 824 The criteria currently is:
830 825 - if it's not present in _lazydirs in either t1 or t2, load it in the
831 826 other (it may already be loaded or it may not exist, doesn't matter)
832 827 - if it's present in _lazydirs in both, compare the nodeid; if it
833 828 differs, load it in both
834 829 """
835 830 toloadlazy = []
836 831 for d, v1 in pycompat.iteritems(t1._lazydirs):
837 832 v2 = t2._lazydirs.get(d)
838 833 if not v2 or v2[1] != v1[1]:
839 834 toloadlazy.append(d)
840 835 for d, v1 in pycompat.iteritems(t2._lazydirs):
841 836 if d not in t1._lazydirs:
842 837 toloadlazy.append(d)
843 838
844 839 for d in toloadlazy:
845 840 t1._loadlazy(d)
846 841 t2._loadlazy(d)
847 842
848 843 def __len__(self):
849 844 self._load()
850 845 size = len(self._files)
851 846 self._loadalllazy()
852 847 for m in self._dirs.values():
853 848 size += m.__len__()
854 849 return size
855 850
856 851 def __nonzero__(self):
857 852 # Faster than "__len() != 0" since it avoids loading sub-manifests
858 853 return not self._isempty()
859 854
860 855 __bool__ = __nonzero__
861 856
862 857 def _isempty(self):
863 858 self._load() # for consistency; already loaded by all callers
864 859 # See if we can skip loading everything.
865 860 if self._files or (
866 861 self._dirs and any(not m._isempty() for m in self._dirs.values())
867 862 ):
868 863 return False
869 864 self._loadalllazy()
870 865 return not self._dirs or all(m._isempty() for m in self._dirs.values())
871 866
872 867 @encoding.strmethod
873 868 def __repr__(self):
874 869 return (
875 870 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
876 871 % (
877 872 self._dir,
878 873 hex(self._node),
879 874 bool(self._loadfunc is _noop),
880 875 self._dirty,
881 876 id(self),
882 877 )
883 878 )
884 879
885 880 def dir(self):
886 881 '''The directory that this tree manifest represents, including a
887 882 trailing '/'. Empty string for the repo root directory.'''
888 883 return self._dir
889 884
890 885 def node(self):
891 886 '''This node of this instance. nullid for unsaved instances. Should
892 887 be updated when the instance is read or written from a revlog.
893 888 '''
894 889 assert not self._dirty
895 890 return self._node
896 891
897 892 def setnode(self, node):
898 893 self._node = node
899 894 self._dirty = False
900 895
901 896 def iterentries(self):
902 897 self._load()
903 898 self._loadalllazy()
904 899 for p, n in sorted(
905 900 itertools.chain(self._dirs.items(), self._files.items())
906 901 ):
907 902 if p in self._files:
908 903 yield self._subpath(p), n, self._flags.get(p, b'')
909 904 else:
910 905 for x in n.iterentries():
911 906 yield x
912 907
913 908 def items(self):
914 909 self._load()
915 910 self._loadalllazy()
916 911 for p, n in sorted(
917 912 itertools.chain(self._dirs.items(), self._files.items())
918 913 ):
919 914 if p in self._files:
920 915 yield self._subpath(p), n
921 916 else:
922 917 for f, sn in pycompat.iteritems(n):
923 918 yield f, sn
924 919
925 920 iteritems = items
926 921
927 922 def iterkeys(self):
928 923 self._load()
929 924 self._loadalllazy()
930 925 for p in sorted(itertools.chain(self._dirs, self._files)):
931 926 if p in self._files:
932 927 yield self._subpath(p)
933 928 else:
934 929 for f in self._dirs[p]:
935 930 yield f
936 931
937 932 def keys(self):
938 933 return list(self.iterkeys())
939 934
940 935 def __iter__(self):
941 936 return self.iterkeys()
942 937
943 938 def __contains__(self, f):
944 939 if f is None:
945 940 return False
946 941 self._load()
947 942 dir, subpath = _splittopdir(f)
948 943 if dir:
949 944 self._loadlazy(dir)
950 945
951 946 if dir not in self._dirs:
952 947 return False
953 948
954 949 return self._dirs[dir].__contains__(subpath)
955 950 else:
956 951 return f in self._files
957 952
958 953 def get(self, f, default=None):
959 954 self._load()
960 955 dir, subpath = _splittopdir(f)
961 956 if dir:
962 957 self._loadlazy(dir)
963 958
964 959 if dir not in self._dirs:
965 960 return default
966 961 return self._dirs[dir].get(subpath, default)
967 962 else:
968 963 return self._files.get(f, default)
969 964
970 965 def __getitem__(self, f):
971 966 self._load()
972 967 dir, subpath = _splittopdir(f)
973 968 if dir:
974 969 self._loadlazy(dir)
975 970
976 971 return self._dirs[dir].__getitem__(subpath)
977 972 else:
978 973 return self._files[f]
979 974
980 975 def flags(self, f):
981 976 self._load()
982 977 dir, subpath = _splittopdir(f)
983 978 if dir:
984 979 self._loadlazy(dir)
985 980
986 981 if dir not in self._dirs:
987 982 return b''
988 983 return self._dirs[dir].flags(subpath)
989 984 else:
990 985 if f in self._lazydirs or f in self._dirs:
991 986 return b''
992 987 return self._flags.get(f, b'')
993 988
994 989 def find(self, f):
995 990 self._load()
996 991 dir, subpath = _splittopdir(f)
997 992 if dir:
998 993 self._loadlazy(dir)
999 994
1000 995 return self._dirs[dir].find(subpath)
1001 996 else:
1002 997 return self._files[f], self._flags.get(f, b'')
1003 998
1004 999 def __delitem__(self, f):
1005 1000 self._load()
1006 1001 dir, subpath = _splittopdir(f)
1007 1002 if dir:
1008 1003 self._loadlazy(dir)
1009 1004
1010 1005 self._dirs[dir].__delitem__(subpath)
1011 1006 # If the directory is now empty, remove it
1012 1007 if self._dirs[dir]._isempty():
1013 1008 del self._dirs[dir]
1014 1009 else:
1015 1010 del self._files[f]
1016 1011 if f in self._flags:
1017 1012 del self._flags[f]
1018 1013 self._dirty = True
1019 1014
1020 1015 def __setitem__(self, f, n):
1021 1016 assert n is not None
1022 1017 self._load()
1023 1018 dir, subpath = _splittopdir(f)
1024 1019 if dir:
1025 1020 self._loadlazy(dir)
1026 1021 if dir not in self._dirs:
1027 1022 self._dirs[dir] = treemanifest(self._subpath(dir))
1028 1023 self._dirs[dir].__setitem__(subpath, n)
1029 1024 else:
1030 1025 self._files[f] = n[:21] # to match manifestdict's behavior
1031 1026 self._dirty = True
1032 1027
1033 1028 def _load(self):
1034 1029 if self._loadfunc is not _noop:
1035 1030 lf, self._loadfunc = self._loadfunc, _noop
1036 1031 lf(self)
1037 1032 elif self._copyfunc is not _noop:
1038 1033 cf, self._copyfunc = self._copyfunc, _noop
1039 1034 cf(self)
1040 1035
1041 1036 def setflag(self, f, flags):
1042 1037 """Set the flags (symlink, executable) for path f."""
1043 1038 self._load()
1044 1039 dir, subpath = _splittopdir(f)
1045 1040 if dir:
1046 1041 self._loadlazy(dir)
1047 1042 if dir not in self._dirs:
1048 1043 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 1044 self._dirs[dir].setflag(subpath, flags)
1050 1045 else:
1051 1046 self._flags[f] = flags
1052 1047 self._dirty = True
1053 1048
1054 1049 def copy(self):
1055 1050 copy = treemanifest(self._dir)
1056 1051 copy._node = self._node
1057 1052 copy._dirty = self._dirty
1058 1053 if self._copyfunc is _noop:
1059 1054
1060 1055 def _copyfunc(s):
1061 1056 self._load()
1062 1057 s._lazydirs = {
1063 1058 d: (p, n, r, True)
1064 1059 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1065 1060 }
1066 1061 sdirs = s._dirs
1067 1062 for d, v in pycompat.iteritems(self._dirs):
1068 1063 sdirs[d] = v.copy()
1069 1064 s._files = dict.copy(self._files)
1070 1065 s._flags = dict.copy(self._flags)
1071 1066
1072 1067 if self._loadfunc is _noop:
1073 1068 _copyfunc(copy)
1074 1069 else:
1075 1070 copy._copyfunc = _copyfunc
1076 1071 else:
1077 1072 copy._copyfunc = self._copyfunc
1078 1073 return copy
1079 1074
1080 1075 def filesnotin(self, m2, match=None):
1081 1076 '''Set of files in this manifest that are not in the other'''
1082 1077 if match and not match.always():
1083 1078 m1 = self.matches(match)
1084 1079 m2 = m2.matches(match)
1085 1080 return m1.filesnotin(m2)
1086 1081
1087 1082 files = set()
1088 1083
1089 1084 def _filesnotin(t1, t2):
1090 1085 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1091 1086 return
1092 1087 t1._load()
1093 1088 t2._load()
1094 1089 self._loaddifflazy(t1, t2)
1095 1090 for d, m1 in pycompat.iteritems(t1._dirs):
1096 1091 if d in t2._dirs:
1097 1092 m2 = t2._dirs[d]
1098 1093 _filesnotin(m1, m2)
1099 1094 else:
1100 1095 files.update(m1.iterkeys())
1101 1096
1102 1097 for fn in t1._files:
1103 1098 if fn not in t2._files:
1104 1099 files.add(t1._subpath(fn))
1105 1100
1106 1101 _filesnotin(self, m2)
1107 1102 return files
1108 1103
1109 1104 @propertycache
1110 1105 def _alldirs(self):
1111 1106 return pathutil.dirs(self)
1112 1107
1113 1108 def dirs(self):
1114 1109 return self._alldirs
1115 1110
1116 1111 def hasdir(self, dir):
1117 1112 self._load()
1118 1113 topdir, subdir = _splittopdir(dir)
1119 1114 if topdir:
1120 1115 self._loadlazy(topdir)
1121 1116 if topdir in self._dirs:
1122 1117 return self._dirs[topdir].hasdir(subdir)
1123 1118 return False
1124 1119 dirslash = dir + b'/'
1125 1120 return dirslash in self._dirs or dirslash in self._lazydirs
1126 1121
1127 1122 def walk(self, match):
1128 1123 '''Generates matching file names.
1129 1124
1130 1125 Equivalent to manifest.matches(match).iterkeys(), but without creating
1131 1126 an entirely new manifest.
1132 1127
1133 1128 It also reports nonexistent files by marking them bad with match.bad().
1134 1129 '''
1135 1130 if match.always():
1136 1131 for f in iter(self):
1137 1132 yield f
1138 1133 return
1139 1134
1140 1135 fset = set(match.files())
1141 1136
1142 1137 for fn in self._walk(match):
1143 1138 if fn in fset:
1144 1139 # specified pattern is the exact name
1145 1140 fset.remove(fn)
1146 1141 yield fn
1147 1142
1148 1143 # for dirstate.walk, files=[''] means "walk the whole tree".
1149 1144 # follow that here, too
1150 1145 fset.discard(b'')
1151 1146
1152 1147 for fn in sorted(fset):
1153 1148 if not self.hasdir(fn):
1154 1149 match.bad(fn, None)
1155 1150
1156 1151 def _walk(self, match):
1157 1152 '''Recursively generates matching file names for walk().'''
1158 1153 visit = match.visitchildrenset(self._dir[:-1])
1159 1154 if not visit:
1160 1155 return
1161 1156
1162 1157 # yield this dir's files and walk its submanifests
1163 1158 self._load()
1164 1159 visit = self._loadchildrensetlazy(visit)
1165 1160 for p in sorted(list(self._dirs) + list(self._files)):
1166 1161 if p in self._files:
1167 1162 fullp = self._subpath(p)
1168 1163 if match(fullp):
1169 1164 yield fullp
1170 1165 else:
1171 1166 if not visit or p[:-1] in visit:
1172 1167 for f in self._dirs[p]._walk(match):
1173 1168 yield f
1174 1169
1175 1170 def matches(self, match):
1176 1171 '''generate a new manifest filtered by the match argument'''
1177 1172 if match.always():
1178 1173 return self.copy()
1179 1174
1180 1175 return self._matches(match)
1181 1176
1182 1177 def _matches(self, match):
1183 1178 '''recursively generate a new manifest filtered by the match argument.
1184 1179 '''
1185 1180
1186 1181 visit = match.visitchildrenset(self._dir[:-1])
1187 1182 if visit == b'all':
1188 1183 return self.copy()
1189 1184 ret = treemanifest(self._dir)
1190 1185 if not visit:
1191 1186 return ret
1192 1187
1193 1188 self._load()
1194 1189 for fn in self._files:
1195 1190 # While visitchildrenset *usually* lists only subdirs, this is
1196 1191 # actually up to the matcher and may have some files in the set().
1197 1192 # If visit == 'this', we should obviously look at the files in this
1198 1193 # directory; if visit is a set, and fn is in it, we should inspect
1199 1194 # fn (but no need to inspect things not in the set).
1200 1195 if visit != b'this' and fn not in visit:
1201 1196 continue
1202 1197 fullp = self._subpath(fn)
1203 1198 # visitchildrenset isn't perfect, we still need to call the regular
1204 1199 # matcher code to further filter results.
1205 1200 if not match(fullp):
1206 1201 continue
1207 1202 ret._files[fn] = self._files[fn]
1208 1203 if fn in self._flags:
1209 1204 ret._flags[fn] = self._flags[fn]
1210 1205
1211 1206 visit = self._loadchildrensetlazy(visit)
1212 1207 for dir, subm in pycompat.iteritems(self._dirs):
1213 1208 if visit and dir[:-1] not in visit:
1214 1209 continue
1215 1210 m = subm._matches(match)
1216 1211 if not m._isempty():
1217 1212 ret._dirs[dir] = m
1218 1213
1219 1214 if not ret._isempty():
1220 1215 ret._dirty = True
1221 1216 return ret
1222 1217
1223 1218 def diff(self, m2, match=None, clean=False):
1224 1219 '''Finds changes between the current manifest and m2.
1225 1220
1226 1221 Args:
1227 1222 m2: the manifest to which this manifest should be compared.
1228 1223 clean: if true, include files unchanged between these manifests
1229 1224 with a None value in the returned dictionary.
1230 1225
1231 1226 The result is returned as a dict with filename as key and
1232 1227 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1233 1228 nodeid in the current/other manifest and fl1/fl2 is the flag
1234 1229 in the current/other manifest. Where the file does not exist,
1235 1230 the nodeid will be None and the flags will be the empty
1236 1231 string.
1237 1232 '''
1238 1233 if match and not match.always():
1239 1234 m1 = self.matches(match)
1240 1235 m2 = m2.matches(match)
1241 1236 return m1.diff(m2, clean=clean)
1242 1237 result = {}
1243 1238 emptytree = treemanifest()
1244 1239
1245 1240 def _iterativediff(t1, t2, stack):
1246 1241 """compares two tree manifests and append new tree-manifests which
1247 1242 needs to be compared to stack"""
1248 1243 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1249 1244 return
1250 1245 t1._load()
1251 1246 t2._load()
1252 1247 self._loaddifflazy(t1, t2)
1253 1248
1254 1249 for d, m1 in pycompat.iteritems(t1._dirs):
1255 1250 m2 = t2._dirs.get(d, emptytree)
1256 1251 stack.append((m1, m2))
1257 1252
1258 1253 for d, m2 in pycompat.iteritems(t2._dirs):
1259 1254 if d not in t1._dirs:
1260 1255 stack.append((emptytree, m2))
1261 1256
1262 1257 for fn, n1 in pycompat.iteritems(t1._files):
1263 1258 fl1 = t1._flags.get(fn, b'')
1264 1259 n2 = t2._files.get(fn, None)
1265 1260 fl2 = t2._flags.get(fn, b'')
1266 1261 if n1 != n2 or fl1 != fl2:
1267 1262 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1268 1263 elif clean:
1269 1264 result[t1._subpath(fn)] = None
1270 1265
1271 1266 for fn, n2 in pycompat.iteritems(t2._files):
1272 1267 if fn not in t1._files:
1273 1268 fl2 = t2._flags.get(fn, b'')
1274 1269 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1275 1270
1276 1271 stackls = []
1277 1272 _iterativediff(self, m2, stackls)
1278 1273 while stackls:
1279 1274 t1, t2 = stackls.pop()
1280 1275 # stackls is populated in the function call
1281 1276 _iterativediff(t1, t2, stackls)
1282 1277 return result
1283 1278
1284 1279 def unmodifiedsince(self, m2):
1285 1280 return not self._dirty and not m2._dirty and self._node == m2._node
1286 1281
1287 1282 def parse(self, text, readsubtree):
1288 1283 selflazy = self._lazydirs
1289 1284 subpath = self._subpath
1290 1285 for f, n, fl in _parse(text):
1291 1286 if fl == b't':
1292 1287 f = f + b'/'
1293 1288 # False below means "doesn't need to be copied" and can use the
1294 1289 # cached value from readsubtree directly.
1295 1290 selflazy[f] = (subpath(f), n, readsubtree, False)
1296 1291 elif b'/' in f:
1297 1292 # This is a flat manifest, so use __setitem__ and setflag rather
1298 1293 # than assigning directly to _files and _flags, so we can
1299 1294 # assign a path in a subdirectory, and to mark dirty (compared
1300 1295 # to nullid).
1301 1296 self[f] = n
1302 1297 if fl:
1303 1298 self.setflag(f, fl)
1304 1299 else:
1305 1300 # Assigning to _files and _flags avoids marking as dirty,
1306 1301 # and should be a little faster.
1307 1302 self._files[f] = n
1308 1303 if fl:
1309 1304 self._flags[f] = fl
1310 1305
1311 1306 def text(self):
1312 1307 """Get the full data of this manifest as a bytestring."""
1313 1308 self._load()
1314 1309 return _text(self.iterentries())
1315 1310
1316 1311 def dirtext(self):
1317 1312 """Get the full data of this directory as a bytestring. Make sure that
1318 1313 any submanifests have been written first, so their nodeids are correct.
1319 1314 """
1320 1315 self._load()
1321 1316 flags = self.flags
1322 1317 lazydirs = [
1323 1318 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1324 1319 ]
1325 1320 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1326 1321 files = [(f, self._files[f], flags(f)) for f in self._files]
1327 1322 return _text(sorted(dirs + files + lazydirs))
1328 1323
1329 1324 def read(self, gettext, readsubtree):
1330 1325 def _load_for_read(s):
1331 1326 s.parse(gettext(), readsubtree)
1332 1327 s._dirty = False
1333 1328
1334 1329 self._loadfunc = _load_for_read
1335 1330
1336 1331 def writesubtrees(self, m1, m2, writesubtree, match):
1337 1332 self._load() # for consistency; should never have any effect here
1338 1333 m1._load()
1339 1334 m2._load()
1340 1335 emptytree = treemanifest()
1341 1336
1342 1337 def getnode(m, d):
1343 1338 ld = m._lazydirs.get(d)
1344 1339 if ld:
1345 1340 return ld[1]
1346 1341 return m._dirs.get(d, emptytree)._node
1347 1342
1348 1343 # let's skip investigating things that `match` says we do not need.
1349 1344 visit = match.visitchildrenset(self._dir[:-1])
1350 1345 visit = self._loadchildrensetlazy(visit)
1351 1346 if visit == b'this' or visit == b'all':
1352 1347 visit = None
1353 1348 for d, subm in pycompat.iteritems(self._dirs):
1354 1349 if visit and d[:-1] not in visit:
1355 1350 continue
1356 1351 subp1 = getnode(m1, d)
1357 1352 subp2 = getnode(m2, d)
1358 1353 if subp1 == nullid:
1359 1354 subp1, subp2 = subp2, subp1
1360 1355 writesubtree(subm, subp1, subp2, match)
1361 1356
1362 1357 def walksubtrees(self, matcher=None):
1363 1358 """Returns an iterator of the subtrees of this manifest, including this
1364 1359 manifest itself.
1365 1360
1366 1361 If `matcher` is provided, it only returns subtrees that match.
1367 1362 """
1368 1363 if matcher and not matcher.visitdir(self._dir[:-1]):
1369 1364 return
1370 1365 if not matcher or matcher(self._dir[:-1]):
1371 1366 yield self
1372 1367
1373 1368 self._load()
1374 1369 # OPT: use visitchildrenset to avoid loading everything.
1375 1370 self._loadalllazy()
1376 1371 for d, subm in pycompat.iteritems(self._dirs):
1377 1372 for subtree in subm.walksubtrees(matcher=matcher):
1378 1373 yield subtree
1379 1374
1380 1375
1381 1376 class manifestfulltextcache(util.lrucachedict):
1382 1377 """File-backed LRU cache for the manifest cache
1383 1378
1384 1379 File consists of entries, up to EOF:
1385 1380
1386 1381 - 20 bytes node, 4 bytes length, <length> manifest data
1387 1382
1388 1383 These are written in reverse cache order (oldest to newest).
1389 1384
1390 1385 """
1391 1386
1392 1387 _file = b'manifestfulltextcache'
1393 1388
1394 1389 def __init__(self, max):
1395 1390 super(manifestfulltextcache, self).__init__(max)
1396 1391 self._dirty = False
1397 1392 self._read = False
1398 1393 self._opener = None
1399 1394
1400 1395 def read(self):
1401 1396 if self._read or self._opener is None:
1402 1397 return
1403 1398
1404 1399 try:
1405 1400 with self._opener(self._file) as fp:
1406 1401 set = super(manifestfulltextcache, self).__setitem__
1407 1402 # ignore trailing data, this is a cache, corruption is skipped
1408 1403 while True:
1409 1404 node = fp.read(20)
1410 1405 if len(node) < 20:
1411 1406 break
1412 1407 try:
1413 1408 size = struct.unpack(b'>L', fp.read(4))[0]
1414 1409 except struct.error:
1415 1410 break
1416 1411 value = bytearray(fp.read(size))
1417 1412 if len(value) != size:
1418 1413 break
1419 1414 set(node, value)
1420 1415 except IOError:
1421 1416 # the file is allowed to be missing
1422 1417 pass
1423 1418
1424 1419 self._read = True
1425 1420 self._dirty = False
1426 1421
1427 1422 def write(self):
1428 1423 if not self._dirty or self._opener is None:
1429 1424 return
1430 1425 # rotate backwards to the first used node
1431 1426 with self._opener(
1432 1427 self._file, b'w', atomictemp=True, checkambig=True
1433 1428 ) as fp:
1434 1429 node = self._head.prev
1435 1430 while True:
1436 1431 if node.key in self._cache:
1437 1432 fp.write(node.key)
1438 1433 fp.write(struct.pack(b'>L', len(node.value)))
1439 1434 fp.write(node.value)
1440 1435 if node is self._head:
1441 1436 break
1442 1437 node = node.prev
1443 1438
1444 1439 def __len__(self):
1445 1440 if not self._read:
1446 1441 self.read()
1447 1442 return super(manifestfulltextcache, self).__len__()
1448 1443
1449 1444 def __contains__(self, k):
1450 1445 if not self._read:
1451 1446 self.read()
1452 1447 return super(manifestfulltextcache, self).__contains__(k)
1453 1448
1454 1449 def __iter__(self):
1455 1450 if not self._read:
1456 1451 self.read()
1457 1452 return super(manifestfulltextcache, self).__iter__()
1458 1453
1459 1454 def __getitem__(self, k):
1460 1455 if not self._read:
1461 1456 self.read()
1462 1457 # the cache lru order can change on read
1463 1458 setdirty = self._cache.get(k) is not self._head
1464 1459 value = super(manifestfulltextcache, self).__getitem__(k)
1465 1460 if setdirty:
1466 1461 self._dirty = True
1467 1462 return value
1468 1463
1469 1464 def __setitem__(self, k, v):
1470 1465 if not self._read:
1471 1466 self.read()
1472 1467 super(manifestfulltextcache, self).__setitem__(k, v)
1473 1468 self._dirty = True
1474 1469
1475 1470 def __delitem__(self, k):
1476 1471 if not self._read:
1477 1472 self.read()
1478 1473 super(manifestfulltextcache, self).__delitem__(k)
1479 1474 self._dirty = True
1480 1475
1481 1476 def get(self, k, default=None):
1482 1477 if not self._read:
1483 1478 self.read()
1484 1479 return super(manifestfulltextcache, self).get(k, default=default)
1485 1480
1486 1481 def clear(self, clear_persisted_data=False):
1487 1482 super(manifestfulltextcache, self).clear()
1488 1483 if clear_persisted_data:
1489 1484 self._dirty = True
1490 1485 self.write()
1491 1486 self._read = False
1492 1487
1493 1488
1494 1489 # and upper bound of what we expect from compression
1495 1490 # (real live value seems to be "3")
1496 1491 MAXCOMPRESSION = 3
1497 1492
1498 1493
1499 1494 @interfaceutil.implementer(repository.imanifeststorage)
1500 1495 class manifestrevlog(object):
1501 1496 '''A revlog that stores manifest texts. This is responsible for caching the
1502 1497 full-text manifest contents.
1503 1498 '''
1504 1499
1505 1500 def __init__(
1506 1501 self,
1507 1502 opener,
1508 1503 tree=b'',
1509 1504 dirlogcache=None,
1510 1505 indexfile=None,
1511 1506 treemanifest=False,
1512 1507 ):
1513 1508 """Constructs a new manifest revlog
1514 1509
1515 1510 `indexfile` - used by extensions to have two manifests at once, like
1516 1511 when transitioning between flatmanifeset and treemanifests.
1517 1512
1518 1513 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1519 1514 options can also be used to make this a tree manifest revlog. The opener
1520 1515 option takes precedence, so if it is set to True, we ignore whatever
1521 1516 value is passed in to the constructor.
1522 1517 """
1523 1518 # During normal operations, we expect to deal with not more than four
1524 1519 # revs at a time (such as during commit --amend). When rebasing large
1525 1520 # stacks of commits, the number can go up, hence the config knob below.
1526 1521 cachesize = 4
1527 1522 optiontreemanifest = False
1528 1523 opts = getattr(opener, 'options', None)
1529 1524 if opts is not None:
1530 1525 cachesize = opts.get(b'manifestcachesize', cachesize)
1531 1526 optiontreemanifest = opts.get(b'treemanifest', False)
1532 1527
1533 1528 self._treeondisk = optiontreemanifest or treemanifest
1534 1529
1535 1530 self._fulltextcache = manifestfulltextcache(cachesize)
1536 1531
1537 1532 if tree:
1538 1533 assert self._treeondisk, b'opts is %r' % opts
1539 1534
1540 1535 if indexfile is None:
1541 1536 indexfile = b'00manifest.i'
1542 1537 if tree:
1543 1538 indexfile = b"meta/" + tree + indexfile
1544 1539
1545 1540 self.tree = tree
1546 1541
1547 1542 # The dirlogcache is kept on the root manifest log
1548 1543 if tree:
1549 1544 self._dirlogcache = dirlogcache
1550 1545 else:
1551 1546 self._dirlogcache = {b'': self}
1552 1547
1553 1548 self._revlog = revlog.revlog(
1554 1549 opener,
1555 1550 indexfile,
1556 1551 # only root indexfile is cached
1557 1552 checkambig=not bool(tree),
1558 1553 mmaplargeindex=True,
1559 1554 upperboundcomp=MAXCOMPRESSION,
1560 1555 )
1561 1556
1562 1557 self.index = self._revlog.index
1563 1558 self.version = self._revlog.version
1564 1559 self._generaldelta = self._revlog._generaldelta
1565 1560
1566 1561 def _setupmanifestcachehooks(self, repo):
1567 1562 """Persist the manifestfulltextcache on lock release"""
1568 1563 if not util.safehasattr(repo, b'_wlockref'):
1569 1564 return
1570 1565
1571 1566 self._fulltextcache._opener = repo.wcachevfs
1572 1567 if repo._currentlock(repo._wlockref) is None:
1573 1568 return
1574 1569
1575 1570 reporef = weakref.ref(repo)
1576 1571 manifestrevlogref = weakref.ref(self)
1577 1572
1578 1573 def persistmanifestcache(success):
1579 1574 # Repo is in an unknown state, do not persist.
1580 1575 if not success:
1581 1576 return
1582 1577
1583 1578 repo = reporef()
1584 1579 self = manifestrevlogref()
1585 1580 if repo is None or self is None:
1586 1581 return
1587 1582 if repo.manifestlog.getstorage(b'') is not self:
1588 1583 # there's a different manifest in play now, abort
1589 1584 return
1590 1585 self._fulltextcache.write()
1591 1586
1592 1587 repo._afterlock(persistmanifestcache)
1593 1588
1594 1589 @property
1595 1590 def fulltextcache(self):
1596 1591 return self._fulltextcache
1597 1592
1598 1593 def clearcaches(self, clear_persisted_data=False):
1599 1594 self._revlog.clearcaches()
1600 1595 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1601 1596 self._dirlogcache = {self.tree: self}
1602 1597
1603 1598 def dirlog(self, d):
1604 1599 if d:
1605 1600 assert self._treeondisk
1606 1601 if d not in self._dirlogcache:
1607 1602 mfrevlog = manifestrevlog(
1608 1603 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1609 1604 )
1610 1605 self._dirlogcache[d] = mfrevlog
1611 1606 return self._dirlogcache[d]
1612 1607
1613 1608 def add(
1614 1609 self,
1615 1610 m,
1616 1611 transaction,
1617 1612 link,
1618 1613 p1,
1619 1614 p2,
1620 1615 added,
1621 1616 removed,
1622 1617 readtree=None,
1623 1618 match=None,
1624 1619 ):
1625 1620 if p1 in self.fulltextcache and util.safehasattr(m, b'fastdelta'):
1626 1621 # If our first parent is in the manifest cache, we can
1627 1622 # compute a delta here using properties we know about the
1628 1623 # manifest up-front, which may save time later for the
1629 1624 # revlog layer.
1630 1625
1631 1626 _checkforbidden(added)
1632 1627 # combine the changed lists into one sorted iterator
1633 1628 work = heapq.merge(
1634 1629 [(x, False) for x in sorted(added)],
1635 1630 [(x, True) for x in sorted(removed)],
1636 1631 )
1637 1632
1638 1633 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1639 1634 cachedelta = self._revlog.rev(p1), deltatext
1640 1635 text = util.buffer(arraytext)
1641 1636 n = self._revlog.addrevision(
1642 1637 text, transaction, link, p1, p2, cachedelta
1643 1638 )
1644 1639 else:
1645 1640 # The first parent manifest isn't already loaded, so we'll
1646 1641 # just encode a fulltext of the manifest and pass that
1647 1642 # through to the revlog layer, and let it handle the delta
1648 1643 # process.
1649 1644 if self._treeondisk:
1650 1645 assert readtree, b"readtree must be set for treemanifest writes"
1651 1646 assert match, b"match must be specified for treemanifest writes"
1652 1647 m1 = readtree(self.tree, p1)
1653 1648 m2 = readtree(self.tree, p2)
1654 1649 n = self._addtree(
1655 1650 m, transaction, link, m1, m2, readtree, match=match
1656 1651 )
1657 1652 arraytext = None
1658 1653 else:
1659 1654 text = m.text()
1660 1655 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1661 1656 arraytext = bytearray(text)
1662 1657
1663 1658 if arraytext is not None:
1664 1659 self.fulltextcache[n] = arraytext
1665 1660
1666 1661 return n
1667 1662
1668 1663 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1669 1664 # If the manifest is unchanged compared to one parent,
1670 1665 # don't write a new revision
1671 1666 if self.tree != b'' and (
1672 1667 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1673 1668 ):
1674 1669 return m.node()
1675 1670
1676 1671 def writesubtree(subm, subp1, subp2, match):
1677 1672 sublog = self.dirlog(subm.dir())
1678 1673 sublog.add(
1679 1674 subm,
1680 1675 transaction,
1681 1676 link,
1682 1677 subp1,
1683 1678 subp2,
1684 1679 None,
1685 1680 None,
1686 1681 readtree=readtree,
1687 1682 match=match,
1688 1683 )
1689 1684
1690 1685 m.writesubtrees(m1, m2, writesubtree, match)
1691 1686 text = m.dirtext()
1692 1687 n = None
1693 1688 if self.tree != b'':
1694 1689 # Double-check whether contents are unchanged to one parent
1695 1690 if text == m1.dirtext():
1696 1691 n = m1.node()
1697 1692 elif text == m2.dirtext():
1698 1693 n = m2.node()
1699 1694
1700 1695 if not n:
1701 1696 n = self._revlog.addrevision(
1702 1697 text, transaction, link, m1.node(), m2.node()
1703 1698 )
1704 1699
1705 1700 # Save nodeid so parent manifest can calculate its nodeid
1706 1701 m.setnode(n)
1707 1702 return n
1708 1703
1709 1704 def __len__(self):
1710 1705 return len(self._revlog)
1711 1706
1712 1707 def __iter__(self):
1713 1708 return self._revlog.__iter__()
1714 1709
1715 1710 def rev(self, node):
1716 1711 return self._revlog.rev(node)
1717 1712
1718 1713 def node(self, rev):
1719 1714 return self._revlog.node(rev)
1720 1715
1721 1716 def lookup(self, value):
1722 1717 return self._revlog.lookup(value)
1723 1718
1724 1719 def parentrevs(self, rev):
1725 1720 return self._revlog.parentrevs(rev)
1726 1721
1727 1722 def parents(self, node):
1728 1723 return self._revlog.parents(node)
1729 1724
1730 1725 def linkrev(self, rev):
1731 1726 return self._revlog.linkrev(rev)
1732 1727
1733 1728 def checksize(self):
1734 1729 return self._revlog.checksize()
1735 1730
1736 1731 def revision(self, node, _df=None, raw=False):
1737 1732 return self._revlog.revision(node, _df=_df, raw=raw)
1738 1733
1739 1734 def rawdata(self, node, _df=None):
1740 1735 return self._revlog.rawdata(node, _df=_df)
1741 1736
1742 1737 def revdiff(self, rev1, rev2):
1743 1738 return self._revlog.revdiff(rev1, rev2)
1744 1739
1745 1740 def cmp(self, node, text):
1746 1741 return self._revlog.cmp(node, text)
1747 1742
1748 1743 def deltaparent(self, rev):
1749 1744 return self._revlog.deltaparent(rev)
1750 1745
1751 1746 def emitrevisions(
1752 1747 self,
1753 1748 nodes,
1754 1749 nodesorder=None,
1755 1750 revisiondata=False,
1756 1751 assumehaveparentrevisions=False,
1757 1752 deltamode=repository.CG_DELTAMODE_STD,
1758 1753 ):
1759 1754 return self._revlog.emitrevisions(
1760 1755 nodes,
1761 1756 nodesorder=nodesorder,
1762 1757 revisiondata=revisiondata,
1763 1758 assumehaveparentrevisions=assumehaveparentrevisions,
1764 1759 deltamode=deltamode,
1765 1760 )
1766 1761
1767 1762 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1768 1763 return self._revlog.addgroup(
1769 1764 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1770 1765 )
1771 1766
1772 1767 def rawsize(self, rev):
1773 1768 return self._revlog.rawsize(rev)
1774 1769
1775 1770 def getstrippoint(self, minlink):
1776 1771 return self._revlog.getstrippoint(minlink)
1777 1772
1778 1773 def strip(self, minlink, transaction):
1779 1774 return self._revlog.strip(minlink, transaction)
1780 1775
1781 1776 def files(self):
1782 1777 return self._revlog.files()
1783 1778
1784 1779 def clone(self, tr, destrevlog, **kwargs):
1785 1780 if not isinstance(destrevlog, manifestrevlog):
1786 1781 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1787 1782
1788 1783 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1789 1784
1790 1785 def storageinfo(
1791 1786 self,
1792 1787 exclusivefiles=False,
1793 1788 sharedfiles=False,
1794 1789 revisionscount=False,
1795 1790 trackedsize=False,
1796 1791 storedsize=False,
1797 1792 ):
1798 1793 return self._revlog.storageinfo(
1799 1794 exclusivefiles=exclusivefiles,
1800 1795 sharedfiles=sharedfiles,
1801 1796 revisionscount=revisionscount,
1802 1797 trackedsize=trackedsize,
1803 1798 storedsize=storedsize,
1804 1799 )
1805 1800
1806 1801 @property
1807 1802 def indexfile(self):
1808 1803 return self._revlog.indexfile
1809 1804
1810 1805 @indexfile.setter
1811 1806 def indexfile(self, value):
1812 1807 self._revlog.indexfile = value
1813 1808
1814 1809 @property
1815 1810 def opener(self):
1816 1811 return self._revlog.opener
1817 1812
1818 1813 @opener.setter
1819 1814 def opener(self, value):
1820 1815 self._revlog.opener = value
1821 1816
1822 1817
1823 1818 @interfaceutil.implementer(repository.imanifestlog)
1824 1819 class manifestlog(object):
1825 1820 """A collection class representing the collection of manifest snapshots
1826 1821 referenced by commits in the repository.
1827 1822
1828 1823 In this situation, 'manifest' refers to the abstract concept of a snapshot
1829 1824 of the list of files in the given commit. Consumers of the output of this
1830 1825 class do not care about the implementation details of the actual manifests
1831 1826 they receive (i.e. tree or flat or lazily loaded, etc)."""
1832 1827
1833 1828 def __init__(self, opener, repo, rootstore, narrowmatch):
1834 1829 usetreemanifest = False
1835 1830 cachesize = 4
1836 1831
1837 1832 opts = getattr(opener, 'options', None)
1838 1833 if opts is not None:
1839 1834 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1840 1835 cachesize = opts.get(b'manifestcachesize', cachesize)
1841 1836
1842 1837 self._treemanifests = usetreemanifest
1843 1838
1844 1839 self._rootstore = rootstore
1845 1840 self._rootstore._setupmanifestcachehooks(repo)
1846 1841 self._narrowmatch = narrowmatch
1847 1842
1848 1843 # A cache of the manifestctx or treemanifestctx for each directory
1849 1844 self._dirmancache = {}
1850 1845 self._dirmancache[b''] = util.lrucachedict(cachesize)
1851 1846
1852 1847 self._cachesize = cachesize
1853 1848
1854 1849 def __getitem__(self, node):
1855 1850 """Retrieves the manifest instance for the given node. Throws a
1856 1851 LookupError if not found.
1857 1852 """
1858 1853 return self.get(b'', node)
1859 1854
1860 1855 def get(self, tree, node, verify=True):
1861 1856 """Retrieves the manifest instance for the given node. Throws a
1862 1857 LookupError if not found.
1863 1858
1864 1859 `verify` - if True an exception will be thrown if the node is not in
1865 1860 the revlog
1866 1861 """
1867 1862 if node in self._dirmancache.get(tree, ()):
1868 1863 return self._dirmancache[tree][node]
1869 1864
1870 1865 if not self._narrowmatch.always():
1871 1866 if not self._narrowmatch.visitdir(tree[:-1]):
1872 1867 return excludeddirmanifestctx(tree, node)
1873 1868 if tree:
1874 1869 if self._rootstore._treeondisk:
1875 1870 if verify:
1876 1871 # Side-effect is LookupError is raised if node doesn't
1877 1872 # exist.
1878 1873 self.getstorage(tree).rev(node)
1879 1874
1880 1875 m = treemanifestctx(self, tree, node)
1881 1876 else:
1882 1877 raise error.Abort(
1883 1878 _(
1884 1879 b"cannot ask for manifest directory '%s' in a flat "
1885 1880 b"manifest"
1886 1881 )
1887 1882 % tree
1888 1883 )
1889 1884 else:
1890 1885 if verify:
1891 1886 # Side-effect is LookupError is raised if node doesn't exist.
1892 1887 self._rootstore.rev(node)
1893 1888
1894 1889 if self._treemanifests:
1895 1890 m = treemanifestctx(self, b'', node)
1896 1891 else:
1897 1892 m = manifestctx(self, node)
1898 1893
1899 1894 if node != nullid:
1900 1895 mancache = self._dirmancache.get(tree)
1901 1896 if not mancache:
1902 1897 mancache = util.lrucachedict(self._cachesize)
1903 1898 self._dirmancache[tree] = mancache
1904 1899 mancache[node] = m
1905 1900 return m
1906 1901
1907 1902 def getstorage(self, tree):
1908 1903 return self._rootstore.dirlog(tree)
1909 1904
1910 1905 def clearcaches(self, clear_persisted_data=False):
1911 1906 self._dirmancache.clear()
1912 1907 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1913 1908
1914 1909 def rev(self, node):
1915 1910 return self._rootstore.rev(node)
1916 1911
1917 1912
1918 1913 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1919 1914 class memmanifestctx(object):
1920 1915 def __init__(self, manifestlog):
1921 1916 self._manifestlog = manifestlog
1922 1917 self._manifestdict = manifestdict()
1923 1918
1924 1919 def _storage(self):
1925 1920 return self._manifestlog.getstorage(b'')
1926 1921
1927 1922 def copy(self):
1928 1923 memmf = memmanifestctx(self._manifestlog)
1929 1924 memmf._manifestdict = self.read().copy()
1930 1925 return memmf
1931 1926
1932 1927 def read(self):
1933 1928 return self._manifestdict
1934 1929
1935 1930 def write(self, transaction, link, p1, p2, added, removed, match=None):
1936 1931 return self._storage().add(
1937 1932 self._manifestdict,
1938 1933 transaction,
1939 1934 link,
1940 1935 p1,
1941 1936 p2,
1942 1937 added,
1943 1938 removed,
1944 1939 match=match,
1945 1940 )
1946 1941
1947 1942
1948 1943 @interfaceutil.implementer(repository.imanifestrevisionstored)
1949 1944 class manifestctx(object):
1950 1945 """A class representing a single revision of a manifest, including its
1951 1946 contents, its parent revs, and its linkrev.
1952 1947 """
1953 1948
1954 1949 def __init__(self, manifestlog, node):
1955 1950 self._manifestlog = manifestlog
1956 1951 self._data = None
1957 1952
1958 1953 self._node = node
1959 1954
1960 1955 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1961 1956 # but let's add it later when something needs it and we can load it
1962 1957 # lazily.
1963 1958 # self.p1, self.p2 = store.parents(node)
1964 1959 # rev = store.rev(node)
1965 1960 # self.linkrev = store.linkrev(rev)
1966 1961
1967 1962 def _storage(self):
1968 1963 return self._manifestlog.getstorage(b'')
1969 1964
1970 1965 def node(self):
1971 1966 return self._node
1972 1967
1973 1968 def copy(self):
1974 1969 memmf = memmanifestctx(self._manifestlog)
1975 1970 memmf._manifestdict = self.read().copy()
1976 1971 return memmf
1977 1972
1978 1973 @propertycache
1979 1974 def parents(self):
1980 1975 return self._storage().parents(self._node)
1981 1976
1982 1977 def read(self):
1983 1978 if self._data is None:
1984 1979 if self._node == nullid:
1985 1980 self._data = manifestdict()
1986 1981 else:
1987 1982 store = self._storage()
1988 1983 if self._node in store.fulltextcache:
1989 1984 text = pycompat.bytestr(store.fulltextcache[self._node])
1990 1985 else:
1991 1986 text = store.revision(self._node)
1992 1987 arraytext = bytearray(text)
1993 1988 store.fulltextcache[self._node] = arraytext
1994 1989 self._data = manifestdict(text)
1995 1990 return self._data
1996 1991
1997 1992 def readfast(self, shallow=False):
1998 1993 '''Calls either readdelta or read, based on which would be less work.
1999 1994 readdelta is called if the delta is against the p1, and therefore can be
2000 1995 read quickly.
2001 1996
2002 1997 If `shallow` is True, nothing changes since this is a flat manifest.
2003 1998 '''
2004 1999 store = self._storage()
2005 2000 r = store.rev(self._node)
2006 2001 deltaparent = store.deltaparent(r)
2007 2002 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2008 2003 return self.readdelta()
2009 2004 return self.read()
2010 2005
2011 2006 def readdelta(self, shallow=False):
2012 2007 '''Returns a manifest containing just the entries that are present
2013 2008 in this manifest, but not in its p1 manifest. This is efficient to read
2014 2009 if the revlog delta is already p1.
2015 2010
2016 2011 Changing the value of `shallow` has no effect on flat manifests.
2017 2012 '''
2018 2013 store = self._storage()
2019 2014 r = store.rev(self._node)
2020 2015 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2021 2016 return manifestdict(d)
2022 2017
2023 2018 def find(self, key):
2024 2019 return self.read().find(key)
2025 2020
2026 2021
2027 2022 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2028 2023 class memtreemanifestctx(object):
2029 2024 def __init__(self, manifestlog, dir=b''):
2030 2025 self._manifestlog = manifestlog
2031 2026 self._dir = dir
2032 2027 self._treemanifest = treemanifest()
2033 2028
2034 2029 def _storage(self):
2035 2030 return self._manifestlog.getstorage(b'')
2036 2031
2037 2032 def copy(self):
2038 2033 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2039 2034 memmf._treemanifest = self._treemanifest.copy()
2040 2035 return memmf
2041 2036
2042 2037 def read(self):
2043 2038 return self._treemanifest
2044 2039
2045 2040 def write(self, transaction, link, p1, p2, added, removed, match=None):
2046 2041 def readtree(dir, node):
2047 2042 return self._manifestlog.get(dir, node).read()
2048 2043
2049 2044 return self._storage().add(
2050 2045 self._treemanifest,
2051 2046 transaction,
2052 2047 link,
2053 2048 p1,
2054 2049 p2,
2055 2050 added,
2056 2051 removed,
2057 2052 readtree=readtree,
2058 2053 match=match,
2059 2054 )
2060 2055
2061 2056
2062 2057 @interfaceutil.implementer(repository.imanifestrevisionstored)
2063 2058 class treemanifestctx(object):
2064 2059 def __init__(self, manifestlog, dir, node):
2065 2060 self._manifestlog = manifestlog
2066 2061 self._dir = dir
2067 2062 self._data = None
2068 2063
2069 2064 self._node = node
2070 2065
2071 2066 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2072 2067 # we can instantiate treemanifestctx objects for directories we don't
2073 2068 # have on disk.
2074 2069 # self.p1, self.p2 = store.parents(node)
2075 2070 # rev = store.rev(node)
2076 2071 # self.linkrev = store.linkrev(rev)
2077 2072
2078 2073 def _storage(self):
2079 2074 narrowmatch = self._manifestlog._narrowmatch
2080 2075 if not narrowmatch.always():
2081 2076 if not narrowmatch.visitdir(self._dir[:-1]):
2082 2077 return excludedmanifestrevlog(self._dir)
2083 2078 return self._manifestlog.getstorage(self._dir)
2084 2079
2085 2080 def read(self):
2086 2081 if self._data is None:
2087 2082 store = self._storage()
2088 2083 if self._node == nullid:
2089 2084 self._data = treemanifest()
2090 2085 # TODO accessing non-public API
2091 2086 elif store._treeondisk:
2092 2087 m = treemanifest(dir=self._dir)
2093 2088
2094 2089 def gettext():
2095 2090 return store.revision(self._node)
2096 2091
2097 2092 def readsubtree(dir, subm):
2098 2093 # Set verify to False since we need to be able to create
2099 2094 # subtrees for trees that don't exist on disk.
2100 2095 return self._manifestlog.get(dir, subm, verify=False).read()
2101 2096
2102 2097 m.read(gettext, readsubtree)
2103 2098 m.setnode(self._node)
2104 2099 self._data = m
2105 2100 else:
2106 2101 if self._node in store.fulltextcache:
2107 2102 text = pycompat.bytestr(store.fulltextcache[self._node])
2108 2103 else:
2109 2104 text = store.revision(self._node)
2110 2105 arraytext = bytearray(text)
2111 2106 store.fulltextcache[self._node] = arraytext
2112 2107 self._data = treemanifest(dir=self._dir, text=text)
2113 2108
2114 2109 return self._data
2115 2110
2116 2111 def node(self):
2117 2112 return self._node
2118 2113
2119 2114 def copy(self):
2120 2115 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2121 2116 memmf._treemanifest = self.read().copy()
2122 2117 return memmf
2123 2118
2124 2119 @propertycache
2125 2120 def parents(self):
2126 2121 return self._storage().parents(self._node)
2127 2122
2128 2123 def readdelta(self, shallow=False):
2129 2124 '''Returns a manifest containing just the entries that are present
2130 2125 in this manifest, but not in its p1 manifest. This is efficient to read
2131 2126 if the revlog delta is already p1.
2132 2127
2133 2128 If `shallow` is True, this will read the delta for this directory,
2134 2129 without recursively reading subdirectory manifests. Instead, any
2135 2130 subdirectory entry will be reported as it appears in the manifest, i.e.
2136 2131 the subdirectory will be reported among files and distinguished only by
2137 2132 its 't' flag.
2138 2133 '''
2139 2134 store = self._storage()
2140 2135 if shallow:
2141 2136 r = store.rev(self._node)
2142 2137 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2143 2138 return manifestdict(d)
2144 2139 else:
2145 2140 # Need to perform a slow delta
2146 2141 r0 = store.deltaparent(store.rev(self._node))
2147 2142 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2148 2143 m1 = self.read()
2149 2144 md = treemanifest(dir=self._dir)
2150 2145 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2151 2146 if n1:
2152 2147 md[f] = n1
2153 2148 if fl1:
2154 2149 md.setflag(f, fl1)
2155 2150 return md
2156 2151
2157 2152 def readfast(self, shallow=False):
2158 2153 '''Calls either readdelta or read, based on which would be less work.
2159 2154 readdelta is called if the delta is against the p1, and therefore can be
2160 2155 read quickly.
2161 2156
2162 2157 If `shallow` is True, it only returns the entries from this manifest,
2163 2158 and not any submanifests.
2164 2159 '''
2165 2160 store = self._storage()
2166 2161 r = store.rev(self._node)
2167 2162 deltaparent = store.deltaparent(r)
2168 2163 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2169 2164 return self.readdelta(shallow=shallow)
2170 2165
2171 2166 if shallow:
2172 2167 return manifestdict(store.revision(self._node))
2173 2168 else:
2174 2169 return self.read()
2175 2170
2176 2171 def find(self, key):
2177 2172 return self.read().find(key)
2178 2173
2179 2174
2180 2175 class excludeddir(treemanifest):
2181 2176 """Stand-in for a directory that is excluded from the repository.
2182 2177
2183 2178 With narrowing active on a repository that uses treemanifests,
2184 2179 some of the directory revlogs will be excluded from the resulting
2185 2180 clone. This is a huge storage win for clients, but means we need
2186 2181 some sort of pseudo-manifest to surface to internals so we can
2187 2182 detect a merge conflict outside the narrowspec. That's what this
2188 2183 class is: it stands in for a directory whose node is known, but
2189 2184 whose contents are unknown.
2190 2185 """
2191 2186
2192 2187 def __init__(self, dir, node):
2193 2188 super(excludeddir, self).__init__(dir)
2194 2189 self._node = node
2195 2190 # Add an empty file, which will be included by iterators and such,
2196 2191 # appearing as the directory itself (i.e. something like "dir/")
2197 2192 self._files[b''] = node
2198 2193 self._flags[b''] = b't'
2199 2194
2200 2195 # Manifests outside the narrowspec should never be modified, so avoid
2201 2196 # copying. This makes a noticeable difference when there are very many
2202 2197 # directories outside the narrowspec. Also, it makes sense for the copy to
2203 2198 # be of the same type as the original, which would not happen with the
2204 2199 # super type's copy().
2205 2200 def copy(self):
2206 2201 return self
2207 2202
2208 2203
2209 2204 class excludeddirmanifestctx(treemanifestctx):
2210 2205 """context wrapper for excludeddir - see that docstring for rationale"""
2211 2206
2212 2207 def __init__(self, dir, node):
2213 2208 self._dir = dir
2214 2209 self._node = node
2215 2210
2216 2211 def read(self):
2217 2212 return excludeddir(self._dir, self._node)
2218 2213
2219 2214 def write(self, *args):
2220 2215 raise error.ProgrammingError(
2221 2216 b'attempt to write manifest from excluded dir %s' % self._dir
2222 2217 )
2223 2218
2224 2219
2225 2220 class excludedmanifestrevlog(manifestrevlog):
2226 2221 """Stand-in for excluded treemanifest revlogs.
2227 2222
2228 2223 When narrowing is active on a treemanifest repository, we'll have
2229 2224 references to directories we can't see due to the revlog being
2230 2225 skipped. This class exists to conform to the manifestrevlog
2231 2226 interface for those directories and proactively prevent writes to
2232 2227 outside the narrowspec.
2233 2228 """
2234 2229
2235 2230 def __init__(self, dir):
2236 2231 self._dir = dir
2237 2232
2238 2233 def __len__(self):
2239 2234 raise error.ProgrammingError(
2240 2235 b'attempt to get length of excluded dir %s' % self._dir
2241 2236 )
2242 2237
2243 2238 def rev(self, node):
2244 2239 raise error.ProgrammingError(
2245 2240 b'attempt to get rev from excluded dir %s' % self._dir
2246 2241 )
2247 2242
2248 2243 def linkrev(self, node):
2249 2244 raise error.ProgrammingError(
2250 2245 b'attempt to get linkrev from excluded dir %s' % self._dir
2251 2246 )
2252 2247
2253 2248 def node(self, rev):
2254 2249 raise error.ProgrammingError(
2255 2250 b'attempt to get node from excluded dir %s' % self._dir
2256 2251 )
2257 2252
2258 2253 def add(self, *args, **kwargs):
2259 2254 # We should never write entries in dirlogs outside the narrow clone.
2260 2255 # However, the method still gets called from writesubtree() in
2261 2256 # _addtree(), so we need to handle it. We should possibly make that
2262 2257 # avoid calling add() with a clean manifest (_dirty is always False
2263 2258 # in excludeddir instances).
2264 2259 pass
General Comments 0
You need to be logged in to leave comments. Login now