##// END OF EJS Templates
manifest: drop the CamelCase name for `manifest.manifestdict`...
Matt Harbison -
r52960:35400ce4 default
parent child Browse files
Show More
@@ -1,2798 +1,2792
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import typing
14 14 import weakref
15 15
16 16 from typing import (
17 17 ByteString,
18 18 Callable,
19 19 Collection,
20 20 Dict,
21 21 Iterable,
22 22 Iterator,
23 23 List,
24 24 Optional,
25 25 Set,
26 26 Tuple,
27 27 Union,
28 28 cast,
29 29 )
30 30
31 31 from .i18n import _
32 32 from .node import (
33 33 bin,
34 34 hex,
35 35 nullrev,
36 36 )
37 37 from . import (
38 38 encoding,
39 39 error,
40 40 match as matchmod,
41 41 mdiff,
42 42 pathutil,
43 43 policy,
44 44 pycompat,
45 45 revlog,
46 46 util,
47 47 )
48 48 from .interfaces import (
49 49 repository,
50 50 util as interfaceutil,
51 51 )
52 52 from .revlogutils import (
53 53 constants as revlog_constants,
54 54 )
55 55
56 56 parsers = policy.importmod('parsers')
57 57 propertycache = util.propertycache
58 58
59 59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
60 60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
61 61
62 62
63 63 def _parse(nodelen, data: bytes):
64 64 # This method does a little bit of excessive-looking
65 65 # precondition checking. This is so that the behavior of this
66 66 # class exactly matches its C counterpart to try and help
67 67 # prevent surprise breakage for anyone that develops against
68 68 # the pure version.
69 69 if data and data[-1:] != b'\n':
70 70 raise ValueError(b'Manifest did not end in a newline.')
71 71 prev = None
72 72 for l in data.splitlines():
73 73 if prev is not None and prev > l:
74 74 raise ValueError(b'Manifest lines not in sorted order.')
75 75 prev = l
76 76 f, n = l.split(b'\0')
77 77 nl = len(n)
78 78 flags = n[-1:]
79 79 if flags in _manifestflags:
80 80 n = n[:-1]
81 81 nl -= 1
82 82 else:
83 83 flags = b''
84 84 if nl != 2 * nodelen:
85 85 raise ValueError(b'Invalid manifest line')
86 86
87 87 yield f, bin(n), flags
88 88
89 89
90 90 def _text(it):
91 91 files = []
92 92 lines = []
93 93 for f, n, fl in it:
94 94 files.append(f)
95 95 # if this is changed to support newlines in filenames,
96 96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
97 97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
98 98
99 99 _checkforbidden(files)
100 100 return b''.join(lines)
101 101
102 102
103 103 class lazymanifestiter:
104 104 def __init__(self, lm: '_LazyManifest') -> None:
105 105 self.pos = 0
106 106 self.lm = lm
107 107
108 108 def __iter__(self) -> 'lazymanifestiter':
109 109 return self
110 110
111 111 def next(self) -> bytes:
112 112 try:
113 113 data, pos = self.lm._get(self.pos)
114 114 except IndexError:
115 115 raise StopIteration
116 116 if pos == -1:
117 117 assert isinstance(data, tuple)
118 118 self.pos += 1
119 119 return data[0]
120 120 assert isinstance(data, bytes)
121 121 self.pos += 1
122 122 zeropos = data.find(b'\x00', pos)
123 123 return data[pos:zeropos]
124 124
125 125 __next__ = next
126 126
127 127
128 128 class lazymanifestiterentries:
129 129 def __init__(self, lm: '_LazyManifest') -> None:
130 130 self.lm = lm
131 131 self.pos = 0
132 132
133 133 def __iter__(self) -> 'lazymanifestiterentries':
134 134 return self
135 135
136 136 def next(self) -> Tuple[bytes, bytes, bytes]:
137 137 try:
138 138 data, pos = self.lm._get(self.pos)
139 139 except IndexError:
140 140 raise StopIteration
141 141 if pos == -1:
142 142 assert isinstance(data, tuple)
143 143 self.pos += 1
144 144 return data
145 145 assert isinstance(data, bytes)
146 146 zeropos = data.find(b'\x00', pos)
147 147 nlpos = data.find(b'\n', pos)
148 148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
149 149 raise error.StorageError(b'Invalid manifest line')
150 150 flags = data[nlpos - 1 : nlpos]
151 151 if flags in _manifestflags:
152 152 hlen = nlpos - zeropos - 2
153 153 else:
154 154 hlen = nlpos - zeropos - 1
155 155 flags = b''
156 156 if hlen != 2 * self.lm._nodelen:
157 157 raise error.StorageError(b'Invalid manifest line')
158 158 hashval = unhexlify(
159 159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
160 160 )
161 161 self.pos += 1
162 162 return (data[pos:zeropos], hashval, flags)
163 163
164 164 __next__ = next
165 165
166 166
167 167 def unhexlify(data: bytes, extra: int, pos, length: int):
168 168 s = bin(data[pos : pos + length])
169 169 if extra:
170 170 s += bytes([extra & 0xFF])
171 171 return s
172 172
173 173
174 174 def _cmp(a, b):
175 175 return (a > b) - (a < b)
176 176
177 177
178 178 _manifestflags = {b'', b'l', b't', b'x'}
179 179
180 180
181 181 class _LazyManifest:
182 182 """A pure python manifest backed by a byte string. It is supplimented with
183 183 internal lists as it is modified, until it is compacted back to a pure byte
184 184 string.
185 185
186 186 ``data`` is the initial manifest data.
187 187
188 188 ``positions`` is a list of offsets, one per manifest entry. Positive
189 189 values are offsets into ``data``, negative values are offsets into the
190 190 ``extradata`` list. When an entry is removed, its entry is dropped from
191 191 ``positions``. The values are encoded such that when walking the list and
192 192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
193 193 sorted by filename.
194 194
195 195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
196 196 modified since the manifest was created or compacted.
197 197 """
198 198
199 199 def __init__(
200 200 self,
201 201 nodelen: int,
202 202 data: bytes,
203 203 positions=None,
204 204 extrainfo=None,
205 205 extradata=None,
206 206 hasremovals: bool = False,
207 207 ):
208 208 self._nodelen = nodelen
209 209 if positions is None:
210 210 self.positions = self.findlines(data)
211 211 self.extrainfo = [0] * len(self.positions)
212 212 self.data = data
213 213 self.extradata = []
214 214 self.hasremovals = False
215 215 else:
216 216 self.positions = positions[:]
217 217 self.extrainfo = extrainfo[:]
218 218 self.extradata = extradata[:]
219 219 self.data = data
220 220 self.hasremovals = hasremovals
221 221
222 222 def findlines(self, data: bytes) -> List[int]:
223 223 if not data:
224 224 return []
225 225 pos = data.find(b"\n")
226 226 if pos == -1 or data[-1:] != b'\n':
227 227 raise ValueError(b"Manifest did not end in a newline.")
228 228 positions = [0]
229 229 prev = data[: data.find(b'\x00')]
230 230 while pos < len(data) - 1 and pos != -1:
231 231 positions.append(pos + 1)
232 232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
233 233 if nexts < prev:
234 234 raise ValueError(b"Manifest lines not in sorted order.")
235 235 prev = nexts
236 236 pos = data.find(b"\n", pos + 1)
237 237 return positions
238 238
239 239 def _get(
240 240 self, index: int
241 241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
242 242 # get the position encoded in pos:
243 243 # positive number is an index in 'data'
244 244 # negative number is in extrapieces
245 245 pos = self.positions[index]
246 246 if pos >= 0:
247 247 return self.data, pos
248 248 return self.extradata[-pos - 1], -1
249 249
250 250 def _getkey(self, pos) -> bytes:
251 251 if pos >= 0:
252 252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
253 253 return self.extradata[-pos - 1][0]
254 254
255 255 def bsearch(self, key: bytes) -> int:
256 256 first = 0
257 257 last = len(self.positions) - 1
258 258
259 259 while first <= last:
260 260 midpoint = (first + last) // 2
261 261 nextpos = self.positions[midpoint]
262 262 candidate = self._getkey(nextpos)
263 263 r = _cmp(key, candidate)
264 264 if r == 0:
265 265 return midpoint
266 266 else:
267 267 if r < 0:
268 268 last = midpoint - 1
269 269 else:
270 270 first = midpoint + 1
271 271 return -1
272 272
273 273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
274 274 # same as the above, but will always return the position
275 275 # done for performance reasons
276 276 first = 0
277 277 last = len(self.positions) - 1
278 278
279 279 while first <= last:
280 280 midpoint = (first + last) // 2
281 281 nextpos = self.positions[midpoint]
282 282 candidate = self._getkey(nextpos)
283 283 r = _cmp(key, candidate)
284 284 if r == 0:
285 285 return (midpoint, True)
286 286 else:
287 287 if r < 0:
288 288 last = midpoint - 1
289 289 else:
290 290 first = midpoint + 1
291 291 return (first, False)
292 292
293 293 def __contains__(self, key: bytes) -> bool:
294 294 return self.bsearch(key) != -1
295 295
296 296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
297 297 if not isinstance(key, bytes):
298 298 raise TypeError(b"getitem: manifest keys must be a bytes.")
299 299 needle = self.bsearch(key)
300 300 if needle == -1:
301 301 raise KeyError
302 302 data, pos = self._get(needle)
303 303 if pos == -1:
304 304 assert isinstance(data, tuple)
305 305 return (data[1], data[2])
306 306
307 307 assert isinstance(data, bytes)
308 308 zeropos = data.find(b'\x00', pos)
309 309 nlpos = data.find(b'\n', zeropos)
310 310 assert 0 <= needle <= len(self.positions)
311 311 assert len(self.extrainfo) == len(self.positions)
312 312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
313 313 raise error.StorageError(b'Invalid manifest line')
314 314 hlen = nlpos - zeropos - 1
315 315 flags = data[nlpos - 1 : nlpos]
316 316 if flags in _manifestflags:
317 317 hlen -= 1
318 318 else:
319 319 flags = b''
320 320 if hlen != 2 * self._nodelen:
321 321 raise error.StorageError(b'Invalid manifest line')
322 322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
323 323 return (hashval, flags)
324 324
325 325 def __delitem__(self, key: bytes) -> None:
326 326 needle, found = self.bsearch2(key)
327 327 if not found:
328 328 raise KeyError
329 329 cur = self.positions[needle]
330 330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
331 331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
332 332 if cur >= 0:
333 333 # This does NOT unsort the list as far as the search functions are
334 334 # concerned, as they only examine lines mapped by self.positions.
335 335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
336 336 self.hasremovals = True
337 337
338 338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
339 339 if not isinstance(key, bytes):
340 340 raise TypeError(b"setitem: manifest keys must be a byte string.")
341 341 if not isinstance(value, tuple) or len(value) != 2:
342 342 raise TypeError(
343 343 b"Manifest values must be a tuple of (node, flags)."
344 344 )
345 345 hashval = value[0]
346 346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
347 347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
348 348 flags = value[1]
349 349 if not isinstance(flags, bytes) or len(flags) > 1:
350 350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
351 351 needle, found = self.bsearch2(key)
352 352 if found:
353 353 # put the item
354 354 pos = self.positions[needle]
355 355 if pos < 0:
356 356 self.extradata[-pos - 1] = (key, hashval, value[1])
357 357 else:
358 358 # just don't bother
359 359 self.extradata.append((key, hashval, value[1]))
360 360 self.positions[needle] = -len(self.extradata)
361 361 else:
362 362 # not found, put it in with extra positions
363 363 self.extradata.append((key, hashval, value[1]))
364 364 self.positions = (
365 365 self.positions[:needle]
366 366 + [-len(self.extradata)]
367 367 + self.positions[needle:]
368 368 )
369 369 self.extrainfo = (
370 370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
371 371 )
372 372
373 373 def copy(self) -> '_LazyManifest':
374 374 # XXX call _compact like in C?
375 375 return _lazymanifest(
376 376 self._nodelen,
377 377 self.data,
378 378 self.positions,
379 379 self.extrainfo,
380 380 self.extradata,
381 381 self.hasremovals,
382 382 )
383 383
384 384 def _compact(self) -> None:
385 385 # hopefully not called TOO often
386 386 if len(self.extradata) == 0 and not self.hasremovals:
387 387 return
388 388 l = []
389 389 i = 0
390 390 offset = 0
391 391 self.extrainfo = [0] * len(self.positions)
392 392 while i < len(self.positions):
393 393 if self.positions[i] >= 0:
394 394 cur = self.positions[i]
395 395 last_cut = cur
396 396
397 397 # Collect all contiguous entries in the buffer at the current
398 398 # offset, breaking out only for added/modified items held in
399 399 # extradata, or a deleted line prior to the next position.
400 400 while True:
401 401 self.positions[i] = offset
402 402 i += 1
403 403 if i == len(self.positions) or self.positions[i] < 0:
404 404 break
405 405
406 406 # A removed file has no positions[] entry, but does have an
407 407 # overwritten first byte. Break out and find the end of the
408 408 # current good entry/entries if there is a removed file
409 409 # before the next position.
410 410 if (
411 411 self.hasremovals
412 412 and self.data.find(b'\n\x00', cur, self.positions[i])
413 413 != -1
414 414 ):
415 415 break
416 416
417 417 offset += self.positions[i] - cur
418 418 cur = self.positions[i]
419 419 end_cut = self.data.find(b'\n', cur)
420 420 if end_cut != -1:
421 421 end_cut += 1
422 422 offset += end_cut - cur
423 423 l.append(self.data[last_cut:end_cut])
424 424 else:
425 425 while i < len(self.positions) and self.positions[i] < 0:
426 426 cur = self.positions[i]
427 427 t = self.extradata[-cur - 1]
428 428 l.append(self._pack(t))
429 429 self.positions[i] = offset
430 430 # Hashes are either 20 bytes (old sha1s) or 32
431 431 # bytes (new non-sha1).
432 432 hlen = 20
433 433 if len(t[1]) > 25:
434 434 hlen = 32
435 435 if len(t[1]) > hlen:
436 436 self.extrainfo[i] = ord(t[1][hlen + 1])
437 437 offset += len(l[-1])
438 438 i += 1
439 439 self.data = b''.join(l)
440 440 self.hasremovals = False
441 441 self.extradata = []
442 442
443 443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
444 444 n = d[1]
445 445 assert len(n) in (20, 32)
446 446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
447 447
448 448 def text(self) -> ByteString:
449 449 self._compact()
450 450 return self.data
451 451
452 452 def diff(
453 453 self, m2: '_LazyManifest', clean: bool = False
454 454 ) -> Dict[
455 455 bytes,
456 456 Optional[
457 457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
458 458 ],
459 459 ]:
460 460 '''Finds changes between the current manifest and m2.'''
461 461 # XXX think whether efficiency matters here
462 462 diff = {}
463 463
464 464 for fn, e1, flags in self.iterentries():
465 465 if fn not in m2:
466 466 diff[fn] = (e1, flags), (None, b'')
467 467 else:
468 468 e2 = m2[fn]
469 469 if (e1, flags) != e2:
470 470 diff[fn] = (e1, flags), e2
471 471 elif clean:
472 472 diff[fn] = None
473 473
474 474 for fn, e2, flags in m2.iterentries():
475 475 if fn not in self:
476 476 diff[fn] = (None, b''), (e2, flags)
477 477
478 478 return diff
479 479
480 480 def iterentries(self) -> lazymanifestiterentries:
481 481 return lazymanifestiterentries(self)
482 482
483 483 def iterkeys(self) -> lazymanifestiter:
484 484 return lazymanifestiter(self)
485 485
486 486 def __iter__(self) -> lazymanifestiter:
487 487 return lazymanifestiter(self)
488 488
489 489 def __len__(self) -> int:
490 490 return len(self.positions)
491 491
492 492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
493 493 # XXX should be optimized
494 494 c = _lazymanifest(self._nodelen, b'')
495 495 for f, n, fl in self.iterentries():
496 496 if filterfn(f):
497 497 c[f] = n, fl
498 498 return c
499 499
500 500
501 501 try:
502 502 _lazymanifest = parsers.lazymanifest
503 503 except AttributeError:
504 504 _lazymanifest = _LazyManifest
505 505
506 506
507 class ManifestDict:
507 class manifestdict: # (repository.imanifestdict)
508 508 def __init__(self, nodelen: int, data: ByteString = b''):
509 509 self._nodelen = nodelen
510 510 self._lm = _lazymanifest(nodelen, data)
511 511
512 512 def __getitem__(self, key: bytes) -> bytes:
513 513 return self._lm[key][0]
514 514
515 515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
516 516 return self._lm[key]
517 517
518 518 def __len__(self) -> int:
519 519 return len(self._lm)
520 520
521 521 def __nonzero__(self) -> bool:
522 522 # nonzero is covered by the __len__ function, but implementing it here
523 523 # makes it easier for extensions to override.
524 524 return len(self._lm) != 0
525 525
526 526 __bool__ = __nonzero__
527 527
528 528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
529 529 self._lm[key] = node, flags
530 530
531 531 def __setitem__(self, key: bytes, node: bytes) -> None:
532 532 self._lm[key] = node, self.flags(key)
533 533
534 534 def __contains__(self, key: bytes) -> bool:
535 535 if key is None:
536 536 return False
537 537 return key in self._lm
538 538
539 539 def __delitem__(self, key: bytes) -> None:
540 540 del self._lm[key]
541 541
542 542 def __iter__(self) -> Iterator[bytes]:
543 543 return self._lm.__iter__()
544 544
545 545 def iterkeys(self) -> Iterator[bytes]:
546 546 return self._lm.iterkeys()
547 547
548 548 def keys(self) -> List[bytes]:
549 549 return list(self.iterkeys())
550 550
551 551 def filesnotin(self, m2, match=None) -> Set[bytes]:
552 552 '''Set of files in this manifest that are not in the other'''
553 553 if match is not None:
554 554 match = matchmod.badmatch(match, lambda path, msg: None)
555 555 sm2 = set(m2.walk(match))
556 556 return {f for f in self.walk(match) if f not in sm2}
557 557 return {f for f in self if f not in m2}
558 558
559 559 @propertycache
560 560 def _dirs(self) -> pathutil.dirs:
561 561 return pathutil.dirs(self)
562 562
563 563 def dirs(self) -> pathutil.dirs:
564 564 return self._dirs
565 565
566 566 def hasdir(self, dir: bytes) -> bool:
567 567 return dir in self._dirs
568 568
569 569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
570 570 """Checks whether we can correctly and quickly iterate over matcher
571 571 files instead of over manifest files."""
572 572 files = match.files()
573 573 return len(files) < 100 and (
574 574 match.isexact()
575 575 or (match.prefix() and all(fn in self for fn in files))
576 576 )
577 577
578 578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
579 579 """Generates matching file names.
580 580
581 581 Equivalent to manifest.matches(match).iterkeys(), but without creating
582 582 an entirely new manifest.
583 583
584 584 It also reports nonexistent files by marking them bad with match.bad().
585 585 """
586 586 if match.always():
587 587 for f in iter(self):
588 588 yield f
589 589 return
590 590
591 591 fset = set(match.files())
592 592
593 593 # avoid the entire walk if we're only looking for specific files
594 594 if self._filesfastpath(match):
595 595 for fn in sorted(fset):
596 596 if fn in self:
597 597 yield fn
598 598 return
599 599
600 600 for fn in self:
601 601 if fn in fset:
602 602 # specified pattern is the exact name
603 603 fset.remove(fn)
604 604 if match(fn):
605 605 yield fn
606 606
607 607 # for dirstate.walk, files=[''] means "walk the whole tree".
608 608 # follow that here, too
609 609 fset.discard(b'')
610 610
611 611 for fn in sorted(fset):
612 612 if not self.hasdir(fn):
613 613 match.bad(fn, None)
614 614
615 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
615 def _matches(self, match: matchmod.basematcher) -> 'manifestdict':
616 616 '''generate a new manifest filtered by the match argument'''
617 617 if match.always():
618 618 return self.copy()
619 619
620 620 if self._filesfastpath(match):
621 621 m = manifestdict(self._nodelen)
622 622 lm = self._lm
623 623 for fn in match.files():
624 624 if fn in lm:
625 625 m._lm[fn] = lm[fn]
626 626 return m
627 627
628 628 m = manifestdict(self._nodelen)
629 629 m._lm = self._lm.filtercopy(match)
630 630 return m
631 631
632 632 def diff(
633 633 self,
634 m2: 'ManifestDict',
634 m2: 'manifestdict',
635 635 match: Optional[matchmod.basematcher] = None,
636 636 clean: bool = False,
637 637 ) -> Dict[
638 638 bytes,
639 639 Optional[
640 640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
641 641 ],
642 642 ]:
643 643 """Finds changes between the current manifest and m2.
644 644
645 645 Args:
646 646 m2: the manifest to which this manifest should be compared.
647 647 clean: if true, include files unchanged between these manifests
648 648 with a None value in the returned dictionary.
649 649
650 650 The result is returned as a dict with filename as key and
651 651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
652 652 nodeid in the current/other manifest and fl1/fl2 is the flag
653 653 in the current/other manifest. Where the file does not exist,
654 654 the nodeid will be None and the flags will be the empty
655 655 string.
656 656 """
657 657 if match:
658 658 m1 = self._matches(match)
659 659 m2 = m2._matches(match)
660 660 return m1.diff(m2, clean=clean)
661 661 return self._lm.diff(m2._lm, clean)
662 662
663 663 def setflag(self, key: bytes, flag: bytes) -> None:
664 664 if flag not in _manifestflags:
665 665 raise TypeError(b"Invalid manifest flag set.")
666 666 self._lm[key] = self[key], flag
667 667
668 668 def get(self, key: bytes, default=None) -> Optional[bytes]:
669 669 try:
670 670 return self._lm[key][0]
671 671 except KeyError:
672 672 return default
673 673
674 674 def flags(self, key: bytes) -> bytes:
675 675 try:
676 676 return self._lm[key][1]
677 677 except KeyError:
678 678 return b''
679 679
680 def copy(self) -> 'ManifestDict':
680 def copy(self) -> 'manifestdict':
681 681 c = manifestdict(self._nodelen)
682 682 c._lm = self._lm.copy()
683 683 return c
684 684
685 685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
686 686 return (x[:2] for x in self._lm.iterentries())
687 687
688 688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
689 689 return (x[:2] for x in self._lm.iterentries())
690 690
691 691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
692 692 return self._lm.iterentries()
693 693
694 694 def text(self) -> ByteString:
695 695 # most likely uses native version
696 696 return self._lm.text()
697 697
698 698 def fastdelta(
699 699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
700 700 ) -> Tuple[ByteString, ByteString]:
701 701 """Given a base manifest text as a bytearray and a list of changes
702 702 relative to that text, compute a delta that can be used by revlog.
703 703 """
704 704 delta = []
705 705 dstart = None
706 706 dend = None
707 707 dline = [b""]
708 708 start = 0
709 709 # zero copy representation of base as a buffer
710 710 addbuf = util.buffer(base)
711 711
712 712 changes = list(changes)
713 713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
714 714 # start with a readonly loop that finds the offset of
715 715 # each line and creates the deltas
716 716 for f, todelete in changes:
717 717 # bs will either be the index of the item or the insert point
718 718 start, end = _msearch(addbuf, f, start)
719 719 if not todelete:
720 720 h, fl = self._lm[f]
721 721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
722 722 else:
723 723 if start == end:
724 724 # item we want to delete was not found, error out
725 725 raise AssertionError(
726 726 _(b"failed to remove %s from manifest") % f
727 727 )
728 728 l = b""
729 729 if dstart is not None and dstart <= start and dend >= start:
730 730 if dend < end:
731 731 dend = end
732 732 if l:
733 733 dline.append(l)
734 734 else:
735 735 if dstart is not None:
736 736 delta.append((dstart, dend, b"".join(dline)))
737 737 dstart = start
738 738 dend = end
739 739 dline = [l]
740 740
741 741 if dstart is not None:
742 742 delta.append((dstart, dend, b"".join(dline)))
743 743 # apply the delta to the base, and get a delta for addrevision
744 744 deltatext, arraytext = _addlistdelta(base, delta)
745 745 else:
746 746 # For large changes, it's much cheaper to just build the text and
747 747 # diff it.
748 748 arraytext = bytearray(self.text())
749 749 deltatext = mdiff.textdiff(
750 750 util.buffer(base), util.buffer(arraytext)
751 751 )
752 752
753 753 return arraytext, deltatext
754 754
755 755
756 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
757
758 if typing.TYPE_CHECKING:
759 manifestdict = ManifestDict
760
761
762 756 def _msearch(
763 757 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
764 758 ) -> Tuple[int, int]:
765 759 """return a tuple (start, end) that says where to find s within m.
766 760
767 761 If the string is found m[start:end] are the line containing
768 762 that string. If start == end the string was not found and
769 763 they indicate the proper sorted insertion point.
770 764 """
771 765
772 766 def advance(i: int, c: bytes):
773 767 while i < lenm and m[i : i + 1] != c:
774 768 i += 1
775 769 return i
776 770
777 771 if not s:
778 772 return (lo, lo)
779 773 lenm = len(m)
780 774 if not hi:
781 775 hi = lenm
782 776 while lo < hi:
783 777 mid = (lo + hi) // 2
784 778 start = mid
785 779 while start > 0 and m[start - 1 : start] != b'\n':
786 780 start -= 1
787 781 end = advance(start, b'\0')
788 782 if bytes(m[start:end]) < s:
789 783 # we know that after the null there are 40 bytes of sha1
790 784 # this translates to the bisect lo = mid + 1
791 785 lo = advance(end + 40, b'\n') + 1
792 786 else:
793 787 # this translates to the bisect hi = mid
794 788 hi = start
795 789 end = advance(lo, b'\0')
796 790 found = m[lo:end]
797 791 if s == found:
798 792 # we know that after the null there are 40 bytes of sha1
799 793 end = advance(end + 40, b'\n')
800 794 return (lo, end + 1)
801 795 else:
802 796 return (lo, lo)
803 797
804 798
805 799 def _checkforbidden(l: Iterable[bytes]) -> None:
806 800 """Check filenames for illegal characters."""
807 801 for f in l:
808 802 if b'\n' in f or b'\r' in f:
809 803 raise error.StorageError(
810 804 _(b"'\\n' and '\\r' disallowed in filenames: %r")
811 805 % pycompat.bytestr(f)
812 806 )
813 807
814 808
815 809 # apply the changes collected during the bisect loop to our addlist
816 810 # return a delta suitable for addrevision
817 811 def _addlistdelta(
818 812 addlist: ByteString,
819 813 x: Iterable[Tuple[int, int, bytes]],
820 814 ) -> Tuple[bytes, ByteString]:
821 815 # for large addlist arrays, building a new array is cheaper
822 816 # than repeatedly modifying the existing one
823 817 currentposition = 0
824 818 newaddlist = bytearray()
825 819
826 820 for start, end, content in x:
827 821 newaddlist += addlist[currentposition:start]
828 822 if content:
829 823 newaddlist += bytearray(content)
830 824
831 825 currentposition = end
832 826
833 827 newaddlist += addlist[currentposition:]
834 828
835 829 deltatext = b"".join(
836 830 struct.pack(b">lll", start, end, len(content)) + content
837 831 for start, end, content in x
838 832 )
839 833 return deltatext, newaddlist
840 834
841 835
842 836 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
843 837 if b'/' in f:
844 838 dir, subpath = f.split(b'/', 1)
845 839 return dir + b'/', subpath
846 840 else:
847 841 return b'', f
848 842
849 843
850 844 _noop = lambda s: None
851 845
852 846
853 847 class TreeManifest:
854 848 _dir: bytes
855 849 _dirs: Dict[bytes, 'TreeManifest']
856 850 _dirty: bool
857 851 _files: Dict[bytes, bytes]
858 852 _flags: Dict[bytes, bytes]
859 853
860 854 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
861 855 self._dir = dir
862 856 self.nodeconstants = nodeconstants
863 857 self._node = self.nodeconstants.nullid
864 858 self._nodelen = self.nodeconstants.nodelen
865 859 self._loadfunc = _noop
866 860 self._copyfunc = _noop
867 861 self._dirty = False
868 862 self._dirs = {}
869 863 self._lazydirs: Dict[
870 864 bytes,
871 865 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
872 866 ] = {}
873 867 # Using _lazymanifest here is a little slower than plain old dicts
874 868 self._files = {}
875 869 self._flags = {}
876 870 if text:
877 871
878 872 def readsubtree(subdir, subm):
879 873 raise AssertionError(
880 874 b'treemanifest constructor only accepts flat manifests'
881 875 )
882 876
883 877 self.parse(text, readsubtree)
884 878 self._dirty = True # Mark flat manifest dirty after parsing
885 879
886 880 def _subpath(self, path: bytes) -> bytes:
887 881 return self._dir + path
888 882
889 883 def _loadalllazy(self) -> None:
890 884 selfdirs = self._dirs
891 885 subpath = self._subpath
892 886 for d, (node, readsubtree, docopy) in self._lazydirs.items():
893 887 if docopy:
894 888 selfdirs[d] = readsubtree(subpath(d), node).copy()
895 889 else:
896 890 selfdirs[d] = readsubtree(subpath(d), node)
897 891 self._lazydirs.clear()
898 892
899 893 def _loadlazy(self, d: bytes) -> None:
900 894 v = self._lazydirs.get(d)
901 895 if v is not None:
902 896 node, readsubtree, docopy = v
903 897 if docopy:
904 898 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
905 899 else:
906 900 self._dirs[d] = readsubtree(self._subpath(d), node)
907 901 del self._lazydirs[d]
908 902
909 903 def _loadchildrensetlazy(
910 904 self, visit: Union[Set[bytes], bytes]
911 905 ) -> Optional[Set[bytes]]:
912 906 if not visit:
913 907 return None
914 908 if visit == b'all' or visit == b'this':
915 909 self._loadalllazy()
916 910 return None
917 911
918 912 visit = cast(Set[bytes], visit)
919 913
920 914 loadlazy = self._loadlazy
921 915 for k in visit:
922 916 loadlazy(k + b'/')
923 917 return visit
924 918
925 919 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
926 920 """load items in t1 and t2 if they're needed for diffing.
927 921
928 922 The criteria currently is:
929 923 - if it's not present in _lazydirs in either t1 or t2, load it in the
930 924 other (it may already be loaded or it may not exist, doesn't matter)
931 925 - if it's present in _lazydirs in both, compare the nodeid; if it
932 926 differs, load it in both
933 927 """
934 928 toloadlazy = []
935 929 for d, v1 in t1._lazydirs.items():
936 930 v2 = t2._lazydirs.get(d)
937 931 if v2 is None or v2[0] != v1[0]:
938 932 toloadlazy.append(d)
939 933 for d, v1 in t2._lazydirs.items():
940 934 if d not in t1._lazydirs:
941 935 toloadlazy.append(d)
942 936
943 937 for d in toloadlazy:
944 938 t1._loadlazy(d)
945 939 t2._loadlazy(d)
946 940
947 941 def __len__(self) -> int:
948 942 self._load()
949 943 size = len(self._files)
950 944 self._loadalllazy()
951 945 for m in self._dirs.values():
952 946 size += m.__len__()
953 947 return size
954 948
955 949 def __nonzero__(self) -> bool:
956 950 # Faster than "__len__() != 0" since it avoids loading sub-manifests
957 951 return not self._isempty()
958 952
959 953 __bool__ = __nonzero__
960 954
961 955 def _isempty(self) -> bool:
962 956 self._load() # for consistency; already loaded by all callers
963 957 # See if we can skip loading everything.
964 958 if self._files or (
965 959 self._dirs and any(not m._isempty() for m in self._dirs.values())
966 960 ):
967 961 return False
968 962 self._loadalllazy()
969 963 return not self._dirs or all(m._isempty() for m in self._dirs.values())
970 964
971 965 @encoding.strmethod
972 966 def __repr__(self) -> bytes:
973 967 return (
974 968 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
975 969 % (
976 970 self._dir,
977 971 hex(self._node),
978 972 bool(self._loadfunc is _noop),
979 973 self._dirty,
980 974 id(self),
981 975 )
982 976 )
983 977
984 978 def dir(self) -> bytes:
985 979 """The directory that this tree manifest represents, including a
986 980 trailing '/'. Empty string for the repo root directory."""
987 981 return self._dir
988 982
989 983 def node(self) -> bytes:
990 984 """This node of this instance. nullid for unsaved instances. Should
991 985 be updated when the instance is read or written from a revlog.
992 986 """
993 987 assert not self._dirty
994 988 return self._node
995 989
996 990 def setnode(self, node: bytes) -> None:
997 991 self._node = node
998 992 self._dirty = False
999 993
1000 994 def iterentries(
1001 995 self,
1002 996 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
1003 997 self._load()
1004 998 self._loadalllazy()
1005 999 for p, n in sorted(
1006 1000 itertools.chain(self._dirs.items(), self._files.items())
1007 1001 ):
1008 1002 if p in self._files:
1009 1003 yield self._subpath(p), n, self._flags.get(p, b'')
1010 1004 else:
1011 1005 for x in n.iterentries():
1012 1006 yield x
1013 1007
1014 1008 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1015 1009 self._load()
1016 1010 self._loadalllazy()
1017 1011 for p, n in sorted(
1018 1012 itertools.chain(self._dirs.items(), self._files.items())
1019 1013 ):
1020 1014 if p in self._files:
1021 1015 yield self._subpath(p), n
1022 1016 else:
1023 1017 for f, sn in n.items():
1024 1018 yield f, sn
1025 1019
1026 1020 iteritems = items
1027 1021
1028 1022 def iterkeys(self) -> Iterator[bytes]:
1029 1023 self._load()
1030 1024 self._loadalllazy()
1031 1025 for p in sorted(itertools.chain(self._dirs, self._files)):
1032 1026 if p in self._files:
1033 1027 yield self._subpath(p)
1034 1028 else:
1035 1029 for f in self._dirs[p]:
1036 1030 yield f
1037 1031
1038 1032 def keys(self) -> List[bytes]:
1039 1033 return list(self.iterkeys())
1040 1034
1041 1035 def __iter__(self) -> Iterator[bytes]:
1042 1036 return self.iterkeys()
1043 1037
1044 1038 def __contains__(self, f: bytes) -> bool:
1045 1039 if f is None:
1046 1040 return False
1047 1041 self._load()
1048 1042 dir, subpath = _splittopdir(f)
1049 1043 if dir:
1050 1044 self._loadlazy(dir)
1051 1045
1052 1046 if dir not in self._dirs:
1053 1047 return False
1054 1048
1055 1049 return self._dirs[dir].__contains__(subpath)
1056 1050 else:
1057 1051 return f in self._files
1058 1052
1059 1053 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1060 1054 self._load()
1061 1055 dir, subpath = _splittopdir(f)
1062 1056 if dir:
1063 1057 self._loadlazy(dir)
1064 1058
1065 1059 if dir not in self._dirs:
1066 1060 return default
1067 1061 return self._dirs[dir].get(subpath, default)
1068 1062 else:
1069 1063 return self._files.get(f, default)
1070 1064
1071 1065 def __getitem__(self, f: bytes) -> bytes:
1072 1066 self._load()
1073 1067 dir, subpath = _splittopdir(f)
1074 1068 if dir:
1075 1069 self._loadlazy(dir)
1076 1070
1077 1071 return self._dirs[dir].__getitem__(subpath)
1078 1072 else:
1079 1073 return self._files[f]
1080 1074
1081 1075 def flags(self, f: bytes) -> bytes:
1082 1076 self._load()
1083 1077 dir, subpath = _splittopdir(f)
1084 1078 if dir:
1085 1079 self._loadlazy(dir)
1086 1080
1087 1081 if dir not in self._dirs:
1088 1082 return b''
1089 1083 return self._dirs[dir].flags(subpath)
1090 1084 else:
1091 1085 if f in self._lazydirs or f in self._dirs:
1092 1086 return b''
1093 1087 return self._flags.get(f, b'')
1094 1088
1095 1089 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1096 1090 self._load()
1097 1091 dir, subpath = _splittopdir(f)
1098 1092 if dir:
1099 1093 self._loadlazy(dir)
1100 1094
1101 1095 return self._dirs[dir].find(subpath)
1102 1096 else:
1103 1097 return self._files[f], self._flags.get(f, b'')
1104 1098
1105 1099 def __delitem__(self, f: bytes) -> None:
1106 1100 self._load()
1107 1101 dir, subpath = _splittopdir(f)
1108 1102 if dir:
1109 1103 self._loadlazy(dir)
1110 1104
1111 1105 self._dirs[dir].__delitem__(subpath)
1112 1106 # If the directory is now empty, remove it
1113 1107 if self._dirs[dir]._isempty():
1114 1108 del self._dirs[dir]
1115 1109 else:
1116 1110 del self._files[f]
1117 1111 if f in self._flags:
1118 1112 del self._flags[f]
1119 1113 self._dirty = True
1120 1114
1121 1115 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1122 1116 """Set both the node and the flags for path f."""
1123 1117 assert node is not None
1124 1118 if flags not in _manifestflags:
1125 1119 raise TypeError(b"Invalid manifest flag set.")
1126 1120 self._load()
1127 1121 dir, subpath = _splittopdir(f)
1128 1122 if dir:
1129 1123 self._loadlazy(dir)
1130 1124 if dir not in self._dirs:
1131 1125 self._dirs[dir] = treemanifest(
1132 1126 self.nodeconstants, self._subpath(dir)
1133 1127 )
1134 1128 self._dirs[dir].set(subpath, node, flags)
1135 1129 else:
1136 1130 assert len(node) in (20, 32)
1137 1131 self._files[f] = node
1138 1132 self._flags[f] = flags
1139 1133 self._dirty = True
1140 1134
1141 1135 def __setitem__(self, f: bytes, n: bytes) -> None:
1142 1136 assert n is not None
1143 1137 self._load()
1144 1138 dir, subpath = _splittopdir(f)
1145 1139 if dir:
1146 1140 self._loadlazy(dir)
1147 1141 if dir not in self._dirs:
1148 1142 self._dirs[dir] = treemanifest(
1149 1143 self.nodeconstants, self._subpath(dir)
1150 1144 )
1151 1145 self._dirs[dir].__setitem__(subpath, n)
1152 1146 else:
1153 1147 # manifest nodes are either 20 bytes or 32 bytes,
1154 1148 # depending on the hash in use. Assert this as historically
1155 1149 # sometimes extra bytes were added.
1156 1150 assert len(n) in (20, 32)
1157 1151 self._files[f] = n
1158 1152 self._dirty = True
1159 1153
1160 1154 def _load(self) -> None:
1161 1155 if self._loadfunc is not _noop:
1162 1156 lf, self._loadfunc = self._loadfunc, _noop
1163 1157 lf(self)
1164 1158 elif self._copyfunc is not _noop:
1165 1159 cf, self._copyfunc = self._copyfunc, _noop
1166 1160 cf(self)
1167 1161
1168 1162 def setflag(self, f: bytes, flags: bytes) -> None:
1169 1163 """Set the flags (symlink, executable) for path f."""
1170 1164 if flags not in _manifestflags:
1171 1165 raise TypeError(b"Invalid manifest flag set.")
1172 1166 self._load()
1173 1167 dir, subpath = _splittopdir(f)
1174 1168 if dir:
1175 1169 self._loadlazy(dir)
1176 1170 if dir not in self._dirs:
1177 1171 self._dirs[dir] = treemanifest(
1178 1172 self.nodeconstants, self._subpath(dir)
1179 1173 )
1180 1174 self._dirs[dir].setflag(subpath, flags)
1181 1175 else:
1182 1176 self._flags[f] = flags
1183 1177 self._dirty = True
1184 1178
1185 1179 def copy(self) -> 'TreeManifest':
1186 1180 copy = treemanifest(self.nodeconstants, self._dir)
1187 1181 copy._node = self._node
1188 1182 copy._dirty = self._dirty
1189 1183 if self._copyfunc is _noop:
1190 1184
1191 1185 def _copyfunc(s):
1192 1186 self._load()
1193 1187 s._lazydirs = {
1194 1188 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1195 1189 }
1196 1190 sdirs = s._dirs
1197 1191 for d, v in self._dirs.items():
1198 1192 sdirs[d] = v.copy()
1199 1193 s._files = dict.copy(self._files)
1200 1194 s._flags = dict.copy(self._flags)
1201 1195
1202 1196 if self._loadfunc is _noop:
1203 1197 _copyfunc(copy)
1204 1198 else:
1205 1199 copy._copyfunc = _copyfunc
1206 1200 else:
1207 1201 copy._copyfunc = self._copyfunc
1208 1202 return copy
1209 1203
1210 1204 def filesnotin(
1211 1205 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1212 1206 ) -> Set[bytes]:
1213 1207 '''Set of files in this manifest that are not in the other'''
1214 1208 if match and not match.always():
1215 1209 m1 = self._matches(match)
1216 1210 m2 = m2._matches(match)
1217 1211 return m1.filesnotin(m2)
1218 1212
1219 1213 files = set()
1220 1214
1221 1215 def _filesnotin(t1, t2):
1222 1216 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1223 1217 return
1224 1218 t1._load()
1225 1219 t2._load()
1226 1220 self._loaddifflazy(t1, t2)
1227 1221 for d, m1 in t1._dirs.items():
1228 1222 if d in t2._dirs:
1229 1223 m2 = t2._dirs[d]
1230 1224 _filesnotin(m1, m2)
1231 1225 else:
1232 1226 files.update(m1.iterkeys())
1233 1227
1234 1228 for fn in t1._files:
1235 1229 if fn not in t2._files:
1236 1230 files.add(t1._subpath(fn))
1237 1231
1238 1232 _filesnotin(self, m2)
1239 1233 return files
1240 1234
1241 1235 @propertycache
1242 1236 def _alldirs(self) -> pathutil.dirs:
1243 1237 return pathutil.dirs(self)
1244 1238
1245 1239 def dirs(self) -> pathutil.dirs:
1246 1240 return self._alldirs
1247 1241
1248 1242 def hasdir(self, dir: bytes) -> bool:
1249 1243 self._load()
1250 1244 topdir, subdir = _splittopdir(dir)
1251 1245 if topdir:
1252 1246 self._loadlazy(topdir)
1253 1247 if topdir in self._dirs:
1254 1248 return self._dirs[topdir].hasdir(subdir)
1255 1249 return False
1256 1250 dirslash = dir + b'/'
1257 1251 return dirslash in self._dirs or dirslash in self._lazydirs
1258 1252
1259 1253 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1260 1254 """Generates matching file names.
1261 1255
1262 1256 It also reports nonexistent files by marking them bad with match.bad().
1263 1257 """
1264 1258 if match.always():
1265 1259 for f in iter(self):
1266 1260 yield f
1267 1261 return
1268 1262
1269 1263 fset = set(match.files())
1270 1264
1271 1265 for fn in self._walk(match):
1272 1266 if fn in fset:
1273 1267 # specified pattern is the exact name
1274 1268 fset.remove(fn)
1275 1269 yield fn
1276 1270
1277 1271 # for dirstate.walk, files=[''] means "walk the whole tree".
1278 1272 # follow that here, too
1279 1273 fset.discard(b'')
1280 1274
1281 1275 for fn in sorted(fset):
1282 1276 if not self.hasdir(fn):
1283 1277 match.bad(fn, None)
1284 1278
1285 1279 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1286 1280 '''Recursively generates matching file names for walk().'''
1287 1281 visit = match.visitchildrenset(self._dir[:-1])
1288 1282 if not visit:
1289 1283 return
1290 1284
1291 1285 # yield this dir's files and walk its submanifests
1292 1286 self._load()
1293 1287 visit = self._loadchildrensetlazy(visit)
1294 1288 for p in sorted(list(self._dirs) + list(self._files)):
1295 1289 if p in self._files:
1296 1290 fullp = self._subpath(p)
1297 1291 if match(fullp):
1298 1292 yield fullp
1299 1293 else:
1300 1294 if not visit or p[:-1] in visit:
1301 1295 for f in self._dirs[p]._walk(match):
1302 1296 yield f
1303 1297
1304 1298 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1305 1299 """recursively generate a new manifest filtered by the match argument."""
1306 1300 if match.always():
1307 1301 return self.copy()
1308 1302 return self._matches_inner(match)
1309 1303
1310 1304 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1311 1305 if match.always():
1312 1306 return self.copy()
1313 1307
1314 1308 visit = match.visitchildrenset(self._dir[:-1])
1315 1309 if visit == b'all':
1316 1310 return self.copy()
1317 1311 ret = treemanifest(self.nodeconstants, self._dir)
1318 1312 if not visit:
1319 1313 return ret
1320 1314
1321 1315 self._load()
1322 1316 for fn in self._files:
1323 1317 # While visitchildrenset *usually* lists only subdirs, this is
1324 1318 # actually up to the matcher and may have some files in the set().
1325 1319 # If visit == 'this', we should obviously look at the files in this
1326 1320 # directory; if visit is a set, and fn is in it, we should inspect
1327 1321 # fn (but no need to inspect things not in the set).
1328 1322 if visit != b'this' and fn not in visit:
1329 1323 continue
1330 1324 fullp = self._subpath(fn)
1331 1325 # visitchildrenset isn't perfect, we still need to call the regular
1332 1326 # matcher code to further filter results.
1333 1327 if not match(fullp):
1334 1328 continue
1335 1329 ret._files[fn] = self._files[fn]
1336 1330 if fn in self._flags:
1337 1331 ret._flags[fn] = self._flags[fn]
1338 1332
1339 1333 visit = self._loadchildrensetlazy(visit)
1340 1334 for dir, subm in self._dirs.items():
1341 1335 if visit and dir[:-1] not in visit:
1342 1336 continue
1343 1337 m = subm._matches_inner(match)
1344 1338 if not m._isempty():
1345 1339 ret._dirs[dir] = m
1346 1340
1347 1341 if not ret._isempty():
1348 1342 ret._dirty = True
1349 1343 return ret
1350 1344
1351 1345 def fastdelta(
1352 1346 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1353 1347 ) -> ByteString:
1354 1348 raise FastdeltaUnavailable()
1355 1349
1356 1350 def diff(
1357 1351 self,
1358 1352 m2: 'TreeManifest',
1359 1353 match: Optional[matchmod.basematcher] = None,
1360 1354 clean: bool = False,
1361 1355 ) -> Dict[
1362 1356 bytes,
1363 1357 Optional[
1364 1358 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1365 1359 ],
1366 1360 ]:
1367 1361 """Finds changes between the current manifest and m2.
1368 1362
1369 1363 Args:
1370 1364 m2: the manifest to which this manifest should be compared.
1371 1365 clean: if true, include files unchanged between these manifests
1372 1366 with a None value in the returned dictionary.
1373 1367
1374 1368 The result is returned as a dict with filename as key and
1375 1369 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1376 1370 nodeid in the current/other manifest and fl1/fl2 is the flag
1377 1371 in the current/other manifest. Where the file does not exist,
1378 1372 the nodeid will be None and the flags will be the empty
1379 1373 string.
1380 1374 """
1381 1375 if match and not match.always():
1382 1376 m1 = self._matches(match)
1383 1377 m2 = m2._matches(match)
1384 1378 return m1.diff(m2, clean=clean)
1385 1379 result = {}
1386 1380 emptytree = treemanifest(self.nodeconstants)
1387 1381
1388 1382 def _iterativediff(t1, t2, stack):
1389 1383 """compares two tree manifests and append new tree-manifests which
1390 1384 needs to be compared to stack"""
1391 1385 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1392 1386 return
1393 1387 t1._load()
1394 1388 t2._load()
1395 1389 self._loaddifflazy(t1, t2)
1396 1390
1397 1391 for d, m1 in t1._dirs.items():
1398 1392 m2 = t2._dirs.get(d, emptytree)
1399 1393 stack.append((m1, m2))
1400 1394
1401 1395 for d, m2 in t2._dirs.items():
1402 1396 if d not in t1._dirs:
1403 1397 stack.append((emptytree, m2))
1404 1398
1405 1399 for fn, n1 in t1._files.items():
1406 1400 fl1 = t1._flags.get(fn, b'')
1407 1401 n2 = t2._files.get(fn, None)
1408 1402 fl2 = t2._flags.get(fn, b'')
1409 1403 if n1 != n2 or fl1 != fl2:
1410 1404 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1411 1405 elif clean:
1412 1406 result[t1._subpath(fn)] = None
1413 1407
1414 1408 for fn, n2 in t2._files.items():
1415 1409 if fn not in t1._files:
1416 1410 fl2 = t2._flags.get(fn, b'')
1417 1411 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1418 1412
1419 1413 stackls = []
1420 1414 _iterativediff(self, m2, stackls)
1421 1415 while stackls:
1422 1416 t1, t2 = stackls.pop()
1423 1417 # stackls is populated in the function call
1424 1418 _iterativediff(t1, t2, stackls)
1425 1419 return result
1426 1420
1427 1421 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1428 1422 return not self._dirty and not m2._dirty and self._node == m2._node
1429 1423
1430 1424 def parse(
1431 1425 self,
1432 1426 text: bytes,
1433 1427 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1434 1428 ) -> None:
1435 1429 selflazy = self._lazydirs
1436 1430 for f, n, fl in _parse(self._nodelen, text):
1437 1431 if fl == b't':
1438 1432 f = f + b'/'
1439 1433 # False below means "doesn't need to be copied" and can use the
1440 1434 # cached value from readsubtree directly.
1441 1435 selflazy[f] = (n, readsubtree, False)
1442 1436 elif b'/' in f:
1443 1437 # This is a flat manifest, so use __setitem__ and setflag rather
1444 1438 # than assigning directly to _files and _flags, so we can
1445 1439 # assign a path in a subdirectory, and to mark dirty (compared
1446 1440 # to nullid).
1447 1441 self[f] = n
1448 1442 if fl:
1449 1443 self.setflag(f, fl)
1450 1444 else:
1451 1445 # Assigning to _files and _flags avoids marking as dirty,
1452 1446 # and should be a little faster.
1453 1447 self._files[f] = n
1454 1448 if fl:
1455 1449 self._flags[f] = fl
1456 1450
1457 1451 def text(self) -> ByteString:
1458 1452 """Get the full data of this manifest as a bytestring."""
1459 1453 self._load()
1460 1454 return _text(self.iterentries())
1461 1455
1462 1456 def dirtext(self) -> ByteString:
1463 1457 """Get the full data of this directory as a bytestring. Make sure that
1464 1458 any submanifests have been written first, so their nodeids are correct.
1465 1459 """
1466 1460 self._load()
1467 1461 flags = self.flags
1468 1462 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1469 1463 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1470 1464 files = [(f, self._files[f], flags(f)) for f in self._files]
1471 1465 return _text(sorted(dirs + files + lazydirs))
1472 1466
1473 1467 def read(
1474 1468 self,
1475 1469 gettext: Callable[[], ByteString],
1476 1470 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1477 1471 ) -> None:
1478 1472 def _load_for_read(s):
1479 1473 s.parse(gettext(), readsubtree)
1480 1474 s._dirty = False
1481 1475
1482 1476 self._loadfunc = _load_for_read
1483 1477
1484 1478 def writesubtrees(
1485 1479 self,
1486 1480 m1: 'TreeManifest',
1487 1481 m2: 'TreeManifest',
1488 1482 writesubtree: Callable[
1489 1483 [
1490 1484 Callable[['TreeManifest'], None],
1491 1485 bytes,
1492 1486 bytes,
1493 1487 matchmod.basematcher,
1494 1488 ],
1495 1489 None,
1496 1490 ],
1497 1491 match: matchmod.basematcher,
1498 1492 ) -> None:
1499 1493 self._load() # for consistency; should never have any effect here
1500 1494 m1._load()
1501 1495 m2._load()
1502 1496 emptytree = treemanifest(self.nodeconstants)
1503 1497
1504 1498 def getnode(m, d):
1505 1499 ld = m._lazydirs.get(d)
1506 1500 if ld:
1507 1501 return ld[0]
1508 1502 tree = m._dirs.get(d, emptytree)
1509 1503 assert tree is not None # helps pytype
1510 1504 return tree._node
1511 1505
1512 1506 # let's skip investigating things that `match` says we do not need.
1513 1507 visit = match.visitchildrenset(self._dir[:-1])
1514 1508 visit = self._loadchildrensetlazy(visit)
1515 1509 if visit == b'this' or visit == b'all':
1516 1510 visit = None
1517 1511 for d, subm in self._dirs.items():
1518 1512 if visit and d[:-1] not in visit:
1519 1513 continue
1520 1514 subp1 = getnode(m1, d)
1521 1515 subp2 = getnode(m2, d)
1522 1516 if subp1 == self.nodeconstants.nullid:
1523 1517 subp1, subp2 = subp2, subp1
1524 1518 writesubtree(subm, subp1, subp2, match)
1525 1519
1526 1520 def walksubtrees(
1527 1521 self, matcher: Optional[matchmod.basematcher] = None
1528 1522 ) -> Iterator['TreeManifest']:
1529 1523 """Returns an iterator of the subtrees of this manifest, including this
1530 1524 manifest itself.
1531 1525
1532 1526 If `matcher` is provided, it only returns subtrees that match.
1533 1527 """
1534 1528 if matcher and not matcher.visitdir(self._dir[:-1]):
1535 1529 return
1536 1530 if not matcher or matcher(self._dir[:-1]):
1537 1531 yield self
1538 1532
1539 1533 self._load()
1540 1534 # OPT: use visitchildrenset to avoid loading everything.
1541 1535 self._loadalllazy()
1542 1536 for d, subm in self._dirs.items():
1543 1537 for subtree in subm.walksubtrees(matcher=matcher):
1544 1538 yield subtree
1545 1539
1546 1540
1547 1541 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1548 1542
1549 1543 if typing.TYPE_CHECKING:
1550 1544 treemanifest = TreeManifest
1551 1545
1552 1546
1553 1547 class manifestfulltextcache(util.lrucachedict):
1554 1548 """File-backed LRU cache for the manifest cache
1555 1549
1556 1550 File consists of entries, up to EOF:
1557 1551
1558 1552 - 20 bytes node, 4 bytes length, <length> manifest data
1559 1553
1560 1554 These are written in reverse cache order (oldest to newest).
1561 1555
1562 1556 """
1563 1557
1564 1558 _file = b'manifestfulltextcache'
1565 1559
1566 1560 def __init__(self, max):
1567 1561 super(manifestfulltextcache, self).__init__(max)
1568 1562 self._dirty = False
1569 1563 self._read = False
1570 1564 self._opener = None
1571 1565
1572 1566 def read(self):
1573 1567 if self._read or self._opener is None:
1574 1568 return
1575 1569
1576 1570 try:
1577 1571 with self._opener(self._file) as fp:
1578 1572 set = super(manifestfulltextcache, self).__setitem__
1579 1573 # ignore trailing data, this is a cache, corruption is skipped
1580 1574 while True:
1581 1575 # TODO do we need to do work here for sha1 portability?
1582 1576 node = fp.read(20)
1583 1577 if len(node) < 20:
1584 1578 break
1585 1579 try:
1586 1580 size = struct.unpack(b'>L', fp.read(4))[0]
1587 1581 except struct.error:
1588 1582 break
1589 1583 value = bytearray(fp.read(size))
1590 1584 if len(value) != size:
1591 1585 break
1592 1586 set(node, value)
1593 1587 except IOError:
1594 1588 # the file is allowed to be missing
1595 1589 pass
1596 1590
1597 1591 self._read = True
1598 1592 self._dirty = False
1599 1593
1600 1594 def write(self):
1601 1595 if not self._dirty or self._opener is None:
1602 1596 return
1603 1597 # rotate backwards to the first used node
1604 1598 try:
1605 1599 with self._opener(
1606 1600 self._file, b'w', atomictemp=True, checkambig=True
1607 1601 ) as fp:
1608 1602 node = self._head.prev
1609 1603 while True:
1610 1604 if node.key in self._cache:
1611 1605 fp.write(node.key)
1612 1606 fp.write(struct.pack(b'>L', len(node.value)))
1613 1607 fp.write(node.value)
1614 1608 if node is self._head:
1615 1609 break
1616 1610 node = node.prev
1617 1611 except IOError:
1618 1612 # We could not write the cache (eg: permission error)
1619 1613 # the content can be missing.
1620 1614 #
1621 1615 # We could try harder and see if we could recreate a wcache
1622 1616 # directory were we coudl write too.
1623 1617 #
1624 1618 # XXX the error pass silently, having some way to issue an error
1625 1619 # log `ui.log` would be nice.
1626 1620 pass
1627 1621
1628 1622 def __len__(self):
1629 1623 if not self._read:
1630 1624 self.read()
1631 1625 return super(manifestfulltextcache, self).__len__()
1632 1626
1633 1627 def __contains__(self, k):
1634 1628 if not self._read:
1635 1629 self.read()
1636 1630 return super(manifestfulltextcache, self).__contains__(k)
1637 1631
1638 1632 def __iter__(self):
1639 1633 if not self._read:
1640 1634 self.read()
1641 1635 return super(manifestfulltextcache, self).__iter__()
1642 1636
1643 1637 def __getitem__(self, k):
1644 1638 if not self._read:
1645 1639 self.read()
1646 1640 # the cache lru order can change on read
1647 1641 setdirty = self._cache.get(k) is not self._head
1648 1642 value = super(manifestfulltextcache, self).__getitem__(k)
1649 1643 if setdirty:
1650 1644 self._dirty = True
1651 1645 return value
1652 1646
1653 1647 def __setitem__(self, k, v):
1654 1648 if not self._read:
1655 1649 self.read()
1656 1650 super(manifestfulltextcache, self).__setitem__(k, v)
1657 1651 self._dirty = True
1658 1652
1659 1653 def __delitem__(self, k):
1660 1654 if not self._read:
1661 1655 self.read()
1662 1656 super(manifestfulltextcache, self).__delitem__(k)
1663 1657 self._dirty = True
1664 1658
1665 1659 def get(self, k, default=None):
1666 1660 if not self._read:
1667 1661 self.read()
1668 1662 return super(manifestfulltextcache, self).get(k, default=default)
1669 1663
1670 1664 def clear(self, clear_persisted_data=False):
1671 1665 super(manifestfulltextcache, self).clear()
1672 1666 if clear_persisted_data:
1673 1667 self._dirty = True
1674 1668 self.write()
1675 1669 self._read = False
1676 1670
1677 1671
1678 1672 # and upper bound of what we expect from compression
1679 1673 # (real live value seems to be "3")
1680 1674 MAXCOMPRESSION = 3
1681 1675
1682 1676
1683 1677 class FastdeltaUnavailable(Exception):
1684 1678 """Exception raised when fastdelta isn't usable on a manifest."""
1685 1679
1686 1680
1687 1681 class ManifestRevlog:
1688 1682 """A revlog that stores manifest texts. This is responsible for caching the
1689 1683 full-text manifest contents.
1690 1684 """
1691 1685
1692 1686 def __init__(
1693 1687 self,
1694 1688 nodeconstants,
1695 1689 opener,
1696 1690 tree=b'',
1697 1691 dirlogcache=None,
1698 1692 treemanifest=False,
1699 1693 ):
1700 1694 """Constructs a new manifest revlog
1701 1695
1702 1696 `indexfile` - used by extensions to have two manifests at once, like
1703 1697 when transitioning between flatmanifeset and treemanifests.
1704 1698
1705 1699 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1706 1700 options can also be used to make this a tree manifest revlog. The opener
1707 1701 option takes precedence, so if it is set to True, we ignore whatever
1708 1702 value is passed in to the constructor.
1709 1703 """
1710 1704 self.nodeconstants = nodeconstants
1711 1705 # During normal operations, we expect to deal with not more than four
1712 1706 # revs at a time (such as during commit --amend). When rebasing large
1713 1707 # stacks of commits, the number can go up, hence the config knob below.
1714 1708 cachesize = 4
1715 1709 optiontreemanifest = False
1716 1710 persistentnodemap = False
1717 1711 opts = getattr(opener, 'options', None)
1718 1712 if opts is not None:
1719 1713 cachesize = opts.get(b'manifestcachesize', cachesize)
1720 1714 optiontreemanifest = opts.get(b'treemanifest', False)
1721 1715 persistentnodemap = opts.get(b'persistent-nodemap', False)
1722 1716
1723 1717 self._treeondisk = optiontreemanifest or treemanifest
1724 1718
1725 1719 self._fulltextcache = manifestfulltextcache(cachesize)
1726 1720
1727 1721 if tree:
1728 1722 assert self._treeondisk, (tree, b'opts is %r' % opts)
1729 1723
1730 1724 radix = b'00manifest'
1731 1725 if tree:
1732 1726 radix = b"meta/" + tree + radix
1733 1727
1734 1728 self.tree = tree
1735 1729
1736 1730 # The dirlogcache is kept on the root manifest log
1737 1731 if tree:
1738 1732 self._dirlogcache = dirlogcache
1739 1733 else:
1740 1734 self._dirlogcache = {b'': self}
1741 1735
1742 1736 self._revlog = revlog.revlog(
1743 1737 opener,
1744 1738 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1745 1739 radix=radix,
1746 1740 # only root indexfile is cached
1747 1741 checkambig=not bool(tree),
1748 1742 mmaplargeindex=True,
1749 1743 upperboundcomp=MAXCOMPRESSION,
1750 1744 persistentnodemap=persistentnodemap,
1751 1745 )
1752 1746
1753 1747 self.index = self._revlog.index
1754 1748
1755 1749 def get_revlog(self):
1756 1750 """return an actual revlog instance if any
1757 1751
1758 1752 This exist because a lot of code leverage the fact the underlying
1759 1753 storage is a revlog for optimization, so giving simple way to access
1760 1754 the revlog instance helps such code.
1761 1755 """
1762 1756 return self._revlog
1763 1757
1764 1758 def _setupmanifestcachehooks(self, repo):
1765 1759 """Persist the manifestfulltextcache on lock release"""
1766 1760 if not hasattr(repo, '_wlockref'):
1767 1761 return
1768 1762
1769 1763 self._fulltextcache._opener = repo.wcachevfs
1770 1764 if repo._currentlock(repo._wlockref) is None:
1771 1765 return
1772 1766
1773 1767 reporef = weakref.ref(repo)
1774 1768 manifestrevlogref = weakref.ref(self)
1775 1769
1776 1770 def persistmanifestcache(success):
1777 1771 # Repo is in an unknown state, do not persist.
1778 1772 if not success:
1779 1773 return
1780 1774
1781 1775 repo = reporef()
1782 1776 self = manifestrevlogref()
1783 1777 if repo is None or self is None:
1784 1778 return
1785 1779 if repo.manifestlog.getstorage(b'') is not self:
1786 1780 # there's a different manifest in play now, abort
1787 1781 return
1788 1782 self._fulltextcache.write()
1789 1783
1790 1784 repo._afterlock(persistmanifestcache)
1791 1785
1792 1786 @property
1793 1787 def fulltextcache(self):
1794 1788 return self._fulltextcache
1795 1789
1796 1790 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1797 1791 self._revlog.clearcaches()
1798 1792 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1799 1793 self._dirlogcache = {self.tree: self}
1800 1794
1801 1795 def dirlog(self, d):
1802 1796 if d:
1803 1797 assert self._treeondisk
1804 1798 if d not in self._dirlogcache:
1805 1799 mfrevlog = manifestrevlog(
1806 1800 self.nodeconstants,
1807 1801 self.opener,
1808 1802 d,
1809 1803 self._dirlogcache,
1810 1804 treemanifest=self._treeondisk,
1811 1805 )
1812 1806 self._dirlogcache[d] = mfrevlog
1813 1807 return self._dirlogcache[d]
1814 1808
1815 1809 def add(
1816 1810 self,
1817 1811 m,
1818 1812 transaction,
1819 1813 link,
1820 1814 p1,
1821 1815 p2,
1822 1816 added: Iterable[bytes],
1823 1817 removed: Iterable[bytes],
1824 1818 readtree=None,
1825 1819 match=None,
1826 1820 ):
1827 1821 """add some manifest entry in to the manifest log
1828 1822
1829 1823 input:
1830 1824
1831 1825 m: the manifest dict we want to store
1832 1826 transaction: the open transaction
1833 1827 p1: manifest-node of p1
1834 1828 p2: manifest-node of p2
1835 1829 added: file added/changed compared to parent
1836 1830 removed: file removed compared to parent
1837 1831
1838 1832 tree manifest input:
1839 1833
1840 1834 readtree: a function to read a subtree
1841 1835 match: a filematcher for the subpart of the tree manifest
1842 1836 """
1843 1837 try:
1844 1838 if p1 not in self.fulltextcache:
1845 1839 raise FastdeltaUnavailable()
1846 1840 # If our first parent is in the manifest cache, we can
1847 1841 # compute a delta here using properties we know about the
1848 1842 # manifest up-front, which may save time later for the
1849 1843 # revlog layer.
1850 1844
1851 1845 _checkforbidden(added)
1852 1846 # combine the changed lists into one sorted iterator
1853 1847 work = heapq.merge(
1854 1848 [(x, False) for x in sorted(added)],
1855 1849 [(x, True) for x in sorted(removed)],
1856 1850 )
1857 1851
1858 1852 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1859 1853 cachedelta = self._revlog.rev(p1), deltatext
1860 1854 text = util.buffer(arraytext)
1861 1855 rev = self._revlog.addrevision(
1862 1856 text, transaction, link, p1, p2, cachedelta
1863 1857 )
1864 1858 n = self._revlog.node(rev)
1865 1859 except FastdeltaUnavailable:
1866 1860 # The first parent manifest isn't already loaded or the
1867 1861 # manifest implementation doesn't support fastdelta, so
1868 1862 # we'll just encode a fulltext of the manifest and pass
1869 1863 # that through to the revlog layer, and let it handle the
1870 1864 # delta process.
1871 1865 if self._treeondisk:
1872 1866 assert readtree, b"readtree must be set for treemanifest writes"
1873 1867 assert match, b"match must be specified for treemanifest writes"
1874 1868 m1 = readtree(self.tree, p1)
1875 1869 m2 = readtree(self.tree, p2)
1876 1870 n = self._addtree(
1877 1871 m, transaction, link, m1, m2, readtree, match=match
1878 1872 )
1879 1873 arraytext = None
1880 1874 else:
1881 1875 text = m.text()
1882 1876 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1883 1877 n = self._revlog.node(rev)
1884 1878 arraytext = bytearray(text)
1885 1879
1886 1880 if arraytext is not None:
1887 1881 self.fulltextcache[n] = arraytext
1888 1882
1889 1883 return n
1890 1884
1891 1885 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1892 1886 # If the manifest is unchanged compared to one parent,
1893 1887 # don't write a new revision
1894 1888 if self.tree != b'' and (
1895 1889 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1896 1890 ):
1897 1891 return m.node()
1898 1892
1899 1893 def writesubtree(subm, subp1, subp2, match):
1900 1894 sublog = self.dirlog(subm.dir())
1901 1895 sublog.add(
1902 1896 subm,
1903 1897 transaction,
1904 1898 link,
1905 1899 subp1,
1906 1900 subp2,
1907 1901 None,
1908 1902 None,
1909 1903 readtree=readtree,
1910 1904 match=match,
1911 1905 )
1912 1906
1913 1907 m.writesubtrees(m1, m2, writesubtree, match)
1914 1908 text = m.dirtext()
1915 1909 n = None
1916 1910 if self.tree != b'':
1917 1911 # Double-check whether contents are unchanged to one parent
1918 1912 if text == m1.dirtext():
1919 1913 n = m1.node()
1920 1914 elif text == m2.dirtext():
1921 1915 n = m2.node()
1922 1916
1923 1917 if not n:
1924 1918 rev = self._revlog.addrevision(
1925 1919 text, transaction, link, m1.node(), m2.node()
1926 1920 )
1927 1921 n = self._revlog.node(rev)
1928 1922
1929 1923 # Save nodeid so parent manifest can calculate its nodeid
1930 1924 m.setnode(n)
1931 1925 return n
1932 1926
1933 1927 def __len__(self):
1934 1928 return len(self._revlog)
1935 1929
1936 1930 def __iter__(self):
1937 1931 return self._revlog.__iter__()
1938 1932
1939 1933 def rev(self, node):
1940 1934 return self._revlog.rev(node)
1941 1935
1942 1936 def node(self, rev):
1943 1937 return self._revlog.node(rev)
1944 1938
1945 1939 def lookup(self, value):
1946 1940 return self._revlog.lookup(value)
1947 1941
1948 1942 def parentrevs(self, rev):
1949 1943 return self._revlog.parentrevs(rev)
1950 1944
1951 1945 def parents(self, node):
1952 1946 return self._revlog.parents(node)
1953 1947
1954 1948 def linkrev(self, rev):
1955 1949 return self._revlog.linkrev(rev)
1956 1950
1957 1951 def checksize(self):
1958 1952 return self._revlog.checksize()
1959 1953
1960 1954 def revision(self, node):
1961 1955 return self._revlog.revision(node)
1962 1956
1963 1957 def rawdata(self, node):
1964 1958 return self._revlog.rawdata(node)
1965 1959
1966 1960 def revdiff(self, rev1, rev2):
1967 1961 return self._revlog.revdiff(rev1, rev2)
1968 1962
1969 1963 def cmp(self, node, text):
1970 1964 return self._revlog.cmp(node, text)
1971 1965
1972 1966 def deltaparent(self, rev):
1973 1967 return self._revlog.deltaparent(rev)
1974 1968
1975 1969 def emitrevisions(
1976 1970 self,
1977 1971 nodes,
1978 1972 nodesorder=None,
1979 1973 revisiondata=False,
1980 1974 assumehaveparentrevisions=False,
1981 1975 deltamode=repository.CG_DELTAMODE_STD,
1982 1976 sidedata_helpers=None,
1983 1977 debug_info=None,
1984 1978 ):
1985 1979 return self._revlog.emitrevisions(
1986 1980 nodes,
1987 1981 nodesorder=nodesorder,
1988 1982 revisiondata=revisiondata,
1989 1983 assumehaveparentrevisions=assumehaveparentrevisions,
1990 1984 deltamode=deltamode,
1991 1985 sidedata_helpers=sidedata_helpers,
1992 1986 debug_info=debug_info,
1993 1987 )
1994 1988
1995 1989 def addgroup(
1996 1990 self,
1997 1991 deltas,
1998 1992 linkmapper,
1999 1993 transaction,
2000 1994 alwayscache=False,
2001 1995 addrevisioncb=None,
2002 1996 duplicaterevisioncb=None,
2003 1997 debug_info=None,
2004 1998 delta_base_reuse_policy=None,
2005 1999 ):
2006 2000 return self._revlog.addgroup(
2007 2001 deltas,
2008 2002 linkmapper,
2009 2003 transaction,
2010 2004 alwayscache=alwayscache,
2011 2005 addrevisioncb=addrevisioncb,
2012 2006 duplicaterevisioncb=duplicaterevisioncb,
2013 2007 debug_info=debug_info,
2014 2008 delta_base_reuse_policy=delta_base_reuse_policy,
2015 2009 )
2016 2010
2017 2011 def rawsize(self, rev):
2018 2012 return self._revlog.rawsize(rev)
2019 2013
2020 2014 def getstrippoint(self, minlink):
2021 2015 return self._revlog.getstrippoint(minlink)
2022 2016
2023 2017 def strip(self, minlink, transaction):
2024 2018 return self._revlog.strip(minlink, transaction)
2025 2019
2026 2020 def files(self):
2027 2021 return self._revlog.files()
2028 2022
2029 2023 def clone(self, tr, destrevlog, **kwargs):
2030 2024 if not isinstance(destrevlog, manifestrevlog):
2031 2025 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2032 2026
2033 2027 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2034 2028
2035 2029 def storageinfo(
2036 2030 self,
2037 2031 exclusivefiles=False,
2038 2032 sharedfiles=False,
2039 2033 revisionscount=False,
2040 2034 trackedsize=False,
2041 2035 storedsize=False,
2042 2036 ):
2043 2037 return self._revlog.storageinfo(
2044 2038 exclusivefiles=exclusivefiles,
2045 2039 sharedfiles=sharedfiles,
2046 2040 revisionscount=revisionscount,
2047 2041 trackedsize=trackedsize,
2048 2042 storedsize=storedsize,
2049 2043 )
2050 2044
2051 2045 @property
2052 2046 def opener(self):
2053 2047 return self._revlog.opener
2054 2048
2055 2049 @opener.setter
2056 2050 def opener(self, value):
2057 2051 self._revlog.opener = value
2058 2052
2059 2053
2060 2054 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2061 2055 ManifestRevlog
2062 2056 )
2063 2057
2064 2058 if typing.TYPE_CHECKING:
2065 2059 manifestrevlog = ManifestRevlog
2066 2060
2067 2061 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2068 AnyManifestDict = Union[ManifestDict, TreeManifest]
2062 AnyManifestDict = Union[manifestdict, TreeManifest]
2069 2063
2070 2064
2071 2065 class ManifestLog:
2072 2066 """A collection class representing the collection of manifest snapshots
2073 2067 referenced by commits in the repository.
2074 2068
2075 2069 In this situation, 'manifest' refers to the abstract concept of a snapshot
2076 2070 of the list of files in the given commit. Consumers of the output of this
2077 2071 class do not care about the implementation details of the actual manifests
2078 2072 they receive (i.e. tree or flat or lazily loaded, etc)."""
2079 2073
2080 2074 def __init__(self, opener, repo, rootstore, narrowmatch):
2081 2075 self.nodeconstants = repo.nodeconstants
2082 2076 usetreemanifest = False
2083 2077 cachesize = 4
2084 2078
2085 2079 opts = getattr(opener, 'options', None)
2086 2080 if opts is not None:
2087 2081 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2088 2082 cachesize = opts.get(b'manifestcachesize', cachesize)
2089 2083
2090 2084 self._treemanifests = usetreemanifest
2091 2085
2092 2086 self._rootstore = rootstore
2093 2087 self._rootstore._setupmanifestcachehooks(repo)
2094 2088 self._narrowmatch = narrowmatch
2095 2089
2096 2090 # A cache of the manifestctx or treemanifestctx for each directory
2097 2091 self._dirmancache = {}
2098 2092 self._dirmancache[b''] = util.lrucachedict(cachesize)
2099 2093
2100 2094 self._cachesize = cachesize
2101 2095
2102 2096 def __getitem__(self, node):
2103 2097 """Retrieves the manifest instance for the given node. Throws a
2104 2098 LookupError if not found.
2105 2099 """
2106 2100 return self.get(b'', node)
2107 2101
2108 2102 @property
2109 2103 def narrowed(self):
2110 2104 return not (self._narrowmatch is None or self._narrowmatch.always())
2111 2105
2112 2106 def get(
2113 2107 self, tree: bytes, node: bytes, verify: bool = True
2114 2108 ) -> AnyManifestCtx:
2115 2109 """Retrieves the manifest instance for the given node. Throws a
2116 2110 LookupError if not found.
2117 2111
2118 2112 `verify` - if True an exception will be thrown if the node is not in
2119 2113 the revlog
2120 2114 """
2121 2115 if node in self._dirmancache.get(tree, ()):
2122 2116 return self._dirmancache[tree][node]
2123 2117
2124 2118 if not self._narrowmatch.always():
2125 2119 if not self._narrowmatch.visitdir(tree[:-1]):
2126 2120 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2127 2121 if tree:
2128 2122 if self._rootstore._treeondisk:
2129 2123 if verify:
2130 2124 # Side-effect is LookupError is raised if node doesn't
2131 2125 # exist.
2132 2126 self.getstorage(tree).rev(node)
2133 2127
2134 2128 m = treemanifestctx(self, tree, node)
2135 2129 else:
2136 2130 raise error.Abort(
2137 2131 _(
2138 2132 b"cannot ask for manifest directory '%s' in a flat "
2139 2133 b"manifest"
2140 2134 )
2141 2135 % tree
2142 2136 )
2143 2137 else:
2144 2138 if verify:
2145 2139 # Side-effect is LookupError is raised if node doesn't exist.
2146 2140 self._rootstore.rev(node)
2147 2141
2148 2142 if self._treemanifests:
2149 2143 m = treemanifestctx(self, b'', node)
2150 2144 else:
2151 2145 m = manifestctx(self, node)
2152 2146
2153 2147 if node != self.nodeconstants.nullid:
2154 2148 mancache = self._dirmancache.get(tree)
2155 2149 if not mancache:
2156 2150 mancache = util.lrucachedict(self._cachesize)
2157 2151 self._dirmancache[tree] = mancache
2158 2152 mancache[node] = m
2159 2153 return m
2160 2154
2161 2155 def getstorage(self, tree):
2162 2156 return self._rootstore.dirlog(tree)
2163 2157
2164 2158 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2165 2159 self._dirmancache.clear()
2166 2160 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2167 2161
2168 2162 def rev(self, node) -> int:
2169 2163 return self._rootstore.rev(node)
2170 2164
2171 2165 def update_caches(self, transaction) -> None:
2172 2166 return self._rootstore._revlog.update_caches(transaction=transaction)
2173 2167
2174 2168
2175 2169 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2176 2170
2177 2171 if typing.TYPE_CHECKING:
2178 2172 manifestlog = ManifestLog
2179 2173
2180 2174
2181 2175 class MemManifestCtx:
2182 _manifestdict: ManifestDict
2176 _manifestdict: manifestdict
2183 2177
2184 2178 def __init__(self, manifestlog):
2185 2179 self._manifestlog = manifestlog
2186 2180 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2187 2181
2188 2182 def _storage(self) -> ManifestRevlog:
2189 2183 return self._manifestlog.getstorage(b'')
2190 2184
2191 2185 def copy(self) -> 'MemManifestCtx':
2192 2186 memmf = memmanifestctx(self._manifestlog)
2193 2187 memmf._manifestdict = self.read().copy()
2194 2188 return memmf
2195 2189
2196 def read(self) -> 'ManifestDict':
2190 def read(self) -> 'manifestdict':
2197 2191 return self._manifestdict
2198 2192
2199 2193 def write(self, transaction, link, p1, p2, added, removed, match=None):
2200 2194 return self._storage().add(
2201 2195 self._manifestdict,
2202 2196 transaction,
2203 2197 link,
2204 2198 p1,
2205 2199 p2,
2206 2200 added,
2207 2201 removed,
2208 2202 match=match,
2209 2203 )
2210 2204
2211 2205
2212 2206 memmanifestctx = interfaceutil.implementer(
2213 2207 repository.imanifestrevisionwritable
2214 2208 )(MemManifestCtx)
2215 2209
2216 2210 if typing.TYPE_CHECKING:
2217 2211 memmanifestctx = MemManifestCtx
2218 2212
2219 2213
2220 2214 class ManifestCtx:
2221 2215 """A class representing a single revision of a manifest, including its
2222 2216 contents, its parent revs, and its linkrev.
2223 2217 """
2224 2218
2225 _data: Optional[ManifestDict]
2219 _data: Optional[manifestdict]
2226 2220
2227 2221 def __init__(self, manifestlog, node):
2228 2222 self._manifestlog = manifestlog
2229 2223 self._data = None
2230 2224
2231 2225 self._node = node
2232 2226
2233 2227 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2234 2228 # but let's add it later when something needs it and we can load it
2235 2229 # lazily.
2236 2230 # self.p1, self.p2 = store.parents(node)
2237 2231 # rev = store.rev(node)
2238 2232 # self.linkrev = store.linkrev(rev)
2239 2233
2240 2234 def _storage(self) -> 'ManifestRevlog':
2241 2235 return self._manifestlog.getstorage(b'')
2242 2236
2243 2237 def node(self) -> bytes:
2244 2238 return self._node
2245 2239
2246 2240 def copy(self) -> MemManifestCtx:
2247 2241 memmf = memmanifestctx(self._manifestlog)
2248 2242 memmf._manifestdict = self.read().copy()
2249 2243 return memmf
2250 2244
2251 2245 @propertycache
2252 2246 def parents(self) -> Tuple[bytes, bytes]:
2253 2247 return self._storage().parents(self._node)
2254 2248
2255 def read(self) -> 'ManifestDict':
2249 def read(self) -> 'manifestdict':
2256 2250 if self._data is None:
2257 2251 nc = self._manifestlog.nodeconstants
2258 2252 if self._node == nc.nullid:
2259 2253 self._data = manifestdict(nc.nodelen)
2260 2254 else:
2261 2255 store = self._storage()
2262 2256 if self._node in store.fulltextcache:
2263 2257 text = pycompat.bytestr(store.fulltextcache[self._node])
2264 2258 else:
2265 2259 text = store.revision(self._node)
2266 2260 arraytext = bytearray(text)
2267 2261 store.fulltextcache[self._node] = arraytext
2268 2262 self._data = manifestdict(nc.nodelen, text)
2269 2263 return self._data
2270 2264
2271 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2265 def readfast(self, shallow: bool = False) -> 'manifestdict':
2272 2266 """Calls either readdelta or read, based on which would be less work.
2273 2267 readdelta is called if the delta is against the p1, and therefore can be
2274 2268 read quickly.
2275 2269
2276 2270 If `shallow` is True, nothing changes since this is a flat manifest.
2277 2271 """
2278 2272 util.nouideprecwarn(
2279 2273 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2280 2274 b"6.9",
2281 2275 stacklevel=2,
2282 2276 )
2283 2277 store = self._storage()
2284 2278 r = store.rev(self._node)
2285 2279 deltaparent = store.deltaparent(r)
2286 2280 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2287 2281 return self.readdelta()
2288 2282 return self.read()
2289 2283
2290 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2284 def readdelta(self, shallow: bool = False) -> 'manifestdict':
2291 2285 """Returns a manifest containing just the entries that are present
2292 2286 in this manifest, but not in its p1 manifest. This is efficient to read
2293 2287 if the revlog delta is already p1.
2294 2288
2295 2289 Changing the value of `shallow` has no effect on flat manifests.
2296 2290 """
2297 2291 util.nouideprecwarn(
2298 2292 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2299 2293 b"6.9",
2300 2294 stacklevel=2,
2301 2295 )
2302 2296 store = self._storage()
2303 2297 r = store.rev(self._node)
2304 2298 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2305 2299 return manifestdict(store.nodeconstants.nodelen, d)
2306 2300
2307 2301 def read_any_fast_delta(
2308 2302 self,
2309 2303 valid_bases: Optional[Collection[int]] = None,
2310 2304 *,
2311 2305 shallow: bool = False,
2312 ) -> Tuple[Optional[int], ManifestDict]:
2306 ) -> Tuple[Optional[int], manifestdict]:
2313 2307 """see `imanifestrevisionstored` documentation"""
2314 2308 store = self._storage()
2315 2309 r = store.rev(self._node)
2316 2310 deltaparent = store.deltaparent(r)
2317 2311 if valid_bases is None:
2318 2312 # make sure the next check is True
2319 2313 valid_bases = (deltaparent,)
2320 2314 if deltaparent != nullrev and deltaparent in valid_bases:
2321 2315 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2322 2316 return (
2323 2317 deltaparent,
2324 2318 manifestdict(store.nodeconstants.nodelen, d),
2325 2319 )
2326 2320 return (None, self.read())
2327 2321
2328 2322 def read_delta_parents(
2329 2323 self,
2330 2324 *,
2331 2325 shallow: bool = False,
2332 2326 exact: bool = True,
2333 ) -> ManifestDict:
2327 ) -> manifestdict:
2334 2328 """see `interface.imanifestrevisionbase` documentations"""
2335 2329 store = self._storage()
2336 2330 r = store.rev(self._node)
2337 2331 deltaparent = store.deltaparent(r)
2338 2332 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2339 2333 if not exact and deltaparent in parents:
2340 2334 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2341 2335 return manifestdict(store.nodeconstants.nodelen, d)
2342 2336 elif not exact or len(parents) == 0:
2343 2337 return self.read()
2344 2338 elif len(parents) == 1:
2345 2339 p = parents[0]
2346 2340 d = mdiff.patchtext(store.revdiff(p, r))
2347 2341 return manifestdict(store.nodeconstants.nodelen, d)
2348 2342 else:
2349 2343 p1, p2 = parents
2350 2344 d1 = mdiff.patchtext(store.revdiff(p1, r))
2351 2345 d2 = mdiff.patchtext(store.revdiff(p2, r))
2352 2346 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2353 2347 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2354 2348 md = manifestdict(store.nodeconstants.nodelen)
2355 2349 for f, new_node, new_flag in d1.iterentries():
2356 2350 if f not in d2:
2357 2351 continue
2358 2352 if new_node is not None:
2359 2353 md.set(f, new_node, new_flag)
2360 2354 return md
2361 2355
2362 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2356 def read_delta_new_entries(self, *, shallow=False) -> manifestdict:
2363 2357 """see `interface.imanifestrevisionbase` documentations"""
2364 2358 # If we are using narrow, returning a delta against an arbitrary
2365 2359 # changeset might return file outside the narrowspec. This can create
2366 2360 # issue when running validation server side with strict security as
2367 2361 # push from low priviledge usage might be seen as adding new revision
2368 2362 # for files they cannot touch. So we are strict if narrow is involved.
2369 2363 if self._manifestlog.narrowed:
2370 2364 return self.read_delta_parents(shallow=shallow, exact=True)
2371 2365 store = self._storage()
2372 2366 r = store.rev(self._node)
2373 2367 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2374 2368 return manifestdict(store.nodeconstants.nodelen, d)
2375 2369
2376 2370 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2377 2371 return self.read().find(key)
2378 2372
2379 2373
2380 2374 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2381 2375 ManifestCtx
2382 2376 )
2383 2377
2384 2378 if typing.TYPE_CHECKING:
2385 2379 manifestctx = ManifestCtx
2386 2380
2387 2381
2388 2382 class MemTreeManifestCtx:
2389 2383 _treemanifest: TreeManifest
2390 2384
2391 2385 def __init__(self, manifestlog, dir=b''):
2392 2386 self._manifestlog = manifestlog
2393 2387 self._dir = dir
2394 2388 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2395 2389
2396 2390 def _storage(self) -> ManifestRevlog:
2397 2391 return self._manifestlog.getstorage(b'')
2398 2392
2399 2393 def copy(self) -> 'MemTreeManifestCtx':
2400 2394 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2401 2395 memmf._treemanifest = self._treemanifest.copy()
2402 2396 return memmf
2403 2397
2404 2398 def read(self) -> 'TreeManifest':
2405 2399 return self._treemanifest
2406 2400
2407 2401 def write(self, transaction, link, p1, p2, added, removed, match=None):
2408 2402 def readtree(dir, node):
2409 2403 return self._manifestlog.get(dir, node).read()
2410 2404
2411 2405 return self._storage().add(
2412 2406 self._treemanifest,
2413 2407 transaction,
2414 2408 link,
2415 2409 p1,
2416 2410 p2,
2417 2411 added,
2418 2412 removed,
2419 2413 readtree=readtree,
2420 2414 match=match,
2421 2415 )
2422 2416
2423 2417
2424 2418 memtreemanifestctx = interfaceutil.implementer(
2425 2419 repository.imanifestrevisionwritable
2426 2420 )(MemTreeManifestCtx)
2427 2421
2428 2422 if typing.TYPE_CHECKING:
2429 2423 memtreemanifestctx = MemTreeManifestCtx
2430 2424
2431 2425
2432 2426 class TreeManifestCtx:
2433 2427 _data: Optional[TreeManifest]
2434 2428
2435 2429 def __init__(self, manifestlog, dir, node):
2436 2430 self._manifestlog = manifestlog
2437 2431 self._dir = dir
2438 2432 self._data = None
2439 2433
2440 2434 self._node = node
2441 2435
2442 2436 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2443 2437 # we can instantiate treemanifestctx objects for directories we don't
2444 2438 # have on disk.
2445 2439 # self.p1, self.p2 = store.parents(node)
2446 2440 # rev = store.rev(node)
2447 2441 # self.linkrev = store.linkrev(rev)
2448 2442
2449 2443 def _storage(self) -> ManifestRevlog:
2450 2444 narrowmatch = self._manifestlog._narrowmatch
2451 2445 if not narrowmatch.always():
2452 2446 if not narrowmatch.visitdir(self._dir[:-1]):
2453 2447 return excludedmanifestrevlog(
2454 2448 self._manifestlog.nodeconstants, self._dir
2455 2449 )
2456 2450 return self._manifestlog.getstorage(self._dir)
2457 2451
2458 2452 def read(self) -> 'TreeManifest':
2459 2453 if self._data is None:
2460 2454 store = self._storage()
2461 2455 if self._node == self._manifestlog.nodeconstants.nullid:
2462 2456 self._data = treemanifest(self._manifestlog.nodeconstants)
2463 2457 # TODO accessing non-public API
2464 2458 elif store._treeondisk:
2465 2459 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2466 2460
2467 2461 def gettext():
2468 2462 return store.revision(self._node)
2469 2463
2470 2464 def readsubtree(dir, subm):
2471 2465 # Set verify to False since we need to be able to create
2472 2466 # subtrees for trees that don't exist on disk.
2473 2467 return self._manifestlog.get(dir, subm, verify=False).read()
2474 2468
2475 2469 m.read(gettext, readsubtree)
2476 2470 m.setnode(self._node)
2477 2471 self._data = m
2478 2472 else:
2479 2473 if self._node in store.fulltextcache:
2480 2474 text = pycompat.bytestr(store.fulltextcache[self._node])
2481 2475 else:
2482 2476 text = store.revision(self._node)
2483 2477 arraytext = bytearray(text)
2484 2478 store.fulltextcache[self._node] = arraytext
2485 2479 self._data = treemanifest(
2486 2480 self._manifestlog.nodeconstants, dir=self._dir, text=text
2487 2481 )
2488 2482
2489 2483 return self._data
2490 2484
2491 2485 def node(self) -> bytes:
2492 2486 return self._node
2493 2487
2494 2488 def copy(self) -> 'MemTreeManifestCtx':
2495 2489 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2496 2490 memmf._treemanifest = self.read().copy()
2497 2491 return memmf
2498 2492
2499 2493 @propertycache
2500 2494 def parents(self) -> Tuple[bytes, bytes]:
2501 2495 return self._storage().parents(self._node)
2502 2496
2503 2497 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2504 2498 """see `imanifestrevisionstored` documentation"""
2505 2499 util.nouideprecwarn(
2506 2500 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2507 2501 b"6.9",
2508 2502 stacklevel=2,
2509 2503 )
2510 2504 store = self._storage()
2511 2505 if shallow:
2512 2506 r = store.rev(self._node)
2513 2507 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2514 2508 return manifestdict(store.nodeconstants.nodelen, d)
2515 2509 else:
2516 2510 # Need to perform a slow delta
2517 2511 r0 = store.deltaparent(store.rev(self._node))
2518 2512 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2519 2513 m1 = self.read()
2520 2514 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2521 2515 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2522 2516 if n1:
2523 2517 md[f] = n1
2524 2518 if fl1:
2525 2519 md.setflag(f, fl1)
2526 2520 return md
2527 2521
2528 2522 def read_any_fast_delta(
2529 2523 self,
2530 2524 valid_bases: Optional[Collection[int]] = None,
2531 2525 *,
2532 2526 shallow: bool = False,
2533 2527 ) -> Tuple[Optional[int], AnyManifestDict]:
2534 2528 """see `imanifestrevisionstored` documentation"""
2535 2529 store = self._storage()
2536 2530 r = store.rev(self._node)
2537 2531 deltaparent = store.deltaparent(r)
2538 2532
2539 2533 if valid_bases is None:
2540 2534 # make sure the next check is True
2541 2535 valid_bases = (deltaparent,)
2542 2536 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2543 2537
2544 2538 if shallow:
2545 2539 if can_use_delta:
2546 2540 return (deltaparent, self._read_storage_delta_shallow())
2547 2541 else:
2548 2542 d = store.revision(self._node)
2549 2543 return (None, manifestdict(store.nodeconstants.nodelen, d))
2550 2544 else:
2551 2545 # note: This use "slow_delta" here is cargo culted from the previous
2552 2546 # implementation. I am not sure it make sense since the goal here is to
2553 2547 # be fast, so why are we computing a delta? On the other hand, tree
2554 2548 # manifest delta as fairly "cheap" and allow for skipping whole part of
2555 2549 # the tree that a full read would access. So it might be a good idea.
2556 2550 #
2557 2551 # If we realize we don't need delta here, we should simply use:
2558 2552 #
2559 2553 # return (None, self.read())
2560 2554 if can_use_delta:
2561 2555 return (None, self._read_storage_slow_delta(base=deltaparent))
2562 2556 else:
2563 2557 parents = [
2564 2558 p
2565 2559 for p in store.parentrevs(r)
2566 2560 if p is not nullrev and p in valid_bases
2567 2561 ]
2568 2562 if parents:
2569 2563 best_base = max(parents)
2570 2564 else:
2571 2565 best_base = max(valid_bases)
2572 2566 return (None, self._read_storage_slow_delta(base=best_base))
2573 2567
2574 def _read_storage_delta_shallow(self) -> ManifestDict:
2568 def _read_storage_delta_shallow(self) -> manifestdict:
2575 2569 store = self._storage()
2576 2570 r = store.rev(self._node)
2577 2571 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2578 2572 return manifestdict(store.nodeconstants.nodelen, d)
2579 2573
2580 2574 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2581 2575 store = self._storage()
2582 2576 if base is None:
2583 2577 base = store.deltaparent(store.rev(self._node))
2584 2578 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2585 2579 m1 = self.read()
2586 2580 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2587 2581 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2588 2582 if n1:
2589 2583 md[f] = n1
2590 2584 if fl1:
2591 2585 md.setflag(f, fl1)
2592 2586 return md
2593 2587
2594 2588 def read_delta_parents(
2595 2589 self,
2596 2590 *,
2597 2591 shallow: bool = False,
2598 2592 exact: bool = True,
2599 2593 ) -> AnyManifestDict:
2600 2594 """see `interface.imanifestrevisionbase` documentations"""
2601 2595 store = self._storage()
2602 2596 r = store.rev(self._node)
2603 2597 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2604 2598 if not exact:
2605 2599 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2606 2600 elif len(parents) == 0:
2607 2601 if shallow:
2608 2602 d = store.revision(self._node)
2609 2603 return manifestdict(store.nodeconstants.nodelen, d)
2610 2604 else:
2611 2605 return self.read()
2612 2606 elif len(parents) == 1:
2613 2607 p = parents[0]
2614 2608 if shallow:
2615 2609 d = mdiff.patchtext(store.revdiff(p, r))
2616 2610 return manifestdict(store.nodeconstants.nodelen, d)
2617 2611 else:
2618 2612 return self._read_storage_slow_delta(base=p)
2619 2613 else:
2620 2614 p1, p2 = parents
2621 2615 if shallow:
2622 2616 d1 = mdiff.patchtext(store.revdiff(p1, r))
2623 2617 d2 = mdiff.patchtext(store.revdiff(p2, r))
2624 2618 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2625 2619 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2626 2620 md = manifestdict(store.nodeconstants.nodelen)
2627 2621 for f, new_node, new_flag in d1.iterentries():
2628 2622 if f not in d2:
2629 2623 continue
2630 2624 if new_node is not None:
2631 2625 md.set(f, new_node, new_flag)
2632 2626 return md
2633 2627 else:
2634 2628 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2635 2629 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2636 2630 mc = self.read()
2637 2631 d1 = m1.diff(mc)
2638 2632 d2 = m2.diff(mc)
2639 2633 md = treemanifest(
2640 2634 self._manifestlog.nodeconstants,
2641 2635 dir=self._dir,
2642 2636 )
2643 2637 for f, new_node, new_flag in d1.iterentries():
2644 2638 if f not in d2:
2645 2639 continue
2646 2640 if new_node is not None:
2647 2641 md.set(f, new_node, new_flag)
2648 2642 return md
2649 2643
2650 2644 def read_delta_new_entries(
2651 2645 self, *, shallow: bool = False
2652 2646 ) -> AnyManifestDict:
2653 2647 """see `interface.imanifestrevisionbase` documentations"""
2654 2648 # If we are using narrow, returning a delta against an arbitrary
2655 2649 # changeset might return file outside the narrowspec. This can create
2656 2650 # issue when running validation server side with strict security as
2657 2651 # push from low priviledge usage might be seen as adding new revision
2658 2652 # for files they cannot touch. So we are strict if narrow is involved.
2659 2653 if self._manifestlog.narrowed:
2660 2654 return self.read_delta_parents(shallow=shallow, exact=True)
2661 2655 # delegate to existing another existing method for simplicity
2662 2656 store = self._storage()
2663 2657 r = store.rev(self._node)
2664 2658 bases = (store.deltaparent(r),)
2665 2659 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2666 2660
2667 2661 def readfast(self, shallow=False) -> AnyManifestDict:
2668 2662 """Calls either readdelta or read, based on which would be less work.
2669 2663 readdelta is called if the delta is against the p1, and therefore can be
2670 2664 read quickly.
2671 2665
2672 2666 If `shallow` is True, it only returns the entries from this manifest,
2673 2667 and not any submanifests.
2674 2668 """
2675 2669 util.nouideprecwarn(
2676 2670 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2677 2671 b"6.9",
2678 2672 stacklevel=2,
2679 2673 )
2680 2674 store = self._storage()
2681 2675 r = store.rev(self._node)
2682 2676 deltaparent = store.deltaparent(r)
2683 2677 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2684 2678 return self.readdelta(shallow=shallow)
2685 2679
2686 2680 if shallow:
2687 2681 return manifestdict(
2688 2682 store.nodeconstants.nodelen, store.revision(self._node)
2689 2683 )
2690 2684 else:
2691 2685 return self.read()
2692 2686
2693 2687 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2694 2688 return self.read().find(key)
2695 2689
2696 2690
2697 2691 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2698 2692 TreeManifestCtx
2699 2693 )
2700 2694
2701 2695 if typing.TYPE_CHECKING:
2702 2696 treemanifestctx = TreeManifestCtx
2703 2697
2704 2698
2705 2699 class excludeddir(treemanifest):
2706 2700 """Stand-in for a directory that is excluded from the repository.
2707 2701
2708 2702 With narrowing active on a repository that uses treemanifests,
2709 2703 some of the directory revlogs will be excluded from the resulting
2710 2704 clone. This is a huge storage win for clients, but means we need
2711 2705 some sort of pseudo-manifest to surface to internals so we can
2712 2706 detect a merge conflict outside the narrowspec. That's what this
2713 2707 class is: it stands in for a directory whose node is known, but
2714 2708 whose contents are unknown.
2715 2709 """
2716 2710
2717 2711 _files: Dict[bytes, bytes]
2718 2712 _flags: Dict[bytes, bytes]
2719 2713
2720 2714 def __init__(self, nodeconstants, dir, node):
2721 2715 super(excludeddir, self).__init__(nodeconstants, dir)
2722 2716 self._node = node
2723 2717 # Add an empty file, which will be included by iterators and such,
2724 2718 # appearing as the directory itself (i.e. something like "dir/")
2725 2719 self._files[b''] = node
2726 2720 self._flags[b''] = b't'
2727 2721
2728 2722 # Manifests outside the narrowspec should never be modified, so avoid
2729 2723 # copying. This makes a noticeable difference when there are very many
2730 2724 # directories outside the narrowspec. Also, it makes sense for the copy to
2731 2725 # be of the same type as the original, which would not happen with the
2732 2726 # super type's copy().
2733 2727 def copy(self):
2734 2728 return self
2735 2729
2736 2730
2737 2731 class excludeddirmanifestctx(treemanifestctx):
2738 2732 """context wrapper for excludeddir - see that docstring for rationale"""
2739 2733
2740 2734 def __init__(self, nodeconstants, dir, node):
2741 2735 self.nodeconstants = nodeconstants
2742 2736 self._dir = dir
2743 2737 self._node = node
2744 2738
2745 2739 def read(self):
2746 2740 return excludeddir(self.nodeconstants, self._dir, self._node)
2747 2741
2748 2742 def readfast(self, shallow=False):
2749 2743 # special version of readfast since we don't have underlying storage
2750 2744 return self.read()
2751 2745
2752 2746 def write(self, *args):
2753 2747 raise error.ProgrammingError(
2754 2748 b'attempt to write manifest from excluded dir %s' % self._dir
2755 2749 )
2756 2750
2757 2751
2758 2752 class excludedmanifestrevlog(manifestrevlog):
2759 2753 """Stand-in for excluded treemanifest revlogs.
2760 2754
2761 2755 When narrowing is active on a treemanifest repository, we'll have
2762 2756 references to directories we can't see due to the revlog being
2763 2757 skipped. This class exists to conform to the manifestrevlog
2764 2758 interface for those directories and proactively prevent writes to
2765 2759 outside the narrowspec.
2766 2760 """
2767 2761
2768 2762 def __init__(self, nodeconstants, dir):
2769 2763 self.nodeconstants = nodeconstants
2770 2764 self._dir = dir
2771 2765
2772 2766 def __len__(self):
2773 2767 raise error.ProgrammingError(
2774 2768 b'attempt to get length of excluded dir %s' % self._dir
2775 2769 )
2776 2770
2777 2771 def rev(self, node):
2778 2772 raise error.ProgrammingError(
2779 2773 b'attempt to get rev from excluded dir %s' % self._dir
2780 2774 )
2781 2775
2782 2776 def linkrev(self, node):
2783 2777 raise error.ProgrammingError(
2784 2778 b'attempt to get linkrev from excluded dir %s' % self._dir
2785 2779 )
2786 2780
2787 2781 def node(self, rev):
2788 2782 raise error.ProgrammingError(
2789 2783 b'attempt to get node from excluded dir %s' % self._dir
2790 2784 )
2791 2785
2792 2786 def add(self, *args, **kwargs):
2793 2787 # We should never write entries in dirlogs outside the narrow clone.
2794 2788 # However, the method still gets called from writesubtree() in
2795 2789 # _addtree(), so we need to handle it. We should possibly make that
2796 2790 # avoid calling add() with a clean manifest (_dirty is always False
2797 2791 # in excludeddir instances).
2798 2792 pass
General Comments 0
You need to be logged in to leave comments. Login now