##// END OF EJS Templates
manifest: subclass the new `repository.imanifestlog` Protocol class...
Matt Harbison -
r53381:d371efa7 default
parent child Browse files
Show More
@@ -1,2747 +1,2747
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import typing
14 14 import weakref
15 15
16 16 from typing import (
17 17 Callable,
18 18 Collection,
19 19 Dict,
20 20 Iterable,
21 21 Iterator,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 Tuple,
26 26 Union,
27 27 cast,
28 28 )
29 29
30 30 from .i18n import _
31 31 from .node import (
32 32 bin,
33 33 hex,
34 34 nullrev,
35 35 )
36 36 from . import (
37 37 encoding,
38 38 error,
39 39 match as matchmod,
40 40 mdiff,
41 41 pathutil,
42 42 policy,
43 43 pycompat,
44 44 revlog,
45 45 util,
46 46 )
47 47 from .interfaces import (
48 48 repository,
49 49 )
50 50 from .revlogutils import (
51 51 constants as revlog_constants,
52 52 )
53 53
54 54 if typing.TYPE_CHECKING:
55 55 from typing import (
56 56 ByteString,
57 57 )
58 58
59 59
60 60 parsers = policy.importmod('parsers')
61 61 propertycache = util.propertycache
62 62
63 63 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
64 64 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
65 65
66 66
67 67 def _parse(nodelen, data: bytes):
68 68 # This method does a little bit of excessive-looking
69 69 # precondition checking. This is so that the behavior of this
70 70 # class exactly matches its C counterpart to try and help
71 71 # prevent surprise breakage for anyone that develops against
72 72 # the pure version.
73 73 if data and data[-1:] != b'\n':
74 74 raise ValueError(b'Manifest did not end in a newline.')
75 75 prev = None
76 76 for l in data.splitlines():
77 77 if prev is not None and prev > l:
78 78 raise ValueError(b'Manifest lines not in sorted order.')
79 79 prev = l
80 80 f, n = l.split(b'\0')
81 81 nl = len(n)
82 82 flags = n[-1:]
83 83 if flags in _manifestflags:
84 84 n = n[:-1]
85 85 nl -= 1
86 86 else:
87 87 flags = b''
88 88 if nl != 2 * nodelen:
89 89 raise ValueError(b'Invalid manifest line')
90 90
91 91 yield f, bin(n), flags
92 92
93 93
94 94 def _text(it):
95 95 files = []
96 96 lines = []
97 97 for f, n, fl in it:
98 98 files.append(f)
99 99 # if this is changed to support newlines in filenames,
100 100 # be sure to check the templates/ dir again (especially *-raw.tmpl)
101 101 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
102 102
103 103 _checkforbidden(files)
104 104 return b''.join(lines)
105 105
106 106
107 107 class lazymanifestiter:
108 108 def __init__(self, lm: '_LazyManifest') -> None:
109 109 self.pos = 0
110 110 self.lm = lm
111 111
112 112 def __iter__(self) -> 'lazymanifestiter':
113 113 return self
114 114
115 115 def next(self) -> bytes:
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 assert isinstance(data, tuple)
122 122 self.pos += 1
123 123 return data[0]
124 124 assert isinstance(data, bytes)
125 125 self.pos += 1
126 126 zeropos = data.find(b'\x00', pos)
127 127 return data[pos:zeropos]
128 128
129 129 __next__ = next
130 130
131 131
132 132 class lazymanifestiterentries:
133 133 def __init__(self, lm: '_LazyManifest') -> None:
134 134 self.lm = lm
135 135 self.pos = 0
136 136
137 137 def __iter__(self) -> 'lazymanifestiterentries':
138 138 return self
139 139
140 140 def next(self) -> Tuple[bytes, bytes, bytes]:
141 141 try:
142 142 data, pos = self.lm._get(self.pos)
143 143 except IndexError:
144 144 raise StopIteration
145 145 if pos == -1:
146 146 assert isinstance(data, tuple)
147 147 self.pos += 1
148 148 return data
149 149 assert isinstance(data, bytes)
150 150 zeropos = data.find(b'\x00', pos)
151 151 nlpos = data.find(b'\n', pos)
152 152 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
153 153 raise error.StorageError(b'Invalid manifest line')
154 154 flags = data[nlpos - 1 : nlpos]
155 155 if flags in _manifestflags:
156 156 hlen = nlpos - zeropos - 2
157 157 else:
158 158 hlen = nlpos - zeropos - 1
159 159 flags = b''
160 160 if hlen != 2 * self.lm._nodelen:
161 161 raise error.StorageError(b'Invalid manifest line')
162 162 hashval = unhexlify(
163 163 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
164 164 )
165 165 self.pos += 1
166 166 return (data[pos:zeropos], hashval, flags)
167 167
168 168 __next__ = next
169 169
170 170
171 171 def unhexlify(data: bytes, extra: int, pos, length: int):
172 172 s = bin(data[pos : pos + length])
173 173 if extra:
174 174 s += bytes([extra & 0xFF])
175 175 return s
176 176
177 177
178 178 def _cmp(a, b):
179 179 return (a > b) - (a < b)
180 180
181 181
182 182 _manifestflags = {b'', b'l', b't', b'x'}
183 183
184 184
185 185 class _LazyManifest:
186 186 """A pure python manifest backed by a byte string. It is supplimented with
187 187 internal lists as it is modified, until it is compacted back to a pure byte
188 188 string.
189 189
190 190 ``data`` is the initial manifest data.
191 191
192 192 ``positions`` is a list of offsets, one per manifest entry. Positive
193 193 values are offsets into ``data``, negative values are offsets into the
194 194 ``extradata`` list. When an entry is removed, its entry is dropped from
195 195 ``positions``. The values are encoded such that when walking the list and
196 196 indexing into ``data`` or ``extradata`` as appropriate, the entries are
197 197 sorted by filename.
198 198
199 199 ``extradata`` is a list of (key, hash, flags) for entries that were added or
200 200 modified since the manifest was created or compacted.
201 201 """
202 202
203 203 def __init__(
204 204 self,
205 205 nodelen: int,
206 206 data: bytes,
207 207 positions=None,
208 208 extrainfo=None,
209 209 extradata=None,
210 210 hasremovals: bool = False,
211 211 ):
212 212 self._nodelen = nodelen
213 213 if positions is None:
214 214 self.positions = self.findlines(data)
215 215 self.extrainfo = [0] * len(self.positions)
216 216 self.data = data
217 217 self.extradata = []
218 218 self.hasremovals = False
219 219 else:
220 220 self.positions = positions[:]
221 221 self.extrainfo = extrainfo[:]
222 222 self.extradata = extradata[:]
223 223 self.data = data
224 224 self.hasremovals = hasremovals
225 225
226 226 def findlines(self, data: bytes) -> List[int]:
227 227 if not data:
228 228 return []
229 229 pos = data.find(b"\n")
230 230 if pos == -1 or data[-1:] != b'\n':
231 231 raise ValueError(b"Manifest did not end in a newline.")
232 232 positions = [0]
233 233 prev = data[: data.find(b'\x00')]
234 234 while pos < len(data) - 1 and pos != -1:
235 235 positions.append(pos + 1)
236 236 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
237 237 if nexts < prev:
238 238 raise ValueError(b"Manifest lines not in sorted order.")
239 239 prev = nexts
240 240 pos = data.find(b"\n", pos + 1)
241 241 return positions
242 242
243 243 def _get(
244 244 self, index: int
245 245 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
246 246 # get the position encoded in pos:
247 247 # positive number is an index in 'data'
248 248 # negative number is in extrapieces
249 249 pos = self.positions[index]
250 250 if pos >= 0:
251 251 return self.data, pos
252 252 return self.extradata[-pos - 1], -1
253 253
254 254 def _getkey(self, pos) -> bytes:
255 255 if pos >= 0:
256 256 return self.data[pos : self.data.find(b'\x00', pos + 1)]
257 257 return self.extradata[-pos - 1][0]
258 258
259 259 def bsearch(self, key: bytes) -> int:
260 260 first = 0
261 261 last = len(self.positions) - 1
262 262
263 263 while first <= last:
264 264 midpoint = (first + last) // 2
265 265 nextpos = self.positions[midpoint]
266 266 candidate = self._getkey(nextpos)
267 267 r = _cmp(key, candidate)
268 268 if r == 0:
269 269 return midpoint
270 270 else:
271 271 if r < 0:
272 272 last = midpoint - 1
273 273 else:
274 274 first = midpoint + 1
275 275 return -1
276 276
277 277 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
278 278 # same as the above, but will always return the position
279 279 # done for performance reasons
280 280 first = 0
281 281 last = len(self.positions) - 1
282 282
283 283 while first <= last:
284 284 midpoint = (first + last) // 2
285 285 nextpos = self.positions[midpoint]
286 286 candidate = self._getkey(nextpos)
287 287 r = _cmp(key, candidate)
288 288 if r == 0:
289 289 return (midpoint, True)
290 290 else:
291 291 if r < 0:
292 292 last = midpoint - 1
293 293 else:
294 294 first = midpoint + 1
295 295 return (first, False)
296 296
297 297 def __contains__(self, key: bytes) -> bool:
298 298 return self.bsearch(key) != -1
299 299
300 300 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
301 301 if not isinstance(key, bytes):
302 302 raise TypeError(b"getitem: manifest keys must be a bytes.")
303 303 needle = self.bsearch(key)
304 304 if needle == -1:
305 305 raise KeyError
306 306 data, pos = self._get(needle)
307 307 if pos == -1:
308 308 assert isinstance(data, tuple)
309 309 return (data[1], data[2])
310 310
311 311 assert isinstance(data, bytes)
312 312 zeropos = data.find(b'\x00', pos)
313 313 nlpos = data.find(b'\n', zeropos)
314 314 assert 0 <= needle <= len(self.positions)
315 315 assert len(self.extrainfo) == len(self.positions)
316 316 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
317 317 raise error.StorageError(b'Invalid manifest line')
318 318 hlen = nlpos - zeropos - 1
319 319 flags = data[nlpos - 1 : nlpos]
320 320 if flags in _manifestflags:
321 321 hlen -= 1
322 322 else:
323 323 flags = b''
324 324 if hlen != 2 * self._nodelen:
325 325 raise error.StorageError(b'Invalid manifest line')
326 326 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
327 327 return (hashval, flags)
328 328
329 329 def __delitem__(self, key: bytes) -> None:
330 330 needle, found = self.bsearch2(key)
331 331 if not found:
332 332 raise KeyError
333 333 cur = self.positions[needle]
334 334 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
335 335 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
336 336 if cur >= 0:
337 337 # This does NOT unsort the list as far as the search functions are
338 338 # concerned, as they only examine lines mapped by self.positions.
339 339 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
340 340 self.hasremovals = True
341 341
342 342 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
343 343 if not isinstance(key, bytes):
344 344 raise TypeError(b"setitem: manifest keys must be a byte string.")
345 345 if not isinstance(value, tuple) or len(value) != 2:
346 346 raise TypeError(
347 347 b"Manifest values must be a tuple of (node, flags)."
348 348 )
349 349 hashval = value[0]
350 350 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
351 351 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
352 352 flags = value[1]
353 353 if not isinstance(flags, bytes) or len(flags) > 1:
354 354 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
355 355 needle, found = self.bsearch2(key)
356 356 if found:
357 357 # put the item
358 358 pos = self.positions[needle]
359 359 if pos < 0:
360 360 self.extradata[-pos - 1] = (key, hashval, value[1])
361 361 else:
362 362 # just don't bother
363 363 self.extradata.append((key, hashval, value[1]))
364 364 self.positions[needle] = -len(self.extradata)
365 365 else:
366 366 # not found, put it in with extra positions
367 367 self.extradata.append((key, hashval, value[1]))
368 368 self.positions = (
369 369 self.positions[:needle]
370 370 + [-len(self.extradata)]
371 371 + self.positions[needle:]
372 372 )
373 373 self.extrainfo = (
374 374 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
375 375 )
376 376
377 377 def copy(self) -> '_LazyManifest':
378 378 # XXX call _compact like in C?
379 379 return _lazymanifest(
380 380 self._nodelen,
381 381 self.data,
382 382 self.positions,
383 383 self.extrainfo,
384 384 self.extradata,
385 385 self.hasremovals,
386 386 )
387 387
388 388 def _compact(self) -> None:
389 389 # hopefully not called TOO often
390 390 if len(self.extradata) == 0 and not self.hasremovals:
391 391 return
392 392 l = []
393 393 i = 0
394 394 offset = 0
395 395 self.extrainfo = [0] * len(self.positions)
396 396 while i < len(self.positions):
397 397 if self.positions[i] >= 0:
398 398 cur = self.positions[i]
399 399 last_cut = cur
400 400
401 401 # Collect all contiguous entries in the buffer at the current
402 402 # offset, breaking out only for added/modified items held in
403 403 # extradata, or a deleted line prior to the next position.
404 404 while True:
405 405 self.positions[i] = offset
406 406 i += 1
407 407 if i == len(self.positions) or self.positions[i] < 0:
408 408 break
409 409
410 410 # A removed file has no positions[] entry, but does have an
411 411 # overwritten first byte. Break out and find the end of the
412 412 # current good entry/entries if there is a removed file
413 413 # before the next position.
414 414 if (
415 415 self.hasremovals
416 416 and self.data.find(b'\n\x00', cur, self.positions[i])
417 417 != -1
418 418 ):
419 419 break
420 420
421 421 offset += self.positions[i] - cur
422 422 cur = self.positions[i]
423 423 end_cut = self.data.find(b'\n', cur)
424 424 if end_cut != -1:
425 425 end_cut += 1
426 426 offset += end_cut - cur
427 427 l.append(self.data[last_cut:end_cut])
428 428 else:
429 429 while i < len(self.positions) and self.positions[i] < 0:
430 430 cur = self.positions[i]
431 431 t = self.extradata[-cur - 1]
432 432 l.append(self._pack(t))
433 433 self.positions[i] = offset
434 434 # Hashes are either 20 bytes (old sha1s) or 32
435 435 # bytes (new non-sha1).
436 436 hlen = 20
437 437 if len(t[1]) > 25:
438 438 hlen = 32
439 439 if len(t[1]) > hlen:
440 440 self.extrainfo[i] = ord(t[1][hlen + 1])
441 441 offset += len(l[-1])
442 442 i += 1
443 443 self.data = b''.join(l)
444 444 self.hasremovals = False
445 445 self.extradata = []
446 446
447 447 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
448 448 n = d[1]
449 449 assert len(n) in (20, 32)
450 450 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
451 451
452 452 def text(self) -> ByteString:
453 453 self._compact()
454 454 return self.data
455 455
456 456 def diff(
457 457 self, m2: '_LazyManifest', clean: bool = False
458 458 ) -> Dict[
459 459 bytes,
460 460 Optional[
461 461 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
462 462 ],
463 463 ]:
464 464 '''Finds changes between the current manifest and m2.'''
465 465 # XXX think whether efficiency matters here
466 466 diff = {}
467 467
468 468 for fn, e1, flags in self.iterentries():
469 469 if fn not in m2:
470 470 diff[fn] = (e1, flags), (None, b'')
471 471 else:
472 472 e2 = m2[fn]
473 473 if (e1, flags) != e2:
474 474 diff[fn] = (e1, flags), e2
475 475 elif clean:
476 476 diff[fn] = None
477 477
478 478 for fn, e2, flags in m2.iterentries():
479 479 if fn not in self:
480 480 diff[fn] = (None, b''), (e2, flags)
481 481
482 482 return diff
483 483
484 484 def iterentries(self) -> lazymanifestiterentries:
485 485 return lazymanifestiterentries(self)
486 486
487 487 def iterkeys(self) -> lazymanifestiter:
488 488 return lazymanifestiter(self)
489 489
490 490 def __iter__(self) -> lazymanifestiter:
491 491 return lazymanifestiter(self)
492 492
493 493 def __len__(self) -> int:
494 494 return len(self.positions)
495 495
496 496 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
497 497 # XXX should be optimized
498 498 c = _lazymanifest(self._nodelen, b'')
499 499 for f, n, fl in self.iterentries():
500 500 if filterfn(f):
501 501 c[f] = n, fl
502 502 return c
503 503
504 504
505 505 try:
506 506 _lazymanifest = parsers.lazymanifest
507 507 except AttributeError:
508 508 _lazymanifest = _LazyManifest
509 509
510 510
511 511 class manifestdict: # (repository.imanifestdict)
512 512 def __init__(self, nodelen: int, data: ByteString = b''):
513 513 self._nodelen = nodelen
514 514 self._lm = _lazymanifest(nodelen, data)
515 515
516 516 def __getitem__(self, key: bytes) -> bytes:
517 517 return self._lm[key][0]
518 518
519 519 def find(self, key: bytes) -> Tuple[bytes, bytes]:
520 520 return self._lm[key]
521 521
522 522 def __len__(self) -> int:
523 523 return len(self._lm)
524 524
525 525 def __nonzero__(self) -> bool:
526 526 # nonzero is covered by the __len__ function, but implementing it here
527 527 # makes it easier for extensions to override.
528 528 return len(self._lm) != 0
529 529
530 530 __bool__ = __nonzero__
531 531
532 532 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
533 533 self._lm[key] = node, flags
534 534
535 535 def __setitem__(self, key: bytes, node: bytes) -> None:
536 536 self._lm[key] = node, self.flags(key)
537 537
538 538 def __contains__(self, key: bytes) -> bool:
539 539 if key is None:
540 540 return False
541 541 return key in self._lm
542 542
543 543 def __delitem__(self, key: bytes) -> None:
544 544 del self._lm[key]
545 545
546 546 def __iter__(self) -> Iterator[bytes]:
547 547 return self._lm.__iter__()
548 548
549 549 def iterkeys(self) -> Iterator[bytes]:
550 550 return self._lm.iterkeys()
551 551
552 552 def keys(self) -> List[bytes]:
553 553 return list(self.iterkeys())
554 554
555 555 def filesnotin(self, m2, match=None) -> Set[bytes]:
556 556 '''Set of files in this manifest that are not in the other'''
557 557 if match is not None:
558 558 match = matchmod.badmatch(match, lambda path, msg: None)
559 559 sm2 = set(m2.walk(match))
560 560 return {f for f in self.walk(match) if f not in sm2}
561 561 return {f for f in self if f not in m2}
562 562
563 563 @propertycache
564 564 def _dirs(self) -> pathutil.dirs:
565 565 return pathutil.dirs(self)
566 566
567 567 def dirs(self) -> pathutil.dirs:
568 568 return self._dirs
569 569
570 570 def hasdir(self, dir: bytes) -> bool:
571 571 return dir in self._dirs
572 572
573 573 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
574 574 """Checks whether we can correctly and quickly iterate over matcher
575 575 files instead of over manifest files."""
576 576 files = match.files()
577 577 return len(files) < 100 and (
578 578 match.isexact()
579 579 or (match.prefix() and all(fn in self for fn in files))
580 580 )
581 581
582 582 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
583 583 """Generates matching file names.
584 584
585 585 Equivalent to manifest.matches(match).iterkeys(), but without creating
586 586 an entirely new manifest.
587 587
588 588 It also reports nonexistent files by marking them bad with match.bad().
589 589 """
590 590 if match.always():
591 591 for f in iter(self):
592 592 yield f
593 593 return
594 594
595 595 fset = set(match.files())
596 596
597 597 # avoid the entire walk if we're only looking for specific files
598 598 if self._filesfastpath(match):
599 599 for fn in sorted(fset):
600 600 if fn in self:
601 601 yield fn
602 602 return
603 603
604 604 for fn in self:
605 605 if fn in fset:
606 606 # specified pattern is the exact name
607 607 fset.remove(fn)
608 608 if match(fn):
609 609 yield fn
610 610
611 611 # for dirstate.walk, files=[''] means "walk the whole tree".
612 612 # follow that here, too
613 613 fset.discard(b'')
614 614
615 615 for fn in sorted(fset):
616 616 if not self.hasdir(fn):
617 617 match.bad(fn, None)
618 618
619 619 def _matches(self, match: matchmod.basematcher) -> 'manifestdict':
620 620 '''generate a new manifest filtered by the match argument'''
621 621 if match.always():
622 622 return self.copy()
623 623
624 624 if self._filesfastpath(match):
625 625 m = manifestdict(self._nodelen)
626 626 lm = self._lm
627 627 for fn in match.files():
628 628 if fn in lm:
629 629 m._lm[fn] = lm[fn]
630 630 return m
631 631
632 632 m = manifestdict(self._nodelen)
633 633 m._lm = self._lm.filtercopy(match)
634 634 return m
635 635
636 636 def diff(
637 637 self,
638 638 m2: 'manifestdict',
639 639 match: Optional[matchmod.basematcher] = None,
640 640 clean: bool = False,
641 641 ) -> Dict[
642 642 bytes,
643 643 Optional[
644 644 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
645 645 ],
646 646 ]:
647 647 """Finds changes between the current manifest and m2.
648 648
649 649 Args:
650 650 m2: the manifest to which this manifest should be compared.
651 651 clean: if true, include files unchanged between these manifests
652 652 with a None value in the returned dictionary.
653 653
654 654 The result is returned as a dict with filename as key and
655 655 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
656 656 nodeid in the current/other manifest and fl1/fl2 is the flag
657 657 in the current/other manifest. Where the file does not exist,
658 658 the nodeid will be None and the flags will be the empty
659 659 string.
660 660 """
661 661 if match:
662 662 m1 = self._matches(match)
663 663 m2 = m2._matches(match)
664 664 return m1.diff(m2, clean=clean)
665 665 return self._lm.diff(m2._lm, clean)
666 666
667 667 def setflag(self, key: bytes, flag: bytes) -> None:
668 668 if flag not in _manifestflags:
669 669 raise TypeError(b"Invalid manifest flag set.")
670 670 self._lm[key] = self[key], flag
671 671
672 672 def get(self, key: bytes, default=None) -> Optional[bytes]:
673 673 try:
674 674 return self._lm[key][0]
675 675 except KeyError:
676 676 return default
677 677
678 678 def flags(self, key: bytes) -> bytes:
679 679 try:
680 680 return self._lm[key][1]
681 681 except KeyError:
682 682 return b''
683 683
684 684 def copy(self) -> 'manifestdict':
685 685 c = manifestdict(self._nodelen)
686 686 c._lm = self._lm.copy()
687 687 return c
688 688
689 689 def items(self) -> Iterator[Tuple[bytes, bytes]]:
690 690 return (x[:2] for x in self._lm.iterentries())
691 691
692 692 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
693 693 return (x[:2] for x in self._lm.iterentries())
694 694
695 695 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
696 696 return self._lm.iterentries()
697 697
698 698 def text(self) -> ByteString:
699 699 # most likely uses native version
700 700 return self._lm.text()
701 701
702 702 def fastdelta(
703 703 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
704 704 ) -> Tuple[ByteString, ByteString]:
705 705 """Given a base manifest text as a bytearray and a list of changes
706 706 relative to that text, compute a delta that can be used by revlog.
707 707 """
708 708 delta = []
709 709 dstart = None
710 710 dend = None
711 711 dline = [b""]
712 712 start = 0
713 713 # zero copy representation of base as a buffer
714 714 addbuf = util.buffer(base)
715 715
716 716 changes = list(changes)
717 717 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
718 718 # start with a readonly loop that finds the offset of
719 719 # each line and creates the deltas
720 720 for f, todelete in changes:
721 721 # bs will either be the index of the item or the insert point
722 722 start, end = _msearch(addbuf, f, start)
723 723 if not todelete:
724 724 h, fl = self._lm[f]
725 725 l = b"%s\0%s%s\n" % (f, hex(h), fl)
726 726 else:
727 727 if start == end:
728 728 # item we want to delete was not found, error out
729 729 raise AssertionError(
730 730 _(b"failed to remove %s from manifest") % f
731 731 )
732 732 l = b""
733 733 if dstart is not None and dstart <= start and dend >= start:
734 734 if dend < end:
735 735 dend = end
736 736 if l:
737 737 dline.append(l)
738 738 else:
739 739 if dstart is not None:
740 740 delta.append((dstart, dend, b"".join(dline)))
741 741 dstart = start
742 742 dend = end
743 743 dline = [l]
744 744
745 745 if dstart is not None:
746 746 delta.append((dstart, dend, b"".join(dline)))
747 747 # apply the delta to the base, and get a delta for addrevision
748 748 deltatext, arraytext = _addlistdelta(base, delta)
749 749 else:
750 750 # For large changes, it's much cheaper to just build the text and
751 751 # diff it.
752 752 arraytext = bytearray(self.text())
753 753 deltatext = mdiff.textdiff(
754 754 util.buffer(base), util.buffer(arraytext)
755 755 )
756 756
757 757 return arraytext, deltatext
758 758
759 759
760 760 def _msearch(
761 761 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
762 762 ) -> Tuple[int, int]:
763 763 """return a tuple (start, end) that says where to find s within m.
764 764
765 765 If the string is found m[start:end] are the line containing
766 766 that string. If start == end the string was not found and
767 767 they indicate the proper sorted insertion point.
768 768 """
769 769
770 770 def advance(i: int, c: bytes):
771 771 while i < lenm and m[i : i + 1] != c:
772 772 i += 1
773 773 return i
774 774
775 775 if not s:
776 776 return (lo, lo)
777 777 lenm = len(m)
778 778 if not hi:
779 779 hi = lenm
780 780 while lo < hi:
781 781 mid = (lo + hi) // 2
782 782 start = mid
783 783 while start > 0 and m[start - 1 : start] != b'\n':
784 784 start -= 1
785 785 end = advance(start, b'\0')
786 786 if bytes(m[start:end]) < s:
787 787 # we know that after the null there are 40 bytes of sha1
788 788 # this translates to the bisect lo = mid + 1
789 789 lo = advance(end + 40, b'\n') + 1
790 790 else:
791 791 # this translates to the bisect hi = mid
792 792 hi = start
793 793 end = advance(lo, b'\0')
794 794 found = m[lo:end]
795 795 if s == found:
796 796 # we know that after the null there are 40 bytes of sha1
797 797 end = advance(end + 40, b'\n')
798 798 return (lo, end + 1)
799 799 else:
800 800 return (lo, lo)
801 801
802 802
803 803 def _checkforbidden(l: Iterable[bytes]) -> None:
804 804 """Check filenames for illegal characters."""
805 805 for f in l:
806 806 if b'\n' in f or b'\r' in f:
807 807 raise error.StorageError(
808 808 _(b"'\\n' and '\\r' disallowed in filenames: %r")
809 809 % pycompat.bytestr(f)
810 810 )
811 811
812 812
813 813 # apply the changes collected during the bisect loop to our addlist
814 814 # return a delta suitable for addrevision
815 815 def _addlistdelta(
816 816 addlist: ByteString,
817 817 x: Iterable[Tuple[int, int, bytes]],
818 818 ) -> Tuple[bytes, ByteString]:
819 819 # for large addlist arrays, building a new array is cheaper
820 820 # than repeatedly modifying the existing one
821 821 currentposition = 0
822 822 newaddlist = bytearray()
823 823
824 824 for start, end, content in x:
825 825 newaddlist += addlist[currentposition:start]
826 826 if content:
827 827 newaddlist += bytearray(content)
828 828
829 829 currentposition = end
830 830
831 831 newaddlist += addlist[currentposition:]
832 832
833 833 deltatext = b"".join(
834 834 struct.pack(b">lll", start, end, len(content)) + content
835 835 for start, end, content in x
836 836 )
837 837 return deltatext, newaddlist
838 838
839 839
840 840 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
841 841 if b'/' in f:
842 842 dir, subpath = f.split(b'/', 1)
843 843 return dir + b'/', subpath
844 844 else:
845 845 return b'', f
846 846
847 847
848 848 _noop = lambda s: None
849 849
850 850
851 851 class treemanifest: # (repository.imanifestdict)
852 852 _dir: bytes
853 853 _dirs: Dict[bytes, 'treemanifest']
854 854 _dirty: bool
855 855 _files: Dict[bytes, bytes]
856 856 _flags: Dict[bytes, bytes]
857 857
858 858 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
859 859 self._dir = dir
860 860 self.nodeconstants = nodeconstants
861 861 self._node = self.nodeconstants.nullid
862 862 self._nodelen = self.nodeconstants.nodelen
863 863 self._loadfunc = _noop
864 864 self._copyfunc = _noop
865 865 self._dirty = False
866 866 self._dirs = {}
867 867 self._lazydirs: Dict[
868 868 bytes,
869 869 Tuple[bytes, Callable[[bytes, bytes], 'treemanifest'], bool],
870 870 ] = {}
871 871 # Using _lazymanifest here is a little slower than plain old dicts
872 872 self._files = {}
873 873 self._flags = {}
874 874 if text:
875 875
876 876 def readsubtree(subdir, subm):
877 877 raise AssertionError(
878 878 b'treemanifest constructor only accepts flat manifests'
879 879 )
880 880
881 881 self.parse(text, readsubtree)
882 882 self._dirty = True # Mark flat manifest dirty after parsing
883 883
884 884 def _subpath(self, path: bytes) -> bytes:
885 885 return self._dir + path
886 886
887 887 def _loadalllazy(self) -> None:
888 888 selfdirs = self._dirs
889 889 subpath = self._subpath
890 890 for d, (node, readsubtree, docopy) in self._lazydirs.items():
891 891 if docopy:
892 892 selfdirs[d] = readsubtree(subpath(d), node).copy()
893 893 else:
894 894 selfdirs[d] = readsubtree(subpath(d), node)
895 895 self._lazydirs.clear()
896 896
897 897 def _loadlazy(self, d: bytes) -> None:
898 898 v = self._lazydirs.get(d)
899 899 if v is not None:
900 900 node, readsubtree, docopy = v
901 901 if docopy:
902 902 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
903 903 else:
904 904 self._dirs[d] = readsubtree(self._subpath(d), node)
905 905 del self._lazydirs[d]
906 906
907 907 def _loadchildrensetlazy(
908 908 self, visit: Union[Set[bytes], bytes]
909 909 ) -> Optional[Set[bytes]]:
910 910 if not visit:
911 911 return None
912 912 if visit == b'all' or visit == b'this':
913 913 self._loadalllazy()
914 914 return None
915 915
916 916 visit = cast(Set[bytes], visit)
917 917
918 918 loadlazy = self._loadlazy
919 919 for k in visit:
920 920 loadlazy(k + b'/')
921 921 return visit
922 922
923 923 def _loaddifflazy(self, t1: 'treemanifest', t2: 'treemanifest'):
924 924 """load items in t1 and t2 if they're needed for diffing.
925 925
926 926 The criteria currently is:
927 927 - if it's not present in _lazydirs in either t1 or t2, load it in the
928 928 other (it may already be loaded or it may not exist, doesn't matter)
929 929 - if it's present in _lazydirs in both, compare the nodeid; if it
930 930 differs, load it in both
931 931 """
932 932 toloadlazy = []
933 933 for d, v1 in t1._lazydirs.items():
934 934 v2 = t2._lazydirs.get(d)
935 935 if v2 is None or v2[0] != v1[0]:
936 936 toloadlazy.append(d)
937 937 for d, v1 in t2._lazydirs.items():
938 938 if d not in t1._lazydirs:
939 939 toloadlazy.append(d)
940 940
941 941 for d in toloadlazy:
942 942 t1._loadlazy(d)
943 943 t2._loadlazy(d)
944 944
945 945 def __len__(self) -> int:
946 946 self._load()
947 947 size = len(self._files)
948 948 self._loadalllazy()
949 949 for m in self._dirs.values():
950 950 size += m.__len__()
951 951 return size
952 952
953 953 def __nonzero__(self) -> bool:
954 954 # Faster than "__len__() != 0" since it avoids loading sub-manifests
955 955 return not self._isempty()
956 956
957 957 __bool__ = __nonzero__
958 958
959 959 def _isempty(self) -> bool:
960 960 self._load() # for consistency; already loaded by all callers
961 961 # See if we can skip loading everything.
962 962 if self._files or (
963 963 self._dirs and any(not m._isempty() for m in self._dirs.values())
964 964 ):
965 965 return False
966 966 self._loadalllazy()
967 967 return not self._dirs or all(m._isempty() for m in self._dirs.values())
968 968
969 969 @encoding.strmethod
970 970 def __repr__(self) -> bytes:
971 971 return (
972 972 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
973 973 % (
974 974 self._dir,
975 975 hex(self._node),
976 976 bool(self._loadfunc is _noop),
977 977 self._dirty,
978 978 id(self),
979 979 )
980 980 )
981 981
982 982 def dir(self) -> bytes:
983 983 """The directory that this tree manifest represents, including a
984 984 trailing '/'. Empty string for the repo root directory."""
985 985 return self._dir
986 986
987 987 def node(self) -> bytes:
988 988 """This node of this instance. nullid for unsaved instances. Should
989 989 be updated when the instance is read or written from a revlog.
990 990 """
991 991 assert not self._dirty
992 992 return self._node
993 993
994 994 def setnode(self, node: bytes) -> None:
995 995 self._node = node
996 996 self._dirty = False
997 997
998 998 def iterentries(
999 999 self,
1000 1000 ) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest'], bytes]]:
1001 1001 self._load()
1002 1002 self._loadalllazy()
1003 1003 for p, n in sorted(
1004 1004 itertools.chain(self._dirs.items(), self._files.items())
1005 1005 ):
1006 1006 if p in self._files:
1007 1007 yield self._subpath(p), n, self._flags.get(p, b'')
1008 1008 else:
1009 1009 for x in n.iterentries():
1010 1010 yield x
1011 1011
1012 1012 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest']]]:
1013 1013 self._load()
1014 1014 self._loadalllazy()
1015 1015 for p, n in sorted(
1016 1016 itertools.chain(self._dirs.items(), self._files.items())
1017 1017 ):
1018 1018 if p in self._files:
1019 1019 yield self._subpath(p), n
1020 1020 else:
1021 1021 for f, sn in n.items():
1022 1022 yield f, sn
1023 1023
1024 1024 iteritems = items
1025 1025
1026 1026 def iterkeys(self) -> Iterator[bytes]:
1027 1027 self._load()
1028 1028 self._loadalllazy()
1029 1029 for p in sorted(itertools.chain(self._dirs, self._files)):
1030 1030 if p in self._files:
1031 1031 yield self._subpath(p)
1032 1032 else:
1033 1033 for f in self._dirs[p]:
1034 1034 yield f
1035 1035
1036 1036 def keys(self) -> List[bytes]:
1037 1037 return list(self.iterkeys())
1038 1038
1039 1039 def __iter__(self) -> Iterator[bytes]:
1040 1040 return self.iterkeys()
1041 1041
1042 1042 def __contains__(self, f: bytes) -> bool:
1043 1043 if f is None:
1044 1044 return False
1045 1045 self._load()
1046 1046 dir, subpath = _splittopdir(f)
1047 1047 if dir:
1048 1048 self._loadlazy(dir)
1049 1049
1050 1050 if dir not in self._dirs:
1051 1051 return False
1052 1052
1053 1053 return self._dirs[dir].__contains__(subpath)
1054 1054 else:
1055 1055 return f in self._files
1056 1056
1057 1057 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1058 1058 self._load()
1059 1059 dir, subpath = _splittopdir(f)
1060 1060 if dir:
1061 1061 self._loadlazy(dir)
1062 1062
1063 1063 if dir not in self._dirs:
1064 1064 return default
1065 1065 return self._dirs[dir].get(subpath, default)
1066 1066 else:
1067 1067 return self._files.get(f, default)
1068 1068
1069 1069 def __getitem__(self, f: bytes) -> bytes:
1070 1070 self._load()
1071 1071 dir, subpath = _splittopdir(f)
1072 1072 if dir:
1073 1073 self._loadlazy(dir)
1074 1074
1075 1075 return self._dirs[dir].__getitem__(subpath)
1076 1076 else:
1077 1077 return self._files[f]
1078 1078
1079 1079 def flags(self, f: bytes) -> bytes:
1080 1080 self._load()
1081 1081 dir, subpath = _splittopdir(f)
1082 1082 if dir:
1083 1083 self._loadlazy(dir)
1084 1084
1085 1085 if dir not in self._dirs:
1086 1086 return b''
1087 1087 return self._dirs[dir].flags(subpath)
1088 1088 else:
1089 1089 if f in self._lazydirs or f in self._dirs:
1090 1090 return b''
1091 1091 return self._flags.get(f, b'')
1092 1092
1093 1093 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1094 1094 self._load()
1095 1095 dir, subpath = _splittopdir(f)
1096 1096 if dir:
1097 1097 self._loadlazy(dir)
1098 1098
1099 1099 return self._dirs[dir].find(subpath)
1100 1100 else:
1101 1101 return self._files[f], self._flags.get(f, b'')
1102 1102
1103 1103 def __delitem__(self, f: bytes) -> None:
1104 1104 self._load()
1105 1105 dir, subpath = _splittopdir(f)
1106 1106 if dir:
1107 1107 self._loadlazy(dir)
1108 1108
1109 1109 self._dirs[dir].__delitem__(subpath)
1110 1110 # If the directory is now empty, remove it
1111 1111 if self._dirs[dir]._isempty():
1112 1112 del self._dirs[dir]
1113 1113 else:
1114 1114 del self._files[f]
1115 1115 if f in self._flags:
1116 1116 del self._flags[f]
1117 1117 self._dirty = True
1118 1118
1119 1119 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1120 1120 """Set both the node and the flags for path f."""
1121 1121 assert node is not None
1122 1122 if flags not in _manifestflags:
1123 1123 raise TypeError(b"Invalid manifest flag set.")
1124 1124 self._load()
1125 1125 dir, subpath = _splittopdir(f)
1126 1126 if dir:
1127 1127 self._loadlazy(dir)
1128 1128 if dir not in self._dirs:
1129 1129 self._dirs[dir] = treemanifest(
1130 1130 self.nodeconstants, self._subpath(dir)
1131 1131 )
1132 1132 self._dirs[dir].set(subpath, node, flags)
1133 1133 else:
1134 1134 assert len(node) in (20, 32)
1135 1135 self._files[f] = node
1136 1136 self._flags[f] = flags
1137 1137 self._dirty = True
1138 1138
1139 1139 def __setitem__(self, f: bytes, n: bytes) -> None:
1140 1140 assert n is not None
1141 1141 self._load()
1142 1142 dir, subpath = _splittopdir(f)
1143 1143 if dir:
1144 1144 self._loadlazy(dir)
1145 1145 if dir not in self._dirs:
1146 1146 self._dirs[dir] = treemanifest(
1147 1147 self.nodeconstants, self._subpath(dir)
1148 1148 )
1149 1149 self._dirs[dir].__setitem__(subpath, n)
1150 1150 else:
1151 1151 # manifest nodes are either 20 bytes or 32 bytes,
1152 1152 # depending on the hash in use. Assert this as historically
1153 1153 # sometimes extra bytes were added.
1154 1154 assert len(n) in (20, 32)
1155 1155 self._files[f] = n
1156 1156 self._dirty = True
1157 1157
1158 1158 def _load(self) -> None:
1159 1159 if self._loadfunc is not _noop:
1160 1160 lf, self._loadfunc = self._loadfunc, _noop
1161 1161 lf(self)
1162 1162 elif self._copyfunc is not _noop:
1163 1163 cf, self._copyfunc = self._copyfunc, _noop
1164 1164 cf(self)
1165 1165
1166 1166 def setflag(self, f: bytes, flags: bytes) -> None:
1167 1167 """Set the flags (symlink, executable) for path f."""
1168 1168 if flags not in _manifestflags:
1169 1169 raise TypeError(b"Invalid manifest flag set.")
1170 1170 self._load()
1171 1171 dir, subpath = _splittopdir(f)
1172 1172 if dir:
1173 1173 self._loadlazy(dir)
1174 1174 if dir not in self._dirs:
1175 1175 self._dirs[dir] = treemanifest(
1176 1176 self.nodeconstants, self._subpath(dir)
1177 1177 )
1178 1178 self._dirs[dir].setflag(subpath, flags)
1179 1179 else:
1180 1180 self._flags[f] = flags
1181 1181 self._dirty = True
1182 1182
1183 1183 def copy(self) -> 'treemanifest':
1184 1184 copy = treemanifest(self.nodeconstants, self._dir)
1185 1185 copy._node = self._node
1186 1186 copy._dirty = self._dirty
1187 1187 if self._copyfunc is _noop:
1188 1188
1189 1189 def _copyfunc(s):
1190 1190 self._load()
1191 1191 s._lazydirs = {
1192 1192 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1193 1193 }
1194 1194 sdirs = s._dirs
1195 1195 for d, v in self._dirs.items():
1196 1196 sdirs[d] = v.copy()
1197 1197 s._files = dict.copy(self._files)
1198 1198 s._flags = dict.copy(self._flags)
1199 1199
1200 1200 if self._loadfunc is _noop:
1201 1201 _copyfunc(copy)
1202 1202 else:
1203 1203 copy._copyfunc = _copyfunc
1204 1204 else:
1205 1205 copy._copyfunc = self._copyfunc
1206 1206 return copy
1207 1207
1208 1208 def filesnotin(
1209 1209 self, m2: 'treemanifest', match: Optional[matchmod.basematcher] = None
1210 1210 ) -> Set[bytes]:
1211 1211 '''Set of files in this manifest that are not in the other'''
1212 1212 if match and not match.always():
1213 1213 m1 = self._matches(match)
1214 1214 m2 = m2._matches(match)
1215 1215 return m1.filesnotin(m2)
1216 1216
1217 1217 files = set()
1218 1218
1219 1219 def _filesnotin(t1, t2):
1220 1220 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1221 1221 return
1222 1222 t1._load()
1223 1223 t2._load()
1224 1224 self._loaddifflazy(t1, t2)
1225 1225 for d, m1 in t1._dirs.items():
1226 1226 if d in t2._dirs:
1227 1227 m2 = t2._dirs[d]
1228 1228 _filesnotin(m1, m2)
1229 1229 else:
1230 1230 files.update(m1.iterkeys())
1231 1231
1232 1232 for fn in t1._files:
1233 1233 if fn not in t2._files:
1234 1234 files.add(t1._subpath(fn))
1235 1235
1236 1236 _filesnotin(self, m2)
1237 1237 return files
1238 1238
1239 1239 @propertycache
1240 1240 def _alldirs(self) -> pathutil.dirs:
1241 1241 return pathutil.dirs(self)
1242 1242
1243 1243 def dirs(self) -> pathutil.dirs:
1244 1244 return self._alldirs
1245 1245
1246 1246 def hasdir(self, dir: bytes) -> bool:
1247 1247 self._load()
1248 1248 topdir, subdir = _splittopdir(dir)
1249 1249 if topdir:
1250 1250 self._loadlazy(topdir)
1251 1251 if topdir in self._dirs:
1252 1252 return self._dirs[topdir].hasdir(subdir)
1253 1253 return False
1254 1254 dirslash = dir + b'/'
1255 1255 return dirslash in self._dirs or dirslash in self._lazydirs
1256 1256
1257 1257 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1258 1258 """Generates matching file names.
1259 1259
1260 1260 It also reports nonexistent files by marking them bad with match.bad().
1261 1261 """
1262 1262 if match.always():
1263 1263 for f in iter(self):
1264 1264 yield f
1265 1265 return
1266 1266
1267 1267 fset = set(match.files())
1268 1268
1269 1269 for fn in self._walk(match):
1270 1270 if fn in fset:
1271 1271 # specified pattern is the exact name
1272 1272 fset.remove(fn)
1273 1273 yield fn
1274 1274
1275 1275 # for dirstate.walk, files=[''] means "walk the whole tree".
1276 1276 # follow that here, too
1277 1277 fset.discard(b'')
1278 1278
1279 1279 for fn in sorted(fset):
1280 1280 if not self.hasdir(fn):
1281 1281 match.bad(fn, None)
1282 1282
1283 1283 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1284 1284 '''Recursively generates matching file names for walk().'''
1285 1285 visit = match.visitchildrenset(self._dir[:-1])
1286 1286 if not visit:
1287 1287 return
1288 1288
1289 1289 # yield this dir's files and walk its submanifests
1290 1290 self._load()
1291 1291 visit = self._loadchildrensetlazy(visit)
1292 1292 for p in sorted(list(self._dirs) + list(self._files)):
1293 1293 if p in self._files:
1294 1294 fullp = self._subpath(p)
1295 1295 if match(fullp):
1296 1296 yield fullp
1297 1297 else:
1298 1298 if not visit or p[:-1] in visit:
1299 1299 for f in self._dirs[p]._walk(match):
1300 1300 yield f
1301 1301
1302 1302 def _matches(self, match: matchmod.basematcher) -> 'treemanifest':
1303 1303 """recursively generate a new manifest filtered by the match argument."""
1304 1304 if match.always():
1305 1305 return self.copy()
1306 1306 return self._matches_inner(match)
1307 1307
1308 1308 def _matches_inner(self, match: matchmod.basematcher) -> 'treemanifest':
1309 1309 if match.always():
1310 1310 return self.copy()
1311 1311
1312 1312 visit = match.visitchildrenset(self._dir[:-1])
1313 1313 if visit == b'all':
1314 1314 return self.copy()
1315 1315 ret = treemanifest(self.nodeconstants, self._dir)
1316 1316 if not visit:
1317 1317 return ret
1318 1318
1319 1319 self._load()
1320 1320 for fn in self._files:
1321 1321 # While visitchildrenset *usually* lists only subdirs, this is
1322 1322 # actually up to the matcher and may have some files in the set().
1323 1323 # If visit == 'this', we should obviously look at the files in this
1324 1324 # directory; if visit is a set, and fn is in it, we should inspect
1325 1325 # fn (but no need to inspect things not in the set).
1326 1326 if visit != b'this' and fn not in visit:
1327 1327 continue
1328 1328 fullp = self._subpath(fn)
1329 1329 # visitchildrenset isn't perfect, we still need to call the regular
1330 1330 # matcher code to further filter results.
1331 1331 if not match(fullp):
1332 1332 continue
1333 1333 ret._files[fn] = self._files[fn]
1334 1334 if fn in self._flags:
1335 1335 ret._flags[fn] = self._flags[fn]
1336 1336
1337 1337 visit = self._loadchildrensetlazy(visit)
1338 1338 for dir, subm in self._dirs.items():
1339 1339 if visit and dir[:-1] not in visit:
1340 1340 continue
1341 1341 m = subm._matches_inner(match)
1342 1342 if not m._isempty():
1343 1343 ret._dirs[dir] = m
1344 1344
1345 1345 if not ret._isempty():
1346 1346 ret._dirty = True
1347 1347 return ret
1348 1348
1349 1349 def fastdelta(
1350 1350 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1351 1351 ) -> ByteString:
1352 1352 raise FastdeltaUnavailable()
1353 1353
1354 1354 def diff(
1355 1355 self,
1356 1356 m2: 'treemanifest',
1357 1357 match: Optional[matchmod.basematcher] = None,
1358 1358 clean: bool = False,
1359 1359 ) -> Dict[
1360 1360 bytes,
1361 1361 Optional[
1362 1362 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1363 1363 ],
1364 1364 ]:
1365 1365 """Finds changes between the current manifest and m2.
1366 1366
1367 1367 Args:
1368 1368 m2: the manifest to which this manifest should be compared.
1369 1369 clean: if true, include files unchanged between these manifests
1370 1370 with a None value in the returned dictionary.
1371 1371
1372 1372 The result is returned as a dict with filename as key and
1373 1373 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1374 1374 nodeid in the current/other manifest and fl1/fl2 is the flag
1375 1375 in the current/other manifest. Where the file does not exist,
1376 1376 the nodeid will be None and the flags will be the empty
1377 1377 string.
1378 1378 """
1379 1379 if match and not match.always():
1380 1380 m1 = self._matches(match)
1381 1381 m2 = m2._matches(match)
1382 1382 return m1.diff(m2, clean=clean)
1383 1383 result = {}
1384 1384 emptytree = treemanifest(self.nodeconstants)
1385 1385
1386 1386 def _iterativediff(t1, t2, stack):
1387 1387 """compares two tree manifests and append new tree-manifests which
1388 1388 needs to be compared to stack"""
1389 1389 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1390 1390 return
1391 1391 t1._load()
1392 1392 t2._load()
1393 1393 self._loaddifflazy(t1, t2)
1394 1394
1395 1395 for d, m1 in t1._dirs.items():
1396 1396 m2 = t2._dirs.get(d, emptytree)
1397 1397 stack.append((m1, m2))
1398 1398
1399 1399 for d, m2 in t2._dirs.items():
1400 1400 if d not in t1._dirs:
1401 1401 stack.append((emptytree, m2))
1402 1402
1403 1403 for fn, n1 in t1._files.items():
1404 1404 fl1 = t1._flags.get(fn, b'')
1405 1405 n2 = t2._files.get(fn, None)
1406 1406 fl2 = t2._flags.get(fn, b'')
1407 1407 if n1 != n2 or fl1 != fl2:
1408 1408 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1409 1409 elif clean:
1410 1410 result[t1._subpath(fn)] = None
1411 1411
1412 1412 for fn, n2 in t2._files.items():
1413 1413 if fn not in t1._files:
1414 1414 fl2 = t2._flags.get(fn, b'')
1415 1415 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1416 1416
1417 1417 stackls = []
1418 1418 _iterativediff(self, m2, stackls)
1419 1419 while stackls:
1420 1420 t1, t2 = stackls.pop()
1421 1421 # stackls is populated in the function call
1422 1422 _iterativediff(t1, t2, stackls)
1423 1423 return result
1424 1424
1425 1425 def unmodifiedsince(self, m2: 'treemanifest') -> bool:
1426 1426 return not self._dirty and not m2._dirty and self._node == m2._node
1427 1427
1428 1428 def parse(
1429 1429 self,
1430 1430 text: bytes,
1431 1431 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1432 1432 ) -> None:
1433 1433 selflazy = self._lazydirs
1434 1434 for f, n, fl in _parse(self._nodelen, text):
1435 1435 if fl == b't':
1436 1436 f = f + b'/'
1437 1437 # False below means "doesn't need to be copied" and can use the
1438 1438 # cached value from readsubtree directly.
1439 1439 selflazy[f] = (n, readsubtree, False)
1440 1440 elif b'/' in f:
1441 1441 # This is a flat manifest, so use __setitem__ and setflag rather
1442 1442 # than assigning directly to _files and _flags, so we can
1443 1443 # assign a path in a subdirectory, and to mark dirty (compared
1444 1444 # to nullid).
1445 1445 self[f] = n
1446 1446 if fl:
1447 1447 self.setflag(f, fl)
1448 1448 else:
1449 1449 # Assigning to _files and _flags avoids marking as dirty,
1450 1450 # and should be a little faster.
1451 1451 self._files[f] = n
1452 1452 if fl:
1453 1453 self._flags[f] = fl
1454 1454
1455 1455 def text(self) -> ByteString:
1456 1456 """Get the full data of this manifest as a bytestring."""
1457 1457 self._load()
1458 1458 return _text(self.iterentries())
1459 1459
1460 1460 def dirtext(self) -> ByteString:
1461 1461 """Get the full data of this directory as a bytestring. Make sure that
1462 1462 any submanifests have been written first, so their nodeids are correct.
1463 1463 """
1464 1464 self._load()
1465 1465 flags = self.flags
1466 1466 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1467 1467 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1468 1468 files = [(f, self._files[f], flags(f)) for f in self._files]
1469 1469 return _text(sorted(dirs + files + lazydirs))
1470 1470
1471 1471 def read(
1472 1472 self,
1473 1473 gettext: Callable[[], ByteString],
1474 1474 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1475 1475 ) -> None:
1476 1476 def _load_for_read(s):
1477 1477 s.parse(gettext(), readsubtree)
1478 1478 s._dirty = False
1479 1479
1480 1480 self._loadfunc = _load_for_read
1481 1481
1482 1482 def writesubtrees(
1483 1483 self,
1484 1484 m1: 'treemanifest',
1485 1485 m2: 'treemanifest',
1486 1486 writesubtree: Callable[
1487 1487 [
1488 1488 Callable[['treemanifest'], None],
1489 1489 bytes,
1490 1490 bytes,
1491 1491 matchmod.basematcher,
1492 1492 ],
1493 1493 None,
1494 1494 ],
1495 1495 match: matchmod.basematcher,
1496 1496 ) -> None:
1497 1497 self._load() # for consistency; should never have any effect here
1498 1498 m1._load()
1499 1499 m2._load()
1500 1500 emptytree = treemanifest(self.nodeconstants)
1501 1501
1502 1502 def getnode(m, d):
1503 1503 ld = m._lazydirs.get(d)
1504 1504 if ld:
1505 1505 return ld[0]
1506 1506 tree = m._dirs.get(d, emptytree)
1507 1507 assert tree is not None # helps pytype
1508 1508 return tree._node
1509 1509
1510 1510 # let's skip investigating things that `match` says we do not need.
1511 1511 visit = match.visitchildrenset(self._dir[:-1])
1512 1512 visit = self._loadchildrensetlazy(visit)
1513 1513 if visit == b'this' or visit == b'all':
1514 1514 visit = None
1515 1515 for d, subm in self._dirs.items():
1516 1516 if visit and d[:-1] not in visit:
1517 1517 continue
1518 1518 subp1 = getnode(m1, d)
1519 1519 subp2 = getnode(m2, d)
1520 1520 if subp1 == self.nodeconstants.nullid:
1521 1521 subp1, subp2 = subp2, subp1
1522 1522 writesubtree(subm, subp1, subp2, match)
1523 1523
1524 1524 def walksubtrees(
1525 1525 self, matcher: Optional[matchmod.basematcher] = None
1526 1526 ) -> Iterator['treemanifest']:
1527 1527 """Returns an iterator of the subtrees of this manifest, including this
1528 1528 manifest itself.
1529 1529
1530 1530 If `matcher` is provided, it only returns subtrees that match.
1531 1531 """
1532 1532 if matcher and not matcher.visitdir(self._dir[:-1]):
1533 1533 return
1534 1534 if not matcher or matcher(self._dir[:-1]):
1535 1535 yield self
1536 1536
1537 1537 self._load()
1538 1538 # OPT: use visitchildrenset to avoid loading everything.
1539 1539 self._loadalllazy()
1540 1540 for d, subm in self._dirs.items():
1541 1541 for subtree in subm.walksubtrees(matcher=matcher):
1542 1542 yield subtree
1543 1543
1544 1544
1545 1545 class manifestfulltextcache(util.lrucachedict):
1546 1546 """File-backed LRU cache for the manifest cache
1547 1547
1548 1548 File consists of entries, up to EOF:
1549 1549
1550 1550 - 20 bytes node, 4 bytes length, <length> manifest data
1551 1551
1552 1552 These are written in reverse cache order (oldest to newest).
1553 1553
1554 1554 """
1555 1555
1556 1556 _file = b'manifestfulltextcache'
1557 1557
1558 1558 def __init__(self, max):
1559 1559 super(manifestfulltextcache, self).__init__(max)
1560 1560 self._dirty = False
1561 1561 self._read = False
1562 1562 self._opener = None
1563 1563
1564 1564 def read(self):
1565 1565 if self._read or self._opener is None:
1566 1566 return
1567 1567
1568 1568 try:
1569 1569 with self._opener(self._file) as fp:
1570 1570 set = super(manifestfulltextcache, self).__setitem__
1571 1571 # ignore trailing data, this is a cache, corruption is skipped
1572 1572 while True:
1573 1573 # TODO do we need to do work here for sha1 portability?
1574 1574 node = fp.read(20)
1575 1575 if len(node) < 20:
1576 1576 break
1577 1577 try:
1578 1578 size = struct.unpack(b'>L', fp.read(4))[0]
1579 1579 except struct.error:
1580 1580 break
1581 1581 value = bytearray(fp.read(size))
1582 1582 if len(value) != size:
1583 1583 break
1584 1584 set(node, value)
1585 1585 except IOError:
1586 1586 # the file is allowed to be missing
1587 1587 pass
1588 1588
1589 1589 self._read = True
1590 1590 self._dirty = False
1591 1591
1592 1592 def write(self):
1593 1593 if not self._dirty or self._opener is None:
1594 1594 return
1595 1595 # rotate backwards to the first used node
1596 1596 try:
1597 1597 with self._opener(
1598 1598 self._file, b'w', atomictemp=True, checkambig=True
1599 1599 ) as fp:
1600 1600 node = self._head.prev
1601 1601 while True:
1602 1602 if node.key in self._cache:
1603 1603 fp.write(node.key)
1604 1604 fp.write(struct.pack(b'>L', len(node.value)))
1605 1605 fp.write(node.value)
1606 1606 if node is self._head:
1607 1607 break
1608 1608 node = node.prev
1609 1609 except IOError:
1610 1610 # We could not write the cache (eg: permission error)
1611 1611 # the content can be missing.
1612 1612 #
1613 1613 # We could try harder and see if we could recreate a wcache
1614 1614 # directory were we coudl write too.
1615 1615 #
1616 1616 # XXX the error pass silently, having some way to issue an error
1617 1617 # log `ui.log` would be nice.
1618 1618 pass
1619 1619
1620 1620 def __len__(self):
1621 1621 if not self._read:
1622 1622 self.read()
1623 1623 return super(manifestfulltextcache, self).__len__()
1624 1624
1625 1625 def __contains__(self, k):
1626 1626 if not self._read:
1627 1627 self.read()
1628 1628 return super(manifestfulltextcache, self).__contains__(k)
1629 1629
1630 1630 def __iter__(self):
1631 1631 if not self._read:
1632 1632 self.read()
1633 1633 return super(manifestfulltextcache, self).__iter__()
1634 1634
1635 1635 def __getitem__(self, k):
1636 1636 if not self._read:
1637 1637 self.read()
1638 1638 # the cache lru order can change on read
1639 1639 setdirty = self._cache.get(k) is not self._head
1640 1640 value = super(manifestfulltextcache, self).__getitem__(k)
1641 1641 if setdirty:
1642 1642 self._dirty = True
1643 1643 return value
1644 1644
1645 1645 def __setitem__(self, k, v):
1646 1646 if not self._read:
1647 1647 self.read()
1648 1648 super(manifestfulltextcache, self).__setitem__(k, v)
1649 1649 self._dirty = True
1650 1650
1651 1651 def __delitem__(self, k):
1652 1652 if not self._read:
1653 1653 self.read()
1654 1654 super(manifestfulltextcache, self).__delitem__(k)
1655 1655 self._dirty = True
1656 1656
1657 1657 def get(self, k, default=None):
1658 1658 if not self._read:
1659 1659 self.read()
1660 1660 return super(manifestfulltextcache, self).get(k, default=default)
1661 1661
1662 1662 def clear(self, clear_persisted_data=False):
1663 1663 super(manifestfulltextcache, self).clear()
1664 1664 if clear_persisted_data:
1665 1665 self._dirty = True
1666 1666 self.write()
1667 1667 self._read = False
1668 1668
1669 1669
1670 1670 # and upper bound of what we expect from compression
1671 1671 # (real live value seems to be "3")
1672 1672 MAXCOMPRESSION = 3
1673 1673
1674 1674
1675 1675 class FastdeltaUnavailable(Exception):
1676 1676 """Exception raised when fastdelta isn't usable on a manifest."""
1677 1677
1678 1678
1679 1679 class manifestrevlog: # (repository.imanifeststorage)
1680 1680 """A revlog that stores manifest texts. This is responsible for caching the
1681 1681 full-text manifest contents.
1682 1682 """
1683 1683
1684 1684 def __init__(
1685 1685 self,
1686 1686 nodeconstants,
1687 1687 opener,
1688 1688 tree=b'',
1689 1689 dirlogcache=None,
1690 1690 treemanifest=False,
1691 1691 ):
1692 1692 """Constructs a new manifest revlog
1693 1693
1694 1694 `indexfile` - used by extensions to have two manifests at once, like
1695 1695 when transitioning between flatmanifeset and treemanifests.
1696 1696
1697 1697 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1698 1698 options can also be used to make this a tree manifest revlog. The opener
1699 1699 option takes precedence, so if it is set to True, we ignore whatever
1700 1700 value is passed in to the constructor.
1701 1701 """
1702 1702 self.nodeconstants = nodeconstants
1703 1703 # During normal operations, we expect to deal with not more than four
1704 1704 # revs at a time (such as during commit --amend). When rebasing large
1705 1705 # stacks of commits, the number can go up, hence the config knob below.
1706 1706 cachesize = 4
1707 1707 optiontreemanifest = False
1708 1708 persistentnodemap = False
1709 1709 opts = getattr(opener, 'options', None)
1710 1710 if opts is not None:
1711 1711 cachesize = opts.get(b'manifestcachesize', cachesize)
1712 1712 optiontreemanifest = opts.get(b'treemanifest', False)
1713 1713 persistentnodemap = opts.get(b'persistent-nodemap', False)
1714 1714
1715 1715 self._treeondisk = optiontreemanifest or treemanifest
1716 1716
1717 1717 self._fulltextcache = manifestfulltextcache(cachesize)
1718 1718
1719 1719 if tree:
1720 1720 assert self._treeondisk, (tree, b'opts is %r' % opts)
1721 1721
1722 1722 radix = b'00manifest'
1723 1723 if tree:
1724 1724 radix = b"meta/" + tree + radix
1725 1725
1726 1726 self.tree = tree
1727 1727
1728 1728 # The dirlogcache is kept on the root manifest log
1729 1729 if tree:
1730 1730 self._dirlogcache = dirlogcache
1731 1731 else:
1732 1732 self._dirlogcache = {b'': self}
1733 1733
1734 1734 self._revlog = revlog.revlog(
1735 1735 opener,
1736 1736 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1737 1737 radix=radix,
1738 1738 # only root indexfile is cached
1739 1739 checkambig=not bool(tree),
1740 1740 mmaplargeindex=True,
1741 1741 upperboundcomp=MAXCOMPRESSION,
1742 1742 persistentnodemap=persistentnodemap,
1743 1743 )
1744 1744
1745 1745 self.index = self._revlog.index
1746 1746
1747 1747 def get_revlog(self):
1748 1748 """return an actual revlog instance if any
1749 1749
1750 1750 This exist because a lot of code leverage the fact the underlying
1751 1751 storage is a revlog for optimization, so giving simple way to access
1752 1752 the revlog instance helps such code.
1753 1753 """
1754 1754 return self._revlog
1755 1755
1756 1756 def _setupmanifestcachehooks(self, repo):
1757 1757 """Persist the manifestfulltextcache on lock release"""
1758 1758 if not hasattr(repo, '_wlockref'):
1759 1759 return
1760 1760
1761 1761 self._fulltextcache._opener = repo.wcachevfs
1762 1762 if repo._currentlock(repo._wlockref) is None:
1763 1763 return
1764 1764
1765 1765 reporef = weakref.ref(repo)
1766 1766 manifestrevlogref = weakref.ref(self)
1767 1767
1768 1768 def persistmanifestcache(success):
1769 1769 # Repo is in an unknown state, do not persist.
1770 1770 if not success:
1771 1771 return
1772 1772
1773 1773 repo = reporef()
1774 1774 self = manifestrevlogref()
1775 1775 if repo is None or self is None:
1776 1776 return
1777 1777 if repo.manifestlog.getstorage(b'') is not self:
1778 1778 # there's a different manifest in play now, abort
1779 1779 return
1780 1780 self._fulltextcache.write()
1781 1781
1782 1782 repo._afterlock(persistmanifestcache)
1783 1783
1784 1784 @property
1785 1785 def fulltextcache(self):
1786 1786 return self._fulltextcache
1787 1787
1788 1788 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1789 1789 self._revlog.clearcaches()
1790 1790 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1791 1791 self._dirlogcache = {self.tree: self}
1792 1792
1793 1793 def dirlog(self, d):
1794 1794 if d:
1795 1795 assert self._treeondisk
1796 1796 if d not in self._dirlogcache:
1797 1797 mfrevlog = manifestrevlog(
1798 1798 self.nodeconstants,
1799 1799 self.opener,
1800 1800 d,
1801 1801 self._dirlogcache,
1802 1802 treemanifest=self._treeondisk,
1803 1803 )
1804 1804 self._dirlogcache[d] = mfrevlog
1805 1805 return self._dirlogcache[d]
1806 1806
1807 1807 def add(
1808 1808 self,
1809 1809 m,
1810 1810 transaction,
1811 1811 link,
1812 1812 p1,
1813 1813 p2,
1814 1814 added: Iterable[bytes],
1815 1815 removed: Iterable[bytes],
1816 1816 readtree=None,
1817 1817 match=None,
1818 1818 ):
1819 1819 """add some manifest entry in to the manifest log
1820 1820
1821 1821 input:
1822 1822
1823 1823 m: the manifest dict we want to store
1824 1824 transaction: the open transaction
1825 1825 p1: manifest-node of p1
1826 1826 p2: manifest-node of p2
1827 1827 added: file added/changed compared to parent
1828 1828 removed: file removed compared to parent
1829 1829
1830 1830 tree manifest input:
1831 1831
1832 1832 readtree: a function to read a subtree
1833 1833 match: a filematcher for the subpart of the tree manifest
1834 1834 """
1835 1835 try:
1836 1836 if p1 not in self.fulltextcache:
1837 1837 raise FastdeltaUnavailable()
1838 1838 # If our first parent is in the manifest cache, we can
1839 1839 # compute a delta here using properties we know about the
1840 1840 # manifest up-front, which may save time later for the
1841 1841 # revlog layer.
1842 1842
1843 1843 _checkforbidden(added)
1844 1844 # combine the changed lists into one sorted iterator
1845 1845 work = heapq.merge(
1846 1846 [(x, False) for x in sorted(added)],
1847 1847 [(x, True) for x in sorted(removed)],
1848 1848 )
1849 1849
1850 1850 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1851 1851 cachedelta = self._revlog.rev(p1), deltatext
1852 1852 text = util.buffer(arraytext)
1853 1853 rev = self._revlog.addrevision(
1854 1854 text, transaction, link, p1, p2, cachedelta
1855 1855 )
1856 1856 n = self._revlog.node(rev)
1857 1857 except FastdeltaUnavailable:
1858 1858 # The first parent manifest isn't already loaded or the
1859 1859 # manifest implementation doesn't support fastdelta, so
1860 1860 # we'll just encode a fulltext of the manifest and pass
1861 1861 # that through to the revlog layer, and let it handle the
1862 1862 # delta process.
1863 1863 if self._treeondisk:
1864 1864 assert readtree, b"readtree must be set for treemanifest writes"
1865 1865 assert match, b"match must be specified for treemanifest writes"
1866 1866 m1 = readtree(self.tree, p1)
1867 1867 m2 = readtree(self.tree, p2)
1868 1868 n = self._addtree(
1869 1869 m, transaction, link, m1, m2, readtree, match=match
1870 1870 )
1871 1871 arraytext = None
1872 1872 else:
1873 1873 text = m.text()
1874 1874 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1875 1875 n = self._revlog.node(rev)
1876 1876 arraytext = bytearray(text)
1877 1877
1878 1878 if arraytext is not None:
1879 1879 self.fulltextcache[n] = arraytext
1880 1880
1881 1881 return n
1882 1882
1883 1883 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1884 1884 # If the manifest is unchanged compared to one parent,
1885 1885 # don't write a new revision
1886 1886 if self.tree != b'' and (
1887 1887 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1888 1888 ):
1889 1889 return m.node()
1890 1890
1891 1891 def writesubtree(subm, subp1, subp2, match):
1892 1892 sublog = self.dirlog(subm.dir())
1893 1893 sublog.add(
1894 1894 subm,
1895 1895 transaction,
1896 1896 link,
1897 1897 subp1,
1898 1898 subp2,
1899 1899 None,
1900 1900 None,
1901 1901 readtree=readtree,
1902 1902 match=match,
1903 1903 )
1904 1904
1905 1905 m.writesubtrees(m1, m2, writesubtree, match)
1906 1906 text = m.dirtext()
1907 1907 n = None
1908 1908 if self.tree != b'':
1909 1909 # Double-check whether contents are unchanged to one parent
1910 1910 if text == m1.dirtext():
1911 1911 n = m1.node()
1912 1912 elif text == m2.dirtext():
1913 1913 n = m2.node()
1914 1914
1915 1915 if not n:
1916 1916 rev = self._revlog.addrevision(
1917 1917 text, transaction, link, m1.node(), m2.node()
1918 1918 )
1919 1919 n = self._revlog.node(rev)
1920 1920
1921 1921 # Save nodeid so parent manifest can calculate its nodeid
1922 1922 m.setnode(n)
1923 1923 return n
1924 1924
1925 1925 def __len__(self):
1926 1926 return len(self._revlog)
1927 1927
1928 1928 def __iter__(self):
1929 1929 return self._revlog.__iter__()
1930 1930
1931 1931 def rev(self, node):
1932 1932 return self._revlog.rev(node)
1933 1933
1934 1934 def node(self, rev):
1935 1935 return self._revlog.node(rev)
1936 1936
1937 1937 def lookup(self, value):
1938 1938 return self._revlog.lookup(value)
1939 1939
1940 1940 def parentrevs(self, rev):
1941 1941 return self._revlog.parentrevs(rev)
1942 1942
1943 1943 def parents(self, node):
1944 1944 return self._revlog.parents(node)
1945 1945
1946 1946 def linkrev(self, rev):
1947 1947 return self._revlog.linkrev(rev)
1948 1948
1949 1949 def checksize(self):
1950 1950 return self._revlog.checksize()
1951 1951
1952 1952 def revision(self, node):
1953 1953 return self._revlog.revision(node)
1954 1954
1955 1955 def rawdata(self, node):
1956 1956 return self._revlog.rawdata(node)
1957 1957
1958 1958 def revdiff(self, rev1, rev2):
1959 1959 return self._revlog.revdiff(rev1, rev2)
1960 1960
1961 1961 def cmp(self, node, text):
1962 1962 return self._revlog.cmp(node, text)
1963 1963
1964 1964 def deltaparent(self, rev):
1965 1965 return self._revlog.deltaparent(rev)
1966 1966
1967 1967 def emitrevisions(
1968 1968 self,
1969 1969 nodes,
1970 1970 nodesorder=None,
1971 1971 revisiondata=False,
1972 1972 assumehaveparentrevisions=False,
1973 1973 deltamode=repository.CG_DELTAMODE_STD,
1974 1974 sidedata_helpers=None,
1975 1975 debug_info=None,
1976 1976 ):
1977 1977 return self._revlog.emitrevisions(
1978 1978 nodes,
1979 1979 nodesorder=nodesorder,
1980 1980 revisiondata=revisiondata,
1981 1981 assumehaveparentrevisions=assumehaveparentrevisions,
1982 1982 deltamode=deltamode,
1983 1983 sidedata_helpers=sidedata_helpers,
1984 1984 debug_info=debug_info,
1985 1985 )
1986 1986
1987 1987 def addgroup(
1988 1988 self,
1989 1989 deltas,
1990 1990 linkmapper,
1991 1991 transaction,
1992 1992 alwayscache=False,
1993 1993 addrevisioncb=None,
1994 1994 duplicaterevisioncb=None,
1995 1995 debug_info=None,
1996 1996 delta_base_reuse_policy=None,
1997 1997 ):
1998 1998 return self._revlog.addgroup(
1999 1999 deltas,
2000 2000 linkmapper,
2001 2001 transaction,
2002 2002 alwayscache=alwayscache,
2003 2003 addrevisioncb=addrevisioncb,
2004 2004 duplicaterevisioncb=duplicaterevisioncb,
2005 2005 debug_info=debug_info,
2006 2006 delta_base_reuse_policy=delta_base_reuse_policy,
2007 2007 )
2008 2008
2009 2009 def rawsize(self, rev):
2010 2010 return self._revlog.rawsize(rev)
2011 2011
2012 2012 def getstrippoint(self, minlink):
2013 2013 return self._revlog.getstrippoint(minlink)
2014 2014
2015 2015 def strip(self, minlink, transaction):
2016 2016 return self._revlog.strip(minlink, transaction)
2017 2017
2018 2018 def files(self):
2019 2019 return self._revlog.files()
2020 2020
2021 2021 def clone(self, tr, destrevlog, **kwargs):
2022 2022 if not isinstance(destrevlog, manifestrevlog):
2023 2023 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2024 2024
2025 2025 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2026 2026
2027 2027 def storageinfo(
2028 2028 self,
2029 2029 exclusivefiles=False,
2030 2030 sharedfiles=False,
2031 2031 revisionscount=False,
2032 2032 trackedsize=False,
2033 2033 storedsize=False,
2034 2034 ):
2035 2035 return self._revlog.storageinfo(
2036 2036 exclusivefiles=exclusivefiles,
2037 2037 sharedfiles=sharedfiles,
2038 2038 revisionscount=revisionscount,
2039 2039 trackedsize=trackedsize,
2040 2040 storedsize=storedsize,
2041 2041 )
2042 2042
2043 2043 @property
2044 2044 def opener(self):
2045 2045 return self._revlog.opener
2046 2046
2047 2047 @opener.setter
2048 2048 def opener(self, value):
2049 2049 self._revlog.opener = value
2050 2050
2051 2051
2052 2052 # TODO: drop this in favor of repository.imanifestrevisionstored?
2053 2053 AnyManifestCtx = Union['manifestctx', 'treemanifestctx']
2054 2054 # TODO: drop this in favor of repository.imanifestdict
2055 2055 AnyManifestDict = Union[manifestdict, treemanifest]
2056 2056
2057 2057
2058 class manifestlog: # (repository.imanifestlog)
2058 class manifestlog(repository.imanifestlog):
2059 2059 """A collection class representing the collection of manifest snapshots
2060 2060 referenced by commits in the repository.
2061 2061
2062 2062 In this situation, 'manifest' refers to the abstract concept of a snapshot
2063 2063 of the list of files in the given commit. Consumers of the output of this
2064 2064 class do not care about the implementation details of the actual manifests
2065 2065 they receive (i.e. tree or flat or lazily loaded, etc)."""
2066 2066
2067 2067 def __init__(self, opener, repo, rootstore, narrowmatch):
2068 2068 self.nodeconstants = repo.nodeconstants
2069 2069 usetreemanifest = False
2070 2070 cachesize = 4
2071 2071
2072 2072 opts = getattr(opener, 'options', None)
2073 2073 if opts is not None:
2074 2074 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2075 2075 cachesize = opts.get(b'manifestcachesize', cachesize)
2076 2076
2077 2077 self._treemanifests = usetreemanifest
2078 2078
2079 2079 self._rootstore = rootstore
2080 2080 self._rootstore._setupmanifestcachehooks(repo)
2081 2081 self._narrowmatch = narrowmatch
2082 2082
2083 2083 # A cache of the manifestctx or treemanifestctx for each directory
2084 2084 self._dirmancache = {}
2085 2085 self._dirmancache[b''] = util.lrucachedict(cachesize)
2086 2086
2087 2087 self._cachesize = cachesize
2088 2088
2089 2089 def __getitem__(self, node):
2090 2090 """Retrieves the manifest instance for the given node. Throws a
2091 2091 LookupError if not found.
2092 2092 """
2093 2093 return self.get(b'', node)
2094 2094
2095 2095 @property
2096 2096 def narrowed(self):
2097 2097 return not (self._narrowmatch is None or self._narrowmatch.always())
2098 2098
2099 2099 def get(
2100 2100 self, tree: bytes, node: bytes, verify: bool = True
2101 2101 ) -> AnyManifestCtx:
2102 2102 """Retrieves the manifest instance for the given node. Throws a
2103 2103 LookupError if not found.
2104 2104
2105 2105 `verify` - if True an exception will be thrown if the node is not in
2106 2106 the revlog
2107 2107 """
2108 2108 if node in self._dirmancache.get(tree, ()):
2109 2109 return self._dirmancache[tree][node]
2110 2110
2111 2111 if not self._narrowmatch.always():
2112 2112 if not self._narrowmatch.visitdir(tree[:-1]):
2113 2113 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2114 2114 if tree:
2115 2115 if self._rootstore._treeondisk:
2116 2116 if verify:
2117 2117 # Side-effect is LookupError is raised if node doesn't
2118 2118 # exist.
2119 2119 self.getstorage(tree).rev(node)
2120 2120
2121 2121 m = treemanifestctx(self, tree, node)
2122 2122 else:
2123 2123 raise error.Abort(
2124 2124 _(
2125 2125 b"cannot ask for manifest directory '%s' in a flat "
2126 2126 b"manifest"
2127 2127 )
2128 2128 % tree
2129 2129 )
2130 2130 else:
2131 2131 if verify:
2132 2132 # Side-effect is LookupError is raised if node doesn't exist.
2133 2133 self._rootstore.rev(node)
2134 2134
2135 2135 if self._treemanifests:
2136 2136 m = treemanifestctx(self, b'', node)
2137 2137 else:
2138 2138 m = manifestctx(self, node)
2139 2139
2140 2140 if node != self.nodeconstants.nullid:
2141 2141 mancache = self._dirmancache.get(tree)
2142 2142 if not mancache:
2143 2143 mancache = util.lrucachedict(self._cachesize)
2144 2144 self._dirmancache[tree] = mancache
2145 2145 mancache[node] = m
2146 2146 return m
2147 2147
2148 2148 def getstorage(self, tree):
2149 2149 return self._rootstore.dirlog(tree)
2150 2150
2151 2151 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2152 2152 self._dirmancache.clear()
2153 2153 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2154 2154
2155 2155 def rev(self, node) -> int:
2156 2156 return self._rootstore.rev(node)
2157 2157
2158 2158 def update_caches(self, transaction) -> None:
2159 2159 return self._rootstore._revlog.update_caches(transaction=transaction)
2160 2160
2161 2161
2162 2162 class memmanifestctx: # (repository.imanifestrevisionwritable)
2163 2163 _manifestdict: manifestdict
2164 2164
2165 2165 def __init__(self, manifestlog):
2166 2166 self._manifestlog = manifestlog
2167 2167 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2168 2168
2169 2169 def _storage(self) -> manifestrevlog:
2170 2170 return self._manifestlog.getstorage(b'')
2171 2171
2172 2172 def copy(self) -> 'memmanifestctx':
2173 2173 memmf = memmanifestctx(self._manifestlog)
2174 2174 memmf._manifestdict = self.read().copy()
2175 2175 return memmf
2176 2176
2177 2177 def read(self) -> 'manifestdict':
2178 2178 return self._manifestdict
2179 2179
2180 2180 def write(self, transaction, link, p1, p2, added, removed, match=None):
2181 2181 return self._storage().add(
2182 2182 self._manifestdict,
2183 2183 transaction,
2184 2184 link,
2185 2185 p1,
2186 2186 p2,
2187 2187 added,
2188 2188 removed,
2189 2189 match=match,
2190 2190 )
2191 2191
2192 2192
2193 2193 class manifestctx(repository.imanifestrevisionstored):
2194 2194 """A class representing a single revision of a manifest, including its
2195 2195 contents, its parent revs, and its linkrev.
2196 2196 """
2197 2197
2198 2198 _data: Optional[manifestdict]
2199 2199
2200 2200 def __init__(self, manifestlog, node):
2201 2201 self._manifestlog = manifestlog
2202 2202 self._data = None
2203 2203
2204 2204 self._node = node
2205 2205
2206 2206 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2207 2207 # but let's add it later when something needs it and we can load it
2208 2208 # lazily.
2209 2209 # self.p1, self.p2 = store.parents(node)
2210 2210 # rev = store.rev(node)
2211 2211 # self.linkrev = store.linkrev(rev)
2212 2212
2213 2213 def _storage(self) -> 'manifestrevlog':
2214 2214 return self._manifestlog.getstorage(b'')
2215 2215
2216 2216 def node(self) -> bytes:
2217 2217 return self._node
2218 2218
2219 2219 def copy(self) -> memmanifestctx:
2220 2220 memmf = memmanifestctx(self._manifestlog)
2221 2221 memmf._manifestdict = self.read().copy()
2222 2222 return memmf
2223 2223
2224 2224 @propertycache
2225 2225 def parents(self) -> Tuple[bytes, bytes]:
2226 2226 return self._storage().parents(self._node)
2227 2227
2228 2228 def read(self) -> 'manifestdict':
2229 2229 if self._data is None:
2230 2230 nc = self._manifestlog.nodeconstants
2231 2231 if self._node == nc.nullid:
2232 2232 self._data = manifestdict(nc.nodelen)
2233 2233 else:
2234 2234 store = self._storage()
2235 2235 if self._node in store.fulltextcache:
2236 2236 text = pycompat.bytestr(store.fulltextcache[self._node])
2237 2237 else:
2238 2238 text = store.revision(self._node)
2239 2239 arraytext = bytearray(text)
2240 2240 store.fulltextcache[self._node] = arraytext
2241 2241 self._data = manifestdict(nc.nodelen, text)
2242 2242 return self._data
2243 2243
2244 2244 def readfast(self, shallow: bool = False) -> 'manifestdict':
2245 2245 """Calls either readdelta or read, based on which would be less work.
2246 2246 readdelta is called if the delta is against the p1, and therefore can be
2247 2247 read quickly.
2248 2248
2249 2249 If `shallow` is True, nothing changes since this is a flat manifest.
2250 2250 """
2251 2251 util.nouideprecwarn(
2252 2252 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2253 2253 b"6.9",
2254 2254 stacklevel=2,
2255 2255 )
2256 2256 store = self._storage()
2257 2257 r = store.rev(self._node)
2258 2258 deltaparent = store.deltaparent(r)
2259 2259 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2260 2260 return self.readdelta()
2261 2261 return self.read()
2262 2262
2263 2263 def readdelta(self, shallow: bool = False) -> 'manifestdict':
2264 2264 """Returns a manifest containing just the entries that are present
2265 2265 in this manifest, but not in its p1 manifest. This is efficient to read
2266 2266 if the revlog delta is already p1.
2267 2267
2268 2268 Changing the value of `shallow` has no effect on flat manifests.
2269 2269 """
2270 2270 util.nouideprecwarn(
2271 2271 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2272 2272 b"6.9",
2273 2273 stacklevel=2,
2274 2274 )
2275 2275 store = self._storage()
2276 2276 r = store.rev(self._node)
2277 2277 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2278 2278 return manifestdict(store.nodeconstants.nodelen, d)
2279 2279
2280 2280 def read_any_fast_delta(
2281 2281 self,
2282 2282 valid_bases: Optional[Collection[int]] = None,
2283 2283 *,
2284 2284 shallow: bool = False,
2285 2285 ) -> Tuple[Optional[int], manifestdict]:
2286 2286 """see `imanifestrevisionstored` documentation"""
2287 2287 store = self._storage()
2288 2288 r = store.rev(self._node)
2289 2289 deltaparent = store.deltaparent(r)
2290 2290 if valid_bases is None:
2291 2291 # make sure the next check is True
2292 2292 valid_bases = (deltaparent,)
2293 2293 if deltaparent != nullrev and deltaparent in valid_bases:
2294 2294 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2295 2295 return (
2296 2296 deltaparent,
2297 2297 manifestdict(store.nodeconstants.nodelen, d),
2298 2298 )
2299 2299 return (None, self.read())
2300 2300
2301 2301 def read_delta_parents(
2302 2302 self,
2303 2303 *,
2304 2304 shallow: bool = False,
2305 2305 exact: bool = True,
2306 2306 ) -> manifestdict:
2307 2307 """see `interface.imanifestrevisionbase` documentations"""
2308 2308 store = self._storage()
2309 2309 r = store.rev(self._node)
2310 2310 deltaparent = store.deltaparent(r)
2311 2311 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2312 2312 if not exact and deltaparent in parents:
2313 2313 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2314 2314 return manifestdict(store.nodeconstants.nodelen, d)
2315 2315 elif not exact or len(parents) == 0:
2316 2316 return self.read()
2317 2317 elif len(parents) == 1:
2318 2318 p = parents[0]
2319 2319 d = mdiff.patchtext(store.revdiff(p, r))
2320 2320 return manifestdict(store.nodeconstants.nodelen, d)
2321 2321 else:
2322 2322 p1, p2 = parents
2323 2323 d1 = mdiff.patchtext(store.revdiff(p1, r))
2324 2324 d2 = mdiff.patchtext(store.revdiff(p2, r))
2325 2325 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2326 2326 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2327 2327 md = manifestdict(store.nodeconstants.nodelen)
2328 2328 for f, new_node, new_flag in d1.iterentries():
2329 2329 if f not in d2:
2330 2330 continue
2331 2331 if new_node is not None:
2332 2332 md.set(f, new_node, new_flag)
2333 2333 return md
2334 2334
2335 2335 def read_delta_new_entries(self, *, shallow: bool = False) -> manifestdict:
2336 2336 """see `interface.imanifestrevisionbase` documentations"""
2337 2337 # If we are using narrow, returning a delta against an arbitrary
2338 2338 # changeset might return file outside the narrowspec. This can create
2339 2339 # issue when running validation server side with strict security as
2340 2340 # push from low priviledge usage might be seen as adding new revision
2341 2341 # for files they cannot touch. So we are strict if narrow is involved.
2342 2342 if self._manifestlog.narrowed:
2343 2343 return self.read_delta_parents(shallow=shallow, exact=True)
2344 2344 store = self._storage()
2345 2345 r = store.rev(self._node)
2346 2346 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2347 2347 return manifestdict(store.nodeconstants.nodelen, d)
2348 2348
2349 2349 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2350 2350 return self.read().find(key)
2351 2351
2352 2352
2353 2353 class memtreemanifestctx(repository.imanifestrevisionwritable):
2354 2354 _treemanifest: treemanifest
2355 2355
2356 2356 def __init__(self, manifestlog, dir=b''):
2357 2357 self._manifestlog = manifestlog
2358 2358 self._dir = dir
2359 2359 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2360 2360
2361 2361 def _storage(self) -> manifestrevlog:
2362 2362 return self._manifestlog.getstorage(b'')
2363 2363
2364 2364 def copy(self) -> 'memtreemanifestctx':
2365 2365 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2366 2366 memmf._treemanifest = self._treemanifest.copy()
2367 2367 return memmf
2368 2368
2369 2369 def read(self) -> 'treemanifest':
2370 2370 return self._treemanifest
2371 2371
2372 2372 def write(self, transaction, link, p1, p2, added, removed, match=None):
2373 2373 def readtree(dir, node):
2374 2374 return self._manifestlog.get(dir, node).read()
2375 2375
2376 2376 return self._storage().add(
2377 2377 self._treemanifest,
2378 2378 transaction,
2379 2379 link,
2380 2380 p1,
2381 2381 p2,
2382 2382 added,
2383 2383 removed,
2384 2384 readtree=readtree,
2385 2385 match=match,
2386 2386 )
2387 2387
2388 2388
2389 2389 class treemanifestctx(repository.imanifestrevisionstored):
2390 2390 _data: Optional[treemanifest]
2391 2391
2392 2392 def __init__(self, manifestlog, dir, node):
2393 2393 self._manifestlog = manifestlog
2394 2394 self._dir = dir
2395 2395 self._data = None
2396 2396
2397 2397 self._node = node
2398 2398
2399 2399 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2400 2400 # we can instantiate treemanifestctx objects for directories we don't
2401 2401 # have on disk.
2402 2402 # self.p1, self.p2 = store.parents(node)
2403 2403 # rev = store.rev(node)
2404 2404 # self.linkrev = store.linkrev(rev)
2405 2405
2406 2406 def _storage(self) -> manifestrevlog:
2407 2407 narrowmatch = self._manifestlog._narrowmatch
2408 2408 if not narrowmatch.always():
2409 2409 if not narrowmatch.visitdir(self._dir[:-1]):
2410 2410 return excludedmanifestrevlog(
2411 2411 self._manifestlog.nodeconstants, self._dir
2412 2412 )
2413 2413 return self._manifestlog.getstorage(self._dir)
2414 2414
2415 2415 def read(self) -> 'treemanifest':
2416 2416 if self._data is None:
2417 2417 store = self._storage()
2418 2418 if self._node == self._manifestlog.nodeconstants.nullid:
2419 2419 self._data = treemanifest(self._manifestlog.nodeconstants)
2420 2420 # TODO accessing non-public API
2421 2421 elif store._treeondisk:
2422 2422 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2423 2423
2424 2424 def gettext():
2425 2425 return store.revision(self._node)
2426 2426
2427 2427 def readsubtree(dir, subm):
2428 2428 # Set verify to False since we need to be able to create
2429 2429 # subtrees for trees that don't exist on disk.
2430 2430 return self._manifestlog.get(dir, subm, verify=False).read()
2431 2431
2432 2432 m.read(gettext, readsubtree)
2433 2433 m.setnode(self._node)
2434 2434 self._data = m
2435 2435 else:
2436 2436 if self._node in store.fulltextcache:
2437 2437 text = pycompat.bytestr(store.fulltextcache[self._node])
2438 2438 else:
2439 2439 text = store.revision(self._node)
2440 2440 arraytext = bytearray(text)
2441 2441 store.fulltextcache[self._node] = arraytext
2442 2442 self._data = treemanifest(
2443 2443 self._manifestlog.nodeconstants, dir=self._dir, text=text
2444 2444 )
2445 2445
2446 2446 return self._data
2447 2447
2448 2448 def node(self) -> bytes:
2449 2449 return self._node
2450 2450
2451 2451 def copy(self) -> 'memtreemanifestctx':
2452 2452 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2453 2453 memmf._treemanifest = self.read().copy()
2454 2454 return memmf
2455 2455
2456 2456 @propertycache
2457 2457 def parents(self) -> Tuple[bytes, bytes]:
2458 2458 return self._storage().parents(self._node)
2459 2459
2460 2460 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2461 2461 """see `imanifestrevisionstored` documentation"""
2462 2462 util.nouideprecwarn(
2463 2463 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2464 2464 b"6.9",
2465 2465 stacklevel=2,
2466 2466 )
2467 2467 store = self._storage()
2468 2468 if shallow:
2469 2469 r = store.rev(self._node)
2470 2470 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2471 2471 return manifestdict(store.nodeconstants.nodelen, d)
2472 2472 else:
2473 2473 # Need to perform a slow delta
2474 2474 r0 = store.deltaparent(store.rev(self._node))
2475 2475 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2476 2476 m1 = self.read()
2477 2477 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2478 2478 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2479 2479 if n1:
2480 2480 md[f] = n1
2481 2481 if fl1:
2482 2482 md.setflag(f, fl1)
2483 2483 return md
2484 2484
2485 2485 def read_any_fast_delta(
2486 2486 self,
2487 2487 valid_bases: Optional[Collection[int]] = None,
2488 2488 *,
2489 2489 shallow: bool = False,
2490 2490 ) -> Tuple[Optional[int], AnyManifestDict]:
2491 2491 """see `imanifestrevisionstored` documentation"""
2492 2492 store = self._storage()
2493 2493 r = store.rev(self._node)
2494 2494 deltaparent = store.deltaparent(r)
2495 2495
2496 2496 if valid_bases is None:
2497 2497 # make sure the next check is True
2498 2498 valid_bases = (deltaparent,)
2499 2499 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2500 2500
2501 2501 if shallow:
2502 2502 if can_use_delta:
2503 2503 return (deltaparent, self._read_storage_delta_shallow())
2504 2504 else:
2505 2505 d = store.revision(self._node)
2506 2506 return (None, manifestdict(store.nodeconstants.nodelen, d))
2507 2507 else:
2508 2508 # note: This use "slow_delta" here is cargo culted from the previous
2509 2509 # implementation. I am not sure it make sense since the goal here is to
2510 2510 # be fast, so why are we computing a delta? On the other hand, tree
2511 2511 # manifest delta as fairly "cheap" and allow for skipping whole part of
2512 2512 # the tree that a full read would access. So it might be a good idea.
2513 2513 #
2514 2514 # If we realize we don't need delta here, we should simply use:
2515 2515 #
2516 2516 # return (None, self.read())
2517 2517 if can_use_delta:
2518 2518 return (None, self._read_storage_slow_delta(base=deltaparent))
2519 2519 else:
2520 2520 parents = [
2521 2521 p
2522 2522 for p in store.parentrevs(r)
2523 2523 if p is not nullrev and p in valid_bases
2524 2524 ]
2525 2525 if parents:
2526 2526 best_base = max(parents)
2527 2527 else:
2528 2528 best_base = max(valid_bases)
2529 2529 return (None, self._read_storage_slow_delta(base=best_base))
2530 2530
2531 2531 def _read_storage_delta_shallow(self) -> manifestdict:
2532 2532 store = self._storage()
2533 2533 r = store.rev(self._node)
2534 2534 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2535 2535 return manifestdict(store.nodeconstants.nodelen, d)
2536 2536
2537 2537 def _read_storage_slow_delta(self, base) -> 'treemanifest':
2538 2538 store = self._storage()
2539 2539 if base is None:
2540 2540 base = store.deltaparent(store.rev(self._node))
2541 2541 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2542 2542 m1 = self.read()
2543 2543 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2544 2544 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2545 2545 if n1:
2546 2546 md[f] = n1
2547 2547 if fl1:
2548 2548 md.setflag(f, fl1)
2549 2549 return md
2550 2550
2551 2551 def read_delta_parents(
2552 2552 self,
2553 2553 *,
2554 2554 shallow: bool = False,
2555 2555 exact: bool = True,
2556 2556 ) -> AnyManifestDict:
2557 2557 """see `interface.imanifestrevisionbase` documentations"""
2558 2558 store = self._storage()
2559 2559 r = store.rev(self._node)
2560 2560 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2561 2561 if not exact:
2562 2562 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2563 2563 elif len(parents) == 0:
2564 2564 if shallow:
2565 2565 d = store.revision(self._node)
2566 2566 return manifestdict(store.nodeconstants.nodelen, d)
2567 2567 else:
2568 2568 return self.read()
2569 2569 elif len(parents) == 1:
2570 2570 p = parents[0]
2571 2571 if shallow:
2572 2572 d = mdiff.patchtext(store.revdiff(p, r))
2573 2573 return manifestdict(store.nodeconstants.nodelen, d)
2574 2574 else:
2575 2575 return self._read_storage_slow_delta(base=p)
2576 2576 else:
2577 2577 p1, p2 = parents
2578 2578 if shallow:
2579 2579 d1 = mdiff.patchtext(store.revdiff(p1, r))
2580 2580 d2 = mdiff.patchtext(store.revdiff(p2, r))
2581 2581 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2582 2582 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2583 2583 md = manifestdict(store.nodeconstants.nodelen)
2584 2584 for f, new_node, new_flag in d1.iterentries():
2585 2585 if f not in d2:
2586 2586 continue
2587 2587 if new_node is not None:
2588 2588 md.set(f, new_node, new_flag)
2589 2589 return md
2590 2590 else:
2591 2591 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2592 2592 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2593 2593 mc = self.read()
2594 2594 d1 = m1.diff(mc)
2595 2595 d2 = m2.diff(mc)
2596 2596 md = treemanifest(
2597 2597 self._manifestlog.nodeconstants,
2598 2598 dir=self._dir,
2599 2599 )
2600 2600 for f, new_node, new_flag in d1.iterentries():
2601 2601 if f not in d2:
2602 2602 continue
2603 2603 if new_node is not None:
2604 2604 md.set(f, new_node, new_flag)
2605 2605 return md
2606 2606
2607 2607 def read_delta_new_entries(
2608 2608 self, *, shallow: bool = False
2609 2609 ) -> AnyManifestDict:
2610 2610 """see `interface.imanifestrevisionbase` documentations"""
2611 2611 # If we are using narrow, returning a delta against an arbitrary
2612 2612 # changeset might return file outside the narrowspec. This can create
2613 2613 # issue when running validation server side with strict security as
2614 2614 # push from low priviledge usage might be seen as adding new revision
2615 2615 # for files they cannot touch. So we are strict if narrow is involved.
2616 2616 if self._manifestlog.narrowed:
2617 2617 return self.read_delta_parents(shallow=shallow, exact=True)
2618 2618 # delegate to existing another existing method for simplicity
2619 2619 store = self._storage()
2620 2620 r = store.rev(self._node)
2621 2621 bases = (store.deltaparent(r),)
2622 2622 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2623 2623
2624 2624 def readfast(self, shallow: bool = False) -> AnyManifestDict:
2625 2625 """Calls either readdelta or read, based on which would be less work.
2626 2626 readdelta is called if the delta is against the p1, and therefore can be
2627 2627 read quickly.
2628 2628
2629 2629 If `shallow` is True, it only returns the entries from this manifest,
2630 2630 and not any submanifests.
2631 2631 """
2632 2632 util.nouideprecwarn(
2633 2633 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2634 2634 b"6.9",
2635 2635 stacklevel=2,
2636 2636 )
2637 2637 store = self._storage()
2638 2638 r = store.rev(self._node)
2639 2639 deltaparent = store.deltaparent(r)
2640 2640 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2641 2641 return self.readdelta(shallow=shallow)
2642 2642
2643 2643 if shallow:
2644 2644 return manifestdict(
2645 2645 store.nodeconstants.nodelen, store.revision(self._node)
2646 2646 )
2647 2647 else:
2648 2648 return self.read()
2649 2649
2650 2650 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2651 2651 return self.read().find(key)
2652 2652
2653 2653
2654 2654 class excludeddir(treemanifest):
2655 2655 """Stand-in for a directory that is excluded from the repository.
2656 2656
2657 2657 With narrowing active on a repository that uses treemanifests,
2658 2658 some of the directory revlogs will be excluded from the resulting
2659 2659 clone. This is a huge storage win for clients, but means we need
2660 2660 some sort of pseudo-manifest to surface to internals so we can
2661 2661 detect a merge conflict outside the narrowspec. That's what this
2662 2662 class is: it stands in for a directory whose node is known, but
2663 2663 whose contents are unknown.
2664 2664 """
2665 2665
2666 2666 _files: Dict[bytes, bytes]
2667 2667 _flags: Dict[bytes, bytes]
2668 2668
2669 2669 def __init__(self, nodeconstants, dir, node):
2670 2670 super(excludeddir, self).__init__(nodeconstants, dir)
2671 2671 self._node = node
2672 2672 # Add an empty file, which will be included by iterators and such,
2673 2673 # appearing as the directory itself (i.e. something like "dir/")
2674 2674 self._files[b''] = node
2675 2675 self._flags[b''] = b't'
2676 2676
2677 2677 # Manifests outside the narrowspec should never be modified, so avoid
2678 2678 # copying. This makes a noticeable difference when there are very many
2679 2679 # directories outside the narrowspec. Also, it makes sense for the copy to
2680 2680 # be of the same type as the original, which would not happen with the
2681 2681 # super type's copy().
2682 2682 def copy(self):
2683 2683 return self
2684 2684
2685 2685
2686 2686 class excludeddirmanifestctx(treemanifestctx):
2687 2687 """context wrapper for excludeddir - see that docstring for rationale"""
2688 2688
2689 2689 def __init__(self, nodeconstants, dir, node):
2690 2690 self.nodeconstants = nodeconstants
2691 2691 self._dir = dir
2692 2692 self._node = node
2693 2693
2694 2694 def read(self):
2695 2695 return excludeddir(self.nodeconstants, self._dir, self._node)
2696 2696
2697 2697 def readfast(self, shallow: bool = False):
2698 2698 # special version of readfast since we don't have underlying storage
2699 2699 return self.read()
2700 2700
2701 2701 def write(self, *args):
2702 2702 raise error.ProgrammingError(
2703 2703 b'attempt to write manifest from excluded dir %s' % self._dir
2704 2704 )
2705 2705
2706 2706
2707 2707 class excludedmanifestrevlog(manifestrevlog):
2708 2708 """Stand-in for excluded treemanifest revlogs.
2709 2709
2710 2710 When narrowing is active on a treemanifest repository, we'll have
2711 2711 references to directories we can't see due to the revlog being
2712 2712 skipped. This class exists to conform to the manifestrevlog
2713 2713 interface for those directories and proactively prevent writes to
2714 2714 outside the narrowspec.
2715 2715 """
2716 2716
2717 2717 def __init__(self, nodeconstants, dir):
2718 2718 self.nodeconstants = nodeconstants
2719 2719 self._dir = dir
2720 2720
2721 2721 def __len__(self):
2722 2722 raise error.ProgrammingError(
2723 2723 b'attempt to get length of excluded dir %s' % self._dir
2724 2724 )
2725 2725
2726 2726 def rev(self, node):
2727 2727 raise error.ProgrammingError(
2728 2728 b'attempt to get rev from excluded dir %s' % self._dir
2729 2729 )
2730 2730
2731 2731 def linkrev(self, node):
2732 2732 raise error.ProgrammingError(
2733 2733 b'attempt to get linkrev from excluded dir %s' % self._dir
2734 2734 )
2735 2735
2736 2736 def node(self, rev):
2737 2737 raise error.ProgrammingError(
2738 2738 b'attempt to get node from excluded dir %s' % self._dir
2739 2739 )
2740 2740
2741 2741 def add(self, *args, **kwargs):
2742 2742 # We should never write entries in dirlogs outside the narrow clone.
2743 2743 # However, the method still gets called from writesubtree() in
2744 2744 # _addtree(), so we need to handle it. We should possibly make that
2745 2745 # avoid calling add() with a clean manifest (_dirty is always False
2746 2746 # in excludeddir instances).
2747 2747 pass
General Comments 0
You need to be logged in to leave comments. Login now