##// END OF EJS Templates
manifest: deprecated readdelta and readfast...
marmoute -
r52681:9af1acc1 default
parent child Browse files
Show More
@@ -1,2734 +1,2754
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import heapq
10 10 import itertools
11 11 import struct
12 12 import weakref
13 13
14 14 from typing import (
15 15 ByteString,
16 16 Callable,
17 17 Collection,
18 18 Dict,
19 19 Iterable,
20 20 Iterator,
21 21 List,
22 22 Optional,
23 23 Set,
24 24 Tuple,
25 25 Union,
26 26 cast,
27 27 )
28 28
29 29 from .i18n import _
30 30 from .node import (
31 31 bin,
32 32 hex,
33 33 nullrev,
34 34 )
35 35 from . import (
36 36 encoding,
37 37 error,
38 38 match as matchmod,
39 39 mdiff,
40 40 pathutil,
41 41 policy,
42 42 pycompat,
43 43 revlog,
44 44 util,
45 45 )
46 46 from .interfaces import (
47 47 repository,
48 48 util as interfaceutil,
49 49 )
50 50 from .revlogutils import (
51 51 constants as revlog_constants,
52 52 )
53 53
54 54 parsers = policy.importmod('parsers')
55 55 propertycache = util.propertycache
56 56
57 57 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
58 58 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
59 59
60 60
61 61 def _parse(nodelen, data: bytes):
62 62 # This method does a little bit of excessive-looking
63 63 # precondition checking. This is so that the behavior of this
64 64 # class exactly matches its C counterpart to try and help
65 65 # prevent surprise breakage for anyone that develops against
66 66 # the pure version.
67 67 if data and data[-1:] != b'\n':
68 68 raise ValueError(b'Manifest did not end in a newline.')
69 69 prev = None
70 70 for l in data.splitlines():
71 71 if prev is not None and prev > l:
72 72 raise ValueError(b'Manifest lines not in sorted order.')
73 73 prev = l
74 74 f, n = l.split(b'\0')
75 75 nl = len(n)
76 76 flags = n[-1:]
77 77 if flags in _manifestflags:
78 78 n = n[:-1]
79 79 nl -= 1
80 80 else:
81 81 flags = b''
82 82 if nl != 2 * nodelen:
83 83 raise ValueError(b'Invalid manifest line')
84 84
85 85 yield f, bin(n), flags
86 86
87 87
88 88 def _text(it):
89 89 files = []
90 90 lines = []
91 91 for f, n, fl in it:
92 92 files.append(f)
93 93 # if this is changed to support newlines in filenames,
94 94 # be sure to check the templates/ dir again (especially *-raw.tmpl)
95 95 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
96 96
97 97 _checkforbidden(files)
98 98 return b''.join(lines)
99 99
100 100
101 101 class lazymanifestiter:
102 102 def __init__(self, lm: '_LazyManifest') -> None:
103 103 self.pos = 0
104 104 self.lm = lm
105 105
106 106 def __iter__(self) -> 'lazymanifestiter':
107 107 return self
108 108
109 109 def next(self) -> bytes:
110 110 try:
111 111 data, pos = self.lm._get(self.pos)
112 112 except IndexError:
113 113 raise StopIteration
114 114 if pos == -1:
115 115 assert isinstance(data, tuple)
116 116 self.pos += 1
117 117 return data[0]
118 118 assert isinstance(data, bytes)
119 119 self.pos += 1
120 120 zeropos = data.find(b'\x00', pos)
121 121 return data[pos:zeropos]
122 122
123 123 __next__ = next
124 124
125 125
126 126 class lazymanifestiterentries:
127 127 def __init__(self, lm: '_LazyManifest') -> None:
128 128 self.lm = lm
129 129 self.pos = 0
130 130
131 131 def __iter__(self) -> 'lazymanifestiterentries':
132 132 return self
133 133
134 134 def next(self) -> Tuple[bytes, bytes, bytes]:
135 135 try:
136 136 data, pos = self.lm._get(self.pos)
137 137 except IndexError:
138 138 raise StopIteration
139 139 if pos == -1:
140 140 assert isinstance(data, tuple)
141 141 self.pos += 1
142 142 return data
143 143 assert isinstance(data, bytes)
144 144 zeropos = data.find(b'\x00', pos)
145 145 nlpos = data.find(b'\n', pos)
146 146 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
147 147 raise error.StorageError(b'Invalid manifest line')
148 148 flags = data[nlpos - 1 : nlpos]
149 149 if flags in _manifestflags:
150 150 hlen = nlpos - zeropos - 2
151 151 else:
152 152 hlen = nlpos - zeropos - 1
153 153 flags = b''
154 154 if hlen != 2 * self.lm._nodelen:
155 155 raise error.StorageError(b'Invalid manifest line')
156 156 hashval = unhexlify(
157 157 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
158 158 )
159 159 self.pos += 1
160 160 return (data[pos:zeropos], hashval, flags)
161 161
162 162 __next__ = next
163 163
164 164
165 165 def unhexlify(data: bytes, extra: int, pos, length: int):
166 166 s = bin(data[pos : pos + length])
167 167 if extra:
168 168 s += bytes([extra & 0xFF])
169 169 return s
170 170
171 171
172 172 def _cmp(a, b):
173 173 return (a > b) - (a < b)
174 174
175 175
176 176 _manifestflags = {b'', b'l', b't', b'x'}
177 177
178 178
179 179 class _LazyManifest:
180 180 """A pure python manifest backed by a byte string. It is supplimented with
181 181 internal lists as it is modified, until it is compacted back to a pure byte
182 182 string.
183 183
184 184 ``data`` is the initial manifest data.
185 185
186 186 ``positions`` is a list of offsets, one per manifest entry. Positive
187 187 values are offsets into ``data``, negative values are offsets into the
188 188 ``extradata`` list. When an entry is removed, its entry is dropped from
189 189 ``positions``. The values are encoded such that when walking the list and
190 190 indexing into ``data`` or ``extradata`` as appropriate, the entries are
191 191 sorted by filename.
192 192
193 193 ``extradata`` is a list of (key, hash, flags) for entries that were added or
194 194 modified since the manifest was created or compacted.
195 195 """
196 196
197 197 def __init__(
198 198 self,
199 199 nodelen: int,
200 200 data: bytes,
201 201 positions=None,
202 202 extrainfo=None,
203 203 extradata=None,
204 204 hasremovals: bool = False,
205 205 ):
206 206 self._nodelen = nodelen
207 207 if positions is None:
208 208 self.positions = self.findlines(data)
209 209 self.extrainfo = [0] * len(self.positions)
210 210 self.data = data
211 211 self.extradata = []
212 212 self.hasremovals = False
213 213 else:
214 214 self.positions = positions[:]
215 215 self.extrainfo = extrainfo[:]
216 216 self.extradata = extradata[:]
217 217 self.data = data
218 218 self.hasremovals = hasremovals
219 219
220 220 def findlines(self, data: bytes) -> List[int]:
221 221 if not data:
222 222 return []
223 223 pos = data.find(b"\n")
224 224 if pos == -1 or data[-1:] != b'\n':
225 225 raise ValueError(b"Manifest did not end in a newline.")
226 226 positions = [0]
227 227 prev = data[: data.find(b'\x00')]
228 228 while pos < len(data) - 1 and pos != -1:
229 229 positions.append(pos + 1)
230 230 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
231 231 if nexts < prev:
232 232 raise ValueError(b"Manifest lines not in sorted order.")
233 233 prev = nexts
234 234 pos = data.find(b"\n", pos + 1)
235 235 return positions
236 236
237 237 def _get(
238 238 self, index: int
239 239 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
240 240 # get the position encoded in pos:
241 241 # positive number is an index in 'data'
242 242 # negative number is in extrapieces
243 243 pos = self.positions[index]
244 244 if pos >= 0:
245 245 return self.data, pos
246 246 return self.extradata[-pos - 1], -1
247 247
248 248 def _getkey(self, pos) -> bytes:
249 249 if pos >= 0:
250 250 return self.data[pos : self.data.find(b'\x00', pos + 1)]
251 251 return self.extradata[-pos - 1][0]
252 252
253 253 def bsearch(self, key: bytes) -> int:
254 254 first = 0
255 255 last = len(self.positions) - 1
256 256
257 257 while first <= last:
258 258 midpoint = (first + last) // 2
259 259 nextpos = self.positions[midpoint]
260 260 candidate = self._getkey(nextpos)
261 261 r = _cmp(key, candidate)
262 262 if r == 0:
263 263 return midpoint
264 264 else:
265 265 if r < 0:
266 266 last = midpoint - 1
267 267 else:
268 268 first = midpoint + 1
269 269 return -1
270 270
271 271 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
272 272 # same as the above, but will always return the position
273 273 # done for performance reasons
274 274 first = 0
275 275 last = len(self.positions) - 1
276 276
277 277 while first <= last:
278 278 midpoint = (first + last) // 2
279 279 nextpos = self.positions[midpoint]
280 280 candidate = self._getkey(nextpos)
281 281 r = _cmp(key, candidate)
282 282 if r == 0:
283 283 return (midpoint, True)
284 284 else:
285 285 if r < 0:
286 286 last = midpoint - 1
287 287 else:
288 288 first = midpoint + 1
289 289 return (first, False)
290 290
291 291 def __contains__(self, key: bytes) -> bool:
292 292 return self.bsearch(key) != -1
293 293
294 294 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
295 295 if not isinstance(key, bytes):
296 296 raise TypeError(b"getitem: manifest keys must be a bytes.")
297 297 needle = self.bsearch(key)
298 298 if needle == -1:
299 299 raise KeyError
300 300 data, pos = self._get(needle)
301 301 if pos == -1:
302 302 assert isinstance(data, tuple)
303 303 return (data[1], data[2])
304 304
305 305 assert isinstance(data, bytes)
306 306 zeropos = data.find(b'\x00', pos)
307 307 nlpos = data.find(b'\n', zeropos)
308 308 assert 0 <= needle <= len(self.positions)
309 309 assert len(self.extrainfo) == len(self.positions)
310 310 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
311 311 raise error.StorageError(b'Invalid manifest line')
312 312 hlen = nlpos - zeropos - 1
313 313 flags = data[nlpos - 1 : nlpos]
314 314 if flags in _manifestflags:
315 315 hlen -= 1
316 316 else:
317 317 flags = b''
318 318 if hlen != 2 * self._nodelen:
319 319 raise error.StorageError(b'Invalid manifest line')
320 320 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
321 321 return (hashval, flags)
322 322
323 323 def __delitem__(self, key: bytes) -> None:
324 324 needle, found = self.bsearch2(key)
325 325 if not found:
326 326 raise KeyError
327 327 cur = self.positions[needle]
328 328 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
329 329 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
330 330 if cur >= 0:
331 331 # This does NOT unsort the list as far as the search functions are
332 332 # concerned, as they only examine lines mapped by self.positions.
333 333 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
334 334 self.hasremovals = True
335 335
336 336 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
337 337 if not isinstance(key, bytes):
338 338 raise TypeError(b"setitem: manifest keys must be a byte string.")
339 339 if not isinstance(value, tuple) or len(value) != 2:
340 340 raise TypeError(
341 341 b"Manifest values must be a tuple of (node, flags)."
342 342 )
343 343 hashval = value[0]
344 344 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
345 345 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
346 346 flags = value[1]
347 347 if not isinstance(flags, bytes) or len(flags) > 1:
348 348 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
349 349 needle, found = self.bsearch2(key)
350 350 if found:
351 351 # put the item
352 352 pos = self.positions[needle]
353 353 if pos < 0:
354 354 self.extradata[-pos - 1] = (key, hashval, value[1])
355 355 else:
356 356 # just don't bother
357 357 self.extradata.append((key, hashval, value[1]))
358 358 self.positions[needle] = -len(self.extradata)
359 359 else:
360 360 # not found, put it in with extra positions
361 361 self.extradata.append((key, hashval, value[1]))
362 362 self.positions = (
363 363 self.positions[:needle]
364 364 + [-len(self.extradata)]
365 365 + self.positions[needle:]
366 366 )
367 367 self.extrainfo = (
368 368 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
369 369 )
370 370
371 371 def copy(self) -> '_LazyManifest':
372 372 # XXX call _compact like in C?
373 373 return _lazymanifest(
374 374 self._nodelen,
375 375 self.data,
376 376 self.positions,
377 377 self.extrainfo,
378 378 self.extradata,
379 379 self.hasremovals,
380 380 )
381 381
382 382 def _compact(self) -> None:
383 383 # hopefully not called TOO often
384 384 if len(self.extradata) == 0 and not self.hasremovals:
385 385 return
386 386 l = []
387 387 i = 0
388 388 offset = 0
389 389 self.extrainfo = [0] * len(self.positions)
390 390 while i < len(self.positions):
391 391 if self.positions[i] >= 0:
392 392 cur = self.positions[i]
393 393 last_cut = cur
394 394
395 395 # Collect all contiguous entries in the buffer at the current
396 396 # offset, breaking out only for added/modified items held in
397 397 # extradata, or a deleted line prior to the next position.
398 398 while True:
399 399 self.positions[i] = offset
400 400 i += 1
401 401 if i == len(self.positions) or self.positions[i] < 0:
402 402 break
403 403
404 404 # A removed file has no positions[] entry, but does have an
405 405 # overwritten first byte. Break out and find the end of the
406 406 # current good entry/entries if there is a removed file
407 407 # before the next position.
408 408 if (
409 409 self.hasremovals
410 410 and self.data.find(b'\n\x00', cur, self.positions[i])
411 411 != -1
412 412 ):
413 413 break
414 414
415 415 offset += self.positions[i] - cur
416 416 cur = self.positions[i]
417 417 end_cut = self.data.find(b'\n', cur)
418 418 if end_cut != -1:
419 419 end_cut += 1
420 420 offset += end_cut - cur
421 421 l.append(self.data[last_cut:end_cut])
422 422 else:
423 423 while i < len(self.positions) and self.positions[i] < 0:
424 424 cur = self.positions[i]
425 425 t = self.extradata[-cur - 1]
426 426 l.append(self._pack(t))
427 427 self.positions[i] = offset
428 428 # Hashes are either 20 bytes (old sha1s) or 32
429 429 # bytes (new non-sha1).
430 430 hlen = 20
431 431 if len(t[1]) > 25:
432 432 hlen = 32
433 433 if len(t[1]) > hlen:
434 434 self.extrainfo[i] = ord(t[1][hlen + 1])
435 435 offset += len(l[-1])
436 436 i += 1
437 437 self.data = b''.join(l)
438 438 self.hasremovals = False
439 439 self.extradata = []
440 440
441 441 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
442 442 n = d[1]
443 443 assert len(n) in (20, 32)
444 444 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
445 445
446 446 def text(self) -> ByteString:
447 447 self._compact()
448 448 return self.data
449 449
450 450 def diff(
451 451 self, m2: '_LazyManifest', clean: bool = False
452 452 ) -> Dict[
453 453 bytes,
454 454 Optional[
455 455 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
456 456 ],
457 457 ]:
458 458 '''Finds changes between the current manifest and m2.'''
459 459 # XXX think whether efficiency matters here
460 460 diff = {}
461 461
462 462 for fn, e1, flags in self.iterentries():
463 463 if fn not in m2:
464 464 diff[fn] = (e1, flags), (None, b'')
465 465 else:
466 466 e2 = m2[fn]
467 467 if (e1, flags) != e2:
468 468 diff[fn] = (e1, flags), e2
469 469 elif clean:
470 470 diff[fn] = None
471 471
472 472 for fn, e2, flags in m2.iterentries():
473 473 if fn not in self:
474 474 diff[fn] = (None, b''), (e2, flags)
475 475
476 476 return diff
477 477
478 478 def iterentries(self) -> lazymanifestiterentries:
479 479 return lazymanifestiterentries(self)
480 480
481 481 def iterkeys(self) -> lazymanifestiter:
482 482 return lazymanifestiter(self)
483 483
484 484 def __iter__(self) -> lazymanifestiter:
485 485 return lazymanifestiter(self)
486 486
487 487 def __len__(self) -> int:
488 488 return len(self.positions)
489 489
490 490 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
491 491 # XXX should be optimized
492 492 c = _lazymanifest(self._nodelen, b'')
493 493 for f, n, fl in self.iterentries():
494 494 if filterfn(f):
495 495 c[f] = n, fl
496 496 return c
497 497
498 498
499 499 try:
500 500 _lazymanifest = parsers.lazymanifest
501 501 except AttributeError:
502 502 _lazymanifest = _LazyManifest
503 503
504 504
505 505 class ManifestDict:
506 506 def __init__(self, nodelen: int, data: ByteString = b''):
507 507 self._nodelen = nodelen
508 508 self._lm = _lazymanifest(nodelen, data)
509 509
510 510 def __getitem__(self, key: bytes) -> bytes:
511 511 return self._lm[key][0]
512 512
513 513 def find(self, key: bytes) -> Tuple[bytes, bytes]:
514 514 return self._lm[key]
515 515
516 516 def __len__(self) -> int:
517 517 return len(self._lm)
518 518
519 519 def __nonzero__(self) -> bool:
520 520 # nonzero is covered by the __len__ function, but implementing it here
521 521 # makes it easier for extensions to override.
522 522 return len(self._lm) != 0
523 523
524 524 __bool__ = __nonzero__
525 525
526 526 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
527 527 self._lm[key] = node, flags
528 528
529 529 def __setitem__(self, key: bytes, node: bytes) -> None:
530 530 self._lm[key] = node, self.flags(key)
531 531
532 532 def __contains__(self, key: bytes) -> bool:
533 533 if key is None:
534 534 return False
535 535 return key in self._lm
536 536
537 537 def __delitem__(self, key: bytes) -> bool:
538 538 del self._lm[key]
539 539
540 540 def __iter__(self) -> Iterator[bytes]:
541 541 return self._lm.__iter__()
542 542
543 543 def iterkeys(self) -> Iterator[bytes]:
544 544 return self._lm.iterkeys()
545 545
546 546 def keys(self) -> List[bytes]:
547 547 return list(self.iterkeys())
548 548
549 549 def filesnotin(self, m2, match=None) -> Set[bytes]:
550 550 '''Set of files in this manifest that are not in the other'''
551 551 if match is not None:
552 552 match = matchmod.badmatch(match, lambda path, msg: None)
553 553 sm2 = set(m2.walk(match))
554 554 return {f for f in self.walk(match) if f not in sm2}
555 555 return {f for f in self if f not in m2}
556 556
557 557 @propertycache
558 558 def _dirs(self) -> pathutil.dirs:
559 559 return pathutil.dirs(self)
560 560
561 561 def dirs(self) -> pathutil.dirs:
562 562 return self._dirs
563 563
564 564 def hasdir(self, dir: bytes) -> bool:
565 565 return dir in self._dirs
566 566
567 567 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
568 568 """Checks whether we can correctly and quickly iterate over matcher
569 569 files instead of over manifest files."""
570 570 files = match.files()
571 571 return len(files) < 100 and (
572 572 match.isexact()
573 573 or (match.prefix() and all(fn in self for fn in files))
574 574 )
575 575
576 576 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
577 577 """Generates matching file names.
578 578
579 579 Equivalent to manifest.matches(match).iterkeys(), but without creating
580 580 an entirely new manifest.
581 581
582 582 It also reports nonexistent files by marking them bad with match.bad().
583 583 """
584 584 if match.always():
585 585 for f in iter(self):
586 586 yield f
587 587 return
588 588
589 589 fset = set(match.files())
590 590
591 591 # avoid the entire walk if we're only looking for specific files
592 592 if self._filesfastpath(match):
593 593 for fn in sorted(fset):
594 594 if fn in self:
595 595 yield fn
596 596 return
597 597
598 598 for fn in self:
599 599 if fn in fset:
600 600 # specified pattern is the exact name
601 601 fset.remove(fn)
602 602 if match(fn):
603 603 yield fn
604 604
605 605 # for dirstate.walk, files=[''] means "walk the whole tree".
606 606 # follow that here, too
607 607 fset.discard(b'')
608 608
609 609 for fn in sorted(fset):
610 610 if not self.hasdir(fn):
611 611 match.bad(fn, None)
612 612
613 613 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
614 614 '''generate a new manifest filtered by the match argument'''
615 615 if match.always():
616 616 return self.copy()
617 617
618 618 if self._filesfastpath(match):
619 619 m = manifestdict(self._nodelen)
620 620 lm = self._lm
621 621 for fn in match.files():
622 622 if fn in lm:
623 623 m._lm[fn] = lm[fn]
624 624 return m
625 625
626 626 m = manifestdict(self._nodelen)
627 627 m._lm = self._lm.filtercopy(match)
628 628 return m
629 629
630 630 def diff(
631 631 self,
632 632 m2: 'ManifestDict',
633 633 match: Optional[matchmod.basematcher] = None,
634 634 clean: bool = False,
635 635 ) -> Dict[
636 636 bytes,
637 637 Optional[
638 638 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
639 639 ],
640 640 ]:
641 641 """Finds changes between the current manifest and m2.
642 642
643 643 Args:
644 644 m2: the manifest to which this manifest should be compared.
645 645 clean: if true, include files unchanged between these manifests
646 646 with a None value in the returned dictionary.
647 647
648 648 The result is returned as a dict with filename as key and
649 649 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
650 650 nodeid in the current/other manifest and fl1/fl2 is the flag
651 651 in the current/other manifest. Where the file does not exist,
652 652 the nodeid will be None and the flags will be the empty
653 653 string.
654 654 """
655 655 if match:
656 656 m1 = self._matches(match)
657 657 m2 = m2._matches(match)
658 658 return m1.diff(m2, clean=clean)
659 659 return self._lm.diff(m2._lm, clean)
660 660
661 661 def setflag(self, key: bytes, flag: bytes) -> None:
662 662 if flag not in _manifestflags:
663 663 raise TypeError(b"Invalid manifest flag set.")
664 664 self._lm[key] = self[key], flag
665 665
666 666 def get(self, key: bytes, default=None) -> Optional[bytes]:
667 667 try:
668 668 return self._lm[key][0]
669 669 except KeyError:
670 670 return default
671 671
672 672 def flags(self, key: bytes) -> bytes:
673 673 try:
674 674 return self._lm[key][1]
675 675 except KeyError:
676 676 return b''
677 677
678 678 def copy(self) -> 'ManifestDict':
679 679 c = manifestdict(self._nodelen)
680 680 c._lm = self._lm.copy()
681 681 return c
682 682
683 683 def items(self) -> Iterator[Tuple[bytes, bytes]]:
684 684 return (x[:2] for x in self._lm.iterentries())
685 685
686 686 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
687 687 return (x[:2] for x in self._lm.iterentries())
688 688
689 689 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
690 690 return self._lm.iterentries()
691 691
692 692 def text(self) -> ByteString:
693 693 # most likely uses native version
694 694 return self._lm.text()
695 695
696 696 def fastdelta(
697 697 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
698 698 ) -> Tuple[ByteString, ByteString]:
699 699 """Given a base manifest text as a bytearray and a list of changes
700 700 relative to that text, compute a delta that can be used by revlog.
701 701 """
702 702 delta = []
703 703 dstart = None
704 704 dend = None
705 705 dline = [b""]
706 706 start = 0
707 707 # zero copy representation of base as a buffer
708 708 addbuf = util.buffer(base)
709 709
710 710 changes = list(changes)
711 711 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
712 712 # start with a readonly loop that finds the offset of
713 713 # each line and creates the deltas
714 714 for f, todelete in changes:
715 715 # bs will either be the index of the item or the insert point
716 716 start, end = _msearch(addbuf, f, start)
717 717 if not todelete:
718 718 h, fl = self._lm[f]
719 719 l = b"%s\0%s%s\n" % (f, hex(h), fl)
720 720 else:
721 721 if start == end:
722 722 # item we want to delete was not found, error out
723 723 raise AssertionError(
724 724 _(b"failed to remove %s from manifest") % f
725 725 )
726 726 l = b""
727 727 if dstart is not None and dstart <= start and dend >= start:
728 728 if dend < end:
729 729 dend = end
730 730 if l:
731 731 dline.append(l)
732 732 else:
733 733 if dstart is not None:
734 734 delta.append((dstart, dend, b"".join(dline)))
735 735 dstart = start
736 736 dend = end
737 737 dline = [l]
738 738
739 739 if dstart is not None:
740 740 delta.append((dstart, dend, b"".join(dline)))
741 741 # apply the delta to the base, and get a delta for addrevision
742 742 deltatext, arraytext = _addlistdelta(base, delta)
743 743 else:
744 744 # For large changes, it's much cheaper to just build the text and
745 745 # diff it.
746 746 arraytext = bytearray(self.text())
747 747 deltatext = mdiff.textdiff(
748 748 util.buffer(base), util.buffer(arraytext)
749 749 )
750 750
751 751 return arraytext, deltatext
752 752
753 753
754 754 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
755 755
756 756
757 757 def _msearch(
758 758 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
759 759 ) -> Tuple[int, int]:
760 760 """return a tuple (start, end) that says where to find s within m.
761 761
762 762 If the string is found m[start:end] are the line containing
763 763 that string. If start == end the string was not found and
764 764 they indicate the proper sorted insertion point.
765 765 """
766 766
767 767 def advance(i: int, c: bytes):
768 768 while i < lenm and m[i : i + 1] != c:
769 769 i += 1
770 770 return i
771 771
772 772 if not s:
773 773 return (lo, lo)
774 774 lenm = len(m)
775 775 if not hi:
776 776 hi = lenm
777 777 while lo < hi:
778 778 mid = (lo + hi) // 2
779 779 start = mid
780 780 while start > 0 and m[start - 1 : start] != b'\n':
781 781 start -= 1
782 782 end = advance(start, b'\0')
783 783 if bytes(m[start:end]) < s:
784 784 # we know that after the null there are 40 bytes of sha1
785 785 # this translates to the bisect lo = mid + 1
786 786 lo = advance(end + 40, b'\n') + 1
787 787 else:
788 788 # this translates to the bisect hi = mid
789 789 hi = start
790 790 end = advance(lo, b'\0')
791 791 found = m[lo:end]
792 792 if s == found:
793 793 # we know that after the null there are 40 bytes of sha1
794 794 end = advance(end + 40, b'\n')
795 795 return (lo, end + 1)
796 796 else:
797 797 return (lo, lo)
798 798
799 799
800 800 def _checkforbidden(l: Iterable[bytes]) -> None:
801 801 """Check filenames for illegal characters."""
802 802 for f in l:
803 803 if b'\n' in f or b'\r' in f:
804 804 raise error.StorageError(
805 805 _(b"'\\n' and '\\r' disallowed in filenames: %r")
806 806 % pycompat.bytestr(f)
807 807 )
808 808
809 809
810 810 # apply the changes collected during the bisect loop to our addlist
811 811 # return a delta suitable for addrevision
812 812 def _addlistdelta(
813 813 addlist: ByteString,
814 814 x: Iterable[Tuple[int, int, bytes]],
815 815 ) -> Tuple[bytes, ByteString]:
816 816 # for large addlist arrays, building a new array is cheaper
817 817 # than repeatedly modifying the existing one
818 818 currentposition = 0
819 819 newaddlist = bytearray()
820 820
821 821 for start, end, content in x:
822 822 newaddlist += addlist[currentposition:start]
823 823 if content:
824 824 newaddlist += bytearray(content)
825 825
826 826 currentposition = end
827 827
828 828 newaddlist += addlist[currentposition:]
829 829
830 830 deltatext = b"".join(
831 831 struct.pack(b">lll", start, end, len(content)) + content
832 832 for start, end, content in x
833 833 )
834 834 return deltatext, newaddlist
835 835
836 836
837 837 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
838 838 if b'/' in f:
839 839 dir, subpath = f.split(b'/', 1)
840 840 return dir + b'/', subpath
841 841 else:
842 842 return b'', f
843 843
844 844
845 845 _noop = lambda s: None
846 846
847 847
848 848 class TreeManifest:
849 849 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
850 850 self._dir = dir
851 851 self.nodeconstants = nodeconstants
852 852 self._node = self.nodeconstants.nullid
853 853 self._nodelen = self.nodeconstants.nodelen
854 854 self._loadfunc = _noop
855 855 self._copyfunc = _noop
856 856 self._dirty = False
857 857 self._dirs: Dict[bytes, 'TreeManifest'] = {}
858 858 self._lazydirs: Dict[
859 859 bytes,
860 860 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
861 861 ] = {}
862 862 # Using _lazymanifest here is a little slower than plain old dicts
863 863 self._files: Dict[bytes, bytes] = {}
864 864 self._flags = {}
865 865 if text:
866 866
867 867 def readsubtree(subdir, subm):
868 868 raise AssertionError(
869 869 b'treemanifest constructor only accepts flat manifests'
870 870 )
871 871
872 872 self.parse(text, readsubtree)
873 873 self._dirty = True # Mark flat manifest dirty after parsing
874 874
875 875 def _subpath(self, path: bytes) -> bytes:
876 876 return self._dir + path
877 877
878 878 def _loadalllazy(self) -> None:
879 879 selfdirs = self._dirs
880 880 subpath = self._subpath
881 881 for d, (node, readsubtree, docopy) in self._lazydirs.items():
882 882 if docopy:
883 883 selfdirs[d] = readsubtree(subpath(d), node).copy()
884 884 else:
885 885 selfdirs[d] = readsubtree(subpath(d), node)
886 886 self._lazydirs.clear()
887 887
888 888 def _loadlazy(self, d: bytes) -> None:
889 889 v = self._lazydirs.get(d)
890 890 if v is not None:
891 891 node, readsubtree, docopy = v
892 892 if docopy:
893 893 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
894 894 else:
895 895 self._dirs[d] = readsubtree(self._subpath(d), node)
896 896 del self._lazydirs[d]
897 897
898 898 def _loadchildrensetlazy(
899 899 self, visit: Union[Set[bytes], bytes]
900 900 ) -> Optional[Set[bytes]]:
901 901 if not visit:
902 902 return None
903 903 if visit == b'all' or visit == b'this':
904 904 self._loadalllazy()
905 905 return None
906 906
907 907 visit = cast(Set[bytes], visit)
908 908
909 909 loadlazy = self._loadlazy
910 910 for k in visit:
911 911 loadlazy(k + b'/')
912 912 return visit
913 913
914 914 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
915 915 """load items in t1 and t2 if they're needed for diffing.
916 916
917 917 The criteria currently is:
918 918 - if it's not present in _lazydirs in either t1 or t2, load it in the
919 919 other (it may already be loaded or it may not exist, doesn't matter)
920 920 - if it's present in _lazydirs in both, compare the nodeid; if it
921 921 differs, load it in both
922 922 """
923 923 toloadlazy = []
924 924 for d, v1 in t1._lazydirs.items():
925 925 v2 = t2._lazydirs.get(d)
926 926 if v2 is None or v2[0] != v1[0]:
927 927 toloadlazy.append(d)
928 928 for d, v1 in t2._lazydirs.items():
929 929 if d not in t1._lazydirs:
930 930 toloadlazy.append(d)
931 931
932 932 for d in toloadlazy:
933 933 t1._loadlazy(d)
934 934 t2._loadlazy(d)
935 935
936 936 def __len__(self) -> int:
937 937 self._load()
938 938 size = len(self._files)
939 939 self._loadalllazy()
940 940 for m in self._dirs.values():
941 941 size += m.__len__()
942 942 return size
943 943
944 944 def __nonzero__(self) -> bool:
945 945 # Faster than "__len__() != 0" since it avoids loading sub-manifests
946 946 return not self._isempty()
947 947
948 948 __bool__ = __nonzero__
949 949
950 950 def _isempty(self) -> bool:
951 951 self._load() # for consistency; already loaded by all callers
952 952 # See if we can skip loading everything.
953 953 if self._files or (
954 954 self._dirs and any(not m._isempty() for m in self._dirs.values())
955 955 ):
956 956 return False
957 957 self._loadalllazy()
958 958 return not self._dirs or all(m._isempty() for m in self._dirs.values())
959 959
960 960 @encoding.strmethod
961 961 def __repr__(self) -> bytes:
962 962 return (
963 963 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
964 964 % (
965 965 self._dir,
966 966 hex(self._node),
967 967 bool(self._loadfunc is _noop),
968 968 self._dirty,
969 969 id(self),
970 970 )
971 971 )
972 972
973 973 def dir(self) -> bytes:
974 974 """The directory that this tree manifest represents, including a
975 975 trailing '/'. Empty string for the repo root directory."""
976 976 return self._dir
977 977
978 978 def node(self) -> bytes:
979 979 """This node of this instance. nullid for unsaved instances. Should
980 980 be updated when the instance is read or written from a revlog.
981 981 """
982 982 assert not self._dirty
983 983 return self._node
984 984
985 985 def setnode(self, node: bytes) -> None:
986 986 self._node = node
987 987 self._dirty = False
988 988
989 989 def iterentries(
990 990 self,
991 991 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
992 992 self._load()
993 993 self._loadalllazy()
994 994 for p, n in sorted(
995 995 itertools.chain(self._dirs.items(), self._files.items())
996 996 ):
997 997 if p in self._files:
998 998 yield self._subpath(p), n, self._flags.get(p, b'')
999 999 else:
1000 1000 for x in n.iterentries():
1001 1001 yield x
1002 1002
1003 1003 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1004 1004 self._load()
1005 1005 self._loadalllazy()
1006 1006 for p, n in sorted(
1007 1007 itertools.chain(self._dirs.items(), self._files.items())
1008 1008 ):
1009 1009 if p in self._files:
1010 1010 yield self._subpath(p), n
1011 1011 else:
1012 1012 for f, sn in n.items():
1013 1013 yield f, sn
1014 1014
1015 1015 iteritems = items
1016 1016
1017 1017 def iterkeys(self) -> Iterator[bytes]:
1018 1018 self._load()
1019 1019 self._loadalllazy()
1020 1020 for p in sorted(itertools.chain(self._dirs, self._files)):
1021 1021 if p in self._files:
1022 1022 yield self._subpath(p)
1023 1023 else:
1024 1024 for f in self._dirs[p]:
1025 1025 yield f
1026 1026
1027 1027 def keys(self) -> List[bytes]:
1028 1028 return list(self.iterkeys())
1029 1029
1030 1030 def __iter__(self) -> Iterator[bytes]:
1031 1031 return self.iterkeys()
1032 1032
1033 1033 def __contains__(self, f: bytes) -> bool:
1034 1034 if f is None:
1035 1035 return False
1036 1036 self._load()
1037 1037 dir, subpath = _splittopdir(f)
1038 1038 if dir:
1039 1039 self._loadlazy(dir)
1040 1040
1041 1041 if dir not in self._dirs:
1042 1042 return False
1043 1043
1044 1044 return self._dirs[dir].__contains__(subpath)
1045 1045 else:
1046 1046 return f in self._files
1047 1047
1048 1048 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1049 1049 self._load()
1050 1050 dir, subpath = _splittopdir(f)
1051 1051 if dir:
1052 1052 self._loadlazy(dir)
1053 1053
1054 1054 if dir not in self._dirs:
1055 1055 return default
1056 1056 return self._dirs[dir].get(subpath, default)
1057 1057 else:
1058 1058 return self._files.get(f, default)
1059 1059
1060 1060 def __getitem__(self, f: bytes) -> bytes:
1061 1061 self._load()
1062 1062 dir, subpath = _splittopdir(f)
1063 1063 if dir:
1064 1064 self._loadlazy(dir)
1065 1065
1066 1066 return self._dirs[dir].__getitem__(subpath)
1067 1067 else:
1068 1068 return self._files[f]
1069 1069
1070 1070 def flags(self, f: bytes) -> bytes:
1071 1071 self._load()
1072 1072 dir, subpath = _splittopdir(f)
1073 1073 if dir:
1074 1074 self._loadlazy(dir)
1075 1075
1076 1076 if dir not in self._dirs:
1077 1077 return b''
1078 1078 return self._dirs[dir].flags(subpath)
1079 1079 else:
1080 1080 if f in self._lazydirs or f in self._dirs:
1081 1081 return b''
1082 1082 return self._flags.get(f, b'')
1083 1083
1084 1084 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1085 1085 self._load()
1086 1086 dir, subpath = _splittopdir(f)
1087 1087 if dir:
1088 1088 self._loadlazy(dir)
1089 1089
1090 1090 return self._dirs[dir].find(subpath)
1091 1091 else:
1092 1092 return self._files[f], self._flags.get(f, b'')
1093 1093
1094 1094 def __delitem__(self, f: bytes) -> None:
1095 1095 self._load()
1096 1096 dir, subpath = _splittopdir(f)
1097 1097 if dir:
1098 1098 self._loadlazy(dir)
1099 1099
1100 1100 self._dirs[dir].__delitem__(subpath)
1101 1101 # If the directory is now empty, remove it
1102 1102 if self._dirs[dir]._isempty():
1103 1103 del self._dirs[dir]
1104 1104 else:
1105 1105 del self._files[f]
1106 1106 if f in self._flags:
1107 1107 del self._flags[f]
1108 1108 self._dirty = True
1109 1109
1110 1110 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1111 1111 """Set both the node and the flags for path f."""
1112 1112 assert node is not None
1113 1113 if flags not in _manifestflags:
1114 1114 raise TypeError(b"Invalid manifest flag set.")
1115 1115 self._load()
1116 1116 dir, subpath = _splittopdir(f)
1117 1117 if dir:
1118 1118 self._loadlazy(dir)
1119 1119 if dir not in self._dirs:
1120 1120 self._dirs[dir] = treemanifest(
1121 1121 self.nodeconstants, self._subpath(dir)
1122 1122 )
1123 1123 self._dirs[dir].set(subpath, node, flags)
1124 1124 else:
1125 1125 assert len(node) in (20, 32)
1126 1126 self._files[f] = node
1127 1127 self._flags[f] = flags
1128 1128 self._dirty = True
1129 1129
1130 1130 def __setitem__(self, f: bytes, n: bytes) -> None:
1131 1131 assert n is not None
1132 1132 self._load()
1133 1133 dir, subpath = _splittopdir(f)
1134 1134 if dir:
1135 1135 self._loadlazy(dir)
1136 1136 if dir not in self._dirs:
1137 1137 self._dirs[dir] = treemanifest(
1138 1138 self.nodeconstants, self._subpath(dir)
1139 1139 )
1140 1140 self._dirs[dir].__setitem__(subpath, n)
1141 1141 else:
1142 1142 # manifest nodes are either 20 bytes or 32 bytes,
1143 1143 # depending on the hash in use. Assert this as historically
1144 1144 # sometimes extra bytes were added.
1145 1145 assert len(n) in (20, 32)
1146 1146 self._files[f] = n
1147 1147 self._dirty = True
1148 1148
1149 1149 def _load(self) -> None:
1150 1150 if self._loadfunc is not _noop:
1151 1151 lf, self._loadfunc = self._loadfunc, _noop
1152 1152 lf(self)
1153 1153 elif self._copyfunc is not _noop:
1154 1154 cf, self._copyfunc = self._copyfunc, _noop
1155 1155 cf(self)
1156 1156
1157 1157 def setflag(self, f: bytes, flags: bytes) -> None:
1158 1158 """Set the flags (symlink, executable) for path f."""
1159 1159 if flags not in _manifestflags:
1160 1160 raise TypeError(b"Invalid manifest flag set.")
1161 1161 self._load()
1162 1162 dir, subpath = _splittopdir(f)
1163 1163 if dir:
1164 1164 self._loadlazy(dir)
1165 1165 if dir not in self._dirs:
1166 1166 self._dirs[dir] = treemanifest(
1167 1167 self.nodeconstants, self._subpath(dir)
1168 1168 )
1169 1169 self._dirs[dir].setflag(subpath, flags)
1170 1170 else:
1171 1171 self._flags[f] = flags
1172 1172 self._dirty = True
1173 1173
1174 1174 def copy(self) -> 'TreeManifest':
1175 1175 copy = treemanifest(self.nodeconstants, self._dir)
1176 1176 copy._node = self._node
1177 1177 copy._dirty = self._dirty
1178 1178 if self._copyfunc is _noop:
1179 1179
1180 1180 def _copyfunc(s):
1181 1181 self._load()
1182 1182 s._lazydirs = {
1183 1183 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1184 1184 }
1185 1185 sdirs = s._dirs
1186 1186 for d, v in self._dirs.items():
1187 1187 sdirs[d] = v.copy()
1188 1188 s._files = dict.copy(self._files)
1189 1189 s._flags = dict.copy(self._flags)
1190 1190
1191 1191 if self._loadfunc is _noop:
1192 1192 _copyfunc(copy)
1193 1193 else:
1194 1194 copy._copyfunc = _copyfunc
1195 1195 else:
1196 1196 copy._copyfunc = self._copyfunc
1197 1197 return copy
1198 1198
1199 1199 def filesnotin(
1200 1200 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1201 1201 ) -> Set[bytes]:
1202 1202 '''Set of files in this manifest that are not in the other'''
1203 1203 if match and not match.always():
1204 1204 m1 = self._matches(match)
1205 1205 m2 = m2._matches(match)
1206 1206 return m1.filesnotin(m2)
1207 1207
1208 1208 files = set()
1209 1209
1210 1210 def _filesnotin(t1, t2):
1211 1211 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1212 1212 return
1213 1213 t1._load()
1214 1214 t2._load()
1215 1215 self._loaddifflazy(t1, t2)
1216 1216 for d, m1 in t1._dirs.items():
1217 1217 if d in t2._dirs:
1218 1218 m2 = t2._dirs[d]
1219 1219 _filesnotin(m1, m2)
1220 1220 else:
1221 1221 files.update(m1.iterkeys())
1222 1222
1223 1223 for fn in t1._files:
1224 1224 if fn not in t2._files:
1225 1225 files.add(t1._subpath(fn))
1226 1226
1227 1227 _filesnotin(self, m2)
1228 1228 return files
1229 1229
1230 1230 @propertycache
1231 1231 def _alldirs(self) -> pathutil.dirs:
1232 1232 return pathutil.dirs(self)
1233 1233
1234 1234 def dirs(self) -> pathutil.dirs:
1235 1235 return self._alldirs
1236 1236
1237 1237 def hasdir(self, dir: bytes) -> bool:
1238 1238 self._load()
1239 1239 topdir, subdir = _splittopdir(dir)
1240 1240 if topdir:
1241 1241 self._loadlazy(topdir)
1242 1242 if topdir in self._dirs:
1243 1243 return self._dirs[topdir].hasdir(subdir)
1244 1244 return False
1245 1245 dirslash = dir + b'/'
1246 1246 return dirslash in self._dirs or dirslash in self._lazydirs
1247 1247
1248 1248 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1249 1249 """Generates matching file names.
1250 1250
1251 1251 It also reports nonexistent files by marking them bad with match.bad().
1252 1252 """
1253 1253 if match.always():
1254 1254 for f in iter(self):
1255 1255 yield f
1256 1256 return
1257 1257
1258 1258 fset = set(match.files())
1259 1259
1260 1260 for fn in self._walk(match):
1261 1261 if fn in fset:
1262 1262 # specified pattern is the exact name
1263 1263 fset.remove(fn)
1264 1264 yield fn
1265 1265
1266 1266 # for dirstate.walk, files=[''] means "walk the whole tree".
1267 1267 # follow that here, too
1268 1268 fset.discard(b'')
1269 1269
1270 1270 for fn in sorted(fset):
1271 1271 if not self.hasdir(fn):
1272 1272 match.bad(fn, None)
1273 1273
1274 1274 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1275 1275 '''Recursively generates matching file names for walk().'''
1276 1276 visit = match.visitchildrenset(self._dir[:-1])
1277 1277 if not visit:
1278 1278 return
1279 1279
1280 1280 # yield this dir's files and walk its submanifests
1281 1281 self._load()
1282 1282 visit = self._loadchildrensetlazy(visit)
1283 1283 for p in sorted(list(self._dirs) + list(self._files)):
1284 1284 if p in self._files:
1285 1285 fullp = self._subpath(p)
1286 1286 if match(fullp):
1287 1287 yield fullp
1288 1288 else:
1289 1289 if not visit or p[:-1] in visit:
1290 1290 for f in self._dirs[p]._walk(match):
1291 1291 yield f
1292 1292
1293 1293 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1294 1294 """recursively generate a new manifest filtered by the match argument."""
1295 1295 if match.always():
1296 1296 return self.copy()
1297 1297 return self._matches_inner(match)
1298 1298
1299 1299 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1300 1300 if match.always():
1301 1301 return self.copy()
1302 1302
1303 1303 visit = match.visitchildrenset(self._dir[:-1])
1304 1304 if visit == b'all':
1305 1305 return self.copy()
1306 1306 ret = treemanifest(self.nodeconstants, self._dir)
1307 1307 if not visit:
1308 1308 return ret
1309 1309
1310 1310 self._load()
1311 1311 for fn in self._files:
1312 1312 # While visitchildrenset *usually* lists only subdirs, this is
1313 1313 # actually up to the matcher and may have some files in the set().
1314 1314 # If visit == 'this', we should obviously look at the files in this
1315 1315 # directory; if visit is a set, and fn is in it, we should inspect
1316 1316 # fn (but no need to inspect things not in the set).
1317 1317 if visit != b'this' and fn not in visit:
1318 1318 continue
1319 1319 fullp = self._subpath(fn)
1320 1320 # visitchildrenset isn't perfect, we still need to call the regular
1321 1321 # matcher code to further filter results.
1322 1322 if not match(fullp):
1323 1323 continue
1324 1324 ret._files[fn] = self._files[fn]
1325 1325 if fn in self._flags:
1326 1326 ret._flags[fn] = self._flags[fn]
1327 1327
1328 1328 visit = self._loadchildrensetlazy(visit)
1329 1329 for dir, subm in self._dirs.items():
1330 1330 if visit and dir[:-1] not in visit:
1331 1331 continue
1332 1332 m = subm._matches_inner(match)
1333 1333 if not m._isempty():
1334 1334 ret._dirs[dir] = m
1335 1335
1336 1336 if not ret._isempty():
1337 1337 ret._dirty = True
1338 1338 return ret
1339 1339
1340 1340 def fastdelta(
1341 1341 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1342 1342 ) -> ByteString:
1343 1343 raise FastdeltaUnavailable()
1344 1344
1345 1345 def diff(
1346 1346 self,
1347 1347 m2: 'TreeManifest',
1348 1348 match: Optional[matchmod.basematcher] = None,
1349 1349 clean: bool = False,
1350 1350 ) -> Dict[
1351 1351 bytes,
1352 1352 Optional[
1353 1353 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1354 1354 ],
1355 1355 ]:
1356 1356 """Finds changes between the current manifest and m2.
1357 1357
1358 1358 Args:
1359 1359 m2: the manifest to which this manifest should be compared.
1360 1360 clean: if true, include files unchanged between these manifests
1361 1361 with a None value in the returned dictionary.
1362 1362
1363 1363 The result is returned as a dict with filename as key and
1364 1364 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1365 1365 nodeid in the current/other manifest and fl1/fl2 is the flag
1366 1366 in the current/other manifest. Where the file does not exist,
1367 1367 the nodeid will be None and the flags will be the empty
1368 1368 string.
1369 1369 """
1370 1370 if match and not match.always():
1371 1371 m1 = self._matches(match)
1372 1372 m2 = m2._matches(match)
1373 1373 return m1.diff(m2, clean=clean)
1374 1374 result = {}
1375 1375 emptytree = treemanifest(self.nodeconstants)
1376 1376
1377 1377 def _iterativediff(t1, t2, stack):
1378 1378 """compares two tree manifests and append new tree-manifests which
1379 1379 needs to be compared to stack"""
1380 1380 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1381 1381 return
1382 1382 t1._load()
1383 1383 t2._load()
1384 1384 self._loaddifflazy(t1, t2)
1385 1385
1386 1386 for d, m1 in t1._dirs.items():
1387 1387 m2 = t2._dirs.get(d, emptytree)
1388 1388 stack.append((m1, m2))
1389 1389
1390 1390 for d, m2 in t2._dirs.items():
1391 1391 if d not in t1._dirs:
1392 1392 stack.append((emptytree, m2))
1393 1393
1394 1394 for fn, n1 in t1._files.items():
1395 1395 fl1 = t1._flags.get(fn, b'')
1396 1396 n2 = t2._files.get(fn, None)
1397 1397 fl2 = t2._flags.get(fn, b'')
1398 1398 if n1 != n2 or fl1 != fl2:
1399 1399 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1400 1400 elif clean:
1401 1401 result[t1._subpath(fn)] = None
1402 1402
1403 1403 for fn, n2 in t2._files.items():
1404 1404 if fn not in t1._files:
1405 1405 fl2 = t2._flags.get(fn, b'')
1406 1406 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1407 1407
1408 1408 stackls = []
1409 1409 _iterativediff(self, m2, stackls)
1410 1410 while stackls:
1411 1411 t1, t2 = stackls.pop()
1412 1412 # stackls is populated in the function call
1413 1413 _iterativediff(t1, t2, stackls)
1414 1414 return result
1415 1415
1416 1416 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1417 1417 return not self._dirty and not m2._dirty and self._node == m2._node
1418 1418
1419 1419 def parse(
1420 1420 self,
1421 1421 text: bytes,
1422 1422 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1423 1423 ) -> None:
1424 1424 selflazy = self._lazydirs
1425 1425 for f, n, fl in _parse(self._nodelen, text):
1426 1426 if fl == b't':
1427 1427 f = f + b'/'
1428 1428 # False below means "doesn't need to be copied" and can use the
1429 1429 # cached value from readsubtree directly.
1430 1430 selflazy[f] = (n, readsubtree, False)
1431 1431 elif b'/' in f:
1432 1432 # This is a flat manifest, so use __setitem__ and setflag rather
1433 1433 # than assigning directly to _files and _flags, so we can
1434 1434 # assign a path in a subdirectory, and to mark dirty (compared
1435 1435 # to nullid).
1436 1436 self[f] = n
1437 1437 if fl:
1438 1438 self.setflag(f, fl)
1439 1439 else:
1440 1440 # Assigning to _files and _flags avoids marking as dirty,
1441 1441 # and should be a little faster.
1442 1442 self._files[f] = n
1443 1443 if fl:
1444 1444 self._flags[f] = fl
1445 1445
1446 1446 def text(self) -> ByteString:
1447 1447 """Get the full data of this manifest as a bytestring."""
1448 1448 self._load()
1449 1449 return _text(self.iterentries())
1450 1450
1451 1451 def dirtext(self) -> ByteString:
1452 1452 """Get the full data of this directory as a bytestring. Make sure that
1453 1453 any submanifests have been written first, so their nodeids are correct.
1454 1454 """
1455 1455 self._load()
1456 1456 flags = self.flags
1457 1457 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1458 1458 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1459 1459 files = [(f, self._files[f], flags(f)) for f in self._files]
1460 1460 return _text(sorted(dirs + files + lazydirs))
1461 1461
1462 1462 def read(
1463 1463 self,
1464 1464 gettext: Callable[[], ByteString],
1465 1465 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1466 1466 ) -> None:
1467 1467 def _load_for_read(s):
1468 1468 s.parse(gettext(), readsubtree)
1469 1469 s._dirty = False
1470 1470
1471 1471 self._loadfunc = _load_for_read
1472 1472
1473 1473 def writesubtrees(
1474 1474 self,
1475 1475 m1: 'TreeManifest',
1476 1476 m2: 'TreeManifest',
1477 1477 writesubtree: Callable[
1478 1478 [
1479 1479 Callable[['TreeManifest'], None],
1480 1480 bytes,
1481 1481 bytes,
1482 1482 matchmod.basematcher,
1483 1483 ],
1484 1484 None,
1485 1485 ],
1486 1486 match: matchmod.basematcher,
1487 1487 ) -> None:
1488 1488 self._load() # for consistency; should never have any effect here
1489 1489 m1._load()
1490 1490 m2._load()
1491 1491 emptytree = treemanifest(self.nodeconstants)
1492 1492
1493 1493 def getnode(m, d):
1494 1494 ld = m._lazydirs.get(d)
1495 1495 if ld:
1496 1496 return ld[0]
1497 1497 tree = m._dirs.get(d, emptytree)
1498 1498 assert tree is not None # helps pytype
1499 1499 return tree._node
1500 1500
1501 1501 # let's skip investigating things that `match` says we do not need.
1502 1502 visit = match.visitchildrenset(self._dir[:-1])
1503 1503 visit = self._loadchildrensetlazy(visit)
1504 1504 if visit == b'this' or visit == b'all':
1505 1505 visit = None
1506 1506 for d, subm in self._dirs.items():
1507 1507 if visit and d[:-1] not in visit:
1508 1508 continue
1509 1509 subp1 = getnode(m1, d)
1510 1510 subp2 = getnode(m2, d)
1511 1511 if subp1 == self.nodeconstants.nullid:
1512 1512 subp1, subp2 = subp2, subp1
1513 1513 writesubtree(subm, subp1, subp2, match)
1514 1514
1515 1515 def walksubtrees(
1516 1516 self, matcher: Optional[matchmod.basematcher] = None
1517 1517 ) -> Iterator['TreeManifest']:
1518 1518 """Returns an iterator of the subtrees of this manifest, including this
1519 1519 manifest itself.
1520 1520
1521 1521 If `matcher` is provided, it only returns subtrees that match.
1522 1522 """
1523 1523 if matcher and not matcher.visitdir(self._dir[:-1]):
1524 1524 return
1525 1525 if not matcher or matcher(self._dir[:-1]):
1526 1526 yield self
1527 1527
1528 1528 self._load()
1529 1529 # OPT: use visitchildrenset to avoid loading everything.
1530 1530 self._loadalllazy()
1531 1531 for d, subm in self._dirs.items():
1532 1532 for subtree in subm.walksubtrees(matcher=matcher):
1533 1533 yield subtree
1534 1534
1535 1535
1536 1536 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1537 1537
1538 1538
1539 1539 class manifestfulltextcache(util.lrucachedict):
1540 1540 """File-backed LRU cache for the manifest cache
1541 1541
1542 1542 File consists of entries, up to EOF:
1543 1543
1544 1544 - 20 bytes node, 4 bytes length, <length> manifest data
1545 1545
1546 1546 These are written in reverse cache order (oldest to newest).
1547 1547
1548 1548 """
1549 1549
1550 1550 _file = b'manifestfulltextcache'
1551 1551
1552 1552 def __init__(self, max):
1553 1553 super(manifestfulltextcache, self).__init__(max)
1554 1554 self._dirty = False
1555 1555 self._read = False
1556 1556 self._opener = None
1557 1557
1558 1558 def read(self):
1559 1559 if self._read or self._opener is None:
1560 1560 return
1561 1561
1562 1562 try:
1563 1563 with self._opener(self._file) as fp:
1564 1564 set = super(manifestfulltextcache, self).__setitem__
1565 1565 # ignore trailing data, this is a cache, corruption is skipped
1566 1566 while True:
1567 1567 # TODO do we need to do work here for sha1 portability?
1568 1568 node = fp.read(20)
1569 1569 if len(node) < 20:
1570 1570 break
1571 1571 try:
1572 1572 size = struct.unpack(b'>L', fp.read(4))[0]
1573 1573 except struct.error:
1574 1574 break
1575 1575 value = bytearray(fp.read(size))
1576 1576 if len(value) != size:
1577 1577 break
1578 1578 set(node, value)
1579 1579 except IOError:
1580 1580 # the file is allowed to be missing
1581 1581 pass
1582 1582
1583 1583 self._read = True
1584 1584 self._dirty = False
1585 1585
1586 1586 def write(self):
1587 1587 if not self._dirty or self._opener is None:
1588 1588 return
1589 1589 # rotate backwards to the first used node
1590 1590 try:
1591 1591 with self._opener(
1592 1592 self._file, b'w', atomictemp=True, checkambig=True
1593 1593 ) as fp:
1594 1594 node = self._head.prev
1595 1595 while True:
1596 1596 if node.key in self._cache:
1597 1597 fp.write(node.key)
1598 1598 fp.write(struct.pack(b'>L', len(node.value)))
1599 1599 fp.write(node.value)
1600 1600 if node is self._head:
1601 1601 break
1602 1602 node = node.prev
1603 1603 except IOError:
1604 1604 # We could not write the cache (eg: permission error)
1605 1605 # the content can be missing.
1606 1606 #
1607 1607 # We could try harder and see if we could recreate a wcache
1608 1608 # directory were we coudl write too.
1609 1609 #
1610 1610 # XXX the error pass silently, having some way to issue an error
1611 1611 # log `ui.log` would be nice.
1612 1612 pass
1613 1613
1614 1614 def __len__(self):
1615 1615 if not self._read:
1616 1616 self.read()
1617 1617 return super(manifestfulltextcache, self).__len__()
1618 1618
1619 1619 def __contains__(self, k):
1620 1620 if not self._read:
1621 1621 self.read()
1622 1622 return super(manifestfulltextcache, self).__contains__(k)
1623 1623
1624 1624 def __iter__(self):
1625 1625 if not self._read:
1626 1626 self.read()
1627 1627 return super(manifestfulltextcache, self).__iter__()
1628 1628
1629 1629 def __getitem__(self, k):
1630 1630 if not self._read:
1631 1631 self.read()
1632 1632 # the cache lru order can change on read
1633 1633 setdirty = self._cache.get(k) is not self._head
1634 1634 value = super(manifestfulltextcache, self).__getitem__(k)
1635 1635 if setdirty:
1636 1636 self._dirty = True
1637 1637 return value
1638 1638
1639 1639 def __setitem__(self, k, v):
1640 1640 if not self._read:
1641 1641 self.read()
1642 1642 super(manifestfulltextcache, self).__setitem__(k, v)
1643 1643 self._dirty = True
1644 1644
1645 1645 def __delitem__(self, k):
1646 1646 if not self._read:
1647 1647 self.read()
1648 1648 super(manifestfulltextcache, self).__delitem__(k)
1649 1649 self._dirty = True
1650 1650
1651 1651 def get(self, k, default=None):
1652 1652 if not self._read:
1653 1653 self.read()
1654 1654 return super(manifestfulltextcache, self).get(k, default=default)
1655 1655
1656 1656 def clear(self, clear_persisted_data=False):
1657 1657 super(manifestfulltextcache, self).clear()
1658 1658 if clear_persisted_data:
1659 1659 self._dirty = True
1660 1660 self.write()
1661 1661 self._read = False
1662 1662
1663 1663
1664 1664 # and upper bound of what we expect from compression
1665 1665 # (real live value seems to be "3")
1666 1666 MAXCOMPRESSION = 3
1667 1667
1668 1668
1669 1669 class FastdeltaUnavailable(Exception):
1670 1670 """Exception raised when fastdelta isn't usable on a manifest."""
1671 1671
1672 1672
1673 1673 class ManifestRevlog:
1674 1674 """A revlog that stores manifest texts. This is responsible for caching the
1675 1675 full-text manifest contents.
1676 1676 """
1677 1677
1678 1678 def __init__(
1679 1679 self,
1680 1680 nodeconstants,
1681 1681 opener,
1682 1682 tree=b'',
1683 1683 dirlogcache=None,
1684 1684 treemanifest=False,
1685 1685 ):
1686 1686 """Constructs a new manifest revlog
1687 1687
1688 1688 `indexfile` - used by extensions to have two manifests at once, like
1689 1689 when transitioning between flatmanifeset and treemanifests.
1690 1690
1691 1691 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1692 1692 options can also be used to make this a tree manifest revlog. The opener
1693 1693 option takes precedence, so if it is set to True, we ignore whatever
1694 1694 value is passed in to the constructor.
1695 1695 """
1696 1696 self.nodeconstants = nodeconstants
1697 1697 # During normal operations, we expect to deal with not more than four
1698 1698 # revs at a time (such as during commit --amend). When rebasing large
1699 1699 # stacks of commits, the number can go up, hence the config knob below.
1700 1700 cachesize = 4
1701 1701 optiontreemanifest = False
1702 1702 persistentnodemap = False
1703 1703 opts = getattr(opener, 'options', None)
1704 1704 if opts is not None:
1705 1705 cachesize = opts.get(b'manifestcachesize', cachesize)
1706 1706 optiontreemanifest = opts.get(b'treemanifest', False)
1707 1707 persistentnodemap = opts.get(b'persistent-nodemap', False)
1708 1708
1709 1709 self._treeondisk = optiontreemanifest or treemanifest
1710 1710
1711 1711 self._fulltextcache = manifestfulltextcache(cachesize)
1712 1712
1713 1713 if tree:
1714 1714 assert self._treeondisk, (tree, b'opts is %r' % opts)
1715 1715
1716 1716 radix = b'00manifest'
1717 1717 if tree:
1718 1718 radix = b"meta/" + tree + radix
1719 1719
1720 1720 self.tree = tree
1721 1721
1722 1722 # The dirlogcache is kept on the root manifest log
1723 1723 if tree:
1724 1724 self._dirlogcache = dirlogcache
1725 1725 else:
1726 1726 self._dirlogcache = {b'': self}
1727 1727
1728 1728 self._revlog = revlog.revlog(
1729 1729 opener,
1730 1730 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1731 1731 radix=radix,
1732 1732 # only root indexfile is cached
1733 1733 checkambig=not bool(tree),
1734 1734 mmaplargeindex=True,
1735 1735 upperboundcomp=MAXCOMPRESSION,
1736 1736 persistentnodemap=persistentnodemap,
1737 1737 )
1738 1738
1739 1739 self.index = self._revlog.index
1740 1740
1741 1741 def get_revlog(self):
1742 1742 """return an actual revlog instance if any
1743 1743
1744 1744 This exist because a lot of code leverage the fact the underlying
1745 1745 storage is a revlog for optimization, so giving simple way to access
1746 1746 the revlog instance helps such code.
1747 1747 """
1748 1748 return self._revlog
1749 1749
1750 1750 def _setupmanifestcachehooks(self, repo):
1751 1751 """Persist the manifestfulltextcache on lock release"""
1752 1752 if not hasattr(repo, '_wlockref'):
1753 1753 return
1754 1754
1755 1755 self._fulltextcache._opener = repo.wcachevfs
1756 1756 if repo._currentlock(repo._wlockref) is None:
1757 1757 return
1758 1758
1759 1759 reporef = weakref.ref(repo)
1760 1760 manifestrevlogref = weakref.ref(self)
1761 1761
1762 1762 def persistmanifestcache(success):
1763 1763 # Repo is in an unknown state, do not persist.
1764 1764 if not success:
1765 1765 return
1766 1766
1767 1767 repo = reporef()
1768 1768 self = manifestrevlogref()
1769 1769 if repo is None or self is None:
1770 1770 return
1771 1771 if repo.manifestlog.getstorage(b'') is not self:
1772 1772 # there's a different manifest in play now, abort
1773 1773 return
1774 1774 self._fulltextcache.write()
1775 1775
1776 1776 repo._afterlock(persistmanifestcache)
1777 1777
1778 1778 @property
1779 1779 def fulltextcache(self):
1780 1780 return self._fulltextcache
1781 1781
1782 1782 def clearcaches(self, clear_persisted_data=False):
1783 1783 self._revlog.clearcaches()
1784 1784 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1785 1785 self._dirlogcache = {self.tree: self}
1786 1786
1787 1787 def dirlog(self, d):
1788 1788 if d:
1789 1789 assert self._treeondisk
1790 1790 if d not in self._dirlogcache:
1791 1791 mfrevlog = manifestrevlog(
1792 1792 self.nodeconstants,
1793 1793 self.opener,
1794 1794 d,
1795 1795 self._dirlogcache,
1796 1796 treemanifest=self._treeondisk,
1797 1797 )
1798 1798 self._dirlogcache[d] = mfrevlog
1799 1799 return self._dirlogcache[d]
1800 1800
1801 1801 def add(
1802 1802 self,
1803 1803 m,
1804 1804 transaction,
1805 1805 link,
1806 1806 p1,
1807 1807 p2,
1808 1808 added: Iterable[bytes],
1809 1809 removed: Iterable[bytes],
1810 1810 readtree=None,
1811 1811 match=None,
1812 1812 ):
1813 1813 """add some manifest entry in to the manifest log
1814 1814
1815 1815 input:
1816 1816
1817 1817 m: the manifest dict we want to store
1818 1818 transaction: the open transaction
1819 1819 p1: manifest-node of p1
1820 1820 p2: manifest-node of p2
1821 1821 added: file added/changed compared to parent
1822 1822 removed: file removed compared to parent
1823 1823
1824 1824 tree manifest input:
1825 1825
1826 1826 readtree: a function to read a subtree
1827 1827 match: a filematcher for the subpart of the tree manifest
1828 1828 """
1829 1829 try:
1830 1830 if p1 not in self.fulltextcache:
1831 1831 raise FastdeltaUnavailable()
1832 1832 # If our first parent is in the manifest cache, we can
1833 1833 # compute a delta here using properties we know about the
1834 1834 # manifest up-front, which may save time later for the
1835 1835 # revlog layer.
1836 1836
1837 1837 _checkforbidden(added)
1838 1838 # combine the changed lists into one sorted iterator
1839 1839 work = heapq.merge(
1840 1840 [(x, False) for x in sorted(added)],
1841 1841 [(x, True) for x in sorted(removed)],
1842 1842 )
1843 1843
1844 1844 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1845 1845 cachedelta = self._revlog.rev(p1), deltatext
1846 1846 text = util.buffer(arraytext)
1847 1847 rev = self._revlog.addrevision(
1848 1848 text, transaction, link, p1, p2, cachedelta
1849 1849 )
1850 1850 n = self._revlog.node(rev)
1851 1851 except FastdeltaUnavailable:
1852 1852 # The first parent manifest isn't already loaded or the
1853 1853 # manifest implementation doesn't support fastdelta, so
1854 1854 # we'll just encode a fulltext of the manifest and pass
1855 1855 # that through to the revlog layer, and let it handle the
1856 1856 # delta process.
1857 1857 if self._treeondisk:
1858 1858 assert readtree, b"readtree must be set for treemanifest writes"
1859 1859 assert match, b"match must be specified for treemanifest writes"
1860 1860 m1 = readtree(self.tree, p1)
1861 1861 m2 = readtree(self.tree, p2)
1862 1862 n = self._addtree(
1863 1863 m, transaction, link, m1, m2, readtree, match=match
1864 1864 )
1865 1865 arraytext = None
1866 1866 else:
1867 1867 text = m.text()
1868 1868 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1869 1869 n = self._revlog.node(rev)
1870 1870 arraytext = bytearray(text)
1871 1871
1872 1872 if arraytext is not None:
1873 1873 self.fulltextcache[n] = arraytext
1874 1874
1875 1875 return n
1876 1876
1877 1877 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1878 1878 # If the manifest is unchanged compared to one parent,
1879 1879 # don't write a new revision
1880 1880 if self.tree != b'' and (
1881 1881 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1882 1882 ):
1883 1883 return m.node()
1884 1884
1885 1885 def writesubtree(subm, subp1, subp2, match):
1886 1886 sublog = self.dirlog(subm.dir())
1887 1887 sublog.add(
1888 1888 subm,
1889 1889 transaction,
1890 1890 link,
1891 1891 subp1,
1892 1892 subp2,
1893 1893 None,
1894 1894 None,
1895 1895 readtree=readtree,
1896 1896 match=match,
1897 1897 )
1898 1898
1899 1899 m.writesubtrees(m1, m2, writesubtree, match)
1900 1900 text = m.dirtext()
1901 1901 n = None
1902 1902 if self.tree != b'':
1903 1903 # Double-check whether contents are unchanged to one parent
1904 1904 if text == m1.dirtext():
1905 1905 n = m1.node()
1906 1906 elif text == m2.dirtext():
1907 1907 n = m2.node()
1908 1908
1909 1909 if not n:
1910 1910 rev = self._revlog.addrevision(
1911 1911 text, transaction, link, m1.node(), m2.node()
1912 1912 )
1913 1913 n = self._revlog.node(rev)
1914 1914
1915 1915 # Save nodeid so parent manifest can calculate its nodeid
1916 1916 m.setnode(n)
1917 1917 return n
1918 1918
1919 1919 def __len__(self):
1920 1920 return len(self._revlog)
1921 1921
1922 1922 def __iter__(self):
1923 1923 return self._revlog.__iter__()
1924 1924
1925 1925 def rev(self, node):
1926 1926 return self._revlog.rev(node)
1927 1927
1928 1928 def node(self, rev):
1929 1929 return self._revlog.node(rev)
1930 1930
1931 1931 def lookup(self, value):
1932 1932 return self._revlog.lookup(value)
1933 1933
1934 1934 def parentrevs(self, rev):
1935 1935 return self._revlog.parentrevs(rev)
1936 1936
1937 1937 def parents(self, node):
1938 1938 return self._revlog.parents(node)
1939 1939
1940 1940 def linkrev(self, rev):
1941 1941 return self._revlog.linkrev(rev)
1942 1942
1943 1943 def checksize(self):
1944 1944 return self._revlog.checksize()
1945 1945
1946 1946 def revision(self, node):
1947 1947 return self._revlog.revision(node)
1948 1948
1949 1949 def rawdata(self, node):
1950 1950 return self._revlog.rawdata(node)
1951 1951
1952 1952 def revdiff(self, rev1, rev2):
1953 1953 return self._revlog.revdiff(rev1, rev2)
1954 1954
1955 1955 def cmp(self, node, text):
1956 1956 return self._revlog.cmp(node, text)
1957 1957
1958 1958 def deltaparent(self, rev):
1959 1959 return self._revlog.deltaparent(rev)
1960 1960
1961 1961 def emitrevisions(
1962 1962 self,
1963 1963 nodes,
1964 1964 nodesorder=None,
1965 1965 revisiondata=False,
1966 1966 assumehaveparentrevisions=False,
1967 1967 deltamode=repository.CG_DELTAMODE_STD,
1968 1968 sidedata_helpers=None,
1969 1969 debug_info=None,
1970 1970 ):
1971 1971 return self._revlog.emitrevisions(
1972 1972 nodes,
1973 1973 nodesorder=nodesorder,
1974 1974 revisiondata=revisiondata,
1975 1975 assumehaveparentrevisions=assumehaveparentrevisions,
1976 1976 deltamode=deltamode,
1977 1977 sidedata_helpers=sidedata_helpers,
1978 1978 debug_info=debug_info,
1979 1979 )
1980 1980
1981 1981 def addgroup(
1982 1982 self,
1983 1983 deltas,
1984 1984 linkmapper,
1985 1985 transaction,
1986 1986 alwayscache=False,
1987 1987 addrevisioncb=None,
1988 1988 duplicaterevisioncb=None,
1989 1989 debug_info=None,
1990 1990 delta_base_reuse_policy=None,
1991 1991 ):
1992 1992 return self._revlog.addgroup(
1993 1993 deltas,
1994 1994 linkmapper,
1995 1995 transaction,
1996 1996 alwayscache=alwayscache,
1997 1997 addrevisioncb=addrevisioncb,
1998 1998 duplicaterevisioncb=duplicaterevisioncb,
1999 1999 debug_info=debug_info,
2000 2000 delta_base_reuse_policy=delta_base_reuse_policy,
2001 2001 )
2002 2002
2003 2003 def rawsize(self, rev):
2004 2004 return self._revlog.rawsize(rev)
2005 2005
2006 2006 def getstrippoint(self, minlink):
2007 2007 return self._revlog.getstrippoint(minlink)
2008 2008
2009 2009 def strip(self, minlink, transaction):
2010 2010 return self._revlog.strip(minlink, transaction)
2011 2011
2012 2012 def files(self):
2013 2013 return self._revlog.files()
2014 2014
2015 2015 def clone(self, tr, destrevlog, **kwargs):
2016 2016 if not isinstance(destrevlog, manifestrevlog):
2017 2017 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2018 2018
2019 2019 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2020 2020
2021 2021 def storageinfo(
2022 2022 self,
2023 2023 exclusivefiles=False,
2024 2024 sharedfiles=False,
2025 2025 revisionscount=False,
2026 2026 trackedsize=False,
2027 2027 storedsize=False,
2028 2028 ):
2029 2029 return self._revlog.storageinfo(
2030 2030 exclusivefiles=exclusivefiles,
2031 2031 sharedfiles=sharedfiles,
2032 2032 revisionscount=revisionscount,
2033 2033 trackedsize=trackedsize,
2034 2034 storedsize=storedsize,
2035 2035 )
2036 2036
2037 2037 @property
2038 2038 def opener(self):
2039 2039 return self._revlog.opener
2040 2040
2041 2041 @opener.setter
2042 2042 def opener(self, value):
2043 2043 self._revlog.opener = value
2044 2044
2045 2045
2046 2046 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2047 2047 ManifestRevlog
2048 2048 )
2049 2049
2050 2050
2051 2051 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2052 2052 AnyManifestDict = Union[ManifestDict, TreeManifest]
2053 2053
2054 2054
2055 2055 @interfaceutil.implementer(repository.imanifestlog)
2056 2056 class manifestlog:
2057 2057 """A collection class representing the collection of manifest snapshots
2058 2058 referenced by commits in the repository.
2059 2059
2060 2060 In this situation, 'manifest' refers to the abstract concept of a snapshot
2061 2061 of the list of files in the given commit. Consumers of the output of this
2062 2062 class do not care about the implementation details of the actual manifests
2063 2063 they receive (i.e. tree or flat or lazily loaded, etc)."""
2064 2064
2065 2065 def __init__(self, opener, repo, rootstore, narrowmatch):
2066 2066 self.nodeconstants = repo.nodeconstants
2067 2067 usetreemanifest = False
2068 2068 cachesize = 4
2069 2069
2070 2070 opts = getattr(opener, 'options', None)
2071 2071 if opts is not None:
2072 2072 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2073 2073 cachesize = opts.get(b'manifestcachesize', cachesize)
2074 2074
2075 2075 self._treemanifests = usetreemanifest
2076 2076
2077 2077 self._rootstore = rootstore
2078 2078 self._rootstore._setupmanifestcachehooks(repo)
2079 2079 self._narrowmatch = narrowmatch
2080 2080
2081 2081 # A cache of the manifestctx or treemanifestctx for each directory
2082 2082 self._dirmancache = {}
2083 2083 self._dirmancache[b''] = util.lrucachedict(cachesize)
2084 2084
2085 2085 self._cachesize = cachesize
2086 2086
2087 2087 def __getitem__(self, node):
2088 2088 """Retrieves the manifest instance for the given node. Throws a
2089 2089 LookupError if not found.
2090 2090 """
2091 2091 return self.get(b'', node)
2092 2092
2093 2093 @property
2094 2094 def narrowed(self):
2095 2095 return not (self._narrowmatch is None or self._narrowmatch.always())
2096 2096
2097 2097 def get(
2098 2098 self, tree: bytes, node: bytes, verify: bool = True
2099 2099 ) -> AnyManifestCtx:
2100 2100 """Retrieves the manifest instance for the given node. Throws a
2101 2101 LookupError if not found.
2102 2102
2103 2103 `verify` - if True an exception will be thrown if the node is not in
2104 2104 the revlog
2105 2105 """
2106 2106 if node in self._dirmancache.get(tree, ()):
2107 2107 return self._dirmancache[tree][node]
2108 2108
2109 2109 if not self._narrowmatch.always():
2110 2110 if not self._narrowmatch.visitdir(tree[:-1]):
2111 2111 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2112 2112 if tree:
2113 2113 if self._rootstore._treeondisk:
2114 2114 if verify:
2115 2115 # Side-effect is LookupError is raised if node doesn't
2116 2116 # exist.
2117 2117 self.getstorage(tree).rev(node)
2118 2118
2119 2119 m = treemanifestctx(self, tree, node)
2120 2120 else:
2121 2121 raise error.Abort(
2122 2122 _(
2123 2123 b"cannot ask for manifest directory '%s' in a flat "
2124 2124 b"manifest"
2125 2125 )
2126 2126 % tree
2127 2127 )
2128 2128 else:
2129 2129 if verify:
2130 2130 # Side-effect is LookupError is raised if node doesn't exist.
2131 2131 self._rootstore.rev(node)
2132 2132
2133 2133 if self._treemanifests:
2134 2134 m = treemanifestctx(self, b'', node)
2135 2135 else:
2136 2136 m = manifestctx(self, node)
2137 2137
2138 2138 if node != self.nodeconstants.nullid:
2139 2139 mancache = self._dirmancache.get(tree)
2140 2140 if not mancache:
2141 2141 mancache = util.lrucachedict(self._cachesize)
2142 2142 self._dirmancache[tree] = mancache
2143 2143 mancache[node] = m
2144 2144 return m
2145 2145
2146 2146 def getstorage(self, tree):
2147 2147 return self._rootstore.dirlog(tree)
2148 2148
2149 2149 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2150 2150 self._dirmancache.clear()
2151 2151 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2152 2152
2153 2153 def rev(self, node) -> int:
2154 2154 return self._rootstore.rev(node)
2155 2155
2156 2156 def update_caches(self, transaction) -> None:
2157 2157 return self._rootstore._revlog.update_caches(transaction=transaction)
2158 2158
2159 2159
2160 2160 class MemManifestCtx:
2161 2161 def __init__(self, manifestlog):
2162 2162 self._manifestlog = manifestlog
2163 2163 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2164 2164
2165 2165 def _storage(self) -> ManifestRevlog:
2166 2166 return self._manifestlog.getstorage(b'')
2167 2167
2168 2168 def copy(self) -> 'MemManifestCtx':
2169 2169 memmf = memmanifestctx(self._manifestlog)
2170 2170 memmf._manifestdict = self.read().copy()
2171 2171 return memmf
2172 2172
2173 2173 def read(self) -> 'ManifestDict':
2174 2174 return self._manifestdict
2175 2175
2176 2176 def write(self, transaction, link, p1, p2, added, removed, match=None):
2177 2177 return self._storage().add(
2178 2178 self._manifestdict,
2179 2179 transaction,
2180 2180 link,
2181 2181 p1,
2182 2182 p2,
2183 2183 added,
2184 2184 removed,
2185 2185 match=match,
2186 2186 )
2187 2187
2188 2188
2189 2189 memmanifestctx = interfaceutil.implementer(
2190 2190 repository.imanifestrevisionwritable
2191 2191 )(MemManifestCtx)
2192 2192
2193 2193
2194 2194 class ManifestCtx:
2195 2195 """A class representing a single revision of a manifest, including its
2196 2196 contents, its parent revs, and its linkrev.
2197 2197 """
2198 2198
2199 2199 def __init__(self, manifestlog, node):
2200 2200 self._manifestlog = manifestlog
2201 2201 self._data = None
2202 2202
2203 2203 self._node = node
2204 2204
2205 2205 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2206 2206 # but let's add it later when something needs it and we can load it
2207 2207 # lazily.
2208 2208 # self.p1, self.p2 = store.parents(node)
2209 2209 # rev = store.rev(node)
2210 2210 # self.linkrev = store.linkrev(rev)
2211 2211
2212 2212 def _storage(self) -> 'ManifestRevlog':
2213 2213 return self._manifestlog.getstorage(b'')
2214 2214
2215 2215 def node(self) -> bytes:
2216 2216 return self._node
2217 2217
2218 2218 def copy(self) -> MemManifestCtx:
2219 2219 memmf = memmanifestctx(self._manifestlog)
2220 2220 memmf._manifestdict = self.read().copy()
2221 2221 return memmf
2222 2222
2223 2223 @propertycache
2224 2224 def parents(self) -> Tuple[bytes, bytes]:
2225 2225 return self._storage().parents(self._node)
2226 2226
2227 2227 def read(self) -> 'ManifestDict':
2228 2228 if self._data is None:
2229 2229 nc = self._manifestlog.nodeconstants
2230 2230 if self._node == nc.nullid:
2231 2231 self._data = manifestdict(nc.nodelen)
2232 2232 else:
2233 2233 store = self._storage()
2234 2234 if self._node in store.fulltextcache:
2235 2235 text = pycompat.bytestr(store.fulltextcache[self._node])
2236 2236 else:
2237 2237 text = store.revision(self._node)
2238 2238 arraytext = bytearray(text)
2239 2239 store.fulltextcache[self._node] = arraytext
2240 2240 self._data = manifestdict(nc.nodelen, text)
2241 2241 return self._data
2242 2242
2243 2243 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2244 2244 """Calls either readdelta or read, based on which would be less work.
2245 2245 readdelta is called if the delta is against the p1, and therefore can be
2246 2246 read quickly.
2247 2247
2248 2248 If `shallow` is True, nothing changes since this is a flat manifest.
2249 2249 """
2250 util.nouideprecwarn(
2251 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2252 b"6.9",
2253 stacklevel=2,
2254 )
2250 2255 store = self._storage()
2251 2256 r = store.rev(self._node)
2252 2257 deltaparent = store.deltaparent(r)
2253 2258 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2254 2259 return self.readdelta()
2255 2260 return self.read()
2256 2261
2257 2262 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2258 2263 """Returns a manifest containing just the entries that are present
2259 2264 in this manifest, but not in its p1 manifest. This is efficient to read
2260 2265 if the revlog delta is already p1.
2261 2266
2262 2267 Changing the value of `shallow` has no effect on flat manifests.
2263 2268 """
2269 util.nouideprecwarn(
2270 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2271 b"6.9",
2272 stacklevel=2,
2273 )
2264 2274 store = self._storage()
2265 2275 r = store.rev(self._node)
2266 2276 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2267 2277 return manifestdict(store.nodeconstants.nodelen, d)
2268 2278
2269 2279 def read_any_fast_delta(
2270 2280 self,
2271 2281 valid_bases: Optional[Collection[int]] = None,
2272 2282 *,
2273 2283 shallow: bool = False,
2274 2284 ) -> Tuple[Optional[int], ManifestDict]:
2275 2285 """see `imanifestrevisionstored` documentation"""
2276 2286 store = self._storage()
2277 2287 r = store.rev(self._node)
2278 2288 deltaparent = store.deltaparent(r)
2279 2289 if valid_bases is None:
2280 2290 # make sure the next check is True
2281 2291 valid_bases = (deltaparent,)
2282 2292 if deltaparent != nullrev and deltaparent in valid_bases:
2283 2293 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2284 2294 return (
2285 2295 deltaparent,
2286 2296 manifestdict(store.nodeconstants.nodelen, d),
2287 2297 )
2288 2298 return (None, self.read())
2289 2299
2290 2300 def read_delta_parents(
2291 2301 self,
2292 2302 *,
2293 2303 shallow: bool = False,
2294 2304 exact: bool = True,
2295 2305 ) -> ManifestDict:
2296 2306 """see `interface.imanifestrevisionbase` documentations"""
2297 2307 store = self._storage()
2298 2308 r = store.rev(self._node)
2299 2309 deltaparent = store.deltaparent(r)
2300 2310 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2301 2311 if not exact and deltaparent in parents:
2302 2312 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2303 2313 return manifestdict(store.nodeconstants.nodelen, d)
2304 2314 elif not exact or len(parents) == 0:
2305 2315 return self.read()
2306 2316 elif len(parents) == 1:
2307 2317 p = parents[0]
2308 2318 d = mdiff.patchtext(store.revdiff(p, r))
2309 2319 return manifestdict(store.nodeconstants.nodelen, d)
2310 2320 else:
2311 2321 p1, p2 = parents
2312 2322 d1 = mdiff.patchtext(store.revdiff(p1, r))
2313 2323 d2 = mdiff.patchtext(store.revdiff(p2, r))
2314 2324 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2315 2325 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2316 2326 md = manifestdict(store.nodeconstants.nodelen)
2317 2327 for f, new_node, new_flag in d1.iterentries():
2318 2328 if f not in d2:
2319 2329 continue
2320 2330 if new_node is not None:
2321 2331 md.set(f, new_node, new_flag)
2322 2332 return md
2323 2333
2324 2334 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2325 2335 """see `interface.imanifestrevisionbase` documentations"""
2326 2336 # If we are using narrow, returning a delta against an arbitrary
2327 2337 # changeset might return file outside the narrowspec. This can create
2328 2338 # issue when running validation server side with strict security as
2329 2339 # push from low priviledge usage might be seen as adding new revision
2330 2340 # for files they cannot touch. So we are strict if narrow is involved.
2331 2341 if self._manifestlog.narrowed:
2332 2342 return self.read_delta_parents(shallow=shallow, exact=True)
2333 2343 store = self._storage()
2334 2344 r = store.rev(self._node)
2335 2345 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2336 2346 return manifestdict(store.nodeconstants.nodelen, d)
2337 2347
2338 2348 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2339 2349 return self.read().find(key)
2340 2350
2341 2351
2342 2352 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2343 2353 ManifestCtx
2344 2354 )
2345 2355
2346 2356
2347 2357 class MemTreeManifestCtx:
2348 2358 def __init__(self, manifestlog, dir=b''):
2349 2359 self._manifestlog = manifestlog
2350 2360 self._dir = dir
2351 2361 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2352 2362
2353 2363 def _storage(self) -> ManifestRevlog:
2354 2364 return self._manifestlog.getstorage(b'')
2355 2365
2356 2366 def copy(self) -> 'MemTreeManifestCtx':
2357 2367 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2358 2368 memmf._treemanifest = self._treemanifest.copy()
2359 2369 return memmf
2360 2370
2361 2371 def read(self) -> 'TreeManifest':
2362 2372 return self._treemanifest
2363 2373
2364 2374 def write(self, transaction, link, p1, p2, added, removed, match=None):
2365 2375 def readtree(dir, node):
2366 2376 return self._manifestlog.get(dir, node).read()
2367 2377
2368 2378 return self._storage().add(
2369 2379 self._treemanifest,
2370 2380 transaction,
2371 2381 link,
2372 2382 p1,
2373 2383 p2,
2374 2384 added,
2375 2385 removed,
2376 2386 readtree=readtree,
2377 2387 match=match,
2378 2388 )
2379 2389
2380 2390
2381 2391 memtreemanifestctx = interfaceutil.implementer(
2382 2392 repository.imanifestrevisionwritable
2383 2393 )(MemTreeManifestCtx)
2384 2394
2385 2395
2386 2396 class TreeManifestCtx:
2387 2397 def __init__(self, manifestlog, dir, node):
2388 2398 self._manifestlog = manifestlog
2389 2399 self._dir = dir
2390 2400 self._data = None
2391 2401
2392 2402 self._node = node
2393 2403
2394 2404 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2395 2405 # we can instantiate treemanifestctx objects for directories we don't
2396 2406 # have on disk.
2397 2407 # self.p1, self.p2 = store.parents(node)
2398 2408 # rev = store.rev(node)
2399 2409 # self.linkrev = store.linkrev(rev)
2400 2410
2401 2411 def _storage(self) -> ManifestRevlog:
2402 2412 narrowmatch = self._manifestlog._narrowmatch
2403 2413 if not narrowmatch.always():
2404 2414 if not narrowmatch.visitdir(self._dir[:-1]):
2405 2415 return excludedmanifestrevlog(
2406 2416 self._manifestlog.nodeconstants, self._dir
2407 2417 )
2408 2418 return self._manifestlog.getstorage(self._dir)
2409 2419
2410 2420 def read(self) -> 'TreeManifest':
2411 2421 if self._data is None:
2412 2422 store = self._storage()
2413 2423 if self._node == self._manifestlog.nodeconstants.nullid:
2414 2424 self._data = treemanifest(self._manifestlog.nodeconstants)
2415 2425 # TODO accessing non-public API
2416 2426 elif store._treeondisk:
2417 2427 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2418 2428
2419 2429 def gettext():
2420 2430 return store.revision(self._node)
2421 2431
2422 2432 def readsubtree(dir, subm):
2423 2433 # Set verify to False since we need to be able to create
2424 2434 # subtrees for trees that don't exist on disk.
2425 2435 return self._manifestlog.get(dir, subm, verify=False).read()
2426 2436
2427 2437 m.read(gettext, readsubtree)
2428 2438 m.setnode(self._node)
2429 2439 self._data = m
2430 2440 else:
2431 2441 if self._node in store.fulltextcache:
2432 2442 text = pycompat.bytestr(store.fulltextcache[self._node])
2433 2443 else:
2434 2444 text = store.revision(self._node)
2435 2445 arraytext = bytearray(text)
2436 2446 store.fulltextcache[self._node] = arraytext
2437 2447 self._data = treemanifest(
2438 2448 self._manifestlog.nodeconstants, dir=self._dir, text=text
2439 2449 )
2440 2450
2441 2451 return self._data
2442 2452
2443 2453 def node(self) -> bytes:
2444 2454 return self._node
2445 2455
2446 2456 def copy(self) -> 'MemTreeManifestCtx':
2447 2457 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2448 2458 memmf._treemanifest = self.read().copy()
2449 2459 return memmf
2450 2460
2451 2461 @propertycache
2452 2462 def parents(self) -> Tuple[bytes, bytes]:
2453 2463 return self._storage().parents(self._node)
2454 2464
2455 2465 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2456 2466 """see `imanifestrevisionstored` documentation"""
2467 util.nouideprecwarn(
2468 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2469 b"6.9",
2470 stacklevel=2,
2471 )
2457 2472 store = self._storage()
2458 2473 if shallow:
2459 2474 r = store.rev(self._node)
2460 2475 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2461 2476 return manifestdict(store.nodeconstants.nodelen, d)
2462 2477 else:
2463 2478 # Need to perform a slow delta
2464 2479 r0 = store.deltaparent(store.rev(self._node))
2465 2480 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2466 2481 m1 = self.read()
2467 2482 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2468 2483 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2469 2484 if n1:
2470 2485 md[f] = n1
2471 2486 if fl1:
2472 2487 md.setflag(f, fl1)
2473 2488 return md
2474 2489
2475 2490 def read_any_fast_delta(
2476 2491 self,
2477 2492 valid_bases: Optional[Collection[int]] = None,
2478 2493 *,
2479 2494 shallow: bool = False,
2480 2495 ) -> Tuple[Optional[int], AnyManifestDict]:
2481 2496 """see `imanifestrevisionstored` documentation"""
2482 2497 store = self._storage()
2483 2498 r = store.rev(self._node)
2484 2499 deltaparent = store.deltaparent(r)
2485 2500
2486 2501 if valid_bases is None:
2487 2502 # make sure the next check is True
2488 2503 valid_bases = (deltaparent,)
2489 2504 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2490 2505
2491 2506 if shallow:
2492 2507 if can_use_delta:
2493 2508 return (deltaparent, self._read_storage_delta_shallow())
2494 2509 else:
2495 2510 d = store.revision(self._node)
2496 2511 return (None, manifestdict(store.nodeconstants.nodelen, d))
2497 2512 else:
2498 2513 # note: This use "slow_delta" here is cargo culted from the previous
2499 2514 # implementation. I am not sure it make sense since the goal here is to
2500 2515 # be fast, so why are we computing a delta? On the other hand, tree
2501 2516 # manifest delta as fairly "cheap" and allow for skipping whole part of
2502 2517 # the tree that a full read would access. So it might be a good idea.
2503 2518 #
2504 2519 # If we realize we don't need delta here, we should simply use:
2505 2520 #
2506 2521 # return (None, self.read())
2507 2522 if can_use_delta:
2508 2523 return (None, self._read_storage_slow_delta(base=deltaparent))
2509 2524 else:
2510 2525 parents = [
2511 2526 p
2512 2527 for p in store.parentrevs(r)
2513 2528 if p is not nullrev and p in valid_bases
2514 2529 ]
2515 2530 if parents:
2516 2531 best_base = max(parents)
2517 2532 else:
2518 2533 best_base = max(valid_bases)
2519 2534 return (None, self._read_storage_slow_delta(base=best_base))
2520 2535
2521 2536 def _read_storage_delta_shallow(self) -> ManifestDict:
2522 2537 store = self._storage()
2523 2538 r = store.rev(self._node)
2524 2539 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2525 2540 return manifestdict(store.nodeconstants.nodelen, d)
2526 2541
2527 2542 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2528 2543 store = self._storage()
2529 2544 if base is None:
2530 2545 base = store.deltaparent(store.rev(self._node))
2531 2546 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2532 2547 m1 = self.read()
2533 2548 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2534 2549 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2535 2550 if n1:
2536 2551 md[f] = n1
2537 2552 if fl1:
2538 2553 md.setflag(f, fl1)
2539 2554 return md
2540 2555
2541 2556 def read_delta_parents(
2542 2557 self,
2543 2558 *,
2544 2559 shallow: bool = False,
2545 2560 exact: bool = True,
2546 2561 ) -> AnyManifestDict:
2547 2562 """see `interface.imanifestrevisionbase` documentations"""
2548 2563 store = self._storage()
2549 2564 r = store.rev(self._node)
2550 2565 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2551 2566 if not exact:
2552 2567 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2553 2568 elif len(parents) == 0:
2554 2569 if shallow:
2555 2570 d = store.revision(self._node)
2556 2571 return manifestdict(store.nodeconstants.nodelen, d)
2557 2572 else:
2558 2573 return self.read()
2559 2574 elif len(parents) == 1:
2560 2575 p = parents[0]
2561 2576 if shallow:
2562 2577 d = mdiff.patchtext(store.revdiff(p, r))
2563 2578 return manifestdict(store.nodeconstants.nodelen, d)
2564 2579 else:
2565 2580 return self._read_storage_slow_delta(base=p)
2566 2581 else:
2567 2582 p1, p2 = parents
2568 2583 if shallow:
2569 2584 d1 = mdiff.patchtext(store.revdiff(p1, r))
2570 2585 d2 = mdiff.patchtext(store.revdiff(p2, r))
2571 2586 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2572 2587 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2573 2588 md = manifestdict(store.nodeconstants.nodelen)
2574 2589 for f, new_node, new_flag in d1.iterentries():
2575 2590 if f not in d2:
2576 2591 continue
2577 2592 if new_node is not None:
2578 2593 md.set(f, new_node, new_flag)
2579 2594 return md
2580 2595 else:
2581 2596 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2582 2597 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2583 2598 mc = self.read()
2584 2599 d1 = m1.diff(mc)
2585 2600 d2 = m2.diff(mc)
2586 2601 md = treemanifest(
2587 2602 self._manifestlog.nodeconstants,
2588 2603 dir=self._dir,
2589 2604 )
2590 2605 for f, new_node, new_flag in d1.iterentries():
2591 2606 if f not in d2:
2592 2607 continue
2593 2608 if new_node is not None:
2594 2609 md.set(f, new_node, new_flag)
2595 2610 return md
2596 2611
2597 2612 def read_delta_new_entries(
2598 2613 self, *, shallow: bool = False
2599 2614 ) -> AnyManifestDict:
2600 2615 """see `interface.imanifestrevisionbase` documentations"""
2601 2616 # If we are using narrow, returning a delta against an arbitrary
2602 2617 # changeset might return file outside the narrowspec. This can create
2603 2618 # issue when running validation server side with strict security as
2604 2619 # push from low priviledge usage might be seen as adding new revision
2605 2620 # for files they cannot touch. So we are strict if narrow is involved.
2606 2621 if self._manifestlog.narrowed:
2607 2622 return self.read_delta_parents(shallow=shallow, exact=True)
2608 2623 # delegate to existing another existing method for simplicity
2609 2624 store = self._storage()
2610 2625 r = store.rev(self._node)
2611 2626 bases = (store.deltaparent(r),)
2612 2627 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2613 2628
2614 2629 def readfast(self, shallow=False) -> AnyManifestDict:
2615 2630 """Calls either readdelta or read, based on which would be less work.
2616 2631 readdelta is called if the delta is against the p1, and therefore can be
2617 2632 read quickly.
2618 2633
2619 2634 If `shallow` is True, it only returns the entries from this manifest,
2620 2635 and not any submanifests.
2621 2636 """
2637 util.nouideprecwarn(
2638 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2639 b"6.9",
2640 stacklevel=2,
2641 )
2622 2642 store = self._storage()
2623 2643 r = store.rev(self._node)
2624 2644 deltaparent = store.deltaparent(r)
2625 2645 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2626 2646 return self.readdelta(shallow=shallow)
2627 2647
2628 2648 if shallow:
2629 2649 return manifestdict(
2630 2650 store.nodeconstants.nodelen, store.revision(self._node)
2631 2651 )
2632 2652 else:
2633 2653 return self.read()
2634 2654
2635 2655 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2636 2656 return self.read().find(key)
2637 2657
2638 2658
2639 2659 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2640 2660 TreeManifestCtx
2641 2661 )
2642 2662
2643 2663
2644 2664 class excludeddir(treemanifest):
2645 2665 """Stand-in for a directory that is excluded from the repository.
2646 2666
2647 2667 With narrowing active on a repository that uses treemanifests,
2648 2668 some of the directory revlogs will be excluded from the resulting
2649 2669 clone. This is a huge storage win for clients, but means we need
2650 2670 some sort of pseudo-manifest to surface to internals so we can
2651 2671 detect a merge conflict outside the narrowspec. That's what this
2652 2672 class is: it stands in for a directory whose node is known, but
2653 2673 whose contents are unknown.
2654 2674 """
2655 2675
2656 2676 def __init__(self, nodeconstants, dir, node):
2657 2677 super(excludeddir, self).__init__(nodeconstants, dir)
2658 2678 self._node = node
2659 2679 # Add an empty file, which will be included by iterators and such,
2660 2680 # appearing as the directory itself (i.e. something like "dir/")
2661 2681 self._files[b''] = node
2662 2682 self._flags[b''] = b't'
2663 2683
2664 2684 # Manifests outside the narrowspec should never be modified, so avoid
2665 2685 # copying. This makes a noticeable difference when there are very many
2666 2686 # directories outside the narrowspec. Also, it makes sense for the copy to
2667 2687 # be of the same type as the original, which would not happen with the
2668 2688 # super type's copy().
2669 2689 def copy(self):
2670 2690 return self
2671 2691
2672 2692
2673 2693 class excludeddirmanifestctx(treemanifestctx):
2674 2694 """context wrapper for excludeddir - see that docstring for rationale"""
2675 2695
2676 2696 def __init__(self, nodeconstants, dir, node):
2677 2697 self.nodeconstants = nodeconstants
2678 2698 self._dir = dir
2679 2699 self._node = node
2680 2700
2681 2701 def read(self):
2682 2702 return excludeddir(self.nodeconstants, self._dir, self._node)
2683 2703
2684 2704 def readfast(self, shallow=False):
2685 2705 # special version of readfast since we don't have underlying storage
2686 2706 return self.read()
2687 2707
2688 2708 def write(self, *args):
2689 2709 raise error.ProgrammingError(
2690 2710 b'attempt to write manifest from excluded dir %s' % self._dir
2691 2711 )
2692 2712
2693 2713
2694 2714 class excludedmanifestrevlog(manifestrevlog):
2695 2715 """Stand-in for excluded treemanifest revlogs.
2696 2716
2697 2717 When narrowing is active on a treemanifest repository, we'll have
2698 2718 references to directories we can't see due to the revlog being
2699 2719 skipped. This class exists to conform to the manifestrevlog
2700 2720 interface for those directories and proactively prevent writes to
2701 2721 outside the narrowspec.
2702 2722 """
2703 2723
2704 2724 def __init__(self, nodeconstants, dir):
2705 2725 self.nodeconstants = nodeconstants
2706 2726 self._dir = dir
2707 2727
2708 2728 def __len__(self):
2709 2729 raise error.ProgrammingError(
2710 2730 b'attempt to get length of excluded dir %s' % self._dir
2711 2731 )
2712 2732
2713 2733 def rev(self, node):
2714 2734 raise error.ProgrammingError(
2715 2735 b'attempt to get rev from excluded dir %s' % self._dir
2716 2736 )
2717 2737
2718 2738 def linkrev(self, node):
2719 2739 raise error.ProgrammingError(
2720 2740 b'attempt to get linkrev from excluded dir %s' % self._dir
2721 2741 )
2722 2742
2723 2743 def node(self, rev):
2724 2744 raise error.ProgrammingError(
2725 2745 b'attempt to get node from excluded dir %s' % self._dir
2726 2746 )
2727 2747
2728 2748 def add(self, *args, **kwargs):
2729 2749 # We should never write entries in dirlogs outside the narrow clone.
2730 2750 # However, the method still gets called from writesubtree() in
2731 2751 # _addtree(), so we need to handle it. We should possibly make that
2732 2752 # avoid calling add() with a clean manifest (_dirty is always False
2733 2753 # in excludeddir instances).
2734 2754 pass
General Comments 0
You need to be logged in to leave comments. Login now