##// END OF EJS Templates
manifest: drop the CamelCase name for `manifest.manifestlog`...
Matt Harbison -
r52963:6641a3ae default
parent child Browse files
Show More
@@ -1,2780 +1,2774
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import typing
14 14 import weakref
15 15
16 16 from typing import (
17 17 ByteString,
18 18 Callable,
19 19 Collection,
20 20 Dict,
21 21 Iterable,
22 22 Iterator,
23 23 List,
24 24 Optional,
25 25 Set,
26 26 Tuple,
27 27 Union,
28 28 cast,
29 29 )
30 30
31 31 from .i18n import _
32 32 from .node import (
33 33 bin,
34 34 hex,
35 35 nullrev,
36 36 )
37 37 from . import (
38 38 encoding,
39 39 error,
40 40 match as matchmod,
41 41 mdiff,
42 42 pathutil,
43 43 policy,
44 44 pycompat,
45 45 revlog,
46 46 util,
47 47 )
48 48 from .interfaces import (
49 49 repository,
50 50 util as interfaceutil,
51 51 )
52 52 from .revlogutils import (
53 53 constants as revlog_constants,
54 54 )
55 55
56 56 parsers = policy.importmod('parsers')
57 57 propertycache = util.propertycache
58 58
59 59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
60 60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
61 61
62 62
63 63 def _parse(nodelen, data: bytes):
64 64 # This method does a little bit of excessive-looking
65 65 # precondition checking. This is so that the behavior of this
66 66 # class exactly matches its C counterpart to try and help
67 67 # prevent surprise breakage for anyone that develops against
68 68 # the pure version.
69 69 if data and data[-1:] != b'\n':
70 70 raise ValueError(b'Manifest did not end in a newline.')
71 71 prev = None
72 72 for l in data.splitlines():
73 73 if prev is not None and prev > l:
74 74 raise ValueError(b'Manifest lines not in sorted order.')
75 75 prev = l
76 76 f, n = l.split(b'\0')
77 77 nl = len(n)
78 78 flags = n[-1:]
79 79 if flags in _manifestflags:
80 80 n = n[:-1]
81 81 nl -= 1
82 82 else:
83 83 flags = b''
84 84 if nl != 2 * nodelen:
85 85 raise ValueError(b'Invalid manifest line')
86 86
87 87 yield f, bin(n), flags
88 88
89 89
90 90 def _text(it):
91 91 files = []
92 92 lines = []
93 93 for f, n, fl in it:
94 94 files.append(f)
95 95 # if this is changed to support newlines in filenames,
96 96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
97 97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
98 98
99 99 _checkforbidden(files)
100 100 return b''.join(lines)
101 101
102 102
103 103 class lazymanifestiter:
104 104 def __init__(self, lm: '_LazyManifest') -> None:
105 105 self.pos = 0
106 106 self.lm = lm
107 107
108 108 def __iter__(self) -> 'lazymanifestiter':
109 109 return self
110 110
111 111 def next(self) -> bytes:
112 112 try:
113 113 data, pos = self.lm._get(self.pos)
114 114 except IndexError:
115 115 raise StopIteration
116 116 if pos == -1:
117 117 assert isinstance(data, tuple)
118 118 self.pos += 1
119 119 return data[0]
120 120 assert isinstance(data, bytes)
121 121 self.pos += 1
122 122 zeropos = data.find(b'\x00', pos)
123 123 return data[pos:zeropos]
124 124
125 125 __next__ = next
126 126
127 127
128 128 class lazymanifestiterentries:
129 129 def __init__(self, lm: '_LazyManifest') -> None:
130 130 self.lm = lm
131 131 self.pos = 0
132 132
133 133 def __iter__(self) -> 'lazymanifestiterentries':
134 134 return self
135 135
136 136 def next(self) -> Tuple[bytes, bytes, bytes]:
137 137 try:
138 138 data, pos = self.lm._get(self.pos)
139 139 except IndexError:
140 140 raise StopIteration
141 141 if pos == -1:
142 142 assert isinstance(data, tuple)
143 143 self.pos += 1
144 144 return data
145 145 assert isinstance(data, bytes)
146 146 zeropos = data.find(b'\x00', pos)
147 147 nlpos = data.find(b'\n', pos)
148 148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
149 149 raise error.StorageError(b'Invalid manifest line')
150 150 flags = data[nlpos - 1 : nlpos]
151 151 if flags in _manifestflags:
152 152 hlen = nlpos - zeropos - 2
153 153 else:
154 154 hlen = nlpos - zeropos - 1
155 155 flags = b''
156 156 if hlen != 2 * self.lm._nodelen:
157 157 raise error.StorageError(b'Invalid manifest line')
158 158 hashval = unhexlify(
159 159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
160 160 )
161 161 self.pos += 1
162 162 return (data[pos:zeropos], hashval, flags)
163 163
164 164 __next__ = next
165 165
166 166
167 167 def unhexlify(data: bytes, extra: int, pos, length: int):
168 168 s = bin(data[pos : pos + length])
169 169 if extra:
170 170 s += bytes([extra & 0xFF])
171 171 return s
172 172
173 173
174 174 def _cmp(a, b):
175 175 return (a > b) - (a < b)
176 176
177 177
178 178 _manifestflags = {b'', b'l', b't', b'x'}
179 179
180 180
181 181 class _LazyManifest:
182 182 """A pure python manifest backed by a byte string. It is supplimented with
183 183 internal lists as it is modified, until it is compacted back to a pure byte
184 184 string.
185 185
186 186 ``data`` is the initial manifest data.
187 187
188 188 ``positions`` is a list of offsets, one per manifest entry. Positive
189 189 values are offsets into ``data``, negative values are offsets into the
190 190 ``extradata`` list. When an entry is removed, its entry is dropped from
191 191 ``positions``. The values are encoded such that when walking the list and
192 192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
193 193 sorted by filename.
194 194
195 195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
196 196 modified since the manifest was created or compacted.
197 197 """
198 198
199 199 def __init__(
200 200 self,
201 201 nodelen: int,
202 202 data: bytes,
203 203 positions=None,
204 204 extrainfo=None,
205 205 extradata=None,
206 206 hasremovals: bool = False,
207 207 ):
208 208 self._nodelen = nodelen
209 209 if positions is None:
210 210 self.positions = self.findlines(data)
211 211 self.extrainfo = [0] * len(self.positions)
212 212 self.data = data
213 213 self.extradata = []
214 214 self.hasremovals = False
215 215 else:
216 216 self.positions = positions[:]
217 217 self.extrainfo = extrainfo[:]
218 218 self.extradata = extradata[:]
219 219 self.data = data
220 220 self.hasremovals = hasremovals
221 221
222 222 def findlines(self, data: bytes) -> List[int]:
223 223 if not data:
224 224 return []
225 225 pos = data.find(b"\n")
226 226 if pos == -1 or data[-1:] != b'\n':
227 227 raise ValueError(b"Manifest did not end in a newline.")
228 228 positions = [0]
229 229 prev = data[: data.find(b'\x00')]
230 230 while pos < len(data) - 1 and pos != -1:
231 231 positions.append(pos + 1)
232 232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
233 233 if nexts < prev:
234 234 raise ValueError(b"Manifest lines not in sorted order.")
235 235 prev = nexts
236 236 pos = data.find(b"\n", pos + 1)
237 237 return positions
238 238
239 239 def _get(
240 240 self, index: int
241 241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
242 242 # get the position encoded in pos:
243 243 # positive number is an index in 'data'
244 244 # negative number is in extrapieces
245 245 pos = self.positions[index]
246 246 if pos >= 0:
247 247 return self.data, pos
248 248 return self.extradata[-pos - 1], -1
249 249
250 250 def _getkey(self, pos) -> bytes:
251 251 if pos >= 0:
252 252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
253 253 return self.extradata[-pos - 1][0]
254 254
255 255 def bsearch(self, key: bytes) -> int:
256 256 first = 0
257 257 last = len(self.positions) - 1
258 258
259 259 while first <= last:
260 260 midpoint = (first + last) // 2
261 261 nextpos = self.positions[midpoint]
262 262 candidate = self._getkey(nextpos)
263 263 r = _cmp(key, candidate)
264 264 if r == 0:
265 265 return midpoint
266 266 else:
267 267 if r < 0:
268 268 last = midpoint - 1
269 269 else:
270 270 first = midpoint + 1
271 271 return -1
272 272
273 273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
274 274 # same as the above, but will always return the position
275 275 # done for performance reasons
276 276 first = 0
277 277 last = len(self.positions) - 1
278 278
279 279 while first <= last:
280 280 midpoint = (first + last) // 2
281 281 nextpos = self.positions[midpoint]
282 282 candidate = self._getkey(nextpos)
283 283 r = _cmp(key, candidate)
284 284 if r == 0:
285 285 return (midpoint, True)
286 286 else:
287 287 if r < 0:
288 288 last = midpoint - 1
289 289 else:
290 290 first = midpoint + 1
291 291 return (first, False)
292 292
293 293 def __contains__(self, key: bytes) -> bool:
294 294 return self.bsearch(key) != -1
295 295
296 296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
297 297 if not isinstance(key, bytes):
298 298 raise TypeError(b"getitem: manifest keys must be a bytes.")
299 299 needle = self.bsearch(key)
300 300 if needle == -1:
301 301 raise KeyError
302 302 data, pos = self._get(needle)
303 303 if pos == -1:
304 304 assert isinstance(data, tuple)
305 305 return (data[1], data[2])
306 306
307 307 assert isinstance(data, bytes)
308 308 zeropos = data.find(b'\x00', pos)
309 309 nlpos = data.find(b'\n', zeropos)
310 310 assert 0 <= needle <= len(self.positions)
311 311 assert len(self.extrainfo) == len(self.positions)
312 312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
313 313 raise error.StorageError(b'Invalid manifest line')
314 314 hlen = nlpos - zeropos - 1
315 315 flags = data[nlpos - 1 : nlpos]
316 316 if flags in _manifestflags:
317 317 hlen -= 1
318 318 else:
319 319 flags = b''
320 320 if hlen != 2 * self._nodelen:
321 321 raise error.StorageError(b'Invalid manifest line')
322 322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
323 323 return (hashval, flags)
324 324
325 325 def __delitem__(self, key: bytes) -> None:
326 326 needle, found = self.bsearch2(key)
327 327 if not found:
328 328 raise KeyError
329 329 cur = self.positions[needle]
330 330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
331 331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
332 332 if cur >= 0:
333 333 # This does NOT unsort the list as far as the search functions are
334 334 # concerned, as they only examine lines mapped by self.positions.
335 335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
336 336 self.hasremovals = True
337 337
338 338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
339 339 if not isinstance(key, bytes):
340 340 raise TypeError(b"setitem: manifest keys must be a byte string.")
341 341 if not isinstance(value, tuple) or len(value) != 2:
342 342 raise TypeError(
343 343 b"Manifest values must be a tuple of (node, flags)."
344 344 )
345 345 hashval = value[0]
346 346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
347 347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
348 348 flags = value[1]
349 349 if not isinstance(flags, bytes) or len(flags) > 1:
350 350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
351 351 needle, found = self.bsearch2(key)
352 352 if found:
353 353 # put the item
354 354 pos = self.positions[needle]
355 355 if pos < 0:
356 356 self.extradata[-pos - 1] = (key, hashval, value[1])
357 357 else:
358 358 # just don't bother
359 359 self.extradata.append((key, hashval, value[1]))
360 360 self.positions[needle] = -len(self.extradata)
361 361 else:
362 362 # not found, put it in with extra positions
363 363 self.extradata.append((key, hashval, value[1]))
364 364 self.positions = (
365 365 self.positions[:needle]
366 366 + [-len(self.extradata)]
367 367 + self.positions[needle:]
368 368 )
369 369 self.extrainfo = (
370 370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
371 371 )
372 372
373 373 def copy(self) -> '_LazyManifest':
374 374 # XXX call _compact like in C?
375 375 return _lazymanifest(
376 376 self._nodelen,
377 377 self.data,
378 378 self.positions,
379 379 self.extrainfo,
380 380 self.extradata,
381 381 self.hasremovals,
382 382 )
383 383
384 384 def _compact(self) -> None:
385 385 # hopefully not called TOO often
386 386 if len(self.extradata) == 0 and not self.hasremovals:
387 387 return
388 388 l = []
389 389 i = 0
390 390 offset = 0
391 391 self.extrainfo = [0] * len(self.positions)
392 392 while i < len(self.positions):
393 393 if self.positions[i] >= 0:
394 394 cur = self.positions[i]
395 395 last_cut = cur
396 396
397 397 # Collect all contiguous entries in the buffer at the current
398 398 # offset, breaking out only for added/modified items held in
399 399 # extradata, or a deleted line prior to the next position.
400 400 while True:
401 401 self.positions[i] = offset
402 402 i += 1
403 403 if i == len(self.positions) or self.positions[i] < 0:
404 404 break
405 405
406 406 # A removed file has no positions[] entry, but does have an
407 407 # overwritten first byte. Break out and find the end of the
408 408 # current good entry/entries if there is a removed file
409 409 # before the next position.
410 410 if (
411 411 self.hasremovals
412 412 and self.data.find(b'\n\x00', cur, self.positions[i])
413 413 != -1
414 414 ):
415 415 break
416 416
417 417 offset += self.positions[i] - cur
418 418 cur = self.positions[i]
419 419 end_cut = self.data.find(b'\n', cur)
420 420 if end_cut != -1:
421 421 end_cut += 1
422 422 offset += end_cut - cur
423 423 l.append(self.data[last_cut:end_cut])
424 424 else:
425 425 while i < len(self.positions) and self.positions[i] < 0:
426 426 cur = self.positions[i]
427 427 t = self.extradata[-cur - 1]
428 428 l.append(self._pack(t))
429 429 self.positions[i] = offset
430 430 # Hashes are either 20 bytes (old sha1s) or 32
431 431 # bytes (new non-sha1).
432 432 hlen = 20
433 433 if len(t[1]) > 25:
434 434 hlen = 32
435 435 if len(t[1]) > hlen:
436 436 self.extrainfo[i] = ord(t[1][hlen + 1])
437 437 offset += len(l[-1])
438 438 i += 1
439 439 self.data = b''.join(l)
440 440 self.hasremovals = False
441 441 self.extradata = []
442 442
443 443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
444 444 n = d[1]
445 445 assert len(n) in (20, 32)
446 446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
447 447
448 448 def text(self) -> ByteString:
449 449 self._compact()
450 450 return self.data
451 451
452 452 def diff(
453 453 self, m2: '_LazyManifest', clean: bool = False
454 454 ) -> Dict[
455 455 bytes,
456 456 Optional[
457 457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
458 458 ],
459 459 ]:
460 460 '''Finds changes between the current manifest and m2.'''
461 461 # XXX think whether efficiency matters here
462 462 diff = {}
463 463
464 464 for fn, e1, flags in self.iterentries():
465 465 if fn not in m2:
466 466 diff[fn] = (e1, flags), (None, b'')
467 467 else:
468 468 e2 = m2[fn]
469 469 if (e1, flags) != e2:
470 470 diff[fn] = (e1, flags), e2
471 471 elif clean:
472 472 diff[fn] = None
473 473
474 474 for fn, e2, flags in m2.iterentries():
475 475 if fn not in self:
476 476 diff[fn] = (None, b''), (e2, flags)
477 477
478 478 return diff
479 479
480 480 def iterentries(self) -> lazymanifestiterentries:
481 481 return lazymanifestiterentries(self)
482 482
483 483 def iterkeys(self) -> lazymanifestiter:
484 484 return lazymanifestiter(self)
485 485
486 486 def __iter__(self) -> lazymanifestiter:
487 487 return lazymanifestiter(self)
488 488
489 489 def __len__(self) -> int:
490 490 return len(self.positions)
491 491
492 492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
493 493 # XXX should be optimized
494 494 c = _lazymanifest(self._nodelen, b'')
495 495 for f, n, fl in self.iterentries():
496 496 if filterfn(f):
497 497 c[f] = n, fl
498 498 return c
499 499
500 500
501 501 try:
502 502 _lazymanifest = parsers.lazymanifest
503 503 except AttributeError:
504 504 _lazymanifest = _LazyManifest
505 505
506 506
507 507 class manifestdict: # (repository.imanifestdict)
508 508 def __init__(self, nodelen: int, data: ByteString = b''):
509 509 self._nodelen = nodelen
510 510 self._lm = _lazymanifest(nodelen, data)
511 511
512 512 def __getitem__(self, key: bytes) -> bytes:
513 513 return self._lm[key][0]
514 514
515 515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
516 516 return self._lm[key]
517 517
518 518 def __len__(self) -> int:
519 519 return len(self._lm)
520 520
521 521 def __nonzero__(self) -> bool:
522 522 # nonzero is covered by the __len__ function, but implementing it here
523 523 # makes it easier for extensions to override.
524 524 return len(self._lm) != 0
525 525
526 526 __bool__ = __nonzero__
527 527
528 528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
529 529 self._lm[key] = node, flags
530 530
531 531 def __setitem__(self, key: bytes, node: bytes) -> None:
532 532 self._lm[key] = node, self.flags(key)
533 533
534 534 def __contains__(self, key: bytes) -> bool:
535 535 if key is None:
536 536 return False
537 537 return key in self._lm
538 538
539 539 def __delitem__(self, key: bytes) -> None:
540 540 del self._lm[key]
541 541
542 542 def __iter__(self) -> Iterator[bytes]:
543 543 return self._lm.__iter__()
544 544
545 545 def iterkeys(self) -> Iterator[bytes]:
546 546 return self._lm.iterkeys()
547 547
548 548 def keys(self) -> List[bytes]:
549 549 return list(self.iterkeys())
550 550
551 551 def filesnotin(self, m2, match=None) -> Set[bytes]:
552 552 '''Set of files in this manifest that are not in the other'''
553 553 if match is not None:
554 554 match = matchmod.badmatch(match, lambda path, msg: None)
555 555 sm2 = set(m2.walk(match))
556 556 return {f for f in self.walk(match) if f not in sm2}
557 557 return {f for f in self if f not in m2}
558 558
559 559 @propertycache
560 560 def _dirs(self) -> pathutil.dirs:
561 561 return pathutil.dirs(self)
562 562
563 563 def dirs(self) -> pathutil.dirs:
564 564 return self._dirs
565 565
566 566 def hasdir(self, dir: bytes) -> bool:
567 567 return dir in self._dirs
568 568
569 569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
570 570 """Checks whether we can correctly and quickly iterate over matcher
571 571 files instead of over manifest files."""
572 572 files = match.files()
573 573 return len(files) < 100 and (
574 574 match.isexact()
575 575 or (match.prefix() and all(fn in self for fn in files))
576 576 )
577 577
578 578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
579 579 """Generates matching file names.
580 580
581 581 Equivalent to manifest.matches(match).iterkeys(), but without creating
582 582 an entirely new manifest.
583 583
584 584 It also reports nonexistent files by marking them bad with match.bad().
585 585 """
586 586 if match.always():
587 587 for f in iter(self):
588 588 yield f
589 589 return
590 590
591 591 fset = set(match.files())
592 592
593 593 # avoid the entire walk if we're only looking for specific files
594 594 if self._filesfastpath(match):
595 595 for fn in sorted(fset):
596 596 if fn in self:
597 597 yield fn
598 598 return
599 599
600 600 for fn in self:
601 601 if fn in fset:
602 602 # specified pattern is the exact name
603 603 fset.remove(fn)
604 604 if match(fn):
605 605 yield fn
606 606
607 607 # for dirstate.walk, files=[''] means "walk the whole tree".
608 608 # follow that here, too
609 609 fset.discard(b'')
610 610
611 611 for fn in sorted(fset):
612 612 if not self.hasdir(fn):
613 613 match.bad(fn, None)
614 614
615 615 def _matches(self, match: matchmod.basematcher) -> 'manifestdict':
616 616 '''generate a new manifest filtered by the match argument'''
617 617 if match.always():
618 618 return self.copy()
619 619
620 620 if self._filesfastpath(match):
621 621 m = manifestdict(self._nodelen)
622 622 lm = self._lm
623 623 for fn in match.files():
624 624 if fn in lm:
625 625 m._lm[fn] = lm[fn]
626 626 return m
627 627
628 628 m = manifestdict(self._nodelen)
629 629 m._lm = self._lm.filtercopy(match)
630 630 return m
631 631
632 632 def diff(
633 633 self,
634 634 m2: 'manifestdict',
635 635 match: Optional[matchmod.basematcher] = None,
636 636 clean: bool = False,
637 637 ) -> Dict[
638 638 bytes,
639 639 Optional[
640 640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
641 641 ],
642 642 ]:
643 643 """Finds changes between the current manifest and m2.
644 644
645 645 Args:
646 646 m2: the manifest to which this manifest should be compared.
647 647 clean: if true, include files unchanged between these manifests
648 648 with a None value in the returned dictionary.
649 649
650 650 The result is returned as a dict with filename as key and
651 651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
652 652 nodeid in the current/other manifest and fl1/fl2 is the flag
653 653 in the current/other manifest. Where the file does not exist,
654 654 the nodeid will be None and the flags will be the empty
655 655 string.
656 656 """
657 657 if match:
658 658 m1 = self._matches(match)
659 659 m2 = m2._matches(match)
660 660 return m1.diff(m2, clean=clean)
661 661 return self._lm.diff(m2._lm, clean)
662 662
663 663 def setflag(self, key: bytes, flag: bytes) -> None:
664 664 if flag not in _manifestflags:
665 665 raise TypeError(b"Invalid manifest flag set.")
666 666 self._lm[key] = self[key], flag
667 667
668 668 def get(self, key: bytes, default=None) -> Optional[bytes]:
669 669 try:
670 670 return self._lm[key][0]
671 671 except KeyError:
672 672 return default
673 673
674 674 def flags(self, key: bytes) -> bytes:
675 675 try:
676 676 return self._lm[key][1]
677 677 except KeyError:
678 678 return b''
679 679
680 680 def copy(self) -> 'manifestdict':
681 681 c = manifestdict(self._nodelen)
682 682 c._lm = self._lm.copy()
683 683 return c
684 684
685 685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
686 686 return (x[:2] for x in self._lm.iterentries())
687 687
688 688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
689 689 return (x[:2] for x in self._lm.iterentries())
690 690
691 691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
692 692 return self._lm.iterentries()
693 693
694 694 def text(self) -> ByteString:
695 695 # most likely uses native version
696 696 return self._lm.text()
697 697
698 698 def fastdelta(
699 699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
700 700 ) -> Tuple[ByteString, ByteString]:
701 701 """Given a base manifest text as a bytearray and a list of changes
702 702 relative to that text, compute a delta that can be used by revlog.
703 703 """
704 704 delta = []
705 705 dstart = None
706 706 dend = None
707 707 dline = [b""]
708 708 start = 0
709 709 # zero copy representation of base as a buffer
710 710 addbuf = util.buffer(base)
711 711
712 712 changes = list(changes)
713 713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
714 714 # start with a readonly loop that finds the offset of
715 715 # each line and creates the deltas
716 716 for f, todelete in changes:
717 717 # bs will either be the index of the item or the insert point
718 718 start, end = _msearch(addbuf, f, start)
719 719 if not todelete:
720 720 h, fl = self._lm[f]
721 721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
722 722 else:
723 723 if start == end:
724 724 # item we want to delete was not found, error out
725 725 raise AssertionError(
726 726 _(b"failed to remove %s from manifest") % f
727 727 )
728 728 l = b""
729 729 if dstart is not None and dstart <= start and dend >= start:
730 730 if dend < end:
731 731 dend = end
732 732 if l:
733 733 dline.append(l)
734 734 else:
735 735 if dstart is not None:
736 736 delta.append((dstart, dend, b"".join(dline)))
737 737 dstart = start
738 738 dend = end
739 739 dline = [l]
740 740
741 741 if dstart is not None:
742 742 delta.append((dstart, dend, b"".join(dline)))
743 743 # apply the delta to the base, and get a delta for addrevision
744 744 deltatext, arraytext = _addlistdelta(base, delta)
745 745 else:
746 746 # For large changes, it's much cheaper to just build the text and
747 747 # diff it.
748 748 arraytext = bytearray(self.text())
749 749 deltatext = mdiff.textdiff(
750 750 util.buffer(base), util.buffer(arraytext)
751 751 )
752 752
753 753 return arraytext, deltatext
754 754
755 755
756 756 def _msearch(
757 757 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
758 758 ) -> Tuple[int, int]:
759 759 """return a tuple (start, end) that says where to find s within m.
760 760
761 761 If the string is found m[start:end] are the line containing
762 762 that string. If start == end the string was not found and
763 763 they indicate the proper sorted insertion point.
764 764 """
765 765
766 766 def advance(i: int, c: bytes):
767 767 while i < lenm and m[i : i + 1] != c:
768 768 i += 1
769 769 return i
770 770
771 771 if not s:
772 772 return (lo, lo)
773 773 lenm = len(m)
774 774 if not hi:
775 775 hi = lenm
776 776 while lo < hi:
777 777 mid = (lo + hi) // 2
778 778 start = mid
779 779 while start > 0 and m[start - 1 : start] != b'\n':
780 780 start -= 1
781 781 end = advance(start, b'\0')
782 782 if bytes(m[start:end]) < s:
783 783 # we know that after the null there are 40 bytes of sha1
784 784 # this translates to the bisect lo = mid + 1
785 785 lo = advance(end + 40, b'\n') + 1
786 786 else:
787 787 # this translates to the bisect hi = mid
788 788 hi = start
789 789 end = advance(lo, b'\0')
790 790 found = m[lo:end]
791 791 if s == found:
792 792 # we know that after the null there are 40 bytes of sha1
793 793 end = advance(end + 40, b'\n')
794 794 return (lo, end + 1)
795 795 else:
796 796 return (lo, lo)
797 797
798 798
799 799 def _checkforbidden(l: Iterable[bytes]) -> None:
800 800 """Check filenames for illegal characters."""
801 801 for f in l:
802 802 if b'\n' in f or b'\r' in f:
803 803 raise error.StorageError(
804 804 _(b"'\\n' and '\\r' disallowed in filenames: %r")
805 805 % pycompat.bytestr(f)
806 806 )
807 807
808 808
809 809 # apply the changes collected during the bisect loop to our addlist
810 810 # return a delta suitable for addrevision
811 811 def _addlistdelta(
812 812 addlist: ByteString,
813 813 x: Iterable[Tuple[int, int, bytes]],
814 814 ) -> Tuple[bytes, ByteString]:
815 815 # for large addlist arrays, building a new array is cheaper
816 816 # than repeatedly modifying the existing one
817 817 currentposition = 0
818 818 newaddlist = bytearray()
819 819
820 820 for start, end, content in x:
821 821 newaddlist += addlist[currentposition:start]
822 822 if content:
823 823 newaddlist += bytearray(content)
824 824
825 825 currentposition = end
826 826
827 827 newaddlist += addlist[currentposition:]
828 828
829 829 deltatext = b"".join(
830 830 struct.pack(b">lll", start, end, len(content)) + content
831 831 for start, end, content in x
832 832 )
833 833 return deltatext, newaddlist
834 834
835 835
836 836 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
837 837 if b'/' in f:
838 838 dir, subpath = f.split(b'/', 1)
839 839 return dir + b'/', subpath
840 840 else:
841 841 return b'', f
842 842
843 843
844 844 _noop = lambda s: None
845 845
846 846
847 847 class treemanifest: # (repository.imanifestdict)
848 848 _dir: bytes
849 849 _dirs: Dict[bytes, 'treemanifest']
850 850 _dirty: bool
851 851 _files: Dict[bytes, bytes]
852 852 _flags: Dict[bytes, bytes]
853 853
854 854 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
855 855 self._dir = dir
856 856 self.nodeconstants = nodeconstants
857 857 self._node = self.nodeconstants.nullid
858 858 self._nodelen = self.nodeconstants.nodelen
859 859 self._loadfunc = _noop
860 860 self._copyfunc = _noop
861 861 self._dirty = False
862 862 self._dirs = {}
863 863 self._lazydirs: Dict[
864 864 bytes,
865 865 Tuple[bytes, Callable[[bytes, bytes], 'treemanifest'], bool],
866 866 ] = {}
867 867 # Using _lazymanifest here is a little slower than plain old dicts
868 868 self._files = {}
869 869 self._flags = {}
870 870 if text:
871 871
872 872 def readsubtree(subdir, subm):
873 873 raise AssertionError(
874 874 b'treemanifest constructor only accepts flat manifests'
875 875 )
876 876
877 877 self.parse(text, readsubtree)
878 878 self._dirty = True # Mark flat manifest dirty after parsing
879 879
880 880 def _subpath(self, path: bytes) -> bytes:
881 881 return self._dir + path
882 882
883 883 def _loadalllazy(self) -> None:
884 884 selfdirs = self._dirs
885 885 subpath = self._subpath
886 886 for d, (node, readsubtree, docopy) in self._lazydirs.items():
887 887 if docopy:
888 888 selfdirs[d] = readsubtree(subpath(d), node).copy()
889 889 else:
890 890 selfdirs[d] = readsubtree(subpath(d), node)
891 891 self._lazydirs.clear()
892 892
893 893 def _loadlazy(self, d: bytes) -> None:
894 894 v = self._lazydirs.get(d)
895 895 if v is not None:
896 896 node, readsubtree, docopy = v
897 897 if docopy:
898 898 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
899 899 else:
900 900 self._dirs[d] = readsubtree(self._subpath(d), node)
901 901 del self._lazydirs[d]
902 902
903 903 def _loadchildrensetlazy(
904 904 self, visit: Union[Set[bytes], bytes]
905 905 ) -> Optional[Set[bytes]]:
906 906 if not visit:
907 907 return None
908 908 if visit == b'all' or visit == b'this':
909 909 self._loadalllazy()
910 910 return None
911 911
912 912 visit = cast(Set[bytes], visit)
913 913
914 914 loadlazy = self._loadlazy
915 915 for k in visit:
916 916 loadlazy(k + b'/')
917 917 return visit
918 918
919 919 def _loaddifflazy(self, t1: 'treemanifest', t2: 'treemanifest'):
920 920 """load items in t1 and t2 if they're needed for diffing.
921 921
922 922 The criteria currently is:
923 923 - if it's not present in _lazydirs in either t1 or t2, load it in the
924 924 other (it may already be loaded or it may not exist, doesn't matter)
925 925 - if it's present in _lazydirs in both, compare the nodeid; if it
926 926 differs, load it in both
927 927 """
928 928 toloadlazy = []
929 929 for d, v1 in t1._lazydirs.items():
930 930 v2 = t2._lazydirs.get(d)
931 931 if v2 is None or v2[0] != v1[0]:
932 932 toloadlazy.append(d)
933 933 for d, v1 in t2._lazydirs.items():
934 934 if d not in t1._lazydirs:
935 935 toloadlazy.append(d)
936 936
937 937 for d in toloadlazy:
938 938 t1._loadlazy(d)
939 939 t2._loadlazy(d)
940 940
941 941 def __len__(self) -> int:
942 942 self._load()
943 943 size = len(self._files)
944 944 self._loadalllazy()
945 945 for m in self._dirs.values():
946 946 size += m.__len__()
947 947 return size
948 948
949 949 def __nonzero__(self) -> bool:
950 950 # Faster than "__len__() != 0" since it avoids loading sub-manifests
951 951 return not self._isempty()
952 952
953 953 __bool__ = __nonzero__
954 954
955 955 def _isempty(self) -> bool:
956 956 self._load() # for consistency; already loaded by all callers
957 957 # See if we can skip loading everything.
958 958 if self._files or (
959 959 self._dirs and any(not m._isempty() for m in self._dirs.values())
960 960 ):
961 961 return False
962 962 self._loadalllazy()
963 963 return not self._dirs or all(m._isempty() for m in self._dirs.values())
964 964
965 965 @encoding.strmethod
966 966 def __repr__(self) -> bytes:
967 967 return (
968 968 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
969 969 % (
970 970 self._dir,
971 971 hex(self._node),
972 972 bool(self._loadfunc is _noop),
973 973 self._dirty,
974 974 id(self),
975 975 )
976 976 )
977 977
978 978 def dir(self) -> bytes:
979 979 """The directory that this tree manifest represents, including a
980 980 trailing '/'. Empty string for the repo root directory."""
981 981 return self._dir
982 982
983 983 def node(self) -> bytes:
984 984 """This node of this instance. nullid for unsaved instances. Should
985 985 be updated when the instance is read or written from a revlog.
986 986 """
987 987 assert not self._dirty
988 988 return self._node
989 989
990 990 def setnode(self, node: bytes) -> None:
991 991 self._node = node
992 992 self._dirty = False
993 993
994 994 def iterentries(
995 995 self,
996 996 ) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest'], bytes]]:
997 997 self._load()
998 998 self._loadalllazy()
999 999 for p, n in sorted(
1000 1000 itertools.chain(self._dirs.items(), self._files.items())
1001 1001 ):
1002 1002 if p in self._files:
1003 1003 yield self._subpath(p), n, self._flags.get(p, b'')
1004 1004 else:
1005 1005 for x in n.iterentries():
1006 1006 yield x
1007 1007
1008 1008 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'treemanifest']]]:
1009 1009 self._load()
1010 1010 self._loadalllazy()
1011 1011 for p, n in sorted(
1012 1012 itertools.chain(self._dirs.items(), self._files.items())
1013 1013 ):
1014 1014 if p in self._files:
1015 1015 yield self._subpath(p), n
1016 1016 else:
1017 1017 for f, sn in n.items():
1018 1018 yield f, sn
1019 1019
1020 1020 iteritems = items
1021 1021
1022 1022 def iterkeys(self) -> Iterator[bytes]:
1023 1023 self._load()
1024 1024 self._loadalllazy()
1025 1025 for p in sorted(itertools.chain(self._dirs, self._files)):
1026 1026 if p in self._files:
1027 1027 yield self._subpath(p)
1028 1028 else:
1029 1029 for f in self._dirs[p]:
1030 1030 yield f
1031 1031
1032 1032 def keys(self) -> List[bytes]:
1033 1033 return list(self.iterkeys())
1034 1034
1035 1035 def __iter__(self) -> Iterator[bytes]:
1036 1036 return self.iterkeys()
1037 1037
1038 1038 def __contains__(self, f: bytes) -> bool:
1039 1039 if f is None:
1040 1040 return False
1041 1041 self._load()
1042 1042 dir, subpath = _splittopdir(f)
1043 1043 if dir:
1044 1044 self._loadlazy(dir)
1045 1045
1046 1046 if dir not in self._dirs:
1047 1047 return False
1048 1048
1049 1049 return self._dirs[dir].__contains__(subpath)
1050 1050 else:
1051 1051 return f in self._files
1052 1052
1053 1053 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1054 1054 self._load()
1055 1055 dir, subpath = _splittopdir(f)
1056 1056 if dir:
1057 1057 self._loadlazy(dir)
1058 1058
1059 1059 if dir not in self._dirs:
1060 1060 return default
1061 1061 return self._dirs[dir].get(subpath, default)
1062 1062 else:
1063 1063 return self._files.get(f, default)
1064 1064
1065 1065 def __getitem__(self, f: bytes) -> bytes:
1066 1066 self._load()
1067 1067 dir, subpath = _splittopdir(f)
1068 1068 if dir:
1069 1069 self._loadlazy(dir)
1070 1070
1071 1071 return self._dirs[dir].__getitem__(subpath)
1072 1072 else:
1073 1073 return self._files[f]
1074 1074
1075 1075 def flags(self, f: bytes) -> bytes:
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080
1081 1081 if dir not in self._dirs:
1082 1082 return b''
1083 1083 return self._dirs[dir].flags(subpath)
1084 1084 else:
1085 1085 if f in self._lazydirs or f in self._dirs:
1086 1086 return b''
1087 1087 return self._flags.get(f, b'')
1088 1088
1089 1089 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1090 1090 self._load()
1091 1091 dir, subpath = _splittopdir(f)
1092 1092 if dir:
1093 1093 self._loadlazy(dir)
1094 1094
1095 1095 return self._dirs[dir].find(subpath)
1096 1096 else:
1097 1097 return self._files[f], self._flags.get(f, b'')
1098 1098
1099 1099 def __delitem__(self, f: bytes) -> None:
1100 1100 self._load()
1101 1101 dir, subpath = _splittopdir(f)
1102 1102 if dir:
1103 1103 self._loadlazy(dir)
1104 1104
1105 1105 self._dirs[dir].__delitem__(subpath)
1106 1106 # If the directory is now empty, remove it
1107 1107 if self._dirs[dir]._isempty():
1108 1108 del self._dirs[dir]
1109 1109 else:
1110 1110 del self._files[f]
1111 1111 if f in self._flags:
1112 1112 del self._flags[f]
1113 1113 self._dirty = True
1114 1114
1115 1115 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1116 1116 """Set both the node and the flags for path f."""
1117 1117 assert node is not None
1118 1118 if flags not in _manifestflags:
1119 1119 raise TypeError(b"Invalid manifest flag set.")
1120 1120 self._load()
1121 1121 dir, subpath = _splittopdir(f)
1122 1122 if dir:
1123 1123 self._loadlazy(dir)
1124 1124 if dir not in self._dirs:
1125 1125 self._dirs[dir] = treemanifest(
1126 1126 self.nodeconstants, self._subpath(dir)
1127 1127 )
1128 1128 self._dirs[dir].set(subpath, node, flags)
1129 1129 else:
1130 1130 assert len(node) in (20, 32)
1131 1131 self._files[f] = node
1132 1132 self._flags[f] = flags
1133 1133 self._dirty = True
1134 1134
1135 1135 def __setitem__(self, f: bytes, n: bytes) -> None:
1136 1136 assert n is not None
1137 1137 self._load()
1138 1138 dir, subpath = _splittopdir(f)
1139 1139 if dir:
1140 1140 self._loadlazy(dir)
1141 1141 if dir not in self._dirs:
1142 1142 self._dirs[dir] = treemanifest(
1143 1143 self.nodeconstants, self._subpath(dir)
1144 1144 )
1145 1145 self._dirs[dir].__setitem__(subpath, n)
1146 1146 else:
1147 1147 # manifest nodes are either 20 bytes or 32 bytes,
1148 1148 # depending on the hash in use. Assert this as historically
1149 1149 # sometimes extra bytes were added.
1150 1150 assert len(n) in (20, 32)
1151 1151 self._files[f] = n
1152 1152 self._dirty = True
1153 1153
1154 1154 def _load(self) -> None:
1155 1155 if self._loadfunc is not _noop:
1156 1156 lf, self._loadfunc = self._loadfunc, _noop
1157 1157 lf(self)
1158 1158 elif self._copyfunc is not _noop:
1159 1159 cf, self._copyfunc = self._copyfunc, _noop
1160 1160 cf(self)
1161 1161
1162 1162 def setflag(self, f: bytes, flags: bytes) -> None:
1163 1163 """Set the flags (symlink, executable) for path f."""
1164 1164 if flags not in _manifestflags:
1165 1165 raise TypeError(b"Invalid manifest flag set.")
1166 1166 self._load()
1167 1167 dir, subpath = _splittopdir(f)
1168 1168 if dir:
1169 1169 self._loadlazy(dir)
1170 1170 if dir not in self._dirs:
1171 1171 self._dirs[dir] = treemanifest(
1172 1172 self.nodeconstants, self._subpath(dir)
1173 1173 )
1174 1174 self._dirs[dir].setflag(subpath, flags)
1175 1175 else:
1176 1176 self._flags[f] = flags
1177 1177 self._dirty = True
1178 1178
1179 1179 def copy(self) -> 'treemanifest':
1180 1180 copy = treemanifest(self.nodeconstants, self._dir)
1181 1181 copy._node = self._node
1182 1182 copy._dirty = self._dirty
1183 1183 if self._copyfunc is _noop:
1184 1184
1185 1185 def _copyfunc(s):
1186 1186 self._load()
1187 1187 s._lazydirs = {
1188 1188 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1189 1189 }
1190 1190 sdirs = s._dirs
1191 1191 for d, v in self._dirs.items():
1192 1192 sdirs[d] = v.copy()
1193 1193 s._files = dict.copy(self._files)
1194 1194 s._flags = dict.copy(self._flags)
1195 1195
1196 1196 if self._loadfunc is _noop:
1197 1197 _copyfunc(copy)
1198 1198 else:
1199 1199 copy._copyfunc = _copyfunc
1200 1200 else:
1201 1201 copy._copyfunc = self._copyfunc
1202 1202 return copy
1203 1203
1204 1204 def filesnotin(
1205 1205 self, m2: 'treemanifest', match: Optional[matchmod.basematcher] = None
1206 1206 ) -> Set[bytes]:
1207 1207 '''Set of files in this manifest that are not in the other'''
1208 1208 if match and not match.always():
1209 1209 m1 = self._matches(match)
1210 1210 m2 = m2._matches(match)
1211 1211 return m1.filesnotin(m2)
1212 1212
1213 1213 files = set()
1214 1214
1215 1215 def _filesnotin(t1, t2):
1216 1216 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1217 1217 return
1218 1218 t1._load()
1219 1219 t2._load()
1220 1220 self._loaddifflazy(t1, t2)
1221 1221 for d, m1 in t1._dirs.items():
1222 1222 if d in t2._dirs:
1223 1223 m2 = t2._dirs[d]
1224 1224 _filesnotin(m1, m2)
1225 1225 else:
1226 1226 files.update(m1.iterkeys())
1227 1227
1228 1228 for fn in t1._files:
1229 1229 if fn not in t2._files:
1230 1230 files.add(t1._subpath(fn))
1231 1231
1232 1232 _filesnotin(self, m2)
1233 1233 return files
1234 1234
1235 1235 @propertycache
1236 1236 def _alldirs(self) -> pathutil.dirs:
1237 1237 return pathutil.dirs(self)
1238 1238
1239 1239 def dirs(self) -> pathutil.dirs:
1240 1240 return self._alldirs
1241 1241
1242 1242 def hasdir(self, dir: bytes) -> bool:
1243 1243 self._load()
1244 1244 topdir, subdir = _splittopdir(dir)
1245 1245 if topdir:
1246 1246 self._loadlazy(topdir)
1247 1247 if topdir in self._dirs:
1248 1248 return self._dirs[topdir].hasdir(subdir)
1249 1249 return False
1250 1250 dirslash = dir + b'/'
1251 1251 return dirslash in self._dirs or dirslash in self._lazydirs
1252 1252
1253 1253 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1254 1254 """Generates matching file names.
1255 1255
1256 1256 It also reports nonexistent files by marking them bad with match.bad().
1257 1257 """
1258 1258 if match.always():
1259 1259 for f in iter(self):
1260 1260 yield f
1261 1261 return
1262 1262
1263 1263 fset = set(match.files())
1264 1264
1265 1265 for fn in self._walk(match):
1266 1266 if fn in fset:
1267 1267 # specified pattern is the exact name
1268 1268 fset.remove(fn)
1269 1269 yield fn
1270 1270
1271 1271 # for dirstate.walk, files=[''] means "walk the whole tree".
1272 1272 # follow that here, too
1273 1273 fset.discard(b'')
1274 1274
1275 1275 for fn in sorted(fset):
1276 1276 if not self.hasdir(fn):
1277 1277 match.bad(fn, None)
1278 1278
1279 1279 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1280 1280 '''Recursively generates matching file names for walk().'''
1281 1281 visit = match.visitchildrenset(self._dir[:-1])
1282 1282 if not visit:
1283 1283 return
1284 1284
1285 1285 # yield this dir's files and walk its submanifests
1286 1286 self._load()
1287 1287 visit = self._loadchildrensetlazy(visit)
1288 1288 for p in sorted(list(self._dirs) + list(self._files)):
1289 1289 if p in self._files:
1290 1290 fullp = self._subpath(p)
1291 1291 if match(fullp):
1292 1292 yield fullp
1293 1293 else:
1294 1294 if not visit or p[:-1] in visit:
1295 1295 for f in self._dirs[p]._walk(match):
1296 1296 yield f
1297 1297
1298 1298 def _matches(self, match: matchmod.basematcher) -> 'treemanifest':
1299 1299 """recursively generate a new manifest filtered by the match argument."""
1300 1300 if match.always():
1301 1301 return self.copy()
1302 1302 return self._matches_inner(match)
1303 1303
1304 1304 def _matches_inner(self, match: matchmod.basematcher) -> 'treemanifest':
1305 1305 if match.always():
1306 1306 return self.copy()
1307 1307
1308 1308 visit = match.visitchildrenset(self._dir[:-1])
1309 1309 if visit == b'all':
1310 1310 return self.copy()
1311 1311 ret = treemanifest(self.nodeconstants, self._dir)
1312 1312 if not visit:
1313 1313 return ret
1314 1314
1315 1315 self._load()
1316 1316 for fn in self._files:
1317 1317 # While visitchildrenset *usually* lists only subdirs, this is
1318 1318 # actually up to the matcher and may have some files in the set().
1319 1319 # If visit == 'this', we should obviously look at the files in this
1320 1320 # directory; if visit is a set, and fn is in it, we should inspect
1321 1321 # fn (but no need to inspect things not in the set).
1322 1322 if visit != b'this' and fn not in visit:
1323 1323 continue
1324 1324 fullp = self._subpath(fn)
1325 1325 # visitchildrenset isn't perfect, we still need to call the regular
1326 1326 # matcher code to further filter results.
1327 1327 if not match(fullp):
1328 1328 continue
1329 1329 ret._files[fn] = self._files[fn]
1330 1330 if fn in self._flags:
1331 1331 ret._flags[fn] = self._flags[fn]
1332 1332
1333 1333 visit = self._loadchildrensetlazy(visit)
1334 1334 for dir, subm in self._dirs.items():
1335 1335 if visit and dir[:-1] not in visit:
1336 1336 continue
1337 1337 m = subm._matches_inner(match)
1338 1338 if not m._isempty():
1339 1339 ret._dirs[dir] = m
1340 1340
1341 1341 if not ret._isempty():
1342 1342 ret._dirty = True
1343 1343 return ret
1344 1344
1345 1345 def fastdelta(
1346 1346 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1347 1347 ) -> ByteString:
1348 1348 raise FastdeltaUnavailable()
1349 1349
1350 1350 def diff(
1351 1351 self,
1352 1352 m2: 'treemanifest',
1353 1353 match: Optional[matchmod.basematcher] = None,
1354 1354 clean: bool = False,
1355 1355 ) -> Dict[
1356 1356 bytes,
1357 1357 Optional[
1358 1358 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1359 1359 ],
1360 1360 ]:
1361 1361 """Finds changes between the current manifest and m2.
1362 1362
1363 1363 Args:
1364 1364 m2: the manifest to which this manifest should be compared.
1365 1365 clean: if true, include files unchanged between these manifests
1366 1366 with a None value in the returned dictionary.
1367 1367
1368 1368 The result is returned as a dict with filename as key and
1369 1369 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1370 1370 nodeid in the current/other manifest and fl1/fl2 is the flag
1371 1371 in the current/other manifest. Where the file does not exist,
1372 1372 the nodeid will be None and the flags will be the empty
1373 1373 string.
1374 1374 """
1375 1375 if match and not match.always():
1376 1376 m1 = self._matches(match)
1377 1377 m2 = m2._matches(match)
1378 1378 return m1.diff(m2, clean=clean)
1379 1379 result = {}
1380 1380 emptytree = treemanifest(self.nodeconstants)
1381 1381
1382 1382 def _iterativediff(t1, t2, stack):
1383 1383 """compares two tree manifests and append new tree-manifests which
1384 1384 needs to be compared to stack"""
1385 1385 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1386 1386 return
1387 1387 t1._load()
1388 1388 t2._load()
1389 1389 self._loaddifflazy(t1, t2)
1390 1390
1391 1391 for d, m1 in t1._dirs.items():
1392 1392 m2 = t2._dirs.get(d, emptytree)
1393 1393 stack.append((m1, m2))
1394 1394
1395 1395 for d, m2 in t2._dirs.items():
1396 1396 if d not in t1._dirs:
1397 1397 stack.append((emptytree, m2))
1398 1398
1399 1399 for fn, n1 in t1._files.items():
1400 1400 fl1 = t1._flags.get(fn, b'')
1401 1401 n2 = t2._files.get(fn, None)
1402 1402 fl2 = t2._flags.get(fn, b'')
1403 1403 if n1 != n2 or fl1 != fl2:
1404 1404 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1405 1405 elif clean:
1406 1406 result[t1._subpath(fn)] = None
1407 1407
1408 1408 for fn, n2 in t2._files.items():
1409 1409 if fn not in t1._files:
1410 1410 fl2 = t2._flags.get(fn, b'')
1411 1411 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1412 1412
1413 1413 stackls = []
1414 1414 _iterativediff(self, m2, stackls)
1415 1415 while stackls:
1416 1416 t1, t2 = stackls.pop()
1417 1417 # stackls is populated in the function call
1418 1418 _iterativediff(t1, t2, stackls)
1419 1419 return result
1420 1420
1421 1421 def unmodifiedsince(self, m2: 'treemanifest') -> bool:
1422 1422 return not self._dirty and not m2._dirty and self._node == m2._node
1423 1423
1424 1424 def parse(
1425 1425 self,
1426 1426 text: bytes,
1427 1427 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1428 1428 ) -> None:
1429 1429 selflazy = self._lazydirs
1430 1430 for f, n, fl in _parse(self._nodelen, text):
1431 1431 if fl == b't':
1432 1432 f = f + b'/'
1433 1433 # False below means "doesn't need to be copied" and can use the
1434 1434 # cached value from readsubtree directly.
1435 1435 selflazy[f] = (n, readsubtree, False)
1436 1436 elif b'/' in f:
1437 1437 # This is a flat manifest, so use __setitem__ and setflag rather
1438 1438 # than assigning directly to _files and _flags, so we can
1439 1439 # assign a path in a subdirectory, and to mark dirty (compared
1440 1440 # to nullid).
1441 1441 self[f] = n
1442 1442 if fl:
1443 1443 self.setflag(f, fl)
1444 1444 else:
1445 1445 # Assigning to _files and _flags avoids marking as dirty,
1446 1446 # and should be a little faster.
1447 1447 self._files[f] = n
1448 1448 if fl:
1449 1449 self._flags[f] = fl
1450 1450
1451 1451 def text(self) -> ByteString:
1452 1452 """Get the full data of this manifest as a bytestring."""
1453 1453 self._load()
1454 1454 return _text(self.iterentries())
1455 1455
1456 1456 def dirtext(self) -> ByteString:
1457 1457 """Get the full data of this directory as a bytestring. Make sure that
1458 1458 any submanifests have been written first, so their nodeids are correct.
1459 1459 """
1460 1460 self._load()
1461 1461 flags = self.flags
1462 1462 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1463 1463 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1464 1464 files = [(f, self._files[f], flags(f)) for f in self._files]
1465 1465 return _text(sorted(dirs + files + lazydirs))
1466 1466
1467 1467 def read(
1468 1468 self,
1469 1469 gettext: Callable[[], ByteString],
1470 1470 readsubtree: Callable[[bytes, bytes], 'treemanifest'],
1471 1471 ) -> None:
1472 1472 def _load_for_read(s):
1473 1473 s.parse(gettext(), readsubtree)
1474 1474 s._dirty = False
1475 1475
1476 1476 self._loadfunc = _load_for_read
1477 1477
1478 1478 def writesubtrees(
1479 1479 self,
1480 1480 m1: 'treemanifest',
1481 1481 m2: 'treemanifest',
1482 1482 writesubtree: Callable[
1483 1483 [
1484 1484 Callable[['treemanifest'], None],
1485 1485 bytes,
1486 1486 bytes,
1487 1487 matchmod.basematcher,
1488 1488 ],
1489 1489 None,
1490 1490 ],
1491 1491 match: matchmod.basematcher,
1492 1492 ) -> None:
1493 1493 self._load() # for consistency; should never have any effect here
1494 1494 m1._load()
1495 1495 m2._load()
1496 1496 emptytree = treemanifest(self.nodeconstants)
1497 1497
1498 1498 def getnode(m, d):
1499 1499 ld = m._lazydirs.get(d)
1500 1500 if ld:
1501 1501 return ld[0]
1502 1502 tree = m._dirs.get(d, emptytree)
1503 1503 assert tree is not None # helps pytype
1504 1504 return tree._node
1505 1505
1506 1506 # let's skip investigating things that `match` says we do not need.
1507 1507 visit = match.visitchildrenset(self._dir[:-1])
1508 1508 visit = self._loadchildrensetlazy(visit)
1509 1509 if visit == b'this' or visit == b'all':
1510 1510 visit = None
1511 1511 for d, subm in self._dirs.items():
1512 1512 if visit and d[:-1] not in visit:
1513 1513 continue
1514 1514 subp1 = getnode(m1, d)
1515 1515 subp2 = getnode(m2, d)
1516 1516 if subp1 == self.nodeconstants.nullid:
1517 1517 subp1, subp2 = subp2, subp1
1518 1518 writesubtree(subm, subp1, subp2, match)
1519 1519
1520 1520 def walksubtrees(
1521 1521 self, matcher: Optional[matchmod.basematcher] = None
1522 1522 ) -> Iterator['treemanifest']:
1523 1523 """Returns an iterator of the subtrees of this manifest, including this
1524 1524 manifest itself.
1525 1525
1526 1526 If `matcher` is provided, it only returns subtrees that match.
1527 1527 """
1528 1528 if matcher and not matcher.visitdir(self._dir[:-1]):
1529 1529 return
1530 1530 if not matcher or matcher(self._dir[:-1]):
1531 1531 yield self
1532 1532
1533 1533 self._load()
1534 1534 # OPT: use visitchildrenset to avoid loading everything.
1535 1535 self._loadalllazy()
1536 1536 for d, subm in self._dirs.items():
1537 1537 for subtree in subm.walksubtrees(matcher=matcher):
1538 1538 yield subtree
1539 1539
1540 1540
1541 1541 class manifestfulltextcache(util.lrucachedict):
1542 1542 """File-backed LRU cache for the manifest cache
1543 1543
1544 1544 File consists of entries, up to EOF:
1545 1545
1546 1546 - 20 bytes node, 4 bytes length, <length> manifest data
1547 1547
1548 1548 These are written in reverse cache order (oldest to newest).
1549 1549
1550 1550 """
1551 1551
1552 1552 _file = b'manifestfulltextcache'
1553 1553
1554 1554 def __init__(self, max):
1555 1555 super(manifestfulltextcache, self).__init__(max)
1556 1556 self._dirty = False
1557 1557 self._read = False
1558 1558 self._opener = None
1559 1559
1560 1560 def read(self):
1561 1561 if self._read or self._opener is None:
1562 1562 return
1563 1563
1564 1564 try:
1565 1565 with self._opener(self._file) as fp:
1566 1566 set = super(manifestfulltextcache, self).__setitem__
1567 1567 # ignore trailing data, this is a cache, corruption is skipped
1568 1568 while True:
1569 1569 # TODO do we need to do work here for sha1 portability?
1570 1570 node = fp.read(20)
1571 1571 if len(node) < 20:
1572 1572 break
1573 1573 try:
1574 1574 size = struct.unpack(b'>L', fp.read(4))[0]
1575 1575 except struct.error:
1576 1576 break
1577 1577 value = bytearray(fp.read(size))
1578 1578 if len(value) != size:
1579 1579 break
1580 1580 set(node, value)
1581 1581 except IOError:
1582 1582 # the file is allowed to be missing
1583 1583 pass
1584 1584
1585 1585 self._read = True
1586 1586 self._dirty = False
1587 1587
1588 1588 def write(self):
1589 1589 if not self._dirty or self._opener is None:
1590 1590 return
1591 1591 # rotate backwards to the first used node
1592 1592 try:
1593 1593 with self._opener(
1594 1594 self._file, b'w', atomictemp=True, checkambig=True
1595 1595 ) as fp:
1596 1596 node = self._head.prev
1597 1597 while True:
1598 1598 if node.key in self._cache:
1599 1599 fp.write(node.key)
1600 1600 fp.write(struct.pack(b'>L', len(node.value)))
1601 1601 fp.write(node.value)
1602 1602 if node is self._head:
1603 1603 break
1604 1604 node = node.prev
1605 1605 except IOError:
1606 1606 # We could not write the cache (eg: permission error)
1607 1607 # the content can be missing.
1608 1608 #
1609 1609 # We could try harder and see if we could recreate a wcache
1610 1610 # directory were we coudl write too.
1611 1611 #
1612 1612 # XXX the error pass silently, having some way to issue an error
1613 1613 # log `ui.log` would be nice.
1614 1614 pass
1615 1615
1616 1616 def __len__(self):
1617 1617 if not self._read:
1618 1618 self.read()
1619 1619 return super(manifestfulltextcache, self).__len__()
1620 1620
1621 1621 def __contains__(self, k):
1622 1622 if not self._read:
1623 1623 self.read()
1624 1624 return super(manifestfulltextcache, self).__contains__(k)
1625 1625
1626 1626 def __iter__(self):
1627 1627 if not self._read:
1628 1628 self.read()
1629 1629 return super(manifestfulltextcache, self).__iter__()
1630 1630
1631 1631 def __getitem__(self, k):
1632 1632 if not self._read:
1633 1633 self.read()
1634 1634 # the cache lru order can change on read
1635 1635 setdirty = self._cache.get(k) is not self._head
1636 1636 value = super(manifestfulltextcache, self).__getitem__(k)
1637 1637 if setdirty:
1638 1638 self._dirty = True
1639 1639 return value
1640 1640
1641 1641 def __setitem__(self, k, v):
1642 1642 if not self._read:
1643 1643 self.read()
1644 1644 super(manifestfulltextcache, self).__setitem__(k, v)
1645 1645 self._dirty = True
1646 1646
1647 1647 def __delitem__(self, k):
1648 1648 if not self._read:
1649 1649 self.read()
1650 1650 super(manifestfulltextcache, self).__delitem__(k)
1651 1651 self._dirty = True
1652 1652
1653 1653 def get(self, k, default=None):
1654 1654 if not self._read:
1655 1655 self.read()
1656 1656 return super(manifestfulltextcache, self).get(k, default=default)
1657 1657
1658 1658 def clear(self, clear_persisted_data=False):
1659 1659 super(manifestfulltextcache, self).clear()
1660 1660 if clear_persisted_data:
1661 1661 self._dirty = True
1662 1662 self.write()
1663 1663 self._read = False
1664 1664
1665 1665
1666 1666 # and upper bound of what we expect from compression
1667 1667 # (real live value seems to be "3")
1668 1668 MAXCOMPRESSION = 3
1669 1669
1670 1670
1671 1671 class FastdeltaUnavailable(Exception):
1672 1672 """Exception raised when fastdelta isn't usable on a manifest."""
1673 1673
1674 1674
1675 1675 class manifestrevlog: # (repository.imanifeststorage)
1676 1676 """A revlog that stores manifest texts. This is responsible for caching the
1677 1677 full-text manifest contents.
1678 1678 """
1679 1679
1680 1680 def __init__(
1681 1681 self,
1682 1682 nodeconstants,
1683 1683 opener,
1684 1684 tree=b'',
1685 1685 dirlogcache=None,
1686 1686 treemanifest=False,
1687 1687 ):
1688 1688 """Constructs a new manifest revlog
1689 1689
1690 1690 `indexfile` - used by extensions to have two manifests at once, like
1691 1691 when transitioning between flatmanifeset and treemanifests.
1692 1692
1693 1693 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1694 1694 options can also be used to make this a tree manifest revlog. The opener
1695 1695 option takes precedence, so if it is set to True, we ignore whatever
1696 1696 value is passed in to the constructor.
1697 1697 """
1698 1698 self.nodeconstants = nodeconstants
1699 1699 # During normal operations, we expect to deal with not more than four
1700 1700 # revs at a time (such as during commit --amend). When rebasing large
1701 1701 # stacks of commits, the number can go up, hence the config knob below.
1702 1702 cachesize = 4
1703 1703 optiontreemanifest = False
1704 1704 persistentnodemap = False
1705 1705 opts = getattr(opener, 'options', None)
1706 1706 if opts is not None:
1707 1707 cachesize = opts.get(b'manifestcachesize', cachesize)
1708 1708 optiontreemanifest = opts.get(b'treemanifest', False)
1709 1709 persistentnodemap = opts.get(b'persistent-nodemap', False)
1710 1710
1711 1711 self._treeondisk = optiontreemanifest or treemanifest
1712 1712
1713 1713 self._fulltextcache = manifestfulltextcache(cachesize)
1714 1714
1715 1715 if tree:
1716 1716 assert self._treeondisk, (tree, b'opts is %r' % opts)
1717 1717
1718 1718 radix = b'00manifest'
1719 1719 if tree:
1720 1720 radix = b"meta/" + tree + radix
1721 1721
1722 1722 self.tree = tree
1723 1723
1724 1724 # The dirlogcache is kept on the root manifest log
1725 1725 if tree:
1726 1726 self._dirlogcache = dirlogcache
1727 1727 else:
1728 1728 self._dirlogcache = {b'': self}
1729 1729
1730 1730 self._revlog = revlog.revlog(
1731 1731 opener,
1732 1732 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1733 1733 radix=radix,
1734 1734 # only root indexfile is cached
1735 1735 checkambig=not bool(tree),
1736 1736 mmaplargeindex=True,
1737 1737 upperboundcomp=MAXCOMPRESSION,
1738 1738 persistentnodemap=persistentnodemap,
1739 1739 )
1740 1740
1741 1741 self.index = self._revlog.index
1742 1742
1743 1743 def get_revlog(self):
1744 1744 """return an actual revlog instance if any
1745 1745
1746 1746 This exist because a lot of code leverage the fact the underlying
1747 1747 storage is a revlog for optimization, so giving simple way to access
1748 1748 the revlog instance helps such code.
1749 1749 """
1750 1750 return self._revlog
1751 1751
1752 1752 def _setupmanifestcachehooks(self, repo):
1753 1753 """Persist the manifestfulltextcache on lock release"""
1754 1754 if not hasattr(repo, '_wlockref'):
1755 1755 return
1756 1756
1757 1757 self._fulltextcache._opener = repo.wcachevfs
1758 1758 if repo._currentlock(repo._wlockref) is None:
1759 1759 return
1760 1760
1761 1761 reporef = weakref.ref(repo)
1762 1762 manifestrevlogref = weakref.ref(self)
1763 1763
1764 1764 def persistmanifestcache(success):
1765 1765 # Repo is in an unknown state, do not persist.
1766 1766 if not success:
1767 1767 return
1768 1768
1769 1769 repo = reporef()
1770 1770 self = manifestrevlogref()
1771 1771 if repo is None or self is None:
1772 1772 return
1773 1773 if repo.manifestlog.getstorage(b'') is not self:
1774 1774 # there's a different manifest in play now, abort
1775 1775 return
1776 1776 self._fulltextcache.write()
1777 1777
1778 1778 repo._afterlock(persistmanifestcache)
1779 1779
1780 1780 @property
1781 1781 def fulltextcache(self):
1782 1782 return self._fulltextcache
1783 1783
1784 1784 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1785 1785 self._revlog.clearcaches()
1786 1786 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1787 1787 self._dirlogcache = {self.tree: self}
1788 1788
1789 1789 def dirlog(self, d):
1790 1790 if d:
1791 1791 assert self._treeondisk
1792 1792 if d not in self._dirlogcache:
1793 1793 mfrevlog = manifestrevlog(
1794 1794 self.nodeconstants,
1795 1795 self.opener,
1796 1796 d,
1797 1797 self._dirlogcache,
1798 1798 treemanifest=self._treeondisk,
1799 1799 )
1800 1800 self._dirlogcache[d] = mfrevlog
1801 1801 return self._dirlogcache[d]
1802 1802
1803 1803 def add(
1804 1804 self,
1805 1805 m,
1806 1806 transaction,
1807 1807 link,
1808 1808 p1,
1809 1809 p2,
1810 1810 added: Iterable[bytes],
1811 1811 removed: Iterable[bytes],
1812 1812 readtree=None,
1813 1813 match=None,
1814 1814 ):
1815 1815 """add some manifest entry in to the manifest log
1816 1816
1817 1817 input:
1818 1818
1819 1819 m: the manifest dict we want to store
1820 1820 transaction: the open transaction
1821 1821 p1: manifest-node of p1
1822 1822 p2: manifest-node of p2
1823 1823 added: file added/changed compared to parent
1824 1824 removed: file removed compared to parent
1825 1825
1826 1826 tree manifest input:
1827 1827
1828 1828 readtree: a function to read a subtree
1829 1829 match: a filematcher for the subpart of the tree manifest
1830 1830 """
1831 1831 try:
1832 1832 if p1 not in self.fulltextcache:
1833 1833 raise FastdeltaUnavailable()
1834 1834 # If our first parent is in the manifest cache, we can
1835 1835 # compute a delta here using properties we know about the
1836 1836 # manifest up-front, which may save time later for the
1837 1837 # revlog layer.
1838 1838
1839 1839 _checkforbidden(added)
1840 1840 # combine the changed lists into one sorted iterator
1841 1841 work = heapq.merge(
1842 1842 [(x, False) for x in sorted(added)],
1843 1843 [(x, True) for x in sorted(removed)],
1844 1844 )
1845 1845
1846 1846 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1847 1847 cachedelta = self._revlog.rev(p1), deltatext
1848 1848 text = util.buffer(arraytext)
1849 1849 rev = self._revlog.addrevision(
1850 1850 text, transaction, link, p1, p2, cachedelta
1851 1851 )
1852 1852 n = self._revlog.node(rev)
1853 1853 except FastdeltaUnavailable:
1854 1854 # The first parent manifest isn't already loaded or the
1855 1855 # manifest implementation doesn't support fastdelta, so
1856 1856 # we'll just encode a fulltext of the manifest and pass
1857 1857 # that through to the revlog layer, and let it handle the
1858 1858 # delta process.
1859 1859 if self._treeondisk:
1860 1860 assert readtree, b"readtree must be set for treemanifest writes"
1861 1861 assert match, b"match must be specified for treemanifest writes"
1862 1862 m1 = readtree(self.tree, p1)
1863 1863 m2 = readtree(self.tree, p2)
1864 1864 n = self._addtree(
1865 1865 m, transaction, link, m1, m2, readtree, match=match
1866 1866 )
1867 1867 arraytext = None
1868 1868 else:
1869 1869 text = m.text()
1870 1870 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1871 1871 n = self._revlog.node(rev)
1872 1872 arraytext = bytearray(text)
1873 1873
1874 1874 if arraytext is not None:
1875 1875 self.fulltextcache[n] = arraytext
1876 1876
1877 1877 return n
1878 1878
1879 1879 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1880 1880 # If the manifest is unchanged compared to one parent,
1881 1881 # don't write a new revision
1882 1882 if self.tree != b'' and (
1883 1883 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1884 1884 ):
1885 1885 return m.node()
1886 1886
1887 1887 def writesubtree(subm, subp1, subp2, match):
1888 1888 sublog = self.dirlog(subm.dir())
1889 1889 sublog.add(
1890 1890 subm,
1891 1891 transaction,
1892 1892 link,
1893 1893 subp1,
1894 1894 subp2,
1895 1895 None,
1896 1896 None,
1897 1897 readtree=readtree,
1898 1898 match=match,
1899 1899 )
1900 1900
1901 1901 m.writesubtrees(m1, m2, writesubtree, match)
1902 1902 text = m.dirtext()
1903 1903 n = None
1904 1904 if self.tree != b'':
1905 1905 # Double-check whether contents are unchanged to one parent
1906 1906 if text == m1.dirtext():
1907 1907 n = m1.node()
1908 1908 elif text == m2.dirtext():
1909 1909 n = m2.node()
1910 1910
1911 1911 if not n:
1912 1912 rev = self._revlog.addrevision(
1913 1913 text, transaction, link, m1.node(), m2.node()
1914 1914 )
1915 1915 n = self._revlog.node(rev)
1916 1916
1917 1917 # Save nodeid so parent manifest can calculate its nodeid
1918 1918 m.setnode(n)
1919 1919 return n
1920 1920
1921 1921 def __len__(self):
1922 1922 return len(self._revlog)
1923 1923
1924 1924 def __iter__(self):
1925 1925 return self._revlog.__iter__()
1926 1926
1927 1927 def rev(self, node):
1928 1928 return self._revlog.rev(node)
1929 1929
1930 1930 def node(self, rev):
1931 1931 return self._revlog.node(rev)
1932 1932
1933 1933 def lookup(self, value):
1934 1934 return self._revlog.lookup(value)
1935 1935
1936 1936 def parentrevs(self, rev):
1937 1937 return self._revlog.parentrevs(rev)
1938 1938
1939 1939 def parents(self, node):
1940 1940 return self._revlog.parents(node)
1941 1941
1942 1942 def linkrev(self, rev):
1943 1943 return self._revlog.linkrev(rev)
1944 1944
1945 1945 def checksize(self):
1946 1946 return self._revlog.checksize()
1947 1947
1948 1948 def revision(self, node):
1949 1949 return self._revlog.revision(node)
1950 1950
1951 1951 def rawdata(self, node):
1952 1952 return self._revlog.rawdata(node)
1953 1953
1954 1954 def revdiff(self, rev1, rev2):
1955 1955 return self._revlog.revdiff(rev1, rev2)
1956 1956
1957 1957 def cmp(self, node, text):
1958 1958 return self._revlog.cmp(node, text)
1959 1959
1960 1960 def deltaparent(self, rev):
1961 1961 return self._revlog.deltaparent(rev)
1962 1962
1963 1963 def emitrevisions(
1964 1964 self,
1965 1965 nodes,
1966 1966 nodesorder=None,
1967 1967 revisiondata=False,
1968 1968 assumehaveparentrevisions=False,
1969 1969 deltamode=repository.CG_DELTAMODE_STD,
1970 1970 sidedata_helpers=None,
1971 1971 debug_info=None,
1972 1972 ):
1973 1973 return self._revlog.emitrevisions(
1974 1974 nodes,
1975 1975 nodesorder=nodesorder,
1976 1976 revisiondata=revisiondata,
1977 1977 assumehaveparentrevisions=assumehaveparentrevisions,
1978 1978 deltamode=deltamode,
1979 1979 sidedata_helpers=sidedata_helpers,
1980 1980 debug_info=debug_info,
1981 1981 )
1982 1982
1983 1983 def addgroup(
1984 1984 self,
1985 1985 deltas,
1986 1986 linkmapper,
1987 1987 transaction,
1988 1988 alwayscache=False,
1989 1989 addrevisioncb=None,
1990 1990 duplicaterevisioncb=None,
1991 1991 debug_info=None,
1992 1992 delta_base_reuse_policy=None,
1993 1993 ):
1994 1994 return self._revlog.addgroup(
1995 1995 deltas,
1996 1996 linkmapper,
1997 1997 transaction,
1998 1998 alwayscache=alwayscache,
1999 1999 addrevisioncb=addrevisioncb,
2000 2000 duplicaterevisioncb=duplicaterevisioncb,
2001 2001 debug_info=debug_info,
2002 2002 delta_base_reuse_policy=delta_base_reuse_policy,
2003 2003 )
2004 2004
2005 2005 def rawsize(self, rev):
2006 2006 return self._revlog.rawsize(rev)
2007 2007
2008 2008 def getstrippoint(self, minlink):
2009 2009 return self._revlog.getstrippoint(minlink)
2010 2010
2011 2011 def strip(self, minlink, transaction):
2012 2012 return self._revlog.strip(minlink, transaction)
2013 2013
2014 2014 def files(self):
2015 2015 return self._revlog.files()
2016 2016
2017 2017 def clone(self, tr, destrevlog, **kwargs):
2018 2018 if not isinstance(destrevlog, manifestrevlog):
2019 2019 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2020 2020
2021 2021 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2022 2022
2023 2023 def storageinfo(
2024 2024 self,
2025 2025 exclusivefiles=False,
2026 2026 sharedfiles=False,
2027 2027 revisionscount=False,
2028 2028 trackedsize=False,
2029 2029 storedsize=False,
2030 2030 ):
2031 2031 return self._revlog.storageinfo(
2032 2032 exclusivefiles=exclusivefiles,
2033 2033 sharedfiles=sharedfiles,
2034 2034 revisionscount=revisionscount,
2035 2035 trackedsize=trackedsize,
2036 2036 storedsize=storedsize,
2037 2037 )
2038 2038
2039 2039 @property
2040 2040 def opener(self):
2041 2041 return self._revlog.opener
2042 2042
2043 2043 @opener.setter
2044 2044 def opener(self, value):
2045 2045 self._revlog.opener = value
2046 2046
2047 2047
2048 2048 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2049 2049 # TODO: drop this in favor of repository.imanifestdict
2050 2050 AnyManifestDict = Union[manifestdict, treemanifest]
2051 2051
2052 2052
2053 class ManifestLog:
2053 class manifestlog: # (repository.imanifestlog)
2054 2054 """A collection class representing the collection of manifest snapshots
2055 2055 referenced by commits in the repository.
2056 2056
2057 2057 In this situation, 'manifest' refers to the abstract concept of a snapshot
2058 2058 of the list of files in the given commit. Consumers of the output of this
2059 2059 class do not care about the implementation details of the actual manifests
2060 2060 they receive (i.e. tree or flat or lazily loaded, etc)."""
2061 2061
2062 2062 def __init__(self, opener, repo, rootstore, narrowmatch):
2063 2063 self.nodeconstants = repo.nodeconstants
2064 2064 usetreemanifest = False
2065 2065 cachesize = 4
2066 2066
2067 2067 opts = getattr(opener, 'options', None)
2068 2068 if opts is not None:
2069 2069 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2070 2070 cachesize = opts.get(b'manifestcachesize', cachesize)
2071 2071
2072 2072 self._treemanifests = usetreemanifest
2073 2073
2074 2074 self._rootstore = rootstore
2075 2075 self._rootstore._setupmanifestcachehooks(repo)
2076 2076 self._narrowmatch = narrowmatch
2077 2077
2078 2078 # A cache of the manifestctx or treemanifestctx for each directory
2079 2079 self._dirmancache = {}
2080 2080 self._dirmancache[b''] = util.lrucachedict(cachesize)
2081 2081
2082 2082 self._cachesize = cachesize
2083 2083
2084 2084 def __getitem__(self, node):
2085 2085 """Retrieves the manifest instance for the given node. Throws a
2086 2086 LookupError if not found.
2087 2087 """
2088 2088 return self.get(b'', node)
2089 2089
2090 2090 @property
2091 2091 def narrowed(self):
2092 2092 return not (self._narrowmatch is None or self._narrowmatch.always())
2093 2093
2094 2094 def get(
2095 2095 self, tree: bytes, node: bytes, verify: bool = True
2096 2096 ) -> AnyManifestCtx:
2097 2097 """Retrieves the manifest instance for the given node. Throws a
2098 2098 LookupError if not found.
2099 2099
2100 2100 `verify` - if True an exception will be thrown if the node is not in
2101 2101 the revlog
2102 2102 """
2103 2103 if node in self._dirmancache.get(tree, ()):
2104 2104 return self._dirmancache[tree][node]
2105 2105
2106 2106 if not self._narrowmatch.always():
2107 2107 if not self._narrowmatch.visitdir(tree[:-1]):
2108 2108 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2109 2109 if tree:
2110 2110 if self._rootstore._treeondisk:
2111 2111 if verify:
2112 2112 # Side-effect is LookupError is raised if node doesn't
2113 2113 # exist.
2114 2114 self.getstorage(tree).rev(node)
2115 2115
2116 2116 m = treemanifestctx(self, tree, node)
2117 2117 else:
2118 2118 raise error.Abort(
2119 2119 _(
2120 2120 b"cannot ask for manifest directory '%s' in a flat "
2121 2121 b"manifest"
2122 2122 )
2123 2123 % tree
2124 2124 )
2125 2125 else:
2126 2126 if verify:
2127 2127 # Side-effect is LookupError is raised if node doesn't exist.
2128 2128 self._rootstore.rev(node)
2129 2129
2130 2130 if self._treemanifests:
2131 2131 m = treemanifestctx(self, b'', node)
2132 2132 else:
2133 2133 m = manifestctx(self, node)
2134 2134
2135 2135 if node != self.nodeconstants.nullid:
2136 2136 mancache = self._dirmancache.get(tree)
2137 2137 if not mancache:
2138 2138 mancache = util.lrucachedict(self._cachesize)
2139 2139 self._dirmancache[tree] = mancache
2140 2140 mancache[node] = m
2141 2141 return m
2142 2142
2143 2143 def getstorage(self, tree):
2144 2144 return self._rootstore.dirlog(tree)
2145 2145
2146 2146 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2147 2147 self._dirmancache.clear()
2148 2148 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2149 2149
2150 2150 def rev(self, node) -> int:
2151 2151 return self._rootstore.rev(node)
2152 2152
2153 2153 def update_caches(self, transaction) -> None:
2154 2154 return self._rootstore._revlog.update_caches(transaction=transaction)
2155 2155
2156 2156
2157 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2158
2159 if typing.TYPE_CHECKING:
2160 manifestlog = ManifestLog
2161
2162
2163 2157 class MemManifestCtx:
2164 2158 _manifestdict: manifestdict
2165 2159
2166 2160 def __init__(self, manifestlog):
2167 2161 self._manifestlog = manifestlog
2168 2162 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2169 2163
2170 2164 def _storage(self) -> manifestrevlog:
2171 2165 return self._manifestlog.getstorage(b'')
2172 2166
2173 2167 def copy(self) -> 'MemManifestCtx':
2174 2168 memmf = memmanifestctx(self._manifestlog)
2175 2169 memmf._manifestdict = self.read().copy()
2176 2170 return memmf
2177 2171
2178 2172 def read(self) -> 'manifestdict':
2179 2173 return self._manifestdict
2180 2174
2181 2175 def write(self, transaction, link, p1, p2, added, removed, match=None):
2182 2176 return self._storage().add(
2183 2177 self._manifestdict,
2184 2178 transaction,
2185 2179 link,
2186 2180 p1,
2187 2181 p2,
2188 2182 added,
2189 2183 removed,
2190 2184 match=match,
2191 2185 )
2192 2186
2193 2187
2194 2188 memmanifestctx = interfaceutil.implementer(
2195 2189 repository.imanifestrevisionwritable
2196 2190 )(MemManifestCtx)
2197 2191
2198 2192 if typing.TYPE_CHECKING:
2199 2193 memmanifestctx = MemManifestCtx
2200 2194
2201 2195
2202 2196 class ManifestCtx:
2203 2197 """A class representing a single revision of a manifest, including its
2204 2198 contents, its parent revs, and its linkrev.
2205 2199 """
2206 2200
2207 2201 _data: Optional[manifestdict]
2208 2202
2209 2203 def __init__(self, manifestlog, node):
2210 2204 self._manifestlog = manifestlog
2211 2205 self._data = None
2212 2206
2213 2207 self._node = node
2214 2208
2215 2209 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2216 2210 # but let's add it later when something needs it and we can load it
2217 2211 # lazily.
2218 2212 # self.p1, self.p2 = store.parents(node)
2219 2213 # rev = store.rev(node)
2220 2214 # self.linkrev = store.linkrev(rev)
2221 2215
2222 2216 def _storage(self) -> 'manifestrevlog':
2223 2217 return self._manifestlog.getstorage(b'')
2224 2218
2225 2219 def node(self) -> bytes:
2226 2220 return self._node
2227 2221
2228 2222 def copy(self) -> MemManifestCtx:
2229 2223 memmf = memmanifestctx(self._manifestlog)
2230 2224 memmf._manifestdict = self.read().copy()
2231 2225 return memmf
2232 2226
2233 2227 @propertycache
2234 2228 def parents(self) -> Tuple[bytes, bytes]:
2235 2229 return self._storage().parents(self._node)
2236 2230
2237 2231 def read(self) -> 'manifestdict':
2238 2232 if self._data is None:
2239 2233 nc = self._manifestlog.nodeconstants
2240 2234 if self._node == nc.nullid:
2241 2235 self._data = manifestdict(nc.nodelen)
2242 2236 else:
2243 2237 store = self._storage()
2244 2238 if self._node in store.fulltextcache:
2245 2239 text = pycompat.bytestr(store.fulltextcache[self._node])
2246 2240 else:
2247 2241 text = store.revision(self._node)
2248 2242 arraytext = bytearray(text)
2249 2243 store.fulltextcache[self._node] = arraytext
2250 2244 self._data = manifestdict(nc.nodelen, text)
2251 2245 return self._data
2252 2246
2253 2247 def readfast(self, shallow: bool = False) -> 'manifestdict':
2254 2248 """Calls either readdelta or read, based on which would be less work.
2255 2249 readdelta is called if the delta is against the p1, and therefore can be
2256 2250 read quickly.
2257 2251
2258 2252 If `shallow` is True, nothing changes since this is a flat manifest.
2259 2253 """
2260 2254 util.nouideprecwarn(
2261 2255 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2262 2256 b"6.9",
2263 2257 stacklevel=2,
2264 2258 )
2265 2259 store = self._storage()
2266 2260 r = store.rev(self._node)
2267 2261 deltaparent = store.deltaparent(r)
2268 2262 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2269 2263 return self.readdelta()
2270 2264 return self.read()
2271 2265
2272 2266 def readdelta(self, shallow: bool = False) -> 'manifestdict':
2273 2267 """Returns a manifest containing just the entries that are present
2274 2268 in this manifest, but not in its p1 manifest. This is efficient to read
2275 2269 if the revlog delta is already p1.
2276 2270
2277 2271 Changing the value of `shallow` has no effect on flat manifests.
2278 2272 """
2279 2273 util.nouideprecwarn(
2280 2274 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2281 2275 b"6.9",
2282 2276 stacklevel=2,
2283 2277 )
2284 2278 store = self._storage()
2285 2279 r = store.rev(self._node)
2286 2280 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2287 2281 return manifestdict(store.nodeconstants.nodelen, d)
2288 2282
2289 2283 def read_any_fast_delta(
2290 2284 self,
2291 2285 valid_bases: Optional[Collection[int]] = None,
2292 2286 *,
2293 2287 shallow: bool = False,
2294 2288 ) -> Tuple[Optional[int], manifestdict]:
2295 2289 """see `imanifestrevisionstored` documentation"""
2296 2290 store = self._storage()
2297 2291 r = store.rev(self._node)
2298 2292 deltaparent = store.deltaparent(r)
2299 2293 if valid_bases is None:
2300 2294 # make sure the next check is True
2301 2295 valid_bases = (deltaparent,)
2302 2296 if deltaparent != nullrev and deltaparent in valid_bases:
2303 2297 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2304 2298 return (
2305 2299 deltaparent,
2306 2300 manifestdict(store.nodeconstants.nodelen, d),
2307 2301 )
2308 2302 return (None, self.read())
2309 2303
2310 2304 def read_delta_parents(
2311 2305 self,
2312 2306 *,
2313 2307 shallow: bool = False,
2314 2308 exact: bool = True,
2315 2309 ) -> manifestdict:
2316 2310 """see `interface.imanifestrevisionbase` documentations"""
2317 2311 store = self._storage()
2318 2312 r = store.rev(self._node)
2319 2313 deltaparent = store.deltaparent(r)
2320 2314 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2321 2315 if not exact and deltaparent in parents:
2322 2316 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2323 2317 return manifestdict(store.nodeconstants.nodelen, d)
2324 2318 elif not exact or len(parents) == 0:
2325 2319 return self.read()
2326 2320 elif len(parents) == 1:
2327 2321 p = parents[0]
2328 2322 d = mdiff.patchtext(store.revdiff(p, r))
2329 2323 return manifestdict(store.nodeconstants.nodelen, d)
2330 2324 else:
2331 2325 p1, p2 = parents
2332 2326 d1 = mdiff.patchtext(store.revdiff(p1, r))
2333 2327 d2 = mdiff.patchtext(store.revdiff(p2, r))
2334 2328 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2335 2329 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2336 2330 md = manifestdict(store.nodeconstants.nodelen)
2337 2331 for f, new_node, new_flag in d1.iterentries():
2338 2332 if f not in d2:
2339 2333 continue
2340 2334 if new_node is not None:
2341 2335 md.set(f, new_node, new_flag)
2342 2336 return md
2343 2337
2344 2338 def read_delta_new_entries(self, *, shallow=False) -> manifestdict:
2345 2339 """see `interface.imanifestrevisionbase` documentations"""
2346 2340 # If we are using narrow, returning a delta against an arbitrary
2347 2341 # changeset might return file outside the narrowspec. This can create
2348 2342 # issue when running validation server side with strict security as
2349 2343 # push from low priviledge usage might be seen as adding new revision
2350 2344 # for files they cannot touch. So we are strict if narrow is involved.
2351 2345 if self._manifestlog.narrowed:
2352 2346 return self.read_delta_parents(shallow=shallow, exact=True)
2353 2347 store = self._storage()
2354 2348 r = store.rev(self._node)
2355 2349 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2356 2350 return manifestdict(store.nodeconstants.nodelen, d)
2357 2351
2358 2352 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2359 2353 return self.read().find(key)
2360 2354
2361 2355
2362 2356 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2363 2357 ManifestCtx
2364 2358 )
2365 2359
2366 2360 if typing.TYPE_CHECKING:
2367 2361 manifestctx = ManifestCtx
2368 2362
2369 2363
2370 2364 class MemTreeManifestCtx:
2371 2365 _treemanifest: treemanifest
2372 2366
2373 2367 def __init__(self, manifestlog, dir=b''):
2374 2368 self._manifestlog = manifestlog
2375 2369 self._dir = dir
2376 2370 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2377 2371
2378 2372 def _storage(self) -> manifestrevlog:
2379 2373 return self._manifestlog.getstorage(b'')
2380 2374
2381 2375 def copy(self) -> 'MemTreeManifestCtx':
2382 2376 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2383 2377 memmf._treemanifest = self._treemanifest.copy()
2384 2378 return memmf
2385 2379
2386 2380 def read(self) -> 'treemanifest':
2387 2381 return self._treemanifest
2388 2382
2389 2383 def write(self, transaction, link, p1, p2, added, removed, match=None):
2390 2384 def readtree(dir, node):
2391 2385 return self._manifestlog.get(dir, node).read()
2392 2386
2393 2387 return self._storage().add(
2394 2388 self._treemanifest,
2395 2389 transaction,
2396 2390 link,
2397 2391 p1,
2398 2392 p2,
2399 2393 added,
2400 2394 removed,
2401 2395 readtree=readtree,
2402 2396 match=match,
2403 2397 )
2404 2398
2405 2399
2406 2400 memtreemanifestctx = interfaceutil.implementer(
2407 2401 repository.imanifestrevisionwritable
2408 2402 )(MemTreeManifestCtx)
2409 2403
2410 2404 if typing.TYPE_CHECKING:
2411 2405 memtreemanifestctx = MemTreeManifestCtx
2412 2406
2413 2407
2414 2408 class TreeManifestCtx:
2415 2409 _data: Optional[treemanifest]
2416 2410
2417 2411 def __init__(self, manifestlog, dir, node):
2418 2412 self._manifestlog = manifestlog
2419 2413 self._dir = dir
2420 2414 self._data = None
2421 2415
2422 2416 self._node = node
2423 2417
2424 2418 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2425 2419 # we can instantiate treemanifestctx objects for directories we don't
2426 2420 # have on disk.
2427 2421 # self.p1, self.p2 = store.parents(node)
2428 2422 # rev = store.rev(node)
2429 2423 # self.linkrev = store.linkrev(rev)
2430 2424
2431 2425 def _storage(self) -> manifestrevlog:
2432 2426 narrowmatch = self._manifestlog._narrowmatch
2433 2427 if not narrowmatch.always():
2434 2428 if not narrowmatch.visitdir(self._dir[:-1]):
2435 2429 return excludedmanifestrevlog(
2436 2430 self._manifestlog.nodeconstants, self._dir
2437 2431 )
2438 2432 return self._manifestlog.getstorage(self._dir)
2439 2433
2440 2434 def read(self) -> 'treemanifest':
2441 2435 if self._data is None:
2442 2436 store = self._storage()
2443 2437 if self._node == self._manifestlog.nodeconstants.nullid:
2444 2438 self._data = treemanifest(self._manifestlog.nodeconstants)
2445 2439 # TODO accessing non-public API
2446 2440 elif store._treeondisk:
2447 2441 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2448 2442
2449 2443 def gettext():
2450 2444 return store.revision(self._node)
2451 2445
2452 2446 def readsubtree(dir, subm):
2453 2447 # Set verify to False since we need to be able to create
2454 2448 # subtrees for trees that don't exist on disk.
2455 2449 return self._manifestlog.get(dir, subm, verify=False).read()
2456 2450
2457 2451 m.read(gettext, readsubtree)
2458 2452 m.setnode(self._node)
2459 2453 self._data = m
2460 2454 else:
2461 2455 if self._node in store.fulltextcache:
2462 2456 text = pycompat.bytestr(store.fulltextcache[self._node])
2463 2457 else:
2464 2458 text = store.revision(self._node)
2465 2459 arraytext = bytearray(text)
2466 2460 store.fulltextcache[self._node] = arraytext
2467 2461 self._data = treemanifest(
2468 2462 self._manifestlog.nodeconstants, dir=self._dir, text=text
2469 2463 )
2470 2464
2471 2465 return self._data
2472 2466
2473 2467 def node(self) -> bytes:
2474 2468 return self._node
2475 2469
2476 2470 def copy(self) -> 'MemTreeManifestCtx':
2477 2471 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2478 2472 memmf._treemanifest = self.read().copy()
2479 2473 return memmf
2480 2474
2481 2475 @propertycache
2482 2476 def parents(self) -> Tuple[bytes, bytes]:
2483 2477 return self._storage().parents(self._node)
2484 2478
2485 2479 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2486 2480 """see `imanifestrevisionstored` documentation"""
2487 2481 util.nouideprecwarn(
2488 2482 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2489 2483 b"6.9",
2490 2484 stacklevel=2,
2491 2485 )
2492 2486 store = self._storage()
2493 2487 if shallow:
2494 2488 r = store.rev(self._node)
2495 2489 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2496 2490 return manifestdict(store.nodeconstants.nodelen, d)
2497 2491 else:
2498 2492 # Need to perform a slow delta
2499 2493 r0 = store.deltaparent(store.rev(self._node))
2500 2494 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2501 2495 m1 = self.read()
2502 2496 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2503 2497 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2504 2498 if n1:
2505 2499 md[f] = n1
2506 2500 if fl1:
2507 2501 md.setflag(f, fl1)
2508 2502 return md
2509 2503
2510 2504 def read_any_fast_delta(
2511 2505 self,
2512 2506 valid_bases: Optional[Collection[int]] = None,
2513 2507 *,
2514 2508 shallow: bool = False,
2515 2509 ) -> Tuple[Optional[int], AnyManifestDict]:
2516 2510 """see `imanifestrevisionstored` documentation"""
2517 2511 store = self._storage()
2518 2512 r = store.rev(self._node)
2519 2513 deltaparent = store.deltaparent(r)
2520 2514
2521 2515 if valid_bases is None:
2522 2516 # make sure the next check is True
2523 2517 valid_bases = (deltaparent,)
2524 2518 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2525 2519
2526 2520 if shallow:
2527 2521 if can_use_delta:
2528 2522 return (deltaparent, self._read_storage_delta_shallow())
2529 2523 else:
2530 2524 d = store.revision(self._node)
2531 2525 return (None, manifestdict(store.nodeconstants.nodelen, d))
2532 2526 else:
2533 2527 # note: This use "slow_delta" here is cargo culted from the previous
2534 2528 # implementation. I am not sure it make sense since the goal here is to
2535 2529 # be fast, so why are we computing a delta? On the other hand, tree
2536 2530 # manifest delta as fairly "cheap" and allow for skipping whole part of
2537 2531 # the tree that a full read would access. So it might be a good idea.
2538 2532 #
2539 2533 # If we realize we don't need delta here, we should simply use:
2540 2534 #
2541 2535 # return (None, self.read())
2542 2536 if can_use_delta:
2543 2537 return (None, self._read_storage_slow_delta(base=deltaparent))
2544 2538 else:
2545 2539 parents = [
2546 2540 p
2547 2541 for p in store.parentrevs(r)
2548 2542 if p is not nullrev and p in valid_bases
2549 2543 ]
2550 2544 if parents:
2551 2545 best_base = max(parents)
2552 2546 else:
2553 2547 best_base = max(valid_bases)
2554 2548 return (None, self._read_storage_slow_delta(base=best_base))
2555 2549
2556 2550 def _read_storage_delta_shallow(self) -> manifestdict:
2557 2551 store = self._storage()
2558 2552 r = store.rev(self._node)
2559 2553 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2560 2554 return manifestdict(store.nodeconstants.nodelen, d)
2561 2555
2562 2556 def _read_storage_slow_delta(self, base) -> 'treemanifest':
2563 2557 store = self._storage()
2564 2558 if base is None:
2565 2559 base = store.deltaparent(store.rev(self._node))
2566 2560 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2567 2561 m1 = self.read()
2568 2562 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2569 2563 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2570 2564 if n1:
2571 2565 md[f] = n1
2572 2566 if fl1:
2573 2567 md.setflag(f, fl1)
2574 2568 return md
2575 2569
2576 2570 def read_delta_parents(
2577 2571 self,
2578 2572 *,
2579 2573 shallow: bool = False,
2580 2574 exact: bool = True,
2581 2575 ) -> AnyManifestDict:
2582 2576 """see `interface.imanifestrevisionbase` documentations"""
2583 2577 store = self._storage()
2584 2578 r = store.rev(self._node)
2585 2579 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2586 2580 if not exact:
2587 2581 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2588 2582 elif len(parents) == 0:
2589 2583 if shallow:
2590 2584 d = store.revision(self._node)
2591 2585 return manifestdict(store.nodeconstants.nodelen, d)
2592 2586 else:
2593 2587 return self.read()
2594 2588 elif len(parents) == 1:
2595 2589 p = parents[0]
2596 2590 if shallow:
2597 2591 d = mdiff.patchtext(store.revdiff(p, r))
2598 2592 return manifestdict(store.nodeconstants.nodelen, d)
2599 2593 else:
2600 2594 return self._read_storage_slow_delta(base=p)
2601 2595 else:
2602 2596 p1, p2 = parents
2603 2597 if shallow:
2604 2598 d1 = mdiff.patchtext(store.revdiff(p1, r))
2605 2599 d2 = mdiff.patchtext(store.revdiff(p2, r))
2606 2600 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2607 2601 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2608 2602 md = manifestdict(store.nodeconstants.nodelen)
2609 2603 for f, new_node, new_flag in d1.iterentries():
2610 2604 if f not in d2:
2611 2605 continue
2612 2606 if new_node is not None:
2613 2607 md.set(f, new_node, new_flag)
2614 2608 return md
2615 2609 else:
2616 2610 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2617 2611 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2618 2612 mc = self.read()
2619 2613 d1 = m1.diff(mc)
2620 2614 d2 = m2.diff(mc)
2621 2615 md = treemanifest(
2622 2616 self._manifestlog.nodeconstants,
2623 2617 dir=self._dir,
2624 2618 )
2625 2619 for f, new_node, new_flag in d1.iterentries():
2626 2620 if f not in d2:
2627 2621 continue
2628 2622 if new_node is not None:
2629 2623 md.set(f, new_node, new_flag)
2630 2624 return md
2631 2625
2632 2626 def read_delta_new_entries(
2633 2627 self, *, shallow: bool = False
2634 2628 ) -> AnyManifestDict:
2635 2629 """see `interface.imanifestrevisionbase` documentations"""
2636 2630 # If we are using narrow, returning a delta against an arbitrary
2637 2631 # changeset might return file outside the narrowspec. This can create
2638 2632 # issue when running validation server side with strict security as
2639 2633 # push from low priviledge usage might be seen as adding new revision
2640 2634 # for files they cannot touch. So we are strict if narrow is involved.
2641 2635 if self._manifestlog.narrowed:
2642 2636 return self.read_delta_parents(shallow=shallow, exact=True)
2643 2637 # delegate to existing another existing method for simplicity
2644 2638 store = self._storage()
2645 2639 r = store.rev(self._node)
2646 2640 bases = (store.deltaparent(r),)
2647 2641 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2648 2642
2649 2643 def readfast(self, shallow=False) -> AnyManifestDict:
2650 2644 """Calls either readdelta or read, based on which would be less work.
2651 2645 readdelta is called if the delta is against the p1, and therefore can be
2652 2646 read quickly.
2653 2647
2654 2648 If `shallow` is True, it only returns the entries from this manifest,
2655 2649 and not any submanifests.
2656 2650 """
2657 2651 util.nouideprecwarn(
2658 2652 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2659 2653 b"6.9",
2660 2654 stacklevel=2,
2661 2655 )
2662 2656 store = self._storage()
2663 2657 r = store.rev(self._node)
2664 2658 deltaparent = store.deltaparent(r)
2665 2659 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2666 2660 return self.readdelta(shallow=shallow)
2667 2661
2668 2662 if shallow:
2669 2663 return manifestdict(
2670 2664 store.nodeconstants.nodelen, store.revision(self._node)
2671 2665 )
2672 2666 else:
2673 2667 return self.read()
2674 2668
2675 2669 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2676 2670 return self.read().find(key)
2677 2671
2678 2672
2679 2673 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2680 2674 TreeManifestCtx
2681 2675 )
2682 2676
2683 2677 if typing.TYPE_CHECKING:
2684 2678 treemanifestctx = TreeManifestCtx
2685 2679
2686 2680
2687 2681 class excludeddir(treemanifest):
2688 2682 """Stand-in for a directory that is excluded from the repository.
2689 2683
2690 2684 With narrowing active on a repository that uses treemanifests,
2691 2685 some of the directory revlogs will be excluded from the resulting
2692 2686 clone. This is a huge storage win for clients, but means we need
2693 2687 some sort of pseudo-manifest to surface to internals so we can
2694 2688 detect a merge conflict outside the narrowspec. That's what this
2695 2689 class is: it stands in for a directory whose node is known, but
2696 2690 whose contents are unknown.
2697 2691 """
2698 2692
2699 2693 _files: Dict[bytes, bytes]
2700 2694 _flags: Dict[bytes, bytes]
2701 2695
2702 2696 def __init__(self, nodeconstants, dir, node):
2703 2697 super(excludeddir, self).__init__(nodeconstants, dir)
2704 2698 self._node = node
2705 2699 # Add an empty file, which will be included by iterators and such,
2706 2700 # appearing as the directory itself (i.e. something like "dir/")
2707 2701 self._files[b''] = node
2708 2702 self._flags[b''] = b't'
2709 2703
2710 2704 # Manifests outside the narrowspec should never be modified, so avoid
2711 2705 # copying. This makes a noticeable difference when there are very many
2712 2706 # directories outside the narrowspec. Also, it makes sense for the copy to
2713 2707 # be of the same type as the original, which would not happen with the
2714 2708 # super type's copy().
2715 2709 def copy(self):
2716 2710 return self
2717 2711
2718 2712
2719 2713 class excludeddirmanifestctx(treemanifestctx):
2720 2714 """context wrapper for excludeddir - see that docstring for rationale"""
2721 2715
2722 2716 def __init__(self, nodeconstants, dir, node):
2723 2717 self.nodeconstants = nodeconstants
2724 2718 self._dir = dir
2725 2719 self._node = node
2726 2720
2727 2721 def read(self):
2728 2722 return excludeddir(self.nodeconstants, self._dir, self._node)
2729 2723
2730 2724 def readfast(self, shallow=False):
2731 2725 # special version of readfast since we don't have underlying storage
2732 2726 return self.read()
2733 2727
2734 2728 def write(self, *args):
2735 2729 raise error.ProgrammingError(
2736 2730 b'attempt to write manifest from excluded dir %s' % self._dir
2737 2731 )
2738 2732
2739 2733
2740 2734 class excludedmanifestrevlog(manifestrevlog):
2741 2735 """Stand-in for excluded treemanifest revlogs.
2742 2736
2743 2737 When narrowing is active on a treemanifest repository, we'll have
2744 2738 references to directories we can't see due to the revlog being
2745 2739 skipped. This class exists to conform to the manifestrevlog
2746 2740 interface for those directories and proactively prevent writes to
2747 2741 outside the narrowspec.
2748 2742 """
2749 2743
2750 2744 def __init__(self, nodeconstants, dir):
2751 2745 self.nodeconstants = nodeconstants
2752 2746 self._dir = dir
2753 2747
2754 2748 def __len__(self):
2755 2749 raise error.ProgrammingError(
2756 2750 b'attempt to get length of excluded dir %s' % self._dir
2757 2751 )
2758 2752
2759 2753 def rev(self, node):
2760 2754 raise error.ProgrammingError(
2761 2755 b'attempt to get rev from excluded dir %s' % self._dir
2762 2756 )
2763 2757
2764 2758 def linkrev(self, node):
2765 2759 raise error.ProgrammingError(
2766 2760 b'attempt to get linkrev from excluded dir %s' % self._dir
2767 2761 )
2768 2762
2769 2763 def node(self, rev):
2770 2764 raise error.ProgrammingError(
2771 2765 b'attempt to get node from excluded dir %s' % self._dir
2772 2766 )
2773 2767
2774 2768 def add(self, *args, **kwargs):
2775 2769 # We should never write entries in dirlogs outside the narrow clone.
2776 2770 # However, the method still gets called from writesubtree() in
2777 2771 # _addtree(), so we need to handle it. We should possibly make that
2778 2772 # avoid calling add() with a clean manifest (_dirty is always False
2779 2773 # in excludeddir instances).
2780 2774 pass
@@ -1,281 +1,281
1 1 # statichttprepo.py - simple http repository class for mercurial
2 2 #
3 3 # This provides read-only repo access to repositories exported via static http
4 4 #
5 5 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import annotations
11 11
12 12 import errno
13 13
14 14 from .i18n import _
15 15 from .node import sha1nodeconstants
16 16 from . import (
17 17 branchmap,
18 18 changelog,
19 19 error,
20 20 localrepo,
21 21 manifest,
22 22 namespaces,
23 23 pathutil,
24 24 pycompat,
25 25 requirements as requirementsmod,
26 26 url,
27 27 util,
28 28 vfs as vfsmod,
29 29 )
30 30 from .utils import (
31 31 urlutil,
32 32 )
33 33
34 34 urlerr = util.urlerr
35 35 urlreq = util.urlreq
36 36
37 37
38 38 class httprangereader:
39 39 def __init__(self, url, opener):
40 40 # we assume opener has HTTPRangeHandler
41 41 self.url = url
42 42 self.pos = 0
43 43 self.opener = opener
44 44 self.name = url
45 45
46 46 def __enter__(self):
47 47 return self
48 48
49 49 def __exit__(self, exc_type, exc_value, traceback):
50 50 self.close()
51 51
52 52 def seek(self, pos):
53 53 self.pos = pos
54 54
55 55 def read(self, n: int = -1):
56 56 req = urlreq.request(pycompat.strurl(self.url))
57 57 end = ''
58 58
59 59 if n == 0:
60 60 return b''
61 61 elif n > 0:
62 62 end = "%d" % (self.pos + n - 1)
63 63 if self.pos or end:
64 64 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
65 65
66 66 try:
67 67 f = self.opener.open(req)
68 68 data = f.read()
69 69 code = f.code
70 70 except urlerr.httperror as inst:
71 71 num = inst.code == 404 and errno.ENOENT or None
72 72 # Explicitly convert the exception to str as Py3 will try
73 73 # convert it to local encoding and with as the HTTPResponse
74 74 # instance doesn't support encode.
75 75 raise IOError(num, str(inst))
76 76 except urlerr.urlerror as inst:
77 77 raise IOError(None, inst.reason)
78 78
79 79 if code == 200:
80 80 # HTTPRangeHandler does nothing if remote does not support
81 81 # Range headers and returns the full entity. Let's slice it.
82 82 if n > 0 and (self.pos + n) < len(data):
83 83 data = data[self.pos : self.pos + n]
84 84 elif self.pos < len(data):
85 85 data = data[self.pos :]
86 86 else:
87 87 data = b''
88 88 elif 0 < n < len(data):
89 89 data = data[:n]
90 90 self.pos += len(data)
91 91 return data
92 92
93 93 def readlines(self):
94 94 return self.read().splitlines(True)
95 95
96 96 def __iter__(self):
97 97 return iter(self.readlines())
98 98
99 99 def close(self):
100 100 pass
101 101
102 102
103 103 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
104 104 # which was itself extracted from urlgrabber. See the last version of
105 105 # byterange.py from history if you need more information.
106 106 class _RangeError(IOError):
107 107 """Error raised when an unsatisfiable range is requested."""
108 108
109 109
110 110 class _HTTPRangeHandler(urlreq.basehandler):
111 111 """Handler that enables HTTP Range headers.
112 112
113 113 This was extremely simple. The Range header is a HTTP feature to
114 114 begin with so all this class does is tell urllib2 that the
115 115 "206 Partial Content" response from the HTTP server is what we
116 116 expected.
117 117 """
118 118
119 119 def http_error_206(self, req, fp, code, msg, hdrs):
120 120 # 206 Partial Content Response
121 121 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
122 122 r.code = code
123 123 r.msg = msg
124 124 return r
125 125
126 126 def http_error_416(self, req, fp, code, msg, hdrs):
127 127 # HTTP's Range Not Satisfiable error
128 128 raise _RangeError('Requested Range Not Satisfiable')
129 129
130 130
131 131 def build_opener(ui, authinfo):
132 132 # urllib cannot handle URLs with embedded user or passwd
133 133 urlopener = url.opener(ui, authinfo)
134 134 urlopener.add_handler(_HTTPRangeHandler())
135 135
136 136 class statichttpvfs(vfsmod.abstractvfs):
137 137 def __init__(self, base):
138 138 self.base = base
139 139 self.options = {}
140 140
141 141 def __call__(self, path, mode=b'r', *args, **kw):
142 142 if mode not in (b'r', b'rb'):
143 143 raise IOError('Permission denied')
144 144 f = b"/".join((self.base, urlreq.quote(path)))
145 145 return httprangereader(f, urlopener)
146 146
147 147 def _auditpath(self, path: bytes, mode: bytes) -> None:
148 148 raise NotImplementedError
149 149
150 150 def join(self, path, *insidef):
151 151 if path:
152 152 return pathutil.join(self.base, path, *insidef)
153 153 else:
154 154 return self.base
155 155
156 156 return statichttpvfs
157 157
158 158
159 159 class statichttppeer(localrepo.localpeer):
160 160 def local(self):
161 161 return None
162 162
163 163 def canpush(self):
164 164 return False
165 165
166 166
167 167 class statichttprepository(
168 168 localrepo.localrepository, localrepo.revlogfilestorage
169 169 ):
170 170 supported = localrepo.localrepository._basesupported
171 171
172 manifestlog: manifest.ManifestLog
172 manifestlog: manifest.manifestlog
173 173
174 174 def __init__(self, ui, path):
175 175 self._url = path
176 176 self.ui = ui
177 177
178 178 self.root = path
179 179 u = urlutil.url(path.rstrip(b'/') + b"/.hg")
180 180 self.path, authinfo = u.authinfo()
181 181
182 182 vfsclass = build_opener(ui, authinfo)
183 183 self.vfs = vfsclass(self.path)
184 184 self.cachevfs = vfsclass(self.vfs.join(b'cache'))
185 185 self._phasedefaults = []
186 186
187 187 self.names = namespaces.namespaces()
188 188 self.filtername = None
189 189 self._extrafilterid = None
190 190 self._wanted_sidedata = set()
191 191 self.features = set()
192 192
193 193 try:
194 194 requirements = set(self.vfs.read(b'requires').splitlines())
195 195 except FileNotFoundError:
196 196 requirements = set()
197 197
198 198 # check if it is a non-empty old-style repository
199 199 try:
200 200 with self.vfs(b"00changelog.i") as fp:
201 201 fp.read(1)
202 202 except FileNotFoundError:
203 203 # we do not care about empty old-style repositories here
204 204 msg = _(b"'%s' does not appear to be an hg repository") % path
205 205 raise error.RepoError(msg)
206 206 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
207 207 storevfs = vfsclass(self.vfs.join(b'store'))
208 208 requirements |= set(storevfs.read(b'requires').splitlines())
209 209
210 210 supportedrequirements = localrepo.gathersupportedrequirements(ui)
211 211 localrepo.ensurerequirementsrecognized(
212 212 requirements, supportedrequirements
213 213 )
214 214 localrepo.ensurerequirementscompatible(ui, requirements)
215 215 self.nodeconstants = sha1nodeconstants
216 216 self.nullid = self.nodeconstants.nullid
217 217
218 218 # setup store
219 219 self.store = localrepo.makestore(requirements, self.path, vfsclass)
220 220 self.spath = self.store.path
221 221 self.svfs = self.store.opener
222 222 self.sjoin = self.store.join
223 223 self._filecache = {}
224 224 self.requirements = requirements
225 225
226 226 rootmanifest = manifest.manifestrevlog(self.nodeconstants, self.svfs)
227 227 self.manifestlog = manifest.manifestlog(
228 228 self.svfs, self, rootmanifest, self.narrowmatch()
229 229 )
230 230 self.changelog = changelog.changelog(self.svfs)
231 231 self._tags = None
232 232 self.nodetagscache = None
233 233 self._branchcaches = branchmap.BranchMapCache()
234 234 self._revbranchcache = None
235 235 self.encodepats = None
236 236 self.decodepats = None
237 237 self._transref = None
238 238 self._dirstate = None
239 239
240 240 def _restrictcapabilities(self, caps):
241 241 caps = super(statichttprepository, self)._restrictcapabilities(caps)
242 242 return caps.difference([b"pushkey"])
243 243
244 244 def url(self):
245 245 return self._url
246 246
247 247 def local(self):
248 248 return False
249 249
250 250 def peer(self, path=None, remotehidden=False):
251 251 return statichttppeer(self, path=path, remotehidden=remotehidden)
252 252
253 253 def wlock(self, wait=True):
254 254 raise error.LockUnavailable(
255 255 0,
256 256 pycompat.sysstr(_(b'lock not available')),
257 257 b'lock',
258 258 _(b'cannot lock static-http repository'),
259 259 )
260 260
261 261 def lock(self, wait=True):
262 262 raise error.LockUnavailable(
263 263 0,
264 264 pycompat.sysstr(_(b'lock not available')),
265 265 b'lock',
266 266 _(b'cannot lock static-http repository'),
267 267 )
268 268
269 269 def _writecaches(self):
270 270 pass # statichttprepository are read only
271 271
272 272
273 273 def make_peer(
274 274 ui, path, create, intents=None, createopts=None, remotehidden=False
275 275 ):
276 276 if create:
277 277 raise error.Abort(_(b'cannot create new static-http repository'))
278 278 url = path.loc[7:]
279 279 return statichttprepository(ui, url).peer(
280 280 path=path, remotehidden=remotehidden
281 281 )
@@ -1,1258 +1,1258
1 1 # store.py - repository store handling for Mercurial)
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import collections
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15 import typing
16 16
17 17 from typing import (
18 18 Generator,
19 19 List,
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .thirdparty import attr
25 25
26 26 # Force pytype to use the non-vendored package
27 27 if typing.TYPE_CHECKING:
28 28 # noinspection PyPackageRequirements
29 29 import attr
30 30
31 31 from .node import hex
32 32 from .revlogutils.constants import (
33 33 INDEX_HEADER,
34 34 KIND_CHANGELOG,
35 35 KIND_FILELOG,
36 36 KIND_MANIFESTLOG,
37 37 )
38 38 from . import (
39 39 changelog,
40 40 error,
41 41 filelog,
42 42 manifest,
43 43 policy,
44 44 pycompat,
45 45 revlog as revlogmod,
46 46 util,
47 47 vfs as vfsmod,
48 48 )
49 49 from .utils import hashutil
50 50
51 51 parsers = policy.importmod('parsers')
52 52 # how much bytes should be read from fncache in one read
53 53 # It is done to prevent loading large fncache files into memory
54 54 fncache_chunksize = 10**6
55 55
56 56
57 57 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
58 58 """parses a fncache entry and returns whether the entry is tracking a path
59 59 matched by matcher or not.
60 60
61 61 If matcher is None, returns True"""
62 62
63 63 if matcher is None:
64 64 return True
65 65
66 66 # TODO: make this safe for other entry types. Currently, the various
67 67 # store.data_entry generators only yield RevlogStoreEntry, so the
68 68 # attributes do exist on `entry`.
69 69 # pytype: disable=attribute-error
70 70 if entry.is_filelog:
71 71 return matcher(entry.target_id)
72 72 elif entry.is_manifestlog:
73 73 return matcher.visitdir(entry.target_id.rstrip(b'/'))
74 74 # pytype: enable=attribute-error
75 75 raise error.ProgrammingError(b"cannot process entry %r" % entry)
76 76
77 77
78 78 # This avoids a collision between a file named foo and a dir named
79 79 # foo.i or foo.d
80 80 def _encodedir(path):
81 81 """
82 82 >>> _encodedir(b'data/foo.i')
83 83 'data/foo.i'
84 84 >>> _encodedir(b'data/foo.i/bla.i')
85 85 'data/foo.i.hg/bla.i'
86 86 >>> _encodedir(b'data/foo.i.hg/bla.i')
87 87 'data/foo.i.hg.hg/bla.i'
88 88 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
89 89 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
90 90 """
91 91 return (
92 92 path.replace(b".hg/", b".hg.hg/")
93 93 .replace(b".i/", b".i.hg/")
94 94 .replace(b".d/", b".d.hg/")
95 95 )
96 96
97 97
98 98 encodedir = getattr(parsers, 'encodedir', _encodedir)
99 99
100 100
101 101 def decodedir(path):
102 102 """
103 103 >>> decodedir(b'data/foo.i')
104 104 'data/foo.i'
105 105 >>> decodedir(b'data/foo.i.hg/bla.i')
106 106 'data/foo.i/bla.i'
107 107 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
108 108 'data/foo.i.hg/bla.i'
109 109 """
110 110 if b".hg/" not in path:
111 111 return path
112 112 return (
113 113 path.replace(b".d.hg/", b".d/")
114 114 .replace(b".i.hg/", b".i/")
115 115 .replace(b".hg.hg/", b".hg/")
116 116 )
117 117
118 118
119 119 def _reserved():
120 120 """characters that are problematic for filesystems
121 121
122 122 * ascii escapes (0..31)
123 123 * ascii hi (126..255)
124 124 * windows specials
125 125
126 126 these characters will be escaped by encodefunctions
127 127 """
128 128 winreserved = [ord(x) for x in u'\\:*?"<>|']
129 129 for x in range(32):
130 130 yield x
131 131 for x in range(126, 256):
132 132 yield x
133 133 for x in winreserved:
134 134 yield x
135 135
136 136
137 137 def _buildencodefun():
138 138 """
139 139 >>> enc, dec = _buildencodefun()
140 140
141 141 >>> enc(b'nothing/special.txt')
142 142 'nothing/special.txt'
143 143 >>> dec(b'nothing/special.txt')
144 144 'nothing/special.txt'
145 145
146 146 >>> enc(b'HELLO')
147 147 '_h_e_l_l_o'
148 148 >>> dec(b'_h_e_l_l_o')
149 149 'HELLO'
150 150
151 151 >>> enc(b'hello:world?')
152 152 'hello~3aworld~3f'
153 153 >>> dec(b'hello~3aworld~3f')
154 154 'hello:world?'
155 155
156 156 >>> enc(b'the\\x07quick\\xADshot')
157 157 'the~07quick~adshot'
158 158 >>> dec(b'the~07quick~adshot')
159 159 'the\\x07quick\\xadshot'
160 160 """
161 161 e = b'_'
162 162 xchr = pycompat.bytechr
163 163 asciistr = list(map(xchr, range(127)))
164 164 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
165 165
166 166 cmap = {x: x for x in asciistr}
167 167 for x in _reserved():
168 168 cmap[xchr(x)] = b"~%02x" % x
169 169 for x in capitals + [ord(e)]:
170 170 cmap[xchr(x)] = e + xchr(x).lower()
171 171
172 172 dmap = {}
173 173 for k, v in cmap.items():
174 174 dmap[v] = k
175 175
176 176 def decode(s):
177 177 i = 0
178 178 while i < len(s):
179 179 for l in range(1, 4):
180 180 try:
181 181 yield dmap[s[i : i + l]]
182 182 i += l
183 183 break
184 184 except KeyError:
185 185 pass
186 186 else:
187 187 raise KeyError
188 188
189 189 return (
190 190 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
191 191 lambda s: b''.join(list(decode(s))),
192 192 )
193 193
194 194
195 195 _encodefname, _decodefname = _buildencodefun()
196 196
197 197
198 198 def encodefilename(s):
199 199 """
200 200 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
201 201 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
202 202 """
203 203 return _encodefname(encodedir(s))
204 204
205 205
206 206 def decodefilename(s):
207 207 """
208 208 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
209 209 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
210 210 """
211 211 return decodedir(_decodefname(s))
212 212
213 213
214 214 def _buildlowerencodefun():
215 215 """
216 216 >>> f = _buildlowerencodefun()
217 217 >>> f(b'nothing/special.txt')
218 218 'nothing/special.txt'
219 219 >>> f(b'HELLO')
220 220 'hello'
221 221 >>> f(b'hello:world?')
222 222 'hello~3aworld~3f'
223 223 >>> f(b'the\\x07quick\\xADshot')
224 224 'the~07quick~adshot'
225 225 """
226 226 xchr = pycompat.bytechr
227 227 cmap = {xchr(x): xchr(x) for x in range(127)}
228 228 for x in _reserved():
229 229 cmap[xchr(x)] = b"~%02x" % x
230 230 for x in range(ord(b"A"), ord(b"Z") + 1):
231 231 cmap[xchr(x)] = xchr(x).lower()
232 232
233 233 def lowerencode(s):
234 234 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
235 235
236 236 return lowerencode
237 237
238 238
239 239 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
240 240
241 241 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
242 242 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
243 243 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
244 244
245 245
246 246 def _auxencode(path, dotencode):
247 247 """
248 248 Encodes filenames containing names reserved by Windows or which end in
249 249 period or space. Does not touch other single reserved characters c.
250 250 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
251 251 Additionally encodes space or period at the beginning, if dotencode is
252 252 True. Parameter path is assumed to be all lowercase.
253 253 A segment only needs encoding if a reserved name appears as a
254 254 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
255 255 doesn't need encoding.
256 256
257 257 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
258 258 >>> _auxencode(s.split(b'/'), True)
259 259 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
260 260 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
261 261 >>> _auxencode(s.split(b'/'), False)
262 262 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
263 263 >>> _auxencode([b'foo. '], True)
264 264 ['foo.~20']
265 265 >>> _auxencode([b' .foo'], True)
266 266 ['~20.foo']
267 267 """
268 268 for i, n in enumerate(path):
269 269 if not n:
270 270 continue
271 271 if dotencode and n[0] in b'. ':
272 272 n = b"~%02x" % ord(n[0:1]) + n[1:]
273 273 path[i] = n
274 274 else:
275 275 l = n.find(b'.')
276 276 if l == -1:
277 277 l = len(n)
278 278 if (l == 3 and n[:3] in _winres3) or (
279 279 l == 4
280 280 and n[3:4] <= b'9'
281 281 and n[3:4] >= b'1'
282 282 and n[:3] in _winres4
283 283 ):
284 284 # encode third letter ('aux' -> 'au~78')
285 285 ec = b"~%02x" % ord(n[2:3])
286 286 n = n[0:2] + ec + n[3:]
287 287 path[i] = n
288 288 if n[-1] in b'. ':
289 289 # encode last period or space ('foo...' -> 'foo..~2e')
290 290 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
291 291 return path
292 292
293 293
294 294 _maxstorepathlen = 120
295 295 _dirprefixlen = 8
296 296 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
297 297
298 298
299 299 def _hashencode(path, dotencode):
300 300 digest = hex(hashutil.sha1(path).digest())
301 301 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
302 302 parts = _auxencode(le, dotencode)
303 303 basename = parts[-1]
304 304 _root, ext = os.path.splitext(basename)
305 305 sdirs = []
306 306 sdirslen = 0
307 307 for p in parts[:-1]:
308 308 d = p[:_dirprefixlen]
309 309 if d[-1] in b'. ':
310 310 # Windows can't access dirs ending in period or space
311 311 d = d[:-1] + b'_'
312 312 if sdirslen == 0:
313 313 t = len(d)
314 314 else:
315 315 t = sdirslen + 1 + len(d)
316 316 if t > _maxshortdirslen:
317 317 break
318 318 sdirs.append(d)
319 319 sdirslen = t
320 320 dirs = b'/'.join(sdirs)
321 321 if len(dirs) > 0:
322 322 dirs += b'/'
323 323 res = b'dh/' + dirs + digest + ext
324 324 spaceleft = _maxstorepathlen - len(res)
325 325 if spaceleft > 0:
326 326 filler = basename[:spaceleft]
327 327 res = b'dh/' + dirs + filler + digest + ext
328 328 return res
329 329
330 330
331 331 def _hybridencode(path, dotencode):
332 332 """encodes path with a length limit
333 333
334 334 Encodes all paths that begin with 'data/', according to the following.
335 335
336 336 Default encoding (reversible):
337 337
338 338 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
339 339 characters are encoded as '~xx', where xx is the two digit hex code
340 340 of the character (see encodefilename).
341 341 Relevant path components consisting of Windows reserved filenames are
342 342 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
343 343
344 344 Hashed encoding (not reversible):
345 345
346 346 If the default-encoded path is longer than _maxstorepathlen, a
347 347 non-reversible hybrid hashing of the path is done instead.
348 348 This encoding uses up to _dirprefixlen characters of all directory
349 349 levels of the lowerencoded path, but not more levels than can fit into
350 350 _maxshortdirslen.
351 351 Then follows the filler followed by the sha digest of the full path.
352 352 The filler is the beginning of the basename of the lowerencoded path
353 353 (the basename is everything after the last path separator). The filler
354 354 is as long as possible, filling in characters from the basename until
355 355 the encoded path has _maxstorepathlen characters (or all chars of the
356 356 basename have been taken).
357 357 The extension (e.g. '.i' or '.d') is preserved.
358 358
359 359 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
360 360 encoding was used.
361 361 """
362 362 path = encodedir(path)
363 363 ef = _encodefname(path).split(b'/')
364 364 res = b'/'.join(_auxencode(ef, dotencode))
365 365 if len(res) > _maxstorepathlen:
366 366 res = _hashencode(path, dotencode)
367 367 return res
368 368
369 369
370 370 def _pathencode(path):
371 371 de = encodedir(path)
372 372 if len(path) > _maxstorepathlen:
373 373 return _hashencode(de, True)
374 374 ef = _encodefname(de).split(b'/')
375 375 res = b'/'.join(_auxencode(ef, True))
376 376 if len(res) > _maxstorepathlen:
377 377 return _hashencode(de, True)
378 378 return res
379 379
380 380
381 381 _pathencode = getattr(parsers, 'pathencode', _pathencode)
382 382
383 383
384 384 def _plainhybridencode(f):
385 385 return _hybridencode(f, False)
386 386
387 387
388 388 def _calcmode(vfs):
389 389 try:
390 390 # files in .hg/ will be created using this mode
391 391 mode = vfs.stat().st_mode
392 392 # avoid some useless chmods
393 393 if (0o777 & ~util.umask) == (0o777 & mode):
394 394 mode = None
395 395 except OSError:
396 396 mode = None
397 397 return mode
398 398
399 399
400 400 _data = [
401 401 b'bookmarks',
402 402 b'narrowspec',
403 403 b'data',
404 404 b'meta',
405 405 b'00manifest.d',
406 406 b'00manifest.i',
407 407 b'00changelog.d',
408 408 b'00changelog.i',
409 409 b'phaseroots',
410 410 b'obsstore',
411 411 b'requires',
412 412 ]
413 413
414 414 REVLOG_FILES_EXT = (
415 415 b'.i',
416 416 b'.idx',
417 417 b'.d',
418 418 b'.dat',
419 419 b'.n',
420 420 b'.nd',
421 421 b'.sda',
422 422 )
423 423 # file extension that also use a `-SOMELONGIDHASH.ext` form
424 424 REVLOG_FILES_LONG_EXT = (
425 425 b'.nd',
426 426 b'.idx',
427 427 b'.dat',
428 428 b'.sda',
429 429 )
430 430 # files that are "volatile" and might change between listing and streaming
431 431 #
432 432 # note: the ".nd" file are nodemap data and won't "change" but they might be
433 433 # deleted.
434 434 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
435 435
436 436 # some exception to the above matching
437 437 #
438 438 # XXX This is currently not in use because of issue6542
439 439 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
440 440
441 441
442 442 def is_revlog(f, kind, st):
443 443 if kind != stat.S_IFREG:
444 444 return False
445 445 if f.endswith(REVLOG_FILES_EXT):
446 446 return True
447 447 return False
448 448
449 449
450 450 def is_revlog_file(f):
451 451 if f.endswith(REVLOG_FILES_EXT):
452 452 return True
453 453 return False
454 454
455 455
456 456 @attr.s(slots=True)
457 457 class StoreFile:
458 458 """a file matching a store entry"""
459 459
460 460 unencoded_path = attr.ib()
461 461 _file_size = attr.ib(default=None)
462 462 is_volatile = attr.ib(default=False)
463 463
464 464 def file_size(self, vfs):
465 465 if self._file_size is None:
466 466 if vfs is None:
467 467 msg = b"calling vfs-less file_size without prior call: %s"
468 468 msg %= self.unencoded_path
469 469 raise error.ProgrammingError(msg)
470 470 try:
471 471 self._file_size = vfs.stat(self.unencoded_path).st_size
472 472 except FileNotFoundError:
473 473 self._file_size = 0
474 474 return self._file_size
475 475
476 476 @property
477 477 def has_size(self):
478 478 return self._file_size is not None
479 479
480 480 def get_stream(self, vfs, volatiles):
481 481 """return data "stream" information for this file
482 482
483 483 (unencoded_file_path, content_iterator, content_size)
484 484 """
485 485 size = self.file_size(None)
486 486
487 487 def get_stream():
488 488 path = vfs.join(self.unencoded_path)
489 489 with volatiles.open(path) as fp:
490 490 yield None # ready to stream
491 491 if size <= 65536:
492 492 yield fp.read(size)
493 493 else:
494 494 yield from util.filechunkiter(fp, limit=size)
495 495
496 496 s = get_stream()
497 497 next(s)
498 498 return (self.unencoded_path, s, size)
499 499
500 500
501 501 @attr.s(slots=True, init=False)
502 502 class BaseStoreEntry:
503 503 """An entry in the store
504 504
505 505 This is returned by `store.walk` and represent some data in the store."""
506 506
507 507 maybe_volatile = True
508 508
509 509 def files(self) -> List[StoreFile]:
510 510 raise NotImplementedError
511 511
512 512 def get_streams(
513 513 self,
514 514 repo=None,
515 515 vfs=None,
516 516 volatiles=None,
517 517 max_changeset=None,
518 518 preserve_file_count=False,
519 519 ):
520 520 """return a list of data stream associated to files for this entry
521 521
522 522 return [(unencoded_file_path, content_iterator, content_size), …]
523 523 """
524 524 assert vfs is not None
525 525 return [f.get_stream(vfs, volatiles) for f in self.files()]
526 526
527 527
528 528 @attr.s(slots=True, init=False)
529 529 class SimpleStoreEntry(BaseStoreEntry):
530 530 """A generic entry in the store"""
531 531
532 532 is_revlog = False
533 533
534 534 maybe_volatile = attr.ib()
535 535 _entry_path = attr.ib()
536 536 _is_volatile = attr.ib(default=False)
537 537 _file_size = attr.ib(default=None)
538 538 _files = attr.ib(default=None)
539 539
540 540 def __init__(
541 541 self,
542 542 entry_path,
543 543 is_volatile=False,
544 544 file_size=None,
545 545 ):
546 546 super().__init__()
547 547 self._entry_path = entry_path
548 548 self._is_volatile = is_volatile
549 549 self._file_size = file_size
550 550 self._files = None
551 551 self.maybe_volatile = is_volatile
552 552
553 553 def files(self) -> List[StoreFile]:
554 554 if self._files is None:
555 555 self._files = [
556 556 StoreFile(
557 557 unencoded_path=self._entry_path,
558 558 file_size=self._file_size,
559 559 is_volatile=self._is_volatile,
560 560 )
561 561 ]
562 562 return self._files
563 563
564 564
565 565 @attr.s(slots=True, init=False)
566 566 class RevlogStoreEntry(BaseStoreEntry):
567 567 """A revlog entry in the store"""
568 568
569 569 is_revlog = True
570 570
571 571 revlog_type = attr.ib(default=None)
572 572 target_id = attr.ib(default=None)
573 573 maybe_volatile = attr.ib(default=True)
574 574 _path_prefix = attr.ib(default=None)
575 575 _details = attr.ib(default=None)
576 576 _files = attr.ib(default=None)
577 577
578 578 def __init__(
579 579 self,
580 580 revlog_type,
581 581 path_prefix,
582 582 target_id,
583 583 details,
584 584 ):
585 585 super().__init__()
586 586 self.revlog_type = revlog_type
587 587 self.target_id = target_id
588 588 self._path_prefix = path_prefix
589 589 assert b'.i' in details, (path_prefix, details)
590 590 for ext in details:
591 591 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
592 592 self.maybe_volatile = True
593 593 break
594 594 else:
595 595 self.maybe_volatile = False
596 596 self._details = details
597 597 self._files = None
598 598
599 599 @property
600 600 def is_changelog(self):
601 601 return self.revlog_type == KIND_CHANGELOG
602 602
603 603 @property
604 604 def is_manifestlog(self):
605 605 return self.revlog_type == KIND_MANIFESTLOG
606 606
607 607 @property
608 608 def is_filelog(self):
609 609 return self.revlog_type == KIND_FILELOG
610 610
611 611 def main_file_path(self):
612 612 """unencoded path of the main revlog file"""
613 613 return self._path_prefix + b'.i'
614 614
615 615 def files(self) -> List[StoreFile]:
616 616 if self._files is None:
617 617 self._files = []
618 618 for ext in sorted(self._details, key=_ext_key):
619 619 path = self._path_prefix + ext
620 620 file_size = self._details[ext]
621 621 # files that are "volatile" and might change between
622 622 # listing and streaming
623 623 #
624 624 # note: the ".nd" file are nodemap data and won't "change"
625 625 # but they might be deleted.
626 626 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
627 627 f = StoreFile(path, file_size, volatile)
628 628 self._files.append(f)
629 629 return self._files
630 630
631 631 def get_streams(
632 632 self,
633 633 repo=None,
634 634 vfs=None,
635 635 volatiles=None,
636 636 max_changeset=None,
637 637 preserve_file_count=False,
638 638 ):
639 639 pre_sized = all(f.has_size for f in self.files())
640 640 if pre_sized and (
641 641 repo is None
642 642 or max_changeset is None
643 643 # This use revlog-v2, ignore for now
644 644 or any(k.endswith(b'.idx') for k in self._details.keys())
645 645 # This is not inline, no race expected
646 646 or b'.d' in self._details
647 647 ):
648 648 return super().get_streams(
649 649 repo=repo,
650 650 vfs=vfs,
651 651 volatiles=volatiles,
652 652 max_changeset=max_changeset,
653 653 preserve_file_count=preserve_file_count,
654 654 )
655 655 elif not preserve_file_count:
656 656 stream = [
657 657 f.get_stream(vfs, volatiles)
658 658 for f in self.files()
659 659 if not f.unencoded_path.endswith((b'.i', b'.d'))
660 660 ]
661 661 rl = self.get_revlog_instance(repo).get_revlog()
662 662 rl_stream = rl.get_streams(max_changeset)
663 663 stream.extend(rl_stream)
664 664 return stream
665 665
666 666 name_to_size = {}
667 667 for f in self.files():
668 668 name_to_size[f.unencoded_path] = f.file_size(None)
669 669
670 670 stream = [
671 671 f.get_stream(vfs, volatiles)
672 672 for f in self.files()
673 673 if not f.unencoded_path.endswith(b'.i')
674 674 ]
675 675
676 676 index_path = self._path_prefix + b'.i'
677 677
678 678 index_file = None
679 679 try:
680 680 index_file = vfs(index_path)
681 681 header = index_file.read(INDEX_HEADER.size)
682 682 if revlogmod.revlog.is_inline_index(header):
683 683 size = name_to_size[index_path]
684 684
685 685 # no split underneath, just return the stream
686 686 def get_stream():
687 687 fp = index_file
688 688 try:
689 689 fp.seek(0)
690 690 yield None
691 691 if size <= 65536:
692 692 yield fp.read(size)
693 693 else:
694 694 yield from util.filechunkiter(fp, limit=size)
695 695 finally:
696 696 fp.close()
697 697
698 698 s = get_stream()
699 699 next(s)
700 700 index_file = None
701 701 stream.append((index_path, s, size))
702 702 else:
703 703 rl = self.get_revlog_instance(repo).get_revlog()
704 704 rl_stream = rl.get_streams(max_changeset, force_inline=True)
705 705 for name, s, size in rl_stream:
706 706 if name_to_size.get(name, 0) != size:
707 707 msg = _(b"expected %d bytes but %d provided for %s")
708 708 msg %= name_to_size.get(name, 0), size, name
709 709 raise error.Abort(msg)
710 710 stream.extend(rl_stream)
711 711 finally:
712 712 if index_file is not None:
713 713 index_file.close()
714 714
715 715 files = self.files()
716 716 assert len(stream) == len(files), (
717 717 stream,
718 718 files,
719 719 self._path_prefix,
720 720 self.target_id,
721 721 )
722 722 return stream
723 723
724 724 def get_revlog_instance(self, repo):
725 725 """Obtain a revlog instance from this store entry
726 726
727 727 An instance of the appropriate class is returned.
728 728 """
729 729 if self.is_changelog:
730 730 return changelog.changelog(repo.svfs)
731 731 elif self.is_manifestlog:
732 732 mandir = self.target_id
733 733 return manifest.manifestrevlog(
734 734 repo.nodeconstants, repo.svfs, tree=mandir
735 735 )
736 736 else:
737 737 return filelog.filelog(repo.svfs, self.target_id)
738 738
739 739
740 740 def _gather_revlog(files_data):
741 741 """group files per revlog prefix
742 742
743 743 The returns a two level nested dict. The top level key is the revlog prefix
744 744 without extension, the second level is all the file "suffix" that were
745 745 seen for this revlog and arbitrary file data as value.
746 746 """
747 747 revlogs = collections.defaultdict(dict)
748 748 for u, value in files_data:
749 749 name, ext = _split_revlog_ext(u)
750 750 revlogs[name][ext] = value
751 751 return sorted(revlogs.items())
752 752
753 753
754 754 def _split_revlog_ext(filename):
755 755 """split the revlog file prefix from the variable extension"""
756 756 if filename.endswith(REVLOG_FILES_LONG_EXT):
757 757 char = b'-'
758 758 else:
759 759 char = b'.'
760 760 idx = filename.rfind(char)
761 761 return filename[:idx], filename[idx:]
762 762
763 763
764 764 def _ext_key(ext):
765 765 """a key to order revlog suffix
766 766
767 767 important to issue .i after other entry."""
768 768 # the only important part of this order is to keep the `.i` last.
769 769 if ext.endswith(b'.n'):
770 770 return (0, ext)
771 771 elif ext.endswith(b'.nd'):
772 772 return (10, ext)
773 773 elif ext.endswith(b'.d'):
774 774 return (20, ext)
775 775 elif ext.endswith(b'.i'):
776 776 return (50, ext)
777 777 else:
778 778 return (40, ext)
779 779
780 780
781 781 class basicstore:
782 782 '''base class for local repository stores'''
783 783
784 784 def __init__(self, path, vfstype):
785 785 vfs = vfstype(path)
786 786 self.path = vfs.base
787 787 self.createmode = _calcmode(vfs)
788 788 vfs.createmode = self.createmode
789 789 self.rawvfs = vfs
790 790 self.vfs = vfsmod.filtervfs(vfs, encodedir)
791 791 self.opener = self.vfs
792 792
793 793 def join(self, f):
794 794 return self.path + b'/' + encodedir(f)
795 795
796 796 def _walk(self, relpath, recurse, undecodable=None):
797 797 '''yields (revlog_type, unencoded, size)'''
798 798 path = self.path
799 799 if relpath:
800 800 path += b'/' + relpath
801 801 striplen = len(self.path) + 1
802 802 l = []
803 803 if self.rawvfs.isdir(path):
804 804 visit = [path]
805 805 readdir = self.rawvfs.readdir
806 806 while visit:
807 807 p = visit.pop()
808 808 for f, kind, st in readdir(p, stat=True):
809 809 fp = p + b'/' + f
810 810 if is_revlog(f, kind, st):
811 811 n = util.pconvert(fp[striplen:])
812 812 l.append((decodedir(n), st.st_size))
813 813 elif kind == stat.S_IFDIR and recurse:
814 814 visit.append(fp)
815 815
816 816 l.sort()
817 817 return l
818 818
819 819 def changelog(self, trypending, concurrencychecker=None):
820 820 return changelog.changelog(
821 821 self.vfs,
822 822 trypending=trypending,
823 823 concurrencychecker=concurrencychecker,
824 824 )
825 825
826 def manifestlog(self, repo, storenarrowmatch) -> manifest.ManifestLog:
826 def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:
827 827 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
828 828 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
829 829
830 830 def data_entries(
831 831 self, matcher=None, undecodable=None
832 832 ) -> Generator[BaseStoreEntry, None, None]:
833 833 """Like walk, but excluding the changelog and root manifest.
834 834
835 835 When [undecodable] is None, revlogs names that can't be
836 836 decoded cause an exception. When it is provided, it should
837 837 be a list and the filenames that can't be decoded are added
838 838 to it instead. This is very rarely needed."""
839 839 dirs = [
840 840 (b'data', KIND_FILELOG, False),
841 841 (b'meta', KIND_MANIFESTLOG, True),
842 842 ]
843 843 for base_dir, rl_type, strip_filename in dirs:
844 844 files = self._walk(base_dir, True, undecodable=undecodable)
845 845 for revlog, details in _gather_revlog(files):
846 846 revlog_target_id = revlog.split(b'/', 1)[1]
847 847 if strip_filename and b'/' in revlog:
848 848 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
849 849 revlog_target_id += b'/'
850 850 yield RevlogStoreEntry(
851 851 path_prefix=revlog,
852 852 revlog_type=rl_type,
853 853 target_id=revlog_target_id,
854 854 details=details,
855 855 )
856 856
857 857 def top_entries(
858 858 self, phase=False, obsolescence=False
859 859 ) -> Generator[BaseStoreEntry, None, None]:
860 860 if phase and self.vfs.exists(b'phaseroots'):
861 861 yield SimpleStoreEntry(
862 862 entry_path=b'phaseroots',
863 863 is_volatile=True,
864 864 )
865 865
866 866 if obsolescence and self.vfs.exists(b'obsstore'):
867 867 # XXX if we had the file size it could be non-volatile
868 868 yield SimpleStoreEntry(
869 869 entry_path=b'obsstore',
870 870 is_volatile=True,
871 871 )
872 872
873 873 files = reversed(self._walk(b'', False))
874 874
875 875 changelogs = collections.defaultdict(dict)
876 876 manifestlogs = collections.defaultdict(dict)
877 877
878 878 for u, s in files:
879 879 if u.startswith(b'00changelog'):
880 880 name, ext = _split_revlog_ext(u)
881 881 changelogs[name][ext] = s
882 882 elif u.startswith(b'00manifest'):
883 883 name, ext = _split_revlog_ext(u)
884 884 manifestlogs[name][ext] = s
885 885 else:
886 886 yield SimpleStoreEntry(
887 887 entry_path=u,
888 888 is_volatile=False,
889 889 file_size=s,
890 890 )
891 891 # yield manifest before changelog
892 892 top_rl = [
893 893 (manifestlogs, KIND_MANIFESTLOG),
894 894 (changelogs, KIND_CHANGELOG),
895 895 ]
896 896 assert len(manifestlogs) <= 1
897 897 assert len(changelogs) <= 1
898 898 for data, revlog_type in top_rl:
899 899 for revlog, details in sorted(data.items()):
900 900 yield RevlogStoreEntry(
901 901 path_prefix=revlog,
902 902 revlog_type=revlog_type,
903 903 target_id=b'',
904 904 details=details,
905 905 )
906 906
907 907 def walk(
908 908 self, matcher=None, phase=False, obsolescence=False
909 909 ) -> Generator[BaseStoreEntry, None, None]:
910 910 """return files related to data storage (ie: revlogs)
911 911
912 912 yields instance from BaseStoreEntry subclasses
913 913
914 914 if a matcher is passed, storage files of only those tracked paths
915 915 are passed with matches the matcher
916 916 """
917 917 # yield data files first
918 918 for x in self.data_entries(matcher):
919 919 yield x
920 920 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
921 921 yield x
922 922
923 923 def copylist(self):
924 924 return _data
925 925
926 926 def write(self, tr):
927 927 pass
928 928
929 929 def invalidatecaches(self):
930 930 pass
931 931
932 932 def markremoved(self, fn):
933 933 pass
934 934
935 935 def __contains__(self, path):
936 936 '''Checks if the store contains path'''
937 937 path = b"/".join((b"data", path))
938 938 # file?
939 939 if self.vfs.exists(path + b".i"):
940 940 return True
941 941 # dir?
942 942 if not path.endswith(b"/"):
943 943 path = path + b"/"
944 944 return self.vfs.exists(path)
945 945
946 946
947 947 class encodedstore(basicstore):
948 948 def __init__(self, path, vfstype):
949 949 vfs = vfstype(path + b'/store')
950 950 self.path = vfs.base
951 951 self.createmode = _calcmode(vfs)
952 952 vfs.createmode = self.createmode
953 953 self.rawvfs = vfs
954 954 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
955 955 self.opener = self.vfs
956 956
957 957 def _walk(self, relpath, recurse, undecodable=None):
958 958 old = super()._walk(relpath, recurse)
959 959 new = []
960 960 for f1, value in old:
961 961 try:
962 962 f2 = decodefilename(f1)
963 963 except KeyError:
964 964 if undecodable is None:
965 965 msg = _(b'undecodable revlog name %s') % f1
966 966 raise error.StorageError(msg)
967 967 else:
968 968 undecodable.append(f1)
969 969 continue
970 970 new.append((f2, value))
971 971 return new
972 972
973 973 def data_entries(
974 974 self, matcher=None, undecodable=None
975 975 ) -> Generator[BaseStoreEntry, None, None]:
976 976 entries = super(encodedstore, self).data_entries(
977 977 undecodable=undecodable
978 978 )
979 979 for entry in entries:
980 980 if _match_tracked_entry(entry, matcher):
981 981 yield entry
982 982
983 983 def join(self, f):
984 984 return self.path + b'/' + encodefilename(f)
985 985
986 986 def copylist(self):
987 987 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
988 988
989 989
990 990 class fncache:
991 991 # the filename used to be partially encoded
992 992 # hence the encodedir/decodedir dance
993 993 def __init__(self, vfs):
994 994 self.vfs = vfs
995 995 self._ignores = set()
996 996 self.entries = None
997 997 self._dirty = False
998 998 # set of new additions to fncache
999 999 self.addls = set()
1000 1000
1001 1001 def ensureloaded(self, warn=None):
1002 1002 """read the fncache file if not already read.
1003 1003
1004 1004 If the file on disk is corrupted, raise. If warn is provided,
1005 1005 warn and keep going instead."""
1006 1006 if self.entries is None:
1007 1007 self._load(warn)
1008 1008
1009 1009 def _load(self, warn=None):
1010 1010 '''fill the entries from the fncache file'''
1011 1011 self._dirty = False
1012 1012 try:
1013 1013 fp = self.vfs(b'fncache', mode=b'rb')
1014 1014 except IOError:
1015 1015 # skip nonexistent file
1016 1016 self.entries = set()
1017 1017 return
1018 1018
1019 1019 self.entries = set()
1020 1020 chunk = b''
1021 1021 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1022 1022 chunk += c
1023 1023 try:
1024 1024 p = chunk.rindex(b'\n')
1025 1025 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1026 1026 chunk = chunk[p + 1 :]
1027 1027 except ValueError:
1028 1028 # substring '\n' not found, maybe the entry is bigger than the
1029 1029 # chunksize, so let's keep iterating
1030 1030 pass
1031 1031
1032 1032 if chunk:
1033 1033 msg = _(b"fncache does not ends with a newline")
1034 1034 if warn:
1035 1035 warn(msg + b'\n')
1036 1036 else:
1037 1037 raise error.Abort(
1038 1038 msg,
1039 1039 hint=_(
1040 1040 b"use 'hg debugrebuildfncache' to "
1041 1041 b"rebuild the fncache"
1042 1042 ),
1043 1043 )
1044 1044 self._checkentries(fp, warn)
1045 1045 fp.close()
1046 1046
1047 1047 def _checkentries(self, fp, warn):
1048 1048 """make sure there is no empty string in entries"""
1049 1049 if b'' in self.entries:
1050 1050 fp.seek(0)
1051 1051 for n, line in enumerate(fp):
1052 1052 if not line.rstrip(b'\n'):
1053 1053 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1054 1054 if warn:
1055 1055 warn(t + b'\n')
1056 1056 else:
1057 1057 raise error.Abort(t)
1058 1058
1059 1059 def write(self, tr):
1060 1060 if self._dirty:
1061 1061 assert self.entries is not None
1062 1062 self.entries = self.entries | self.addls
1063 1063 self.addls = set()
1064 1064 tr.addbackup(b'fncache')
1065 1065 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1066 1066 if self.entries:
1067 1067 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1068 1068 fp.close()
1069 1069 self._dirty = False
1070 1070 if self.addls:
1071 1071 # if we have just new entries, let's append them to the fncache
1072 1072 tr.addbackup(b'fncache')
1073 1073 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1074 1074 if self.addls:
1075 1075 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1076 1076 fp.close()
1077 1077 self.entries = None
1078 1078 self.addls = set()
1079 1079
1080 1080 def addignore(self, fn):
1081 1081 self._ignores.add(fn)
1082 1082
1083 1083 def add(self, fn):
1084 1084 if fn in self._ignores:
1085 1085 return
1086 1086 if self.entries is None:
1087 1087 self._load()
1088 1088 if fn not in self.entries:
1089 1089 self.addls.add(fn)
1090 1090
1091 1091 def remove(self, fn):
1092 1092 if self.entries is None:
1093 1093 self._load()
1094 1094 if fn in self.addls:
1095 1095 self.addls.remove(fn)
1096 1096 return
1097 1097 try:
1098 1098 self.entries.remove(fn)
1099 1099 self._dirty = True
1100 1100 except KeyError:
1101 1101 pass
1102 1102
1103 1103 def __contains__(self, fn):
1104 1104 if fn in self.addls:
1105 1105 return True
1106 1106 if self.entries is None:
1107 1107 self._load()
1108 1108 return fn in self.entries
1109 1109
1110 1110 def __iter__(self):
1111 1111 if self.entries is None:
1112 1112 self._load()
1113 1113 return iter(self.entries | self.addls)
1114 1114
1115 1115
1116 1116 class _fncachevfs(vfsmod.proxyvfs):
1117 1117 def __init__(self, vfs, fnc, encode):
1118 1118 vfsmod.proxyvfs.__init__(self, vfs)
1119 1119 self.fncache = fnc
1120 1120 self.encode = encode
1121 1121
1122 1122 def __call__(self, path, mode=b'r', *args, **kw):
1123 1123 encoded = self.encode(path)
1124 1124 if (
1125 1125 mode not in (b'r', b'rb')
1126 1126 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1127 1127 and is_revlog_file(path)
1128 1128 ):
1129 1129 # do not trigger a fncache load when adding a file that already is
1130 1130 # known to exist.
1131 1131 notload = self.fncache.entries is None and (
1132 1132 # if the file has size zero, it should be considered as missing.
1133 1133 # Such zero-size files are the result of truncation when a
1134 1134 # transaction is aborted.
1135 1135 self.vfs.exists(encoded)
1136 1136 and self.vfs.stat(encoded).st_size
1137 1137 )
1138 1138 if not notload:
1139 1139 self.fncache.add(path)
1140 1140 return self.vfs(encoded, mode, *args, **kw)
1141 1141
1142 1142 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
1143 1143 insidef = (self.encode(f) for f in insidef)
1144 1144
1145 1145 if path:
1146 1146 return self.vfs.join(self.encode(path), *insidef)
1147 1147 else:
1148 1148 return self.vfs.join(path, *insidef)
1149 1149
1150 1150 def register_file(self, path):
1151 1151 """generic hook point to lets fncache steer its stew"""
1152 1152 if path.startswith(b'data/') or path.startswith(b'meta/'):
1153 1153 self.fncache.add(path)
1154 1154
1155 1155
1156 1156 class fncachestore(basicstore):
1157 1157 def __init__(self, path, vfstype, dotencode):
1158 1158 if dotencode:
1159 1159 encode = _pathencode
1160 1160 else:
1161 1161 encode = _plainhybridencode
1162 1162 self.encode = encode
1163 1163 vfs = vfstype(path + b'/store')
1164 1164 self.path = vfs.base
1165 1165 self.pathsep = self.path + b'/'
1166 1166 self.createmode = _calcmode(vfs)
1167 1167 vfs.createmode = self.createmode
1168 1168 self.rawvfs = vfs
1169 1169 fnc = fncache(vfs)
1170 1170 self.fncache = fnc
1171 1171 self.vfs = _fncachevfs(vfs, fnc, encode)
1172 1172 self.opener = self.vfs
1173 1173
1174 1174 def join(self, f):
1175 1175 return self.pathsep + self.encode(f)
1176 1176
1177 1177 def getsize(self, path):
1178 1178 return self.rawvfs.stat(path).st_size
1179 1179
1180 1180 def data_entries(
1181 1181 self, matcher=None, undecodable=None
1182 1182 ) -> Generator[BaseStoreEntry, None, None]:
1183 1183 # Note: all files in fncache should be revlog related, However the
1184 1184 # fncache might contains such file added by previous version of
1185 1185 # Mercurial.
1186 1186 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1187 1187 by_revlog = _gather_revlog(files)
1188 1188 for revlog, details in by_revlog:
1189 1189 if revlog.startswith(b'data/'):
1190 1190 rl_type = KIND_FILELOG
1191 1191 revlog_target_id = revlog.split(b'/', 1)[1]
1192 1192 elif revlog.startswith(b'meta/'):
1193 1193 rl_type = KIND_MANIFESTLOG
1194 1194 # drop the initial directory and the `00manifest` file part
1195 1195 tmp = revlog.split(b'/', 1)[1]
1196 1196 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1197 1197 else:
1198 1198 # unreachable
1199 1199 assert False, revlog
1200 1200 entry = RevlogStoreEntry(
1201 1201 path_prefix=revlog,
1202 1202 revlog_type=rl_type,
1203 1203 target_id=revlog_target_id,
1204 1204 details=details,
1205 1205 )
1206 1206 if _match_tracked_entry(entry, matcher):
1207 1207 yield entry
1208 1208
1209 1209 def copylist(self):
1210 1210 d = (
1211 1211 b'bookmarks',
1212 1212 b'narrowspec',
1213 1213 b'data',
1214 1214 b'meta',
1215 1215 b'dh',
1216 1216 b'fncache',
1217 1217 b'phaseroots',
1218 1218 b'obsstore',
1219 1219 b'00manifest.d',
1220 1220 b'00manifest.i',
1221 1221 b'00changelog.d',
1222 1222 b'00changelog.i',
1223 1223 b'requires',
1224 1224 )
1225 1225 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1226 1226
1227 1227 def write(self, tr):
1228 1228 self.fncache.write(tr)
1229 1229
1230 1230 def invalidatecaches(self):
1231 1231 self.fncache.entries = None
1232 1232 self.fncache.addls = set()
1233 1233
1234 1234 def markremoved(self, fn):
1235 1235 self.fncache.remove(fn)
1236 1236
1237 1237 def _exists(self, f):
1238 1238 ef = self.encode(f)
1239 1239 try:
1240 1240 self.getsize(ef)
1241 1241 return True
1242 1242 except FileNotFoundError:
1243 1243 return False
1244 1244
1245 1245 def __contains__(self, path):
1246 1246 '''Checks if the store contains path'''
1247 1247 path = b"/".join((b"data", path))
1248 1248 # check for files (exact match)
1249 1249 e = path + b'.i'
1250 1250 if e in self.fncache and self._exists(e):
1251 1251 return True
1252 1252 # now check for directories (prefix match)
1253 1253 if not path.endswith(b'/'):
1254 1254 path += b'/'
1255 1255 for e in self.fncache:
1256 1256 if e.startswith(path) and self._exists(e):
1257 1257 return True
1258 1258 return False
General Comments 0
You need to be logged in to leave comments. Login now