##// END OF EJS Templates
manifest: help pytype to understant `writesubtrees`'s `getnode` type...
marmoute -
r52666:e2f1efa2 default
parent child Browse files
Show More
@@ -1,2434 +1,2436 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import heapq
10 10 import itertools
11 11 import struct
12 12 import weakref
13 13
14 14 from typing import (
15 15 Iterable,
16 16 )
17 17
18 18 from .i18n import _
19 19 from .node import (
20 20 bin,
21 21 hex,
22 22 nullrev,
23 23 )
24 24 from . import (
25 25 encoding,
26 26 error,
27 27 match as matchmod,
28 28 mdiff,
29 29 pathutil,
30 30 policy,
31 31 pycompat,
32 32 revlog,
33 33 util,
34 34 )
35 35 from .interfaces import (
36 36 repository,
37 37 util as interfaceutil,
38 38 )
39 39 from .revlogutils import (
40 40 constants as revlog_constants,
41 41 )
42 42
43 43 parsers = policy.importmod('parsers')
44 44 propertycache = util.propertycache
45 45
46 46 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
47 47 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
48 48
49 49
50 50 def _parse(nodelen, data):
51 51 # This method does a little bit of excessive-looking
52 52 # precondition checking. This is so that the behavior of this
53 53 # class exactly matches its C counterpart to try and help
54 54 # prevent surprise breakage for anyone that develops against
55 55 # the pure version.
56 56 if data and data[-1:] != b'\n':
57 57 raise ValueError(b'Manifest did not end in a newline.')
58 58 prev = None
59 59 for l in data.splitlines():
60 60 if prev is not None and prev > l:
61 61 raise ValueError(b'Manifest lines not in sorted order.')
62 62 prev = l
63 63 f, n = l.split(b'\0')
64 64 nl = len(n)
65 65 flags = n[-1:]
66 66 if flags in _manifestflags:
67 67 n = n[:-1]
68 68 nl -= 1
69 69 else:
70 70 flags = b''
71 71 if nl != 2 * nodelen:
72 72 raise ValueError(b'Invalid manifest line')
73 73
74 74 yield f, bin(n), flags
75 75
76 76
77 77 def _text(it):
78 78 files = []
79 79 lines = []
80 80 for f, n, fl in it:
81 81 files.append(f)
82 82 # if this is changed to support newlines in filenames,
83 83 # be sure to check the templates/ dir again (especially *-raw.tmpl)
84 84 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
85 85
86 86 _checkforbidden(files)
87 87 return b''.join(lines)
88 88
89 89
90 90 class lazymanifestiter:
91 91 def __init__(self, lm):
92 92 self.pos = 0
93 93 self.lm = lm
94 94
95 95 def __iter__(self):
96 96 return self
97 97
98 98 def next(self):
99 99 try:
100 100 data, pos = self.lm._get(self.pos)
101 101 except IndexError:
102 102 raise StopIteration
103 103 if pos == -1:
104 104 self.pos += 1
105 105 return data[0]
106 106 self.pos += 1
107 107 zeropos = data.find(b'\x00', pos)
108 108 return data[pos:zeropos]
109 109
110 110 __next__ = next
111 111
112 112
113 113 class lazymanifestiterentries:
114 114 def __init__(self, lm):
115 115 self.lm = lm
116 116 self.pos = 0
117 117
118 118 def __iter__(self):
119 119 return self
120 120
121 121 def next(self):
122 122 try:
123 123 data, pos = self.lm._get(self.pos)
124 124 except IndexError:
125 125 raise StopIteration
126 126 if pos == -1:
127 127 self.pos += 1
128 128 return data
129 129 zeropos = data.find(b'\x00', pos)
130 130 nlpos = data.find(b'\n', pos)
131 131 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
132 132 raise error.StorageError(b'Invalid manifest line')
133 133 flags = data[nlpos - 1 : nlpos]
134 134 if flags in _manifestflags:
135 135 hlen = nlpos - zeropos - 2
136 136 else:
137 137 hlen = nlpos - zeropos - 1
138 138 flags = b''
139 139 if hlen != 2 * self.lm._nodelen:
140 140 raise error.StorageError(b'Invalid manifest line')
141 141 hashval = unhexlify(
142 142 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
143 143 )
144 144 self.pos += 1
145 145 return (data[pos:zeropos], hashval, flags)
146 146
147 147 __next__ = next
148 148
149 149
150 150 def unhexlify(data: bytes, extra: int, pos, length: int):
151 151 s = bin(data[pos : pos + length])
152 152 if extra:
153 153 s += bytes([extra & 0xFF])
154 154 return s
155 155
156 156
157 157 def _cmp(a, b):
158 158 return (a > b) - (a < b)
159 159
160 160
161 161 _manifestflags = {b'', b'l', b't', b'x'}
162 162
163 163
164 164 class _LazyManifest:
165 165 """A pure python manifest backed by a byte string. It is supplimented with
166 166 internal lists as it is modified, until it is compacted back to a pure byte
167 167 string.
168 168
169 169 ``data`` is the initial manifest data.
170 170
171 171 ``positions`` is a list of offsets, one per manifest entry. Positive
172 172 values are offsets into ``data``, negative values are offsets into the
173 173 ``extradata`` list. When an entry is removed, its entry is dropped from
174 174 ``positions``. The values are encoded such that when walking the list and
175 175 indexing into ``data`` or ``extradata`` as appropriate, the entries are
176 176 sorted by filename.
177 177
178 178 ``extradata`` is a list of (key, hash, flags) for entries that were added or
179 179 modified since the manifest was created or compacted.
180 180 """
181 181
182 182 def __init__(
183 183 self,
184 184 nodelen,
185 185 data,
186 186 positions=None,
187 187 extrainfo=None,
188 188 extradata=None,
189 189 hasremovals=False,
190 190 ):
191 191 self._nodelen = nodelen
192 192 if positions is None:
193 193 self.positions = self.findlines(data)
194 194 self.extrainfo = [0] * len(self.positions)
195 195 self.data = data
196 196 self.extradata = []
197 197 self.hasremovals = False
198 198 else:
199 199 self.positions = positions[:]
200 200 self.extrainfo = extrainfo[:]
201 201 self.extradata = extradata[:]
202 202 self.data = data
203 203 self.hasremovals = hasremovals
204 204
205 205 def findlines(self, data):
206 206 if not data:
207 207 return []
208 208 pos = data.find(b"\n")
209 209 if pos == -1 or data[-1:] != b'\n':
210 210 raise ValueError(b"Manifest did not end in a newline.")
211 211 positions = [0]
212 212 prev = data[: data.find(b'\x00')]
213 213 while pos < len(data) - 1 and pos != -1:
214 214 positions.append(pos + 1)
215 215 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
216 216 if nexts < prev:
217 217 raise ValueError(b"Manifest lines not in sorted order.")
218 218 prev = nexts
219 219 pos = data.find(b"\n", pos + 1)
220 220 return positions
221 221
222 222 def _get(self, index):
223 223 # get the position encoded in pos:
224 224 # positive number is an index in 'data'
225 225 # negative number is in extrapieces
226 226 pos = self.positions[index]
227 227 if pos >= 0:
228 228 return self.data, pos
229 229 return self.extradata[-pos - 1], -1
230 230
231 231 def _getkey(self, pos):
232 232 if pos >= 0:
233 233 return self.data[pos : self.data.find(b'\x00', pos + 1)]
234 234 return self.extradata[-pos - 1][0]
235 235
236 236 def bsearch(self, key):
237 237 first = 0
238 238 last = len(self.positions) - 1
239 239
240 240 while first <= last:
241 241 midpoint = (first + last) // 2
242 242 nextpos = self.positions[midpoint]
243 243 candidate = self._getkey(nextpos)
244 244 r = _cmp(key, candidate)
245 245 if r == 0:
246 246 return midpoint
247 247 else:
248 248 if r < 0:
249 249 last = midpoint - 1
250 250 else:
251 251 first = midpoint + 1
252 252 return -1
253 253
254 254 def bsearch2(self, key):
255 255 # same as the above, but will always return the position
256 256 # done for performance reasons
257 257 first = 0
258 258 last = len(self.positions) - 1
259 259
260 260 while first <= last:
261 261 midpoint = (first + last) // 2
262 262 nextpos = self.positions[midpoint]
263 263 candidate = self._getkey(nextpos)
264 264 r = _cmp(key, candidate)
265 265 if r == 0:
266 266 return (midpoint, True)
267 267 else:
268 268 if r < 0:
269 269 last = midpoint - 1
270 270 else:
271 271 first = midpoint + 1
272 272 return (first, False)
273 273
274 274 def __contains__(self, key):
275 275 return self.bsearch(key) != -1
276 276
277 277 def __getitem__(self, key):
278 278 if not isinstance(key, bytes):
279 279 raise TypeError(b"getitem: manifest keys must be a bytes.")
280 280 needle = self.bsearch(key)
281 281 if needle == -1:
282 282 raise KeyError
283 283 data, pos = self._get(needle)
284 284 if pos == -1:
285 285 return (data[1], data[2])
286 286 zeropos = data.find(b'\x00', pos)
287 287 nlpos = data.find(b'\n', zeropos)
288 288 assert 0 <= needle <= len(self.positions)
289 289 assert len(self.extrainfo) == len(self.positions)
290 290 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
291 291 raise error.StorageError(b'Invalid manifest line')
292 292 hlen = nlpos - zeropos - 1
293 293 flags = data[nlpos - 1 : nlpos]
294 294 if flags in _manifestflags:
295 295 hlen -= 1
296 296 else:
297 297 flags = b''
298 298 if hlen != 2 * self._nodelen:
299 299 raise error.StorageError(b'Invalid manifest line')
300 300 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
301 301 return (hashval, flags)
302 302
303 303 def __delitem__(self, key):
304 304 needle, found = self.bsearch2(key)
305 305 if not found:
306 306 raise KeyError
307 307 cur = self.positions[needle]
308 308 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
309 309 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
310 310 if cur >= 0:
311 311 # This does NOT unsort the list as far as the search functions are
312 312 # concerned, as they only examine lines mapped by self.positions.
313 313 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
314 314 self.hasremovals = True
315 315
316 316 def __setitem__(self, key, value):
317 317 if not isinstance(key, bytes):
318 318 raise TypeError(b"setitem: manifest keys must be a byte string.")
319 319 if not isinstance(value, tuple) or len(value) != 2:
320 320 raise TypeError(
321 321 b"Manifest values must be a tuple of (node, flags)."
322 322 )
323 323 hashval = value[0]
324 324 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
325 325 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
326 326 flags = value[1]
327 327 if not isinstance(flags, bytes) or len(flags) > 1:
328 328 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
329 329 needle, found = self.bsearch2(key)
330 330 if found:
331 331 # put the item
332 332 pos = self.positions[needle]
333 333 if pos < 0:
334 334 self.extradata[-pos - 1] = (key, hashval, value[1])
335 335 else:
336 336 # just don't bother
337 337 self.extradata.append((key, hashval, value[1]))
338 338 self.positions[needle] = -len(self.extradata)
339 339 else:
340 340 # not found, put it in with extra positions
341 341 self.extradata.append((key, hashval, value[1]))
342 342 self.positions = (
343 343 self.positions[:needle]
344 344 + [-len(self.extradata)]
345 345 + self.positions[needle:]
346 346 )
347 347 self.extrainfo = (
348 348 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
349 349 )
350 350
351 351 def copy(self):
352 352 # XXX call _compact like in C?
353 353 return _lazymanifest(
354 354 self._nodelen,
355 355 self.data,
356 356 self.positions,
357 357 self.extrainfo,
358 358 self.extradata,
359 359 self.hasremovals,
360 360 )
361 361
362 362 def _compact(self):
363 363 # hopefully not called TOO often
364 364 if len(self.extradata) == 0 and not self.hasremovals:
365 365 return
366 366 l = []
367 367 i = 0
368 368 offset = 0
369 369 self.extrainfo = [0] * len(self.positions)
370 370 while i < len(self.positions):
371 371 if self.positions[i] >= 0:
372 372 cur = self.positions[i]
373 373 last_cut = cur
374 374
375 375 # Collect all contiguous entries in the buffer at the current
376 376 # offset, breaking out only for added/modified items held in
377 377 # extradata, or a deleted line prior to the next position.
378 378 while True:
379 379 self.positions[i] = offset
380 380 i += 1
381 381 if i == len(self.positions) or self.positions[i] < 0:
382 382 break
383 383
384 384 # A removed file has no positions[] entry, but does have an
385 385 # overwritten first byte. Break out and find the end of the
386 386 # current good entry/entries if there is a removed file
387 387 # before the next position.
388 388 if (
389 389 self.hasremovals
390 390 and self.data.find(b'\n\x00', cur, self.positions[i])
391 391 != -1
392 392 ):
393 393 break
394 394
395 395 offset += self.positions[i] - cur
396 396 cur = self.positions[i]
397 397 end_cut = self.data.find(b'\n', cur)
398 398 if end_cut != -1:
399 399 end_cut += 1
400 400 offset += end_cut - cur
401 401 l.append(self.data[last_cut:end_cut])
402 402 else:
403 403 while i < len(self.positions) and self.positions[i] < 0:
404 404 cur = self.positions[i]
405 405 t = self.extradata[-cur - 1]
406 406 l.append(self._pack(t))
407 407 self.positions[i] = offset
408 408 # Hashes are either 20 bytes (old sha1s) or 32
409 409 # bytes (new non-sha1).
410 410 hlen = 20
411 411 if len(t[1]) > 25:
412 412 hlen = 32
413 413 if len(t[1]) > hlen:
414 414 self.extrainfo[i] = ord(t[1][hlen + 1])
415 415 offset += len(l[-1])
416 416 i += 1
417 417 self.data = b''.join(l)
418 418 self.hasremovals = False
419 419 self.extradata = []
420 420
421 421 def _pack(self, d):
422 422 n = d[1]
423 423 assert len(n) in (20, 32)
424 424 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
425 425
426 426 def text(self):
427 427 self._compact()
428 428 return self.data
429 429
430 430 def diff(self, m2, clean=False):
431 431 '''Finds changes between the current manifest and m2.'''
432 432 # XXX think whether efficiency matters here
433 433 diff = {}
434 434
435 435 for fn, e1, flags in self.iterentries():
436 436 if fn not in m2:
437 437 diff[fn] = (e1, flags), (None, b'')
438 438 else:
439 439 e2 = m2[fn]
440 440 if (e1, flags) != e2:
441 441 diff[fn] = (e1, flags), e2
442 442 elif clean:
443 443 diff[fn] = None
444 444
445 445 for fn, e2, flags in m2.iterentries():
446 446 if fn not in self:
447 447 diff[fn] = (None, b''), (e2, flags)
448 448
449 449 return diff
450 450
451 451 def iterentries(self):
452 452 return lazymanifestiterentries(self)
453 453
454 454 def iterkeys(self):
455 455 return lazymanifestiter(self)
456 456
457 457 def __iter__(self):
458 458 return lazymanifestiter(self)
459 459
460 460 def __len__(self):
461 461 return len(self.positions)
462 462
463 463 def filtercopy(self, filterfn):
464 464 # XXX should be optimized
465 465 c = _lazymanifest(self._nodelen, b'')
466 466 for f, n, fl in self.iterentries():
467 467 if filterfn(f):
468 468 c[f] = n, fl
469 469 return c
470 470
471 471
472 472 try:
473 473 _lazymanifest = parsers.lazymanifest
474 474 except AttributeError:
475 475 _lazymanifest = _LazyManifest
476 476
477 477
478 478 class ManifestDict:
479 479 def __init__(self, nodelen, data=b''):
480 480 self._nodelen = nodelen
481 481 self._lm = _lazymanifest(nodelen, data)
482 482
483 483 def __getitem__(self, key):
484 484 return self._lm[key][0]
485 485
486 486 def find(self, key):
487 487 return self._lm[key]
488 488
489 489 def __len__(self):
490 490 return len(self._lm)
491 491
492 492 def __nonzero__(self):
493 493 # nonzero is covered by the __len__ function, but implementing it here
494 494 # makes it easier for extensions to override.
495 495 return len(self._lm) != 0
496 496
497 497 __bool__ = __nonzero__
498 498
499 499 def set(self, key, node, flags):
500 500 self._lm[key] = node, flags
501 501
502 502 def __setitem__(self, key, node):
503 503 self._lm[key] = node, self.flags(key)
504 504
505 505 def __contains__(self, key):
506 506 if key is None:
507 507 return False
508 508 return key in self._lm
509 509
510 510 def __delitem__(self, key):
511 511 del self._lm[key]
512 512
513 513 def __iter__(self):
514 514 return self._lm.__iter__()
515 515
516 516 def iterkeys(self):
517 517 return self._lm.iterkeys()
518 518
519 519 def keys(self):
520 520 return list(self.iterkeys())
521 521
522 522 def filesnotin(self, m2, match=None):
523 523 '''Set of files in this manifest that are not in the other'''
524 524 if match is not None:
525 525 match = matchmod.badmatch(match, lambda path, msg: None)
526 526 sm2 = set(m2.walk(match))
527 527 return {f for f in self.walk(match) if f not in sm2}
528 528 return {f for f in self if f not in m2}
529 529
530 530 @propertycache
531 531 def _dirs(self):
532 532 return pathutil.dirs(self)
533 533
534 534 def dirs(self):
535 535 return self._dirs
536 536
537 537 def hasdir(self, dir):
538 538 return dir in self._dirs
539 539
540 540 def _filesfastpath(self, match):
541 541 """Checks whether we can correctly and quickly iterate over matcher
542 542 files instead of over manifest files."""
543 543 files = match.files()
544 544 return len(files) < 100 and (
545 545 match.isexact()
546 546 or (match.prefix() and all(fn in self for fn in files))
547 547 )
548 548
549 549 def walk(self, match):
550 550 """Generates matching file names.
551 551
552 552 Equivalent to manifest.matches(match).iterkeys(), but without creating
553 553 an entirely new manifest.
554 554
555 555 It also reports nonexistent files by marking them bad with match.bad().
556 556 """
557 557 if match.always():
558 558 for f in iter(self):
559 559 yield f
560 560 return
561 561
562 562 fset = set(match.files())
563 563
564 564 # avoid the entire walk if we're only looking for specific files
565 565 if self._filesfastpath(match):
566 566 for fn in sorted(fset):
567 567 if fn in self:
568 568 yield fn
569 569 return
570 570
571 571 for fn in self:
572 572 if fn in fset:
573 573 # specified pattern is the exact name
574 574 fset.remove(fn)
575 575 if match(fn):
576 576 yield fn
577 577
578 578 # for dirstate.walk, files=[''] means "walk the whole tree".
579 579 # follow that here, too
580 580 fset.discard(b'')
581 581
582 582 for fn in sorted(fset):
583 583 if not self.hasdir(fn):
584 584 match.bad(fn, None)
585 585
586 586 def _matches(self, match):
587 587 '''generate a new manifest filtered by the match argument'''
588 588 if match.always():
589 589 return self.copy()
590 590
591 591 if self._filesfastpath(match):
592 592 m = manifestdict(self._nodelen)
593 593 lm = self._lm
594 594 for fn in match.files():
595 595 if fn in lm:
596 596 m._lm[fn] = lm[fn]
597 597 return m
598 598
599 599 m = manifestdict(self._nodelen)
600 600 m._lm = self._lm.filtercopy(match)
601 601 return m
602 602
603 603 def diff(self, m2, match=None, clean=False):
604 604 """Finds changes between the current manifest and m2.
605 605
606 606 Args:
607 607 m2: the manifest to which this manifest should be compared.
608 608 clean: if true, include files unchanged between these manifests
609 609 with a None value in the returned dictionary.
610 610
611 611 The result is returned as a dict with filename as key and
612 612 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
613 613 nodeid in the current/other manifest and fl1/fl2 is the flag
614 614 in the current/other manifest. Where the file does not exist,
615 615 the nodeid will be None and the flags will be the empty
616 616 string.
617 617 """
618 618 if match:
619 619 m1 = self._matches(match)
620 620 m2 = m2._matches(match)
621 621 return m1.diff(m2, clean=clean)
622 622 return self._lm.diff(m2._lm, clean)
623 623
624 624 def setflag(self, key, flag):
625 625 if flag not in _manifestflags:
626 626 raise TypeError(b"Invalid manifest flag set.")
627 627 self._lm[key] = self[key], flag
628 628
629 629 def get(self, key, default=None):
630 630 try:
631 631 return self._lm[key][0]
632 632 except KeyError:
633 633 return default
634 634
635 635 def flags(self, key):
636 636 try:
637 637 return self._lm[key][1]
638 638 except KeyError:
639 639 return b''
640 640
641 641 def copy(self):
642 642 c = manifestdict(self._nodelen)
643 643 c._lm = self._lm.copy()
644 644 return c
645 645
646 646 def items(self):
647 647 return (x[:2] for x in self._lm.iterentries())
648 648
649 649 def iteritems(self):
650 650 return (x[:2] for x in self._lm.iterentries())
651 651
652 652 def iterentries(self):
653 653 return self._lm.iterentries()
654 654
655 655 def text(self):
656 656 # most likely uses native version
657 657 return self._lm.text()
658 658
659 659 def fastdelta(self, base, changes):
660 660 """Given a base manifest text as a bytearray and a list of changes
661 661 relative to that text, compute a delta that can be used by revlog.
662 662 """
663 663 delta = []
664 664 dstart = None
665 665 dend = None
666 666 dline = [b""]
667 667 start = 0
668 668 # zero copy representation of base as a buffer
669 669 addbuf = util.buffer(base)
670 670
671 671 changes = list(changes)
672 672 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
673 673 # start with a readonly loop that finds the offset of
674 674 # each line and creates the deltas
675 675 for f, todelete in changes:
676 676 # bs will either be the index of the item or the insert point
677 677 start, end = _msearch(addbuf, f, start)
678 678 if not todelete:
679 679 h, fl = self._lm[f]
680 680 l = b"%s\0%s%s\n" % (f, hex(h), fl)
681 681 else:
682 682 if start == end:
683 683 # item we want to delete was not found, error out
684 684 raise AssertionError(
685 685 _(b"failed to remove %s from manifest") % f
686 686 )
687 687 l = b""
688 688 if dstart is not None and dstart <= start and dend >= start:
689 689 if dend < end:
690 690 dend = end
691 691 if l:
692 692 dline.append(l)
693 693 else:
694 694 if dstart is not None:
695 695 delta.append((dstart, dend, b"".join(dline)))
696 696 dstart = start
697 697 dend = end
698 698 dline = [l]
699 699
700 700 if dstart is not None:
701 701 delta.append((dstart, dend, b"".join(dline)))
702 702 # apply the delta to the base, and get a delta for addrevision
703 703 deltatext, arraytext = _addlistdelta(base, delta)
704 704 else:
705 705 # For large changes, it's much cheaper to just build the text and
706 706 # diff it.
707 707 arraytext = bytearray(self.text())
708 708 deltatext = mdiff.textdiff(
709 709 util.buffer(base), util.buffer(arraytext)
710 710 )
711 711
712 712 return arraytext, deltatext
713 713
714 714
715 715 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
716 716
717 717
718 718 def _msearch(m, s, lo=0, hi=None):
719 719 """return a tuple (start, end) that says where to find s within m.
720 720
721 721 If the string is found m[start:end] are the line containing
722 722 that string. If start == end the string was not found and
723 723 they indicate the proper sorted insertion point.
724 724
725 725 m should be a buffer, a memoryview or a byte string.
726 726 s is a byte string"""
727 727
728 728 def advance(i, c):
729 729 while i < lenm and m[i : i + 1] != c:
730 730 i += 1
731 731 return i
732 732
733 733 if not s:
734 734 return (lo, lo)
735 735 lenm = len(m)
736 736 if not hi:
737 737 hi = lenm
738 738 while lo < hi:
739 739 mid = (lo + hi) // 2
740 740 start = mid
741 741 while start > 0 and m[start - 1 : start] != b'\n':
742 742 start -= 1
743 743 end = advance(start, b'\0')
744 744 if bytes(m[start:end]) < s:
745 745 # we know that after the null there are 40 bytes of sha1
746 746 # this translates to the bisect lo = mid + 1
747 747 lo = advance(end + 40, b'\n') + 1
748 748 else:
749 749 # this translates to the bisect hi = mid
750 750 hi = start
751 751 end = advance(lo, b'\0')
752 752 found = m[lo:end]
753 753 if s == found:
754 754 # we know that after the null there are 40 bytes of sha1
755 755 end = advance(end + 40, b'\n')
756 756 return (lo, end + 1)
757 757 else:
758 758 return (lo, lo)
759 759
760 760
761 761 def _checkforbidden(l):
762 762 """Check filenames for illegal characters."""
763 763 for f in l:
764 764 if b'\n' in f or b'\r' in f:
765 765 raise error.StorageError(
766 766 _(b"'\\n' and '\\r' disallowed in filenames: %r")
767 767 % pycompat.bytestr(f)
768 768 )
769 769
770 770
771 771 # apply the changes collected during the bisect loop to our addlist
772 772 # return a delta suitable for addrevision
773 773 def _addlistdelta(addlist, x):
774 774 # for large addlist arrays, building a new array is cheaper
775 775 # than repeatedly modifying the existing one
776 776 currentposition = 0
777 777 newaddlist = bytearray()
778 778
779 779 for start, end, content in x:
780 780 newaddlist += addlist[currentposition:start]
781 781 if content:
782 782 newaddlist += bytearray(content)
783 783
784 784 currentposition = end
785 785
786 786 newaddlist += addlist[currentposition:]
787 787
788 788 deltatext = b"".join(
789 789 struct.pack(b">lll", start, end, len(content)) + content
790 790 for start, end, content in x
791 791 )
792 792 return deltatext, newaddlist
793 793
794 794
795 795 def _splittopdir(f):
796 796 if b'/' in f:
797 797 dir, subpath = f.split(b'/', 1)
798 798 return dir + b'/', subpath
799 799 else:
800 800 return b'', f
801 801
802 802
803 803 _noop = lambda s: None
804 804
805 805
806 806 class TreeManifest:
807 807 def __init__(self, nodeconstants, dir=b'', text=b''):
808 808 self._dir = dir
809 809 self.nodeconstants = nodeconstants
810 810 self._node = self.nodeconstants.nullid
811 811 self._nodelen = self.nodeconstants.nodelen
812 812 self._loadfunc = _noop
813 813 self._copyfunc = _noop
814 814 self._dirty = False
815 815 self._dirs = {}
816 816 self._lazydirs = {}
817 817 # Using _lazymanifest here is a little slower than plain old dicts
818 818 self._files = {}
819 819 self._flags = {}
820 820 if text:
821 821
822 822 def readsubtree(subdir, subm):
823 823 raise AssertionError(
824 824 b'treemanifest constructor only accepts flat manifests'
825 825 )
826 826
827 827 self.parse(text, readsubtree)
828 828 self._dirty = True # Mark flat manifest dirty after parsing
829 829
830 830 def _subpath(self, path):
831 831 return self._dir + path
832 832
833 833 def _loadalllazy(self):
834 834 selfdirs = self._dirs
835 835 subpath = self._subpath
836 836 for d, (node, readsubtree, docopy) in self._lazydirs.items():
837 837 if docopy:
838 838 selfdirs[d] = readsubtree(subpath(d), node).copy()
839 839 else:
840 840 selfdirs[d] = readsubtree(subpath(d), node)
841 841 self._lazydirs.clear()
842 842
843 843 def _loadlazy(self, d):
844 844 v = self._lazydirs.get(d)
845 845 if v is not None:
846 846 node, readsubtree, docopy = v
847 847 if docopy:
848 848 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
849 849 else:
850 850 self._dirs[d] = readsubtree(self._subpath(d), node)
851 851 del self._lazydirs[d]
852 852
853 853 def _loadchildrensetlazy(self, visit):
854 854 if not visit:
855 855 return None
856 856 if visit == b'all' or visit == b'this':
857 857 self._loadalllazy()
858 858 return None
859 859
860 860 loadlazy = self._loadlazy
861 861 for k in visit:
862 862 loadlazy(k + b'/')
863 863 return visit
864 864
865 865 def _loaddifflazy(self, t1, t2):
866 866 """load items in t1 and t2 if they're needed for diffing.
867 867
868 868 The criteria currently is:
869 869 - if it's not present in _lazydirs in either t1 or t2, load it in the
870 870 other (it may already be loaded or it may not exist, doesn't matter)
871 871 - if it's present in _lazydirs in both, compare the nodeid; if it
872 872 differs, load it in both
873 873 """
874 874 toloadlazy = []
875 875 for d, v1 in t1._lazydirs.items():
876 876 v2 = t2._lazydirs.get(d)
877 877 if v2 is None or v2[0] != v1[0]:
878 878 toloadlazy.append(d)
879 879 for d, v1 in t2._lazydirs.items():
880 880 if d not in t1._lazydirs:
881 881 toloadlazy.append(d)
882 882
883 883 for d in toloadlazy:
884 884 t1._loadlazy(d)
885 885 t2._loadlazy(d)
886 886
887 887 def __len__(self):
888 888 self._load()
889 889 size = len(self._files)
890 890 self._loadalllazy()
891 891 for m in self._dirs.values():
892 892 size += m.__len__()
893 893 return size
894 894
895 895 def __nonzero__(self):
896 896 # Faster than "__len() != 0" since it avoids loading sub-manifests
897 897 return not self._isempty()
898 898
899 899 __bool__ = __nonzero__
900 900
901 901 def _isempty(self):
902 902 self._load() # for consistency; already loaded by all callers
903 903 # See if we can skip loading everything.
904 904 if self._files or (
905 905 self._dirs and any(not m._isempty() for m in self._dirs.values())
906 906 ):
907 907 return False
908 908 self._loadalllazy()
909 909 return not self._dirs or all(m._isempty() for m in self._dirs.values())
910 910
911 911 @encoding.strmethod
912 912 def __repr__(self):
913 913 return (
914 914 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
915 915 % (
916 916 self._dir,
917 917 hex(self._node),
918 918 bool(self._loadfunc is _noop),
919 919 self._dirty,
920 920 id(self),
921 921 )
922 922 )
923 923
924 924 def dir(self):
925 925 """The directory that this tree manifest represents, including a
926 926 trailing '/'. Empty string for the repo root directory."""
927 927 return self._dir
928 928
929 929 def node(self):
930 930 """This node of this instance. nullid for unsaved instances. Should
931 931 be updated when the instance is read or written from a revlog.
932 932 """
933 933 assert not self._dirty
934 934 return self._node
935 935
936 936 def setnode(self, node):
937 937 self._node = node
938 938 self._dirty = False
939 939
940 940 def iterentries(self):
941 941 self._load()
942 942 self._loadalllazy()
943 943 for p, n in sorted(
944 944 itertools.chain(self._dirs.items(), self._files.items())
945 945 ):
946 946 if p in self._files:
947 947 yield self._subpath(p), n, self._flags.get(p, b'')
948 948 else:
949 949 for x in n.iterentries():
950 950 yield x
951 951
952 952 def items(self):
953 953 self._load()
954 954 self._loadalllazy()
955 955 for p, n in sorted(
956 956 itertools.chain(self._dirs.items(), self._files.items())
957 957 ):
958 958 if p in self._files:
959 959 yield self._subpath(p), n
960 960 else:
961 961 for f, sn in n.items():
962 962 yield f, sn
963 963
964 964 iteritems = items
965 965
966 966 def iterkeys(self):
967 967 self._load()
968 968 self._loadalllazy()
969 969 for p in sorted(itertools.chain(self._dirs, self._files)):
970 970 if p in self._files:
971 971 yield self._subpath(p)
972 972 else:
973 973 for f in self._dirs[p]:
974 974 yield f
975 975
976 976 def keys(self):
977 977 return list(self.iterkeys())
978 978
979 979 def __iter__(self):
980 980 return self.iterkeys()
981 981
982 982 def __contains__(self, f):
983 983 if f is None:
984 984 return False
985 985 self._load()
986 986 dir, subpath = _splittopdir(f)
987 987 if dir:
988 988 self._loadlazy(dir)
989 989
990 990 if dir not in self._dirs:
991 991 return False
992 992
993 993 return self._dirs[dir].__contains__(subpath)
994 994 else:
995 995 return f in self._files
996 996
997 997 def get(self, f, default=None):
998 998 self._load()
999 999 dir, subpath = _splittopdir(f)
1000 1000 if dir:
1001 1001 self._loadlazy(dir)
1002 1002
1003 1003 if dir not in self._dirs:
1004 1004 return default
1005 1005 return self._dirs[dir].get(subpath, default)
1006 1006 else:
1007 1007 return self._files.get(f, default)
1008 1008
1009 1009 def __getitem__(self, f):
1010 1010 self._load()
1011 1011 dir, subpath = _splittopdir(f)
1012 1012 if dir:
1013 1013 self._loadlazy(dir)
1014 1014
1015 1015 return self._dirs[dir].__getitem__(subpath)
1016 1016 else:
1017 1017 return self._files[f]
1018 1018
1019 1019 def flags(self, f):
1020 1020 self._load()
1021 1021 dir, subpath = _splittopdir(f)
1022 1022 if dir:
1023 1023 self._loadlazy(dir)
1024 1024
1025 1025 if dir not in self._dirs:
1026 1026 return b''
1027 1027 return self._dirs[dir].flags(subpath)
1028 1028 else:
1029 1029 if f in self._lazydirs or f in self._dirs:
1030 1030 return b''
1031 1031 return self._flags.get(f, b'')
1032 1032
1033 1033 def find(self, f):
1034 1034 self._load()
1035 1035 dir, subpath = _splittopdir(f)
1036 1036 if dir:
1037 1037 self._loadlazy(dir)
1038 1038
1039 1039 return self._dirs[dir].find(subpath)
1040 1040 else:
1041 1041 return self._files[f], self._flags.get(f, b'')
1042 1042
1043 1043 def __delitem__(self, f):
1044 1044 self._load()
1045 1045 dir, subpath = _splittopdir(f)
1046 1046 if dir:
1047 1047 self._loadlazy(dir)
1048 1048
1049 1049 self._dirs[dir].__delitem__(subpath)
1050 1050 # If the directory is now empty, remove it
1051 1051 if self._dirs[dir]._isempty():
1052 1052 del self._dirs[dir]
1053 1053 else:
1054 1054 del self._files[f]
1055 1055 if f in self._flags:
1056 1056 del self._flags[f]
1057 1057 self._dirty = True
1058 1058
1059 1059 def set(self, f, node, flags):
1060 1060 """Set both the node and the flags for path f."""
1061 1061 assert node is not None
1062 1062 if flags not in _manifestflags:
1063 1063 raise TypeError(b"Invalid manifest flag set.")
1064 1064 self._load()
1065 1065 dir, subpath = _splittopdir(f)
1066 1066 if dir:
1067 1067 self._loadlazy(dir)
1068 1068 if dir not in self._dirs:
1069 1069 self._dirs[dir] = treemanifest(
1070 1070 self.nodeconstants, self._subpath(dir)
1071 1071 )
1072 1072 self._dirs[dir].set(subpath, node, flags)
1073 1073 else:
1074 1074 assert len(node) in (20, 32)
1075 1075 self._files[f] = node
1076 1076 self._flags[f] = flags
1077 1077 self._dirty = True
1078 1078
1079 1079 def __setitem__(self, f, n):
1080 1080 assert n is not None
1081 1081 self._load()
1082 1082 dir, subpath = _splittopdir(f)
1083 1083 if dir:
1084 1084 self._loadlazy(dir)
1085 1085 if dir not in self._dirs:
1086 1086 self._dirs[dir] = treemanifest(
1087 1087 self.nodeconstants, self._subpath(dir)
1088 1088 )
1089 1089 self._dirs[dir].__setitem__(subpath, n)
1090 1090 else:
1091 1091 # manifest nodes are either 20 bytes or 32 bytes,
1092 1092 # depending on the hash in use. Assert this as historically
1093 1093 # sometimes extra bytes were added.
1094 1094 assert len(n) in (20, 32)
1095 1095 self._files[f] = n
1096 1096 self._dirty = True
1097 1097
1098 1098 def _load(self):
1099 1099 if self._loadfunc is not _noop:
1100 1100 lf, self._loadfunc = self._loadfunc, _noop
1101 1101 lf(self)
1102 1102 elif self._copyfunc is not _noop:
1103 1103 cf, self._copyfunc = self._copyfunc, _noop
1104 1104 cf(self)
1105 1105
1106 1106 def setflag(self, f, flags):
1107 1107 """Set the flags (symlink, executable) for path f."""
1108 1108 if flags not in _manifestflags:
1109 1109 raise TypeError(b"Invalid manifest flag set.")
1110 1110 self._load()
1111 1111 dir, subpath = _splittopdir(f)
1112 1112 if dir:
1113 1113 self._loadlazy(dir)
1114 1114 if dir not in self._dirs:
1115 1115 self._dirs[dir] = treemanifest(
1116 1116 self.nodeconstants, self._subpath(dir)
1117 1117 )
1118 1118 self._dirs[dir].setflag(subpath, flags)
1119 1119 else:
1120 1120 self._flags[f] = flags
1121 1121 self._dirty = True
1122 1122
1123 1123 def copy(self):
1124 1124 copy = treemanifest(self.nodeconstants, self._dir)
1125 1125 copy._node = self._node
1126 1126 copy._dirty = self._dirty
1127 1127 if self._copyfunc is _noop:
1128 1128
1129 1129 def _copyfunc(s):
1130 1130 self._load()
1131 1131 s._lazydirs = {
1132 1132 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1133 1133 }
1134 1134 sdirs = s._dirs
1135 1135 for d, v in self._dirs.items():
1136 1136 sdirs[d] = v.copy()
1137 1137 s._files = dict.copy(self._files)
1138 1138 s._flags = dict.copy(self._flags)
1139 1139
1140 1140 if self._loadfunc is _noop:
1141 1141 _copyfunc(copy)
1142 1142 else:
1143 1143 copy._copyfunc = _copyfunc
1144 1144 else:
1145 1145 copy._copyfunc = self._copyfunc
1146 1146 return copy
1147 1147
1148 1148 def filesnotin(self, m2, match=None):
1149 1149 '''Set of files in this manifest that are not in the other'''
1150 1150 if match and not match.always():
1151 1151 m1 = self._matches(match)
1152 1152 m2 = m2._matches(match)
1153 1153 return m1.filesnotin(m2)
1154 1154
1155 1155 files = set()
1156 1156
1157 1157 def _filesnotin(t1, t2):
1158 1158 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1159 1159 return
1160 1160 t1._load()
1161 1161 t2._load()
1162 1162 self._loaddifflazy(t1, t2)
1163 1163 for d, m1 in t1._dirs.items():
1164 1164 if d in t2._dirs:
1165 1165 m2 = t2._dirs[d]
1166 1166 _filesnotin(m1, m2)
1167 1167 else:
1168 1168 files.update(m1.iterkeys())
1169 1169
1170 1170 for fn in t1._files:
1171 1171 if fn not in t2._files:
1172 1172 files.add(t1._subpath(fn))
1173 1173
1174 1174 _filesnotin(self, m2)
1175 1175 return files
1176 1176
1177 1177 @propertycache
1178 1178 def _alldirs(self):
1179 1179 return pathutil.dirs(self)
1180 1180
1181 1181 def dirs(self):
1182 1182 return self._alldirs
1183 1183
1184 1184 def hasdir(self, dir):
1185 1185 self._load()
1186 1186 topdir, subdir = _splittopdir(dir)
1187 1187 if topdir:
1188 1188 self._loadlazy(topdir)
1189 1189 if topdir in self._dirs:
1190 1190 return self._dirs[topdir].hasdir(subdir)
1191 1191 return False
1192 1192 dirslash = dir + b'/'
1193 1193 return dirslash in self._dirs or dirslash in self._lazydirs
1194 1194
1195 1195 def walk(self, match):
1196 1196 """Generates matching file names.
1197 1197
1198 1198 It also reports nonexistent files by marking them bad with match.bad().
1199 1199 """
1200 1200 if match.always():
1201 1201 for f in iter(self):
1202 1202 yield f
1203 1203 return
1204 1204
1205 1205 fset = set(match.files())
1206 1206
1207 1207 for fn in self._walk(match):
1208 1208 if fn in fset:
1209 1209 # specified pattern is the exact name
1210 1210 fset.remove(fn)
1211 1211 yield fn
1212 1212
1213 1213 # for dirstate.walk, files=[''] means "walk the whole tree".
1214 1214 # follow that here, too
1215 1215 fset.discard(b'')
1216 1216
1217 1217 for fn in sorted(fset):
1218 1218 if not self.hasdir(fn):
1219 1219 match.bad(fn, None)
1220 1220
1221 1221 def _walk(self, match):
1222 1222 '''Recursively generates matching file names for walk().'''
1223 1223 visit = match.visitchildrenset(self._dir[:-1])
1224 1224 if not visit:
1225 1225 return
1226 1226
1227 1227 # yield this dir's files and walk its submanifests
1228 1228 self._load()
1229 1229 visit = self._loadchildrensetlazy(visit)
1230 1230 for p in sorted(list(self._dirs) + list(self._files)):
1231 1231 if p in self._files:
1232 1232 fullp = self._subpath(p)
1233 1233 if match(fullp):
1234 1234 yield fullp
1235 1235 else:
1236 1236 if not visit or p[:-1] in visit:
1237 1237 for f in self._dirs[p]._walk(match):
1238 1238 yield f
1239 1239
1240 1240 def _matches(self, match):
1241 1241 """recursively generate a new manifest filtered by the match argument."""
1242 1242 if match.always():
1243 1243 return self.copy()
1244 1244 return self._matches_inner(match)
1245 1245
1246 1246 def _matches_inner(self, match):
1247 1247 if match.always():
1248 1248 return self.copy()
1249 1249
1250 1250 visit = match.visitchildrenset(self._dir[:-1])
1251 1251 if visit == b'all':
1252 1252 return self.copy()
1253 1253 ret = treemanifest(self.nodeconstants, self._dir)
1254 1254 if not visit:
1255 1255 return ret
1256 1256
1257 1257 self._load()
1258 1258 for fn in self._files:
1259 1259 # While visitchildrenset *usually* lists only subdirs, this is
1260 1260 # actually up to the matcher and may have some files in the set().
1261 1261 # If visit == 'this', we should obviously look at the files in this
1262 1262 # directory; if visit is a set, and fn is in it, we should inspect
1263 1263 # fn (but no need to inspect things not in the set).
1264 1264 if visit != b'this' and fn not in visit:
1265 1265 continue
1266 1266 fullp = self._subpath(fn)
1267 1267 # visitchildrenset isn't perfect, we still need to call the regular
1268 1268 # matcher code to further filter results.
1269 1269 if not match(fullp):
1270 1270 continue
1271 1271 ret._files[fn] = self._files[fn]
1272 1272 if fn in self._flags:
1273 1273 ret._flags[fn] = self._flags[fn]
1274 1274
1275 1275 visit = self._loadchildrensetlazy(visit)
1276 1276 for dir, subm in self._dirs.items():
1277 1277 if visit and dir[:-1] not in visit:
1278 1278 continue
1279 1279 m = subm._matches_inner(match)
1280 1280 if not m._isempty():
1281 1281 ret._dirs[dir] = m
1282 1282
1283 1283 if not ret._isempty():
1284 1284 ret._dirty = True
1285 1285 return ret
1286 1286
1287 1287 def fastdelta(self, base, changes):
1288 1288 raise FastdeltaUnavailable()
1289 1289
1290 1290 def diff(self, m2, match=None, clean=False):
1291 1291 """Finds changes between the current manifest and m2.
1292 1292
1293 1293 Args:
1294 1294 m2: the manifest to which this manifest should be compared.
1295 1295 clean: if true, include files unchanged between these manifests
1296 1296 with a None value in the returned dictionary.
1297 1297
1298 1298 The result is returned as a dict with filename as key and
1299 1299 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1300 1300 nodeid in the current/other manifest and fl1/fl2 is the flag
1301 1301 in the current/other manifest. Where the file does not exist,
1302 1302 the nodeid will be None and the flags will be the empty
1303 1303 string.
1304 1304 """
1305 1305 if match and not match.always():
1306 1306 m1 = self._matches(match)
1307 1307 m2 = m2._matches(match)
1308 1308 return m1.diff(m2, clean=clean)
1309 1309 result = {}
1310 1310 emptytree = treemanifest(self.nodeconstants)
1311 1311
1312 1312 def _iterativediff(t1, t2, stack):
1313 1313 """compares two tree manifests and append new tree-manifests which
1314 1314 needs to be compared to stack"""
1315 1315 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1316 1316 return
1317 1317 t1._load()
1318 1318 t2._load()
1319 1319 self._loaddifflazy(t1, t2)
1320 1320
1321 1321 for d, m1 in t1._dirs.items():
1322 1322 m2 = t2._dirs.get(d, emptytree)
1323 1323 stack.append((m1, m2))
1324 1324
1325 1325 for d, m2 in t2._dirs.items():
1326 1326 if d not in t1._dirs:
1327 1327 stack.append((emptytree, m2))
1328 1328
1329 1329 for fn, n1 in t1._files.items():
1330 1330 fl1 = t1._flags.get(fn, b'')
1331 1331 n2 = t2._files.get(fn, None)
1332 1332 fl2 = t2._flags.get(fn, b'')
1333 1333 if n1 != n2 or fl1 != fl2:
1334 1334 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1335 1335 elif clean:
1336 1336 result[t1._subpath(fn)] = None
1337 1337
1338 1338 for fn, n2 in t2._files.items():
1339 1339 if fn not in t1._files:
1340 1340 fl2 = t2._flags.get(fn, b'')
1341 1341 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1342 1342
1343 1343 stackls = []
1344 1344 _iterativediff(self, m2, stackls)
1345 1345 while stackls:
1346 1346 t1, t2 = stackls.pop()
1347 1347 # stackls is populated in the function call
1348 1348 _iterativediff(t1, t2, stackls)
1349 1349 return result
1350 1350
1351 1351 def unmodifiedsince(self, m2):
1352 1352 return not self._dirty and not m2._dirty and self._node == m2._node
1353 1353
1354 1354 def parse(self, text, readsubtree):
1355 1355 selflazy = self._lazydirs
1356 1356 for f, n, fl in _parse(self._nodelen, text):
1357 1357 if fl == b't':
1358 1358 f = f + b'/'
1359 1359 # False below means "doesn't need to be copied" and can use the
1360 1360 # cached value from readsubtree directly.
1361 1361 selflazy[f] = (n, readsubtree, False)
1362 1362 elif b'/' in f:
1363 1363 # This is a flat manifest, so use __setitem__ and setflag rather
1364 1364 # than assigning directly to _files and _flags, so we can
1365 1365 # assign a path in a subdirectory, and to mark dirty (compared
1366 1366 # to nullid).
1367 1367 self[f] = n
1368 1368 if fl:
1369 1369 self.setflag(f, fl)
1370 1370 else:
1371 1371 # Assigning to _files and _flags avoids marking as dirty,
1372 1372 # and should be a little faster.
1373 1373 self._files[f] = n
1374 1374 if fl:
1375 1375 self._flags[f] = fl
1376 1376
1377 1377 def text(self):
1378 1378 """Get the full data of this manifest as a bytestring."""
1379 1379 self._load()
1380 1380 return _text(self.iterentries())
1381 1381
1382 1382 def dirtext(self):
1383 1383 """Get the full data of this directory as a bytestring. Make sure that
1384 1384 any submanifests have been written first, so their nodeids are correct.
1385 1385 """
1386 1386 self._load()
1387 1387 flags = self.flags
1388 1388 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1389 1389 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1390 1390 files = [(f, self._files[f], flags(f)) for f in self._files]
1391 1391 return _text(sorted(dirs + files + lazydirs))
1392 1392
1393 1393 def read(self, gettext, readsubtree):
1394 1394 def _load_for_read(s):
1395 1395 s.parse(gettext(), readsubtree)
1396 1396 s._dirty = False
1397 1397
1398 1398 self._loadfunc = _load_for_read
1399 1399
1400 1400 def writesubtrees(self, m1, m2, writesubtree, match):
1401 1401 self._load() # for consistency; should never have any effect here
1402 1402 m1._load()
1403 1403 m2._load()
1404 1404 emptytree = treemanifest(self.nodeconstants)
1405 1405
1406 1406 def getnode(m, d):
1407 1407 ld = m._lazydirs.get(d)
1408 1408 if ld:
1409 1409 return ld[0]
1410 return m._dirs.get(d, emptytree)._node
1410 tree = m._dirs.get(d, emptytree)
1411 assert tree is not None # helps pytype
1412 return tree._node
1411 1413
1412 1414 # let's skip investigating things that `match` says we do not need.
1413 1415 visit = match.visitchildrenset(self._dir[:-1])
1414 1416 visit = self._loadchildrensetlazy(visit)
1415 1417 if visit == b'this' or visit == b'all':
1416 1418 visit = None
1417 1419 for d, subm in self._dirs.items():
1418 1420 if visit and d[:-1] not in visit:
1419 1421 continue
1420 1422 subp1 = getnode(m1, d)
1421 1423 subp2 = getnode(m2, d)
1422 1424 if subp1 == self.nodeconstants.nullid:
1423 1425 subp1, subp2 = subp2, subp1
1424 1426 writesubtree(subm, subp1, subp2, match)
1425 1427
1426 1428 def walksubtrees(self, matcher=None):
1427 1429 """Returns an iterator of the subtrees of this manifest, including this
1428 1430 manifest itself.
1429 1431
1430 1432 If `matcher` is provided, it only returns subtrees that match.
1431 1433 """
1432 1434 if matcher and not matcher.visitdir(self._dir[:-1]):
1433 1435 return
1434 1436 if not matcher or matcher(self._dir[:-1]):
1435 1437 yield self
1436 1438
1437 1439 self._load()
1438 1440 # OPT: use visitchildrenset to avoid loading everything.
1439 1441 self._loadalllazy()
1440 1442 for d, subm in self._dirs.items():
1441 1443 for subtree in subm.walksubtrees(matcher=matcher):
1442 1444 yield subtree
1443 1445
1444 1446
1445 1447 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1446 1448
1447 1449
1448 1450 class manifestfulltextcache(util.lrucachedict):
1449 1451 """File-backed LRU cache for the manifest cache
1450 1452
1451 1453 File consists of entries, up to EOF:
1452 1454
1453 1455 - 20 bytes node, 4 bytes length, <length> manifest data
1454 1456
1455 1457 These are written in reverse cache order (oldest to newest).
1456 1458
1457 1459 """
1458 1460
1459 1461 _file = b'manifestfulltextcache'
1460 1462
1461 1463 def __init__(self, max):
1462 1464 super(manifestfulltextcache, self).__init__(max)
1463 1465 self._dirty = False
1464 1466 self._read = False
1465 1467 self._opener = None
1466 1468
1467 1469 def read(self):
1468 1470 if self._read or self._opener is None:
1469 1471 return
1470 1472
1471 1473 try:
1472 1474 with self._opener(self._file) as fp:
1473 1475 set = super(manifestfulltextcache, self).__setitem__
1474 1476 # ignore trailing data, this is a cache, corruption is skipped
1475 1477 while True:
1476 1478 # TODO do we need to do work here for sha1 portability?
1477 1479 node = fp.read(20)
1478 1480 if len(node) < 20:
1479 1481 break
1480 1482 try:
1481 1483 size = struct.unpack(b'>L', fp.read(4))[0]
1482 1484 except struct.error:
1483 1485 break
1484 1486 value = bytearray(fp.read(size))
1485 1487 if len(value) != size:
1486 1488 break
1487 1489 set(node, value)
1488 1490 except IOError:
1489 1491 # the file is allowed to be missing
1490 1492 pass
1491 1493
1492 1494 self._read = True
1493 1495 self._dirty = False
1494 1496
1495 1497 def write(self):
1496 1498 if not self._dirty or self._opener is None:
1497 1499 return
1498 1500 # rotate backwards to the first used node
1499 1501 try:
1500 1502 with self._opener(
1501 1503 self._file, b'w', atomictemp=True, checkambig=True
1502 1504 ) as fp:
1503 1505 node = self._head.prev
1504 1506 while True:
1505 1507 if node.key in self._cache:
1506 1508 fp.write(node.key)
1507 1509 fp.write(struct.pack(b'>L', len(node.value)))
1508 1510 fp.write(node.value)
1509 1511 if node is self._head:
1510 1512 break
1511 1513 node = node.prev
1512 1514 except IOError:
1513 1515 # We could not write the cache (eg: permission error)
1514 1516 # the content can be missing.
1515 1517 #
1516 1518 # We could try harder and see if we could recreate a wcache
1517 1519 # directory were we coudl write too.
1518 1520 #
1519 1521 # XXX the error pass silently, having some way to issue an error
1520 1522 # log `ui.log` would be nice.
1521 1523 pass
1522 1524
1523 1525 def __len__(self):
1524 1526 if not self._read:
1525 1527 self.read()
1526 1528 return super(manifestfulltextcache, self).__len__()
1527 1529
1528 1530 def __contains__(self, k):
1529 1531 if not self._read:
1530 1532 self.read()
1531 1533 return super(manifestfulltextcache, self).__contains__(k)
1532 1534
1533 1535 def __iter__(self):
1534 1536 if not self._read:
1535 1537 self.read()
1536 1538 return super(manifestfulltextcache, self).__iter__()
1537 1539
1538 1540 def __getitem__(self, k):
1539 1541 if not self._read:
1540 1542 self.read()
1541 1543 # the cache lru order can change on read
1542 1544 setdirty = self._cache.get(k) is not self._head
1543 1545 value = super(manifestfulltextcache, self).__getitem__(k)
1544 1546 if setdirty:
1545 1547 self._dirty = True
1546 1548 return value
1547 1549
1548 1550 def __setitem__(self, k, v):
1549 1551 if not self._read:
1550 1552 self.read()
1551 1553 super(manifestfulltextcache, self).__setitem__(k, v)
1552 1554 self._dirty = True
1553 1555
1554 1556 def __delitem__(self, k):
1555 1557 if not self._read:
1556 1558 self.read()
1557 1559 super(manifestfulltextcache, self).__delitem__(k)
1558 1560 self._dirty = True
1559 1561
1560 1562 def get(self, k, default=None):
1561 1563 if not self._read:
1562 1564 self.read()
1563 1565 return super(manifestfulltextcache, self).get(k, default=default)
1564 1566
1565 1567 def clear(self, clear_persisted_data=False):
1566 1568 super(manifestfulltextcache, self).clear()
1567 1569 if clear_persisted_data:
1568 1570 self._dirty = True
1569 1571 self.write()
1570 1572 self._read = False
1571 1573
1572 1574
1573 1575 # and upper bound of what we expect from compression
1574 1576 # (real live value seems to be "3")
1575 1577 MAXCOMPRESSION = 3
1576 1578
1577 1579
1578 1580 class FastdeltaUnavailable(Exception):
1579 1581 """Exception raised when fastdelta isn't usable on a manifest."""
1580 1582
1581 1583
1582 1584 class ManifestRevlog:
1583 1585 """A revlog that stores manifest texts. This is responsible for caching the
1584 1586 full-text manifest contents.
1585 1587 """
1586 1588
1587 1589 def __init__(
1588 1590 self,
1589 1591 nodeconstants,
1590 1592 opener,
1591 1593 tree=b'',
1592 1594 dirlogcache=None,
1593 1595 treemanifest=False,
1594 1596 ):
1595 1597 """Constructs a new manifest revlog
1596 1598
1597 1599 `indexfile` - used by extensions to have two manifests at once, like
1598 1600 when transitioning between flatmanifeset and treemanifests.
1599 1601
1600 1602 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1601 1603 options can also be used to make this a tree manifest revlog. The opener
1602 1604 option takes precedence, so if it is set to True, we ignore whatever
1603 1605 value is passed in to the constructor.
1604 1606 """
1605 1607 self.nodeconstants = nodeconstants
1606 1608 # During normal operations, we expect to deal with not more than four
1607 1609 # revs at a time (such as during commit --amend). When rebasing large
1608 1610 # stacks of commits, the number can go up, hence the config knob below.
1609 1611 cachesize = 4
1610 1612 optiontreemanifest = False
1611 1613 persistentnodemap = False
1612 1614 opts = getattr(opener, 'options', None)
1613 1615 if opts is not None:
1614 1616 cachesize = opts.get(b'manifestcachesize', cachesize)
1615 1617 optiontreemanifest = opts.get(b'treemanifest', False)
1616 1618 persistentnodemap = opts.get(b'persistent-nodemap', False)
1617 1619
1618 1620 self._treeondisk = optiontreemanifest or treemanifest
1619 1621
1620 1622 self._fulltextcache = manifestfulltextcache(cachesize)
1621 1623
1622 1624 if tree:
1623 1625 assert self._treeondisk, (tree, b'opts is %r' % opts)
1624 1626
1625 1627 radix = b'00manifest'
1626 1628 if tree:
1627 1629 radix = b"meta/" + tree + radix
1628 1630
1629 1631 self.tree = tree
1630 1632
1631 1633 # The dirlogcache is kept on the root manifest log
1632 1634 if tree:
1633 1635 self._dirlogcache = dirlogcache
1634 1636 else:
1635 1637 self._dirlogcache = {b'': self}
1636 1638
1637 1639 self._revlog = revlog.revlog(
1638 1640 opener,
1639 1641 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1640 1642 radix=radix,
1641 1643 # only root indexfile is cached
1642 1644 checkambig=not bool(tree),
1643 1645 mmaplargeindex=True,
1644 1646 upperboundcomp=MAXCOMPRESSION,
1645 1647 persistentnodemap=persistentnodemap,
1646 1648 )
1647 1649
1648 1650 self.index = self._revlog.index
1649 1651
1650 1652 def get_revlog(self):
1651 1653 """return an actual revlog instance if any
1652 1654
1653 1655 This exist because a lot of code leverage the fact the underlying
1654 1656 storage is a revlog for optimization, so giving simple way to access
1655 1657 the revlog instance helps such code.
1656 1658 """
1657 1659 return self._revlog
1658 1660
1659 1661 def _setupmanifestcachehooks(self, repo):
1660 1662 """Persist the manifestfulltextcache on lock release"""
1661 1663 if not hasattr(repo, '_wlockref'):
1662 1664 return
1663 1665
1664 1666 self._fulltextcache._opener = repo.wcachevfs
1665 1667 if repo._currentlock(repo._wlockref) is None:
1666 1668 return
1667 1669
1668 1670 reporef = weakref.ref(repo)
1669 1671 manifestrevlogref = weakref.ref(self)
1670 1672
1671 1673 def persistmanifestcache(success):
1672 1674 # Repo is in an unknown state, do not persist.
1673 1675 if not success:
1674 1676 return
1675 1677
1676 1678 repo = reporef()
1677 1679 self = manifestrevlogref()
1678 1680 if repo is None or self is None:
1679 1681 return
1680 1682 if repo.manifestlog.getstorage(b'') is not self:
1681 1683 # there's a different manifest in play now, abort
1682 1684 return
1683 1685 self._fulltextcache.write()
1684 1686
1685 1687 repo._afterlock(persistmanifestcache)
1686 1688
1687 1689 @property
1688 1690 def fulltextcache(self):
1689 1691 return self._fulltextcache
1690 1692
1691 1693 def clearcaches(self, clear_persisted_data=False):
1692 1694 self._revlog.clearcaches()
1693 1695 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1694 1696 self._dirlogcache = {self.tree: self}
1695 1697
1696 1698 def dirlog(self, d):
1697 1699 if d:
1698 1700 assert self._treeondisk
1699 1701 if d not in self._dirlogcache:
1700 1702 mfrevlog = manifestrevlog(
1701 1703 self.nodeconstants,
1702 1704 self.opener,
1703 1705 d,
1704 1706 self._dirlogcache,
1705 1707 treemanifest=self._treeondisk,
1706 1708 )
1707 1709 self._dirlogcache[d] = mfrevlog
1708 1710 return self._dirlogcache[d]
1709 1711
1710 1712 def add(
1711 1713 self,
1712 1714 m,
1713 1715 transaction,
1714 1716 link,
1715 1717 p1,
1716 1718 p2,
1717 1719 added: Iterable[Iterable],
1718 1720 removed: Iterable[Iterable],
1719 1721 readtree=None,
1720 1722 match=None,
1721 1723 ):
1722 1724 """add some manifest entry in to the manifest log
1723 1725
1724 1726 input:
1725 1727
1726 1728 m: the manifest dict we want to store
1727 1729 transaction: the open transaction
1728 1730 p1: manifest-node of p1
1729 1731 p2: manifest-node of p2
1730 1732 added: file added/changed compared to parent
1731 1733 removed: file removed compared to parent
1732 1734
1733 1735 tree manifest input:
1734 1736
1735 1737 readtree: a function to read a subtree
1736 1738 match: a filematcher for the subpart of the tree manifest
1737 1739 """
1738 1740 try:
1739 1741 if p1 not in self.fulltextcache:
1740 1742 raise FastdeltaUnavailable()
1741 1743 # If our first parent is in the manifest cache, we can
1742 1744 # compute a delta here using properties we know about the
1743 1745 # manifest up-front, which may save time later for the
1744 1746 # revlog layer.
1745 1747
1746 1748 _checkforbidden(added)
1747 1749 # combine the changed lists into one sorted iterator
1748 1750 work = heapq.merge(
1749 1751 [(x, False) for x in sorted(added)],
1750 1752 [(x, True) for x in sorted(removed)],
1751 1753 )
1752 1754
1753 1755 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1754 1756 cachedelta = self._revlog.rev(p1), deltatext
1755 1757 text = util.buffer(arraytext)
1756 1758 rev = self._revlog.addrevision(
1757 1759 text, transaction, link, p1, p2, cachedelta
1758 1760 )
1759 1761 n = self._revlog.node(rev)
1760 1762 except FastdeltaUnavailable:
1761 1763 # The first parent manifest isn't already loaded or the
1762 1764 # manifest implementation doesn't support fastdelta, so
1763 1765 # we'll just encode a fulltext of the manifest and pass
1764 1766 # that through to the revlog layer, and let it handle the
1765 1767 # delta process.
1766 1768 if self._treeondisk:
1767 1769 assert readtree, b"readtree must be set for treemanifest writes"
1768 1770 assert match, b"match must be specified for treemanifest writes"
1769 1771 m1 = readtree(self.tree, p1)
1770 1772 m2 = readtree(self.tree, p2)
1771 1773 n = self._addtree(
1772 1774 m, transaction, link, m1, m2, readtree, match=match
1773 1775 )
1774 1776 arraytext = None
1775 1777 else:
1776 1778 text = m.text()
1777 1779 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1778 1780 n = self._revlog.node(rev)
1779 1781 arraytext = bytearray(text)
1780 1782
1781 1783 if arraytext is not None:
1782 1784 self.fulltextcache[n] = arraytext
1783 1785
1784 1786 return n
1785 1787
1786 1788 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1787 1789 # If the manifest is unchanged compared to one parent,
1788 1790 # don't write a new revision
1789 1791 if self.tree != b'' and (
1790 1792 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1791 1793 ):
1792 1794 return m.node()
1793 1795
1794 1796 def writesubtree(subm, subp1, subp2, match):
1795 1797 sublog = self.dirlog(subm.dir())
1796 1798 sublog.add(
1797 1799 subm,
1798 1800 transaction,
1799 1801 link,
1800 1802 subp1,
1801 1803 subp2,
1802 1804 None,
1803 1805 None,
1804 1806 readtree=readtree,
1805 1807 match=match,
1806 1808 )
1807 1809
1808 1810 m.writesubtrees(m1, m2, writesubtree, match)
1809 1811 text = m.dirtext()
1810 1812 n = None
1811 1813 if self.tree != b'':
1812 1814 # Double-check whether contents are unchanged to one parent
1813 1815 if text == m1.dirtext():
1814 1816 n = m1.node()
1815 1817 elif text == m2.dirtext():
1816 1818 n = m2.node()
1817 1819
1818 1820 if not n:
1819 1821 rev = self._revlog.addrevision(
1820 1822 text, transaction, link, m1.node(), m2.node()
1821 1823 )
1822 1824 n = self._revlog.node(rev)
1823 1825
1824 1826 # Save nodeid so parent manifest can calculate its nodeid
1825 1827 m.setnode(n)
1826 1828 return n
1827 1829
1828 1830 def __len__(self):
1829 1831 return len(self._revlog)
1830 1832
1831 1833 def __iter__(self):
1832 1834 return self._revlog.__iter__()
1833 1835
1834 1836 def rev(self, node):
1835 1837 return self._revlog.rev(node)
1836 1838
1837 1839 def node(self, rev):
1838 1840 return self._revlog.node(rev)
1839 1841
1840 1842 def lookup(self, value):
1841 1843 return self._revlog.lookup(value)
1842 1844
1843 1845 def parentrevs(self, rev):
1844 1846 return self._revlog.parentrevs(rev)
1845 1847
1846 1848 def parents(self, node):
1847 1849 return self._revlog.parents(node)
1848 1850
1849 1851 def linkrev(self, rev):
1850 1852 return self._revlog.linkrev(rev)
1851 1853
1852 1854 def checksize(self):
1853 1855 return self._revlog.checksize()
1854 1856
1855 1857 def revision(self, node):
1856 1858 return self._revlog.revision(node)
1857 1859
1858 1860 def rawdata(self, node):
1859 1861 return self._revlog.rawdata(node)
1860 1862
1861 1863 def revdiff(self, rev1, rev2):
1862 1864 return self._revlog.revdiff(rev1, rev2)
1863 1865
1864 1866 def cmp(self, node, text):
1865 1867 return self._revlog.cmp(node, text)
1866 1868
1867 1869 def deltaparent(self, rev):
1868 1870 return self._revlog.deltaparent(rev)
1869 1871
1870 1872 def emitrevisions(
1871 1873 self,
1872 1874 nodes,
1873 1875 nodesorder=None,
1874 1876 revisiondata=False,
1875 1877 assumehaveparentrevisions=False,
1876 1878 deltamode=repository.CG_DELTAMODE_STD,
1877 1879 sidedata_helpers=None,
1878 1880 debug_info=None,
1879 1881 ):
1880 1882 return self._revlog.emitrevisions(
1881 1883 nodes,
1882 1884 nodesorder=nodesorder,
1883 1885 revisiondata=revisiondata,
1884 1886 assumehaveparentrevisions=assumehaveparentrevisions,
1885 1887 deltamode=deltamode,
1886 1888 sidedata_helpers=sidedata_helpers,
1887 1889 debug_info=debug_info,
1888 1890 )
1889 1891
1890 1892 def addgroup(
1891 1893 self,
1892 1894 deltas,
1893 1895 linkmapper,
1894 1896 transaction,
1895 1897 alwayscache=False,
1896 1898 addrevisioncb=None,
1897 1899 duplicaterevisioncb=None,
1898 1900 debug_info=None,
1899 1901 delta_base_reuse_policy=None,
1900 1902 ):
1901 1903 return self._revlog.addgroup(
1902 1904 deltas,
1903 1905 linkmapper,
1904 1906 transaction,
1905 1907 alwayscache=alwayscache,
1906 1908 addrevisioncb=addrevisioncb,
1907 1909 duplicaterevisioncb=duplicaterevisioncb,
1908 1910 debug_info=debug_info,
1909 1911 delta_base_reuse_policy=delta_base_reuse_policy,
1910 1912 )
1911 1913
1912 1914 def rawsize(self, rev):
1913 1915 return self._revlog.rawsize(rev)
1914 1916
1915 1917 def getstrippoint(self, minlink):
1916 1918 return self._revlog.getstrippoint(minlink)
1917 1919
1918 1920 def strip(self, minlink, transaction):
1919 1921 return self._revlog.strip(minlink, transaction)
1920 1922
1921 1923 def files(self):
1922 1924 return self._revlog.files()
1923 1925
1924 1926 def clone(self, tr, destrevlog, **kwargs):
1925 1927 if not isinstance(destrevlog, manifestrevlog):
1926 1928 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1927 1929
1928 1930 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1929 1931
1930 1932 def storageinfo(
1931 1933 self,
1932 1934 exclusivefiles=False,
1933 1935 sharedfiles=False,
1934 1936 revisionscount=False,
1935 1937 trackedsize=False,
1936 1938 storedsize=False,
1937 1939 ):
1938 1940 return self._revlog.storageinfo(
1939 1941 exclusivefiles=exclusivefiles,
1940 1942 sharedfiles=sharedfiles,
1941 1943 revisionscount=revisionscount,
1942 1944 trackedsize=trackedsize,
1943 1945 storedsize=storedsize,
1944 1946 )
1945 1947
1946 1948 @property
1947 1949 def opener(self):
1948 1950 return self._revlog.opener
1949 1951
1950 1952 @opener.setter
1951 1953 def opener(self, value):
1952 1954 self._revlog.opener = value
1953 1955
1954 1956
1955 1957 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
1956 1958 ManifestRevlog
1957 1959 )
1958 1960
1959 1961
1960 1962 @interfaceutil.implementer(repository.imanifestlog)
1961 1963 class manifestlog:
1962 1964 """A collection class representing the collection of manifest snapshots
1963 1965 referenced by commits in the repository.
1964 1966
1965 1967 In this situation, 'manifest' refers to the abstract concept of a snapshot
1966 1968 of the list of files in the given commit. Consumers of the output of this
1967 1969 class do not care about the implementation details of the actual manifests
1968 1970 they receive (i.e. tree or flat or lazily loaded, etc)."""
1969 1971
1970 1972 def __init__(self, opener, repo, rootstore, narrowmatch):
1971 1973 self.nodeconstants = repo.nodeconstants
1972 1974 usetreemanifest = False
1973 1975 cachesize = 4
1974 1976
1975 1977 opts = getattr(opener, 'options', None)
1976 1978 if opts is not None:
1977 1979 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1978 1980 cachesize = opts.get(b'manifestcachesize', cachesize)
1979 1981
1980 1982 self._treemanifests = usetreemanifest
1981 1983
1982 1984 self._rootstore = rootstore
1983 1985 self._rootstore._setupmanifestcachehooks(repo)
1984 1986 self._narrowmatch = narrowmatch
1985 1987
1986 1988 # A cache of the manifestctx or treemanifestctx for each directory
1987 1989 self._dirmancache = {}
1988 1990 self._dirmancache[b''] = util.lrucachedict(cachesize)
1989 1991
1990 1992 self._cachesize = cachesize
1991 1993
1992 1994 def __getitem__(self, node):
1993 1995 """Retrieves the manifest instance for the given node. Throws a
1994 1996 LookupError if not found.
1995 1997 """
1996 1998 return self.get(b'', node)
1997 1999
1998 2000 def get(self, tree, node, verify=True):
1999 2001 """Retrieves the manifest instance for the given node. Throws a
2000 2002 LookupError if not found.
2001 2003
2002 2004 `verify` - if True an exception will be thrown if the node is not in
2003 2005 the revlog
2004 2006 """
2005 2007 if node in self._dirmancache.get(tree, ()):
2006 2008 return self._dirmancache[tree][node]
2007 2009
2008 2010 if not self._narrowmatch.always():
2009 2011 if not self._narrowmatch.visitdir(tree[:-1]):
2010 2012 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2011 2013 if tree:
2012 2014 if self._rootstore._treeondisk:
2013 2015 if verify:
2014 2016 # Side-effect is LookupError is raised if node doesn't
2015 2017 # exist.
2016 2018 self.getstorage(tree).rev(node)
2017 2019
2018 2020 m = treemanifestctx(self, tree, node)
2019 2021 else:
2020 2022 raise error.Abort(
2021 2023 _(
2022 2024 b"cannot ask for manifest directory '%s' in a flat "
2023 2025 b"manifest"
2024 2026 )
2025 2027 % tree
2026 2028 )
2027 2029 else:
2028 2030 if verify:
2029 2031 # Side-effect is LookupError is raised if node doesn't exist.
2030 2032 self._rootstore.rev(node)
2031 2033
2032 2034 if self._treemanifests:
2033 2035 m = treemanifestctx(self, b'', node)
2034 2036 else:
2035 2037 m = manifestctx(self, node)
2036 2038
2037 2039 if node != self.nodeconstants.nullid:
2038 2040 mancache = self._dirmancache.get(tree)
2039 2041 if not mancache:
2040 2042 mancache = util.lrucachedict(self._cachesize)
2041 2043 self._dirmancache[tree] = mancache
2042 2044 mancache[node] = m
2043 2045 return m
2044 2046
2045 2047 def getstorage(self, tree):
2046 2048 return self._rootstore.dirlog(tree)
2047 2049
2048 2050 def clearcaches(self, clear_persisted_data=False):
2049 2051 self._dirmancache.clear()
2050 2052 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2051 2053
2052 2054 def rev(self, node):
2053 2055 return self._rootstore.rev(node)
2054 2056
2055 2057 def update_caches(self, transaction):
2056 2058 return self._rootstore._revlog.update_caches(transaction=transaction)
2057 2059
2058 2060
2059 2061 class MemManifestCtx:
2060 2062 def __init__(self, manifestlog):
2061 2063 self._manifestlog = manifestlog
2062 2064 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2063 2065
2064 2066 def _storage(self):
2065 2067 return self._manifestlog.getstorage(b'')
2066 2068
2067 2069 def copy(self):
2068 2070 memmf = memmanifestctx(self._manifestlog)
2069 2071 memmf._manifestdict = self.read().copy()
2070 2072 return memmf
2071 2073
2072 2074 def read(self):
2073 2075 return self._manifestdict
2074 2076
2075 2077 def write(self, transaction, link, p1, p2, added, removed, match=None):
2076 2078 return self._storage().add(
2077 2079 self._manifestdict,
2078 2080 transaction,
2079 2081 link,
2080 2082 p1,
2081 2083 p2,
2082 2084 added,
2083 2085 removed,
2084 2086 match=match,
2085 2087 )
2086 2088
2087 2089
2088 2090 memmanifestctx = interfaceutil.implementer(
2089 2091 repository.imanifestrevisionwritable
2090 2092 )(MemManifestCtx)
2091 2093
2092 2094
2093 2095 class ManifestCtx:
2094 2096 """A class representing a single revision of a manifest, including its
2095 2097 contents, its parent revs, and its linkrev.
2096 2098 """
2097 2099
2098 2100 def __init__(self, manifestlog, node):
2099 2101 self._manifestlog = manifestlog
2100 2102 self._data = None
2101 2103
2102 2104 self._node = node
2103 2105
2104 2106 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2105 2107 # but let's add it later when something needs it and we can load it
2106 2108 # lazily.
2107 2109 # self.p1, self.p2 = store.parents(node)
2108 2110 # rev = store.rev(node)
2109 2111 # self.linkrev = store.linkrev(rev)
2110 2112
2111 2113 def _storage(self):
2112 2114 return self._manifestlog.getstorage(b'')
2113 2115
2114 2116 def node(self):
2115 2117 return self._node
2116 2118
2117 2119 def copy(self):
2118 2120 memmf = memmanifestctx(self._manifestlog)
2119 2121 memmf._manifestdict = self.read().copy()
2120 2122 return memmf
2121 2123
2122 2124 @propertycache
2123 2125 def parents(self):
2124 2126 return self._storage().parents(self._node)
2125 2127
2126 2128 def read(self):
2127 2129 if self._data is None:
2128 2130 nc = self._manifestlog.nodeconstants
2129 2131 if self._node == nc.nullid:
2130 2132 self._data = manifestdict(nc.nodelen)
2131 2133 else:
2132 2134 store = self._storage()
2133 2135 if self._node in store.fulltextcache:
2134 2136 text = pycompat.bytestr(store.fulltextcache[self._node])
2135 2137 else:
2136 2138 text = store.revision(self._node)
2137 2139 arraytext = bytearray(text)
2138 2140 store.fulltextcache[self._node] = arraytext
2139 2141 self._data = manifestdict(nc.nodelen, text)
2140 2142 return self._data
2141 2143
2142 2144 def readfast(self, shallow=False):
2143 2145 """Calls either readdelta or read, based on which would be less work.
2144 2146 readdelta is called if the delta is against the p1, and therefore can be
2145 2147 read quickly.
2146 2148
2147 2149 If `shallow` is True, nothing changes since this is a flat manifest.
2148 2150 """
2149 2151 store = self._storage()
2150 2152 r = store.rev(self._node)
2151 2153 deltaparent = store.deltaparent(r)
2152 2154 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2153 2155 return self.readdelta()
2154 2156 return self.read()
2155 2157
2156 2158 def readdelta(self, shallow=False):
2157 2159 """Returns a manifest containing just the entries that are present
2158 2160 in this manifest, but not in its p1 manifest. This is efficient to read
2159 2161 if the revlog delta is already p1.
2160 2162
2161 2163 Changing the value of `shallow` has no effect on flat manifests.
2162 2164 """
2163 2165 store = self._storage()
2164 2166 r = store.rev(self._node)
2165 2167 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2166 2168 return manifestdict(store.nodeconstants.nodelen, d)
2167 2169
2168 2170 def find(self, key):
2169 2171 return self.read().find(key)
2170 2172
2171 2173
2172 2174 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2173 2175 ManifestCtx
2174 2176 )
2175 2177
2176 2178
2177 2179 class MemTreeManifestCtx:
2178 2180 def __init__(self, manifestlog, dir=b''):
2179 2181 self._manifestlog = manifestlog
2180 2182 self._dir = dir
2181 2183 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2182 2184
2183 2185 def _storage(self):
2184 2186 return self._manifestlog.getstorage(b'')
2185 2187
2186 2188 def copy(self):
2187 2189 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2188 2190 memmf._treemanifest = self._treemanifest.copy()
2189 2191 return memmf
2190 2192
2191 2193 def read(self):
2192 2194 return self._treemanifest
2193 2195
2194 2196 def write(self, transaction, link, p1, p2, added, removed, match=None):
2195 2197 def readtree(dir, node):
2196 2198 return self._manifestlog.get(dir, node).read()
2197 2199
2198 2200 return self._storage().add(
2199 2201 self._treemanifest,
2200 2202 transaction,
2201 2203 link,
2202 2204 p1,
2203 2205 p2,
2204 2206 added,
2205 2207 removed,
2206 2208 readtree=readtree,
2207 2209 match=match,
2208 2210 )
2209 2211
2210 2212
2211 2213 memtreemanifestctx = interfaceutil.implementer(
2212 2214 repository.imanifestrevisionwritable
2213 2215 )(MemTreeManifestCtx)
2214 2216
2215 2217
2216 2218 class TreeManifestCtx:
2217 2219 def __init__(self, manifestlog, dir, node):
2218 2220 self._manifestlog = manifestlog
2219 2221 self._dir = dir
2220 2222 self._data = None
2221 2223
2222 2224 self._node = node
2223 2225
2224 2226 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2225 2227 # we can instantiate treemanifestctx objects for directories we don't
2226 2228 # have on disk.
2227 2229 # self.p1, self.p2 = store.parents(node)
2228 2230 # rev = store.rev(node)
2229 2231 # self.linkrev = store.linkrev(rev)
2230 2232
2231 2233 def _storage(self):
2232 2234 narrowmatch = self._manifestlog._narrowmatch
2233 2235 if not narrowmatch.always():
2234 2236 if not narrowmatch.visitdir(self._dir[:-1]):
2235 2237 return excludedmanifestrevlog(
2236 2238 self._manifestlog.nodeconstants, self._dir
2237 2239 )
2238 2240 return self._manifestlog.getstorage(self._dir)
2239 2241
2240 2242 def read(self):
2241 2243 if self._data is None:
2242 2244 store = self._storage()
2243 2245 if self._node == self._manifestlog.nodeconstants.nullid:
2244 2246 self._data = treemanifest(self._manifestlog.nodeconstants)
2245 2247 # TODO accessing non-public API
2246 2248 elif store._treeondisk:
2247 2249 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2248 2250
2249 2251 def gettext():
2250 2252 return store.revision(self._node)
2251 2253
2252 2254 def readsubtree(dir, subm):
2253 2255 # Set verify to False since we need to be able to create
2254 2256 # subtrees for trees that don't exist on disk.
2255 2257 return self._manifestlog.get(dir, subm, verify=False).read()
2256 2258
2257 2259 m.read(gettext, readsubtree)
2258 2260 m.setnode(self._node)
2259 2261 self._data = m
2260 2262 else:
2261 2263 if self._node in store.fulltextcache:
2262 2264 text = pycompat.bytestr(store.fulltextcache[self._node])
2263 2265 else:
2264 2266 text = store.revision(self._node)
2265 2267 arraytext = bytearray(text)
2266 2268 store.fulltextcache[self._node] = arraytext
2267 2269 self._data = treemanifest(
2268 2270 self._manifestlog.nodeconstants, dir=self._dir, text=text
2269 2271 )
2270 2272
2271 2273 return self._data
2272 2274
2273 2275 def node(self):
2274 2276 return self._node
2275 2277
2276 2278 def copy(self):
2277 2279 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2278 2280 memmf._treemanifest = self.read().copy()
2279 2281 return memmf
2280 2282
2281 2283 @propertycache
2282 2284 def parents(self):
2283 2285 return self._storage().parents(self._node)
2284 2286
2285 2287 def readdelta(self, shallow=False):
2286 2288 """Returns a manifest containing just the entries that are present
2287 2289 in this manifest, but not in its p1 manifest. This is efficient to read
2288 2290 if the revlog delta is already p1.
2289 2291
2290 2292 If `shallow` is True, this will read the delta for this directory,
2291 2293 without recursively reading subdirectory manifests. Instead, any
2292 2294 subdirectory entry will be reported as it appears in the manifest, i.e.
2293 2295 the subdirectory will be reported among files and distinguished only by
2294 2296 its 't' flag.
2295 2297 """
2296 2298 store = self._storage()
2297 2299 if shallow:
2298 2300 r = store.rev(self._node)
2299 2301 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2300 2302 return manifestdict(store.nodeconstants.nodelen, d)
2301 2303 else:
2302 2304 # Need to perform a slow delta
2303 2305 r0 = store.deltaparent(store.rev(self._node))
2304 2306 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2305 2307 m1 = self.read()
2306 2308 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2307 2309 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2308 2310 if n1:
2309 2311 md[f] = n1
2310 2312 if fl1:
2311 2313 md.setflag(f, fl1)
2312 2314 return md
2313 2315
2314 2316 def readfast(self, shallow=False):
2315 2317 """Calls either readdelta or read, based on which would be less work.
2316 2318 readdelta is called if the delta is against the p1, and therefore can be
2317 2319 read quickly.
2318 2320
2319 2321 If `shallow` is True, it only returns the entries from this manifest,
2320 2322 and not any submanifests.
2321 2323 """
2322 2324 store = self._storage()
2323 2325 r = store.rev(self._node)
2324 2326 deltaparent = store.deltaparent(r)
2325 2327 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2326 2328 return self.readdelta(shallow=shallow)
2327 2329
2328 2330 if shallow:
2329 2331 return manifestdict(
2330 2332 store.nodeconstants.nodelen, store.revision(self._node)
2331 2333 )
2332 2334 else:
2333 2335 return self.read()
2334 2336
2335 2337 def find(self, key):
2336 2338 return self.read().find(key)
2337 2339
2338 2340
2339 2341 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2340 2342 TreeManifestCtx
2341 2343 )
2342 2344
2343 2345
2344 2346 class excludeddir(treemanifest):
2345 2347 """Stand-in for a directory that is excluded from the repository.
2346 2348
2347 2349 With narrowing active on a repository that uses treemanifests,
2348 2350 some of the directory revlogs will be excluded from the resulting
2349 2351 clone. This is a huge storage win for clients, but means we need
2350 2352 some sort of pseudo-manifest to surface to internals so we can
2351 2353 detect a merge conflict outside the narrowspec. That's what this
2352 2354 class is: it stands in for a directory whose node is known, but
2353 2355 whose contents are unknown.
2354 2356 """
2355 2357
2356 2358 def __init__(self, nodeconstants, dir, node):
2357 2359 super(excludeddir, self).__init__(nodeconstants, dir)
2358 2360 self._node = node
2359 2361 # Add an empty file, which will be included by iterators and such,
2360 2362 # appearing as the directory itself (i.e. something like "dir/")
2361 2363 self._files[b''] = node
2362 2364 self._flags[b''] = b't'
2363 2365
2364 2366 # Manifests outside the narrowspec should never be modified, so avoid
2365 2367 # copying. This makes a noticeable difference when there are very many
2366 2368 # directories outside the narrowspec. Also, it makes sense for the copy to
2367 2369 # be of the same type as the original, which would not happen with the
2368 2370 # super type's copy().
2369 2371 def copy(self):
2370 2372 return self
2371 2373
2372 2374
2373 2375 class excludeddirmanifestctx(treemanifestctx):
2374 2376 """context wrapper for excludeddir - see that docstring for rationale"""
2375 2377
2376 2378 def __init__(self, nodeconstants, dir, node):
2377 2379 self.nodeconstants = nodeconstants
2378 2380 self._dir = dir
2379 2381 self._node = node
2380 2382
2381 2383 def read(self):
2382 2384 return excludeddir(self.nodeconstants, self._dir, self._node)
2383 2385
2384 2386 def readfast(self, shallow=False):
2385 2387 # special version of readfast since we don't have underlying storage
2386 2388 return self.read()
2387 2389
2388 2390 def write(self, *args):
2389 2391 raise error.ProgrammingError(
2390 2392 b'attempt to write manifest from excluded dir %s' % self._dir
2391 2393 )
2392 2394
2393 2395
2394 2396 class excludedmanifestrevlog(manifestrevlog):
2395 2397 """Stand-in for excluded treemanifest revlogs.
2396 2398
2397 2399 When narrowing is active on a treemanifest repository, we'll have
2398 2400 references to directories we can't see due to the revlog being
2399 2401 skipped. This class exists to conform to the manifestrevlog
2400 2402 interface for those directories and proactively prevent writes to
2401 2403 outside the narrowspec.
2402 2404 """
2403 2405
2404 2406 def __init__(self, nodeconstants, dir):
2405 2407 self.nodeconstants = nodeconstants
2406 2408 self._dir = dir
2407 2409
2408 2410 def __len__(self):
2409 2411 raise error.ProgrammingError(
2410 2412 b'attempt to get length of excluded dir %s' % self._dir
2411 2413 )
2412 2414
2413 2415 def rev(self, node):
2414 2416 raise error.ProgrammingError(
2415 2417 b'attempt to get rev from excluded dir %s' % self._dir
2416 2418 )
2417 2419
2418 2420 def linkrev(self, node):
2419 2421 raise error.ProgrammingError(
2420 2422 b'attempt to get linkrev from excluded dir %s' % self._dir
2421 2423 )
2422 2424
2423 2425 def node(self, rev):
2424 2426 raise error.ProgrammingError(
2425 2427 b'attempt to get node from excluded dir %s' % self._dir
2426 2428 )
2427 2429
2428 2430 def add(self, *args, **kwargs):
2429 2431 # We should never write entries in dirlogs outside the narrow clone.
2430 2432 # However, the method still gets called from writesubtree() in
2431 2433 # _addtree(), so we need to handle it. We should possibly make that
2432 2434 # avoid calling add() with a clean manifest (_dirty is always False
2433 2435 # in excludeddir instances).
2434 2436 pass
General Comments 0
You need to be logged in to leave comments. Login now