##// END OF EJS Templates
narrow: overwrite readfast in excludeddirmanifestctx...
marmoute -
r46864:a3ccbac6 default
parent child Browse files
Show More
@@ -1,2345 +1,2349
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 flags = n[-1:]
62 62 if flags in _manifestflags:
63 63 n = n[:-1]
64 64 nl -= 1
65 65 else:
66 66 flags = b''
67 67 if nl not in (40, 64):
68 68 raise ValueError(b'Invalid manifest line')
69 69
70 70 yield f, bin(n), flags
71 71
72 72
73 73 def _text(it):
74 74 files = []
75 75 lines = []
76 76 for f, n, fl in it:
77 77 files.append(f)
78 78 # if this is changed to support newlines in filenames,
79 79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 81
82 82 _checkforbidden(files)
83 83 return b''.join(lines)
84 84
85 85
86 86 class lazymanifestiter(object):
87 87 def __init__(self, lm):
88 88 self.pos = 0
89 89 self.lm = lm
90 90
91 91 def __iter__(self):
92 92 return self
93 93
94 94 def next(self):
95 95 try:
96 96 data, pos = self.lm._get(self.pos)
97 97 except IndexError:
98 98 raise StopIteration
99 99 if pos == -1:
100 100 self.pos += 1
101 101 return data[0]
102 102 self.pos += 1
103 103 zeropos = data.find(b'\x00', pos)
104 104 return data[pos:zeropos]
105 105
106 106 __next__ = next
107 107
108 108
109 109 class lazymanifestiterentries(object):
110 110 def __init__(self, lm):
111 111 self.lm = lm
112 112 self.pos = 0
113 113
114 114 def __iter__(self):
115 115 return self
116 116
117 117 def next(self):
118 118 try:
119 119 data, pos = self.lm._get(self.pos)
120 120 except IndexError:
121 121 raise StopIteration
122 122 if pos == -1:
123 123 self.pos += 1
124 124 return data
125 125 zeropos = data.find(b'\x00', pos)
126 126 nlpos = data.find(b'\n', pos)
127 127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 128 raise error.StorageError(b'Invalid manifest line')
129 129 flags = data[nlpos - 1 : nlpos]
130 130 if flags in _manifestflags:
131 131 hlen = nlpos - zeropos - 2
132 132 else:
133 133 hlen = nlpos - zeropos - 1
134 134 flags = b''
135 135 if hlen not in (40, 64):
136 136 raise error.StorageError(b'Invalid manifest line')
137 137 hashval = unhexlify(
138 138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 139 )
140 140 self.pos += 1
141 141 return (data[pos:zeropos], hashval, flags)
142 142
143 143 __next__ = next
144 144
145 145
146 146 def unhexlify(data, extra, pos, length):
147 147 s = bin(data[pos : pos + length])
148 148 if extra:
149 149 s += chr(extra & 0xFF)
150 150 return s
151 151
152 152
153 153 def _cmp(a, b):
154 154 return (a > b) - (a < b)
155 155
156 156
157 157 _manifestflags = {b'', b'l', b't', b'x'}
158 158
159 159
160 160 class _lazymanifest(object):
161 161 """A pure python manifest backed by a byte string. It is supplimented with
162 162 internal lists as it is modified, until it is compacted back to a pure byte
163 163 string.
164 164
165 165 ``data`` is the initial manifest data.
166 166
167 167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 168 values are offsets into ``data``, negative values are offsets into the
169 169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 170 ``positions``. The values are encoded such that when walking the list and
171 171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 172 sorted by filename.
173 173
174 174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 175 modified since the manifest was created or compacted.
176 176 """
177 177
178 178 def __init__(
179 179 self,
180 180 data,
181 181 positions=None,
182 182 extrainfo=None,
183 183 extradata=None,
184 184 hasremovals=False,
185 185 ):
186 186 if positions is None:
187 187 self.positions = self.findlines(data)
188 188 self.extrainfo = [0] * len(self.positions)
189 189 self.data = data
190 190 self.extradata = []
191 191 self.hasremovals = False
192 192 else:
193 193 self.positions = positions[:]
194 194 self.extrainfo = extrainfo[:]
195 195 self.extradata = extradata[:]
196 196 self.data = data
197 197 self.hasremovals = hasremovals
198 198
199 199 def findlines(self, data):
200 200 if not data:
201 201 return []
202 202 pos = data.find(b"\n")
203 203 if pos == -1 or data[-1:] != b'\n':
204 204 raise ValueError(b"Manifest did not end in a newline.")
205 205 positions = [0]
206 206 prev = data[: data.find(b'\x00')]
207 207 while pos < len(data) - 1 and pos != -1:
208 208 positions.append(pos + 1)
209 209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
210 210 if nexts < prev:
211 211 raise ValueError(b"Manifest lines not in sorted order.")
212 212 prev = nexts
213 213 pos = data.find(b"\n", pos + 1)
214 214 return positions
215 215
216 216 def _get(self, index):
217 217 # get the position encoded in pos:
218 218 # positive number is an index in 'data'
219 219 # negative number is in extrapieces
220 220 pos = self.positions[index]
221 221 if pos >= 0:
222 222 return self.data, pos
223 223 return self.extradata[-pos - 1], -1
224 224
225 225 def _getkey(self, pos):
226 226 if pos >= 0:
227 227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
228 228 return self.extradata[-pos - 1][0]
229 229
230 230 def bsearch(self, key):
231 231 first = 0
232 232 last = len(self.positions) - 1
233 233
234 234 while first <= last:
235 235 midpoint = (first + last) // 2
236 236 nextpos = self.positions[midpoint]
237 237 candidate = self._getkey(nextpos)
238 238 r = _cmp(key, candidate)
239 239 if r == 0:
240 240 return midpoint
241 241 else:
242 242 if r < 0:
243 243 last = midpoint - 1
244 244 else:
245 245 first = midpoint + 1
246 246 return -1
247 247
248 248 def bsearch2(self, key):
249 249 # same as the above, but will always return the position
250 250 # done for performance reasons
251 251 first = 0
252 252 last = len(self.positions) - 1
253 253
254 254 while first <= last:
255 255 midpoint = (first + last) // 2
256 256 nextpos = self.positions[midpoint]
257 257 candidate = self._getkey(nextpos)
258 258 r = _cmp(key, candidate)
259 259 if r == 0:
260 260 return (midpoint, True)
261 261 else:
262 262 if r < 0:
263 263 last = midpoint - 1
264 264 else:
265 265 first = midpoint + 1
266 266 return (first, False)
267 267
268 268 def __contains__(self, key):
269 269 return self.bsearch(key) != -1
270 270
271 271 def __getitem__(self, key):
272 272 if not isinstance(key, bytes):
273 273 raise TypeError(b"getitem: manifest keys must be a bytes.")
274 274 needle = self.bsearch(key)
275 275 if needle == -1:
276 276 raise KeyError
277 277 data, pos = self._get(needle)
278 278 if pos == -1:
279 279 return (data[1], data[2])
280 280 zeropos = data.find(b'\x00', pos)
281 281 nlpos = data.find(b'\n', zeropos)
282 282 assert 0 <= needle <= len(self.positions)
283 283 assert len(self.extrainfo) == len(self.positions)
284 284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
285 285 raise error.StorageError(b'Invalid manifest line')
286 286 hlen = nlpos - zeropos - 1
287 287 flags = data[nlpos - 1 : nlpos]
288 288 if flags in _manifestflags:
289 289 hlen -= 1
290 290 else:
291 291 flags = b''
292 292 if hlen not in (40, 64):
293 293 raise error.StorageError(b'Invalid manifest line')
294 294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
295 295 return (hashval, flags)
296 296
297 297 def __delitem__(self, key):
298 298 needle, found = self.bsearch2(key)
299 299 if not found:
300 300 raise KeyError
301 301 cur = self.positions[needle]
302 302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
303 303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
304 304 if cur >= 0:
305 305 # This does NOT unsort the list as far as the search functions are
306 306 # concerned, as they only examine lines mapped by self.positions.
307 307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
308 308 self.hasremovals = True
309 309
310 310 def __setitem__(self, key, value):
311 311 if not isinstance(key, bytes):
312 312 raise TypeError(b"setitem: manifest keys must be a byte string.")
313 313 if not isinstance(value, tuple) or len(value) != 2:
314 314 raise TypeError(
315 315 b"Manifest values must be a tuple of (node, flags)."
316 316 )
317 317 hashval = value[0]
318 318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
319 319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
320 320 flags = value[1]
321 321 if not isinstance(flags, bytes) or len(flags) > 1:
322 322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
323 323 needle, found = self.bsearch2(key)
324 324 if found:
325 325 # put the item
326 326 pos = self.positions[needle]
327 327 if pos < 0:
328 328 self.extradata[-pos - 1] = (key, hashval, value[1])
329 329 else:
330 330 # just don't bother
331 331 self.extradata.append((key, hashval, value[1]))
332 332 self.positions[needle] = -len(self.extradata)
333 333 else:
334 334 # not found, put it in with extra positions
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions = (
337 337 self.positions[:needle]
338 338 + [-len(self.extradata)]
339 339 + self.positions[needle:]
340 340 )
341 341 self.extrainfo = (
342 342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
343 343 )
344 344
345 345 def copy(self):
346 346 # XXX call _compact like in C?
347 347 return _lazymanifest(
348 348 self.data,
349 349 self.positions,
350 350 self.extrainfo,
351 351 self.extradata,
352 352 self.hasremovals,
353 353 )
354 354
355 355 def _compact(self):
356 356 # hopefully not called TOO often
357 357 if len(self.extradata) == 0 and not self.hasremovals:
358 358 return
359 359 l = []
360 360 i = 0
361 361 offset = 0
362 362 self.extrainfo = [0] * len(self.positions)
363 363 while i < len(self.positions):
364 364 if self.positions[i] >= 0:
365 365 cur = self.positions[i]
366 366 last_cut = cur
367 367
368 368 # Collect all contiguous entries in the buffer at the current
369 369 # offset, breaking out only for added/modified items held in
370 370 # extradata, or a deleted line prior to the next position.
371 371 while True:
372 372 self.positions[i] = offset
373 373 i += 1
374 374 if i == len(self.positions) or self.positions[i] < 0:
375 375 break
376 376
377 377 # A removed file has no positions[] entry, but does have an
378 378 # overwritten first byte. Break out and find the end of the
379 379 # current good entry/entries if there is a removed file
380 380 # before the next position.
381 381 if (
382 382 self.hasremovals
383 383 and self.data.find(b'\n\x00', cur, self.positions[i])
384 384 != -1
385 385 ):
386 386 break
387 387
388 388 offset += self.positions[i] - cur
389 389 cur = self.positions[i]
390 390 end_cut = self.data.find(b'\n', cur)
391 391 if end_cut != -1:
392 392 end_cut += 1
393 393 offset += end_cut - cur
394 394 l.append(self.data[last_cut:end_cut])
395 395 else:
396 396 while i < len(self.positions) and self.positions[i] < 0:
397 397 cur = self.positions[i]
398 398 t = self.extradata[-cur - 1]
399 399 l.append(self._pack(t))
400 400 self.positions[i] = offset
401 401 # Hashes are either 20 bytes (old sha1s) or 32
402 402 # bytes (new non-sha1).
403 403 hlen = 20
404 404 if len(t[1]) > 25:
405 405 hlen = 32
406 406 if len(t[1]) > hlen:
407 407 self.extrainfo[i] = ord(t[1][hlen + 1])
408 408 offset += len(l[-1])
409 409 i += 1
410 410 self.data = b''.join(l)
411 411 self.hasremovals = False
412 412 self.extradata = []
413 413
414 414 def _pack(self, d):
415 415 n = d[1]
416 416 assert len(n) in (20, 32)
417 417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
418 418
419 419 def text(self):
420 420 self._compact()
421 421 return self.data
422 422
423 423 def diff(self, m2, clean=False):
424 424 '''Finds changes between the current manifest and m2.'''
425 425 # XXX think whether efficiency matters here
426 426 diff = {}
427 427
428 428 for fn, e1, flags in self.iterentries():
429 429 if fn not in m2:
430 430 diff[fn] = (e1, flags), (None, b'')
431 431 else:
432 432 e2 = m2[fn]
433 433 if (e1, flags) != e2:
434 434 diff[fn] = (e1, flags), e2
435 435 elif clean:
436 436 diff[fn] = None
437 437
438 438 for fn, e2, flags in m2.iterentries():
439 439 if fn not in self:
440 440 diff[fn] = (None, b''), (e2, flags)
441 441
442 442 return diff
443 443
444 444 def iterentries(self):
445 445 return lazymanifestiterentries(self)
446 446
447 447 def iterkeys(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __iter__(self):
451 451 return lazymanifestiter(self)
452 452
453 453 def __len__(self):
454 454 return len(self.positions)
455 455
456 456 def filtercopy(self, filterfn):
457 457 # XXX should be optimized
458 458 c = _lazymanifest(b'')
459 459 for f, n, fl in self.iterentries():
460 460 if filterfn(f):
461 461 c[f] = n, fl
462 462 return c
463 463
464 464
465 465 try:
466 466 _lazymanifest = parsers.lazymanifest
467 467 except AttributeError:
468 468 pass
469 469
470 470
471 471 @interfaceutil.implementer(repository.imanifestdict)
472 472 class manifestdict(object):
473 473 def __init__(self, data=b''):
474 474 self._lm = _lazymanifest(data)
475 475
476 476 def __getitem__(self, key):
477 477 return self._lm[key][0]
478 478
479 479 def find(self, key):
480 480 return self._lm[key]
481 481
482 482 def __len__(self):
483 483 return len(self._lm)
484 484
485 485 def __nonzero__(self):
486 486 # nonzero is covered by the __len__ function, but implementing it here
487 487 # makes it easier for extensions to override.
488 488 return len(self._lm) != 0
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __setitem__(self, key, node):
493 493 self._lm[key] = node, self.flags(key)
494 494
495 495 def __contains__(self, key):
496 496 if key is None:
497 497 return False
498 498 return key in self._lm
499 499
500 500 def __delitem__(self, key):
501 501 del self._lm[key]
502 502
503 503 def __iter__(self):
504 504 return self._lm.__iter__()
505 505
506 506 def iterkeys(self):
507 507 return self._lm.iterkeys()
508 508
509 509 def keys(self):
510 510 return list(self.iterkeys())
511 511
512 512 def filesnotin(self, m2, match=None):
513 513 '''Set of files in this manifest that are not in the other'''
514 514 if match is not None:
515 515 match = matchmod.badmatch(match, lambda path, msg: None)
516 516 sm2 = set(m2.walk(match))
517 517 return {f for f in self.walk(match) if f not in sm2}
518 518 return {f for f in self if f not in m2}
519 519
520 520 @propertycache
521 521 def _dirs(self):
522 522 return pathutil.dirs(self)
523 523
524 524 def dirs(self):
525 525 return self._dirs
526 526
527 527 def hasdir(self, dir):
528 528 return dir in self._dirs
529 529
530 530 def _filesfastpath(self, match):
531 531 """Checks whether we can correctly and quickly iterate over matcher
532 532 files instead of over manifest files."""
533 533 files = match.files()
534 534 return len(files) < 100 and (
535 535 match.isexact()
536 536 or (match.prefix() and all(fn in self for fn in files))
537 537 )
538 538
539 539 def walk(self, match):
540 540 """Generates matching file names.
541 541
542 542 Equivalent to manifest.matches(match).iterkeys(), but without creating
543 543 an entirely new manifest.
544 544
545 545 It also reports nonexistent files by marking them bad with match.bad().
546 546 """
547 547 if match.always():
548 548 for f in iter(self):
549 549 yield f
550 550 return
551 551
552 552 fset = set(match.files())
553 553
554 554 # avoid the entire walk if we're only looking for specific files
555 555 if self._filesfastpath(match):
556 556 for fn in sorted(fset):
557 557 if fn in self:
558 558 yield fn
559 559 return
560 560
561 561 for fn in self:
562 562 if fn in fset:
563 563 # specified pattern is the exact name
564 564 fset.remove(fn)
565 565 if match(fn):
566 566 yield fn
567 567
568 568 # for dirstate.walk, files=[''] means "walk the whole tree".
569 569 # follow that here, too
570 570 fset.discard(b'')
571 571
572 572 for fn in sorted(fset):
573 573 if not self.hasdir(fn):
574 574 match.bad(fn, None)
575 575
576 576 def _matches(self, match):
577 577 '''generate a new manifest filtered by the match argument'''
578 578 if match.always():
579 579 return self.copy()
580 580
581 581 if self._filesfastpath(match):
582 582 m = manifestdict()
583 583 lm = self._lm
584 584 for fn in match.files():
585 585 if fn in lm:
586 586 m._lm[fn] = lm[fn]
587 587 return m
588 588
589 589 m = manifestdict()
590 590 m._lm = self._lm.filtercopy(match)
591 591 return m
592 592
593 593 def diff(self, m2, match=None, clean=False):
594 594 """Finds changes between the current manifest and m2.
595 595
596 596 Args:
597 597 m2: the manifest to which this manifest should be compared.
598 598 clean: if true, include files unchanged between these manifests
599 599 with a None value in the returned dictionary.
600 600
601 601 The result is returned as a dict with filename as key and
602 602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
603 603 nodeid in the current/other manifest and fl1/fl2 is the flag
604 604 in the current/other manifest. Where the file does not exist,
605 605 the nodeid will be None and the flags will be the empty
606 606 string.
607 607 """
608 608 if match:
609 609 m1 = self._matches(match)
610 610 m2 = m2._matches(match)
611 611 return m1.diff(m2, clean=clean)
612 612 return self._lm.diff(m2._lm, clean)
613 613
614 614 def setflag(self, key, flag):
615 615 if flag not in _manifestflags:
616 616 raise TypeError(b"Invalid manifest flag set.")
617 617 self._lm[key] = self[key], flag
618 618
619 619 def get(self, key, default=None):
620 620 try:
621 621 return self._lm[key][0]
622 622 except KeyError:
623 623 return default
624 624
625 625 def flags(self, key):
626 626 try:
627 627 return self._lm[key][1]
628 628 except KeyError:
629 629 return b''
630 630
631 631 def copy(self):
632 632 c = manifestdict()
633 633 c._lm = self._lm.copy()
634 634 return c
635 635
636 636 def items(self):
637 637 return (x[:2] for x in self._lm.iterentries())
638 638
639 639 def iteritems(self):
640 640 return (x[:2] for x in self._lm.iterentries())
641 641
642 642 def iterentries(self):
643 643 return self._lm.iterentries()
644 644
645 645 def text(self):
646 646 # most likely uses native version
647 647 return self._lm.text()
648 648
649 649 def fastdelta(self, base, changes):
650 650 """Given a base manifest text as a bytearray and a list of changes
651 651 relative to that text, compute a delta that can be used by revlog.
652 652 """
653 653 delta = []
654 654 dstart = None
655 655 dend = None
656 656 dline = [b""]
657 657 start = 0
658 658 # zero copy representation of base as a buffer
659 659 addbuf = util.buffer(base)
660 660
661 661 changes = list(changes)
662 662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
663 663 # start with a readonly loop that finds the offset of
664 664 # each line and creates the deltas
665 665 for f, todelete in changes:
666 666 # bs will either be the index of the item or the insert point
667 667 start, end = _msearch(addbuf, f, start)
668 668 if not todelete:
669 669 h, fl = self._lm[f]
670 670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
671 671 else:
672 672 if start == end:
673 673 # item we want to delete was not found, error out
674 674 raise AssertionError(
675 675 _(b"failed to remove %s from manifest") % f
676 676 )
677 677 l = b""
678 678 if dstart is not None and dstart <= start and dend >= start:
679 679 if dend < end:
680 680 dend = end
681 681 if l:
682 682 dline.append(l)
683 683 else:
684 684 if dstart is not None:
685 685 delta.append([dstart, dend, b"".join(dline)])
686 686 dstart = start
687 687 dend = end
688 688 dline = [l]
689 689
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 # apply the delta to the base, and get a delta for addrevision
693 693 deltatext, arraytext = _addlistdelta(base, delta)
694 694 else:
695 695 # For large changes, it's much cheaper to just build the text and
696 696 # diff it.
697 697 arraytext = bytearray(self.text())
698 698 deltatext = mdiff.textdiff(
699 699 util.buffer(base), util.buffer(arraytext)
700 700 )
701 701
702 702 return arraytext, deltatext
703 703
704 704
705 705 def _msearch(m, s, lo=0, hi=None):
706 706 """return a tuple (start, end) that says where to find s within m.
707 707
708 708 If the string is found m[start:end] are the line containing
709 709 that string. If start == end the string was not found and
710 710 they indicate the proper sorted insertion point.
711 711
712 712 m should be a buffer, a memoryview or a byte string.
713 713 s is a byte string"""
714 714
715 715 def advance(i, c):
716 716 while i < lenm and m[i : i + 1] != c:
717 717 i += 1
718 718 return i
719 719
720 720 if not s:
721 721 return (lo, lo)
722 722 lenm = len(m)
723 723 if not hi:
724 724 hi = lenm
725 725 while lo < hi:
726 726 mid = (lo + hi) // 2
727 727 start = mid
728 728 while start > 0 and m[start - 1 : start] != b'\n':
729 729 start -= 1
730 730 end = advance(start, b'\0')
731 731 if bytes(m[start:end]) < s:
732 732 # we know that after the null there are 40 bytes of sha1
733 733 # this translates to the bisect lo = mid + 1
734 734 lo = advance(end + 40, b'\n') + 1
735 735 else:
736 736 # this translates to the bisect hi = mid
737 737 hi = start
738 738 end = advance(lo, b'\0')
739 739 found = m[lo:end]
740 740 if s == found:
741 741 # we know that after the null there are 40 bytes of sha1
742 742 end = advance(end + 40, b'\n')
743 743 return (lo, end + 1)
744 744 else:
745 745 return (lo, lo)
746 746
747 747
748 748 def _checkforbidden(l):
749 749 """Check filenames for illegal characters."""
750 750 for f in l:
751 751 if b'\n' in f or b'\r' in f:
752 752 raise error.StorageError(
753 753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
754 754 % pycompat.bytestr(f)
755 755 )
756 756
757 757
758 758 # apply the changes collected during the bisect loop to our addlist
759 759 # return a delta suitable for addrevision
760 760 def _addlistdelta(addlist, x):
761 761 # for large addlist arrays, building a new array is cheaper
762 762 # than repeatedly modifying the existing one
763 763 currentposition = 0
764 764 newaddlist = bytearray()
765 765
766 766 for start, end, content in x:
767 767 newaddlist += addlist[currentposition:start]
768 768 if content:
769 769 newaddlist += bytearray(content)
770 770
771 771 currentposition = end
772 772
773 773 newaddlist += addlist[currentposition:]
774 774
775 775 deltatext = b"".join(
776 776 struct.pack(b">lll", start, end, len(content)) + content
777 777 for start, end, content in x
778 778 )
779 779 return deltatext, newaddlist
780 780
781 781
782 782 def _splittopdir(f):
783 783 if b'/' in f:
784 784 dir, subpath = f.split(b'/', 1)
785 785 return dir + b'/', subpath
786 786 else:
787 787 return b'', f
788 788
789 789
790 790 _noop = lambda s: None
791 791
792 792
793 793 @interfaceutil.implementer(repository.imanifestdict)
794 794 class treemanifest(object):
795 795 def __init__(self, dir=b'', text=b''):
796 796 self._dir = dir
797 797 self._node = nullid
798 798 self._loadfunc = _noop
799 799 self._copyfunc = _noop
800 800 self._dirty = False
801 801 self._dirs = {}
802 802 self._lazydirs = {}
803 803 # Using _lazymanifest here is a little slower than plain old dicts
804 804 self._files = {}
805 805 self._flags = {}
806 806 if text:
807 807
808 808 def readsubtree(subdir, subm):
809 809 raise AssertionError(
810 810 b'treemanifest constructor only accepts flat manifests'
811 811 )
812 812
813 813 self.parse(text, readsubtree)
814 814 self._dirty = True # Mark flat manifest dirty after parsing
815 815
816 816 def _subpath(self, path):
817 817 return self._dir + path
818 818
819 819 def _loadalllazy(self):
820 820 selfdirs = self._dirs
821 821 subpath = self._subpath
822 822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
823 823 self._lazydirs
824 824 ):
825 825 if docopy:
826 826 selfdirs[d] = readsubtree(subpath(d), node).copy()
827 827 else:
828 828 selfdirs[d] = readsubtree(subpath(d), node)
829 829 self._lazydirs = {}
830 830
831 831 def _loadlazy(self, d):
832 832 v = self._lazydirs.get(d)
833 833 if v:
834 834 node, readsubtree, docopy = v
835 835 if docopy:
836 836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
837 837 else:
838 838 self._dirs[d] = readsubtree(self._subpath(d), node)
839 839 del self._lazydirs[d]
840 840
841 841 def _loadchildrensetlazy(self, visit):
842 842 if not visit:
843 843 return None
844 844 if visit == b'all' or visit == b'this':
845 845 self._loadalllazy()
846 846 return None
847 847
848 848 loadlazy = self._loadlazy
849 849 for k in visit:
850 850 loadlazy(k + b'/')
851 851 return visit
852 852
853 853 def _loaddifflazy(self, t1, t2):
854 854 """load items in t1 and t2 if they're needed for diffing.
855 855
856 856 The criteria currently is:
857 857 - if it's not present in _lazydirs in either t1 or t2, load it in the
858 858 other (it may already be loaded or it may not exist, doesn't matter)
859 859 - if it's present in _lazydirs in both, compare the nodeid; if it
860 860 differs, load it in both
861 861 """
862 862 toloadlazy = []
863 863 for d, v1 in pycompat.iteritems(t1._lazydirs):
864 864 v2 = t2._lazydirs.get(d)
865 865 if not v2 or v2[0] != v1[0]:
866 866 toloadlazy.append(d)
867 867 for d, v1 in pycompat.iteritems(t2._lazydirs):
868 868 if d not in t1._lazydirs:
869 869 toloadlazy.append(d)
870 870
871 871 for d in toloadlazy:
872 872 t1._loadlazy(d)
873 873 t2._loadlazy(d)
874 874
875 875 def __len__(self):
876 876 self._load()
877 877 size = len(self._files)
878 878 self._loadalllazy()
879 879 for m in self._dirs.values():
880 880 size += m.__len__()
881 881 return size
882 882
883 883 def __nonzero__(self):
884 884 # Faster than "__len() != 0" since it avoids loading sub-manifests
885 885 return not self._isempty()
886 886
887 887 __bool__ = __nonzero__
888 888
889 889 def _isempty(self):
890 890 self._load() # for consistency; already loaded by all callers
891 891 # See if we can skip loading everything.
892 892 if self._files or (
893 893 self._dirs and any(not m._isempty() for m in self._dirs.values())
894 894 ):
895 895 return False
896 896 self._loadalllazy()
897 897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
898 898
899 899 @encoding.strmethod
900 900 def __repr__(self):
901 901 return (
902 902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
903 903 % (
904 904 self._dir,
905 905 hex(self._node),
906 906 bool(self._loadfunc is _noop),
907 907 self._dirty,
908 908 id(self),
909 909 )
910 910 )
911 911
912 912 def dir(self):
913 913 """The directory that this tree manifest represents, including a
914 914 trailing '/'. Empty string for the repo root directory."""
915 915 return self._dir
916 916
917 917 def node(self):
918 918 """This node of this instance. nullid for unsaved instances. Should
919 919 be updated when the instance is read or written from a revlog.
920 920 """
921 921 assert not self._dirty
922 922 return self._node
923 923
924 924 def setnode(self, node):
925 925 self._node = node
926 926 self._dirty = False
927 927
928 928 def iterentries(self):
929 929 self._load()
930 930 self._loadalllazy()
931 931 for p, n in sorted(
932 932 itertools.chain(self._dirs.items(), self._files.items())
933 933 ):
934 934 if p in self._files:
935 935 yield self._subpath(p), n, self._flags.get(p, b'')
936 936 else:
937 937 for x in n.iterentries():
938 938 yield x
939 939
940 940 def items(self):
941 941 self._load()
942 942 self._loadalllazy()
943 943 for p, n in sorted(
944 944 itertools.chain(self._dirs.items(), self._files.items())
945 945 ):
946 946 if p in self._files:
947 947 yield self._subpath(p), n
948 948 else:
949 949 for f, sn in pycompat.iteritems(n):
950 950 yield f, sn
951 951
952 952 iteritems = items
953 953
954 954 def iterkeys(self):
955 955 self._load()
956 956 self._loadalllazy()
957 957 for p in sorted(itertools.chain(self._dirs, self._files)):
958 958 if p in self._files:
959 959 yield self._subpath(p)
960 960 else:
961 961 for f in self._dirs[p]:
962 962 yield f
963 963
964 964 def keys(self):
965 965 return list(self.iterkeys())
966 966
967 967 def __iter__(self):
968 968 return self.iterkeys()
969 969
970 970 def __contains__(self, f):
971 971 if f is None:
972 972 return False
973 973 self._load()
974 974 dir, subpath = _splittopdir(f)
975 975 if dir:
976 976 self._loadlazy(dir)
977 977
978 978 if dir not in self._dirs:
979 979 return False
980 980
981 981 return self._dirs[dir].__contains__(subpath)
982 982 else:
983 983 return f in self._files
984 984
985 985 def get(self, f, default=None):
986 986 self._load()
987 987 dir, subpath = _splittopdir(f)
988 988 if dir:
989 989 self._loadlazy(dir)
990 990
991 991 if dir not in self._dirs:
992 992 return default
993 993 return self._dirs[dir].get(subpath, default)
994 994 else:
995 995 return self._files.get(f, default)
996 996
997 997 def __getitem__(self, f):
998 998 self._load()
999 999 dir, subpath = _splittopdir(f)
1000 1000 if dir:
1001 1001 self._loadlazy(dir)
1002 1002
1003 1003 return self._dirs[dir].__getitem__(subpath)
1004 1004 else:
1005 1005 return self._files[f]
1006 1006
1007 1007 def flags(self, f):
1008 1008 self._load()
1009 1009 dir, subpath = _splittopdir(f)
1010 1010 if dir:
1011 1011 self._loadlazy(dir)
1012 1012
1013 1013 if dir not in self._dirs:
1014 1014 return b''
1015 1015 return self._dirs[dir].flags(subpath)
1016 1016 else:
1017 1017 if f in self._lazydirs or f in self._dirs:
1018 1018 return b''
1019 1019 return self._flags.get(f, b'')
1020 1020
1021 1021 def find(self, f):
1022 1022 self._load()
1023 1023 dir, subpath = _splittopdir(f)
1024 1024 if dir:
1025 1025 self._loadlazy(dir)
1026 1026
1027 1027 return self._dirs[dir].find(subpath)
1028 1028 else:
1029 1029 return self._files[f], self._flags.get(f, b'')
1030 1030
1031 1031 def __delitem__(self, f):
1032 1032 self._load()
1033 1033 dir, subpath = _splittopdir(f)
1034 1034 if dir:
1035 1035 self._loadlazy(dir)
1036 1036
1037 1037 self._dirs[dir].__delitem__(subpath)
1038 1038 # If the directory is now empty, remove it
1039 1039 if self._dirs[dir]._isempty():
1040 1040 del self._dirs[dir]
1041 1041 else:
1042 1042 del self._files[f]
1043 1043 if f in self._flags:
1044 1044 del self._flags[f]
1045 1045 self._dirty = True
1046 1046
1047 1047 def __setitem__(self, f, n):
1048 1048 assert n is not None
1049 1049 self._load()
1050 1050 dir, subpath = _splittopdir(f)
1051 1051 if dir:
1052 1052 self._loadlazy(dir)
1053 1053 if dir not in self._dirs:
1054 1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1055 1055 self._dirs[dir].__setitem__(subpath, n)
1056 1056 else:
1057 1057 # manifest nodes are either 20 bytes or 32 bytes,
1058 1058 # depending on the hash in use. Assert this as historically
1059 1059 # sometimes extra bytes were added.
1060 1060 assert len(n) in (20, 32)
1061 1061 self._files[f] = n
1062 1062 self._dirty = True
1063 1063
1064 1064 def _load(self):
1065 1065 if self._loadfunc is not _noop:
1066 1066 lf, self._loadfunc = self._loadfunc, _noop
1067 1067 lf(self)
1068 1068 elif self._copyfunc is not _noop:
1069 1069 cf, self._copyfunc = self._copyfunc, _noop
1070 1070 cf(self)
1071 1071
1072 1072 def setflag(self, f, flags):
1073 1073 """Set the flags (symlink, executable) for path f."""
1074 1074 if flags not in _manifestflags:
1075 1075 raise TypeError(b"Invalid manifest flag set.")
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080 if dir not in self._dirs:
1081 1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1082 1082 self._dirs[dir].setflag(subpath, flags)
1083 1083 else:
1084 1084 self._flags[f] = flags
1085 1085 self._dirty = True
1086 1086
1087 1087 def copy(self):
1088 1088 copy = treemanifest(self._dir)
1089 1089 copy._node = self._node
1090 1090 copy._dirty = self._dirty
1091 1091 if self._copyfunc is _noop:
1092 1092
1093 1093 def _copyfunc(s):
1094 1094 self._load()
1095 1095 s._lazydirs = {
1096 1096 d: (n, r, True)
1097 1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1098 1098 }
1099 1099 sdirs = s._dirs
1100 1100 for d, v in pycompat.iteritems(self._dirs):
1101 1101 sdirs[d] = v.copy()
1102 1102 s._files = dict.copy(self._files)
1103 1103 s._flags = dict.copy(self._flags)
1104 1104
1105 1105 if self._loadfunc is _noop:
1106 1106 _copyfunc(copy)
1107 1107 else:
1108 1108 copy._copyfunc = _copyfunc
1109 1109 else:
1110 1110 copy._copyfunc = self._copyfunc
1111 1111 return copy
1112 1112
1113 1113 def filesnotin(self, m2, match=None):
1114 1114 '''Set of files in this manifest that are not in the other'''
1115 1115 if match and not match.always():
1116 1116 m1 = self._matches(match)
1117 1117 m2 = m2._matches(match)
1118 1118 return m1.filesnotin(m2)
1119 1119
1120 1120 files = set()
1121 1121
1122 1122 def _filesnotin(t1, t2):
1123 1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1124 1124 return
1125 1125 t1._load()
1126 1126 t2._load()
1127 1127 self._loaddifflazy(t1, t2)
1128 1128 for d, m1 in pycompat.iteritems(t1._dirs):
1129 1129 if d in t2._dirs:
1130 1130 m2 = t2._dirs[d]
1131 1131 _filesnotin(m1, m2)
1132 1132 else:
1133 1133 files.update(m1.iterkeys())
1134 1134
1135 1135 for fn in t1._files:
1136 1136 if fn not in t2._files:
1137 1137 files.add(t1._subpath(fn))
1138 1138
1139 1139 _filesnotin(self, m2)
1140 1140 return files
1141 1141
1142 1142 @propertycache
1143 1143 def _alldirs(self):
1144 1144 return pathutil.dirs(self)
1145 1145
1146 1146 def dirs(self):
1147 1147 return self._alldirs
1148 1148
1149 1149 def hasdir(self, dir):
1150 1150 self._load()
1151 1151 topdir, subdir = _splittopdir(dir)
1152 1152 if topdir:
1153 1153 self._loadlazy(topdir)
1154 1154 if topdir in self._dirs:
1155 1155 return self._dirs[topdir].hasdir(subdir)
1156 1156 return False
1157 1157 dirslash = dir + b'/'
1158 1158 return dirslash in self._dirs or dirslash in self._lazydirs
1159 1159
1160 1160 def walk(self, match):
1161 1161 """Generates matching file names.
1162 1162
1163 1163 It also reports nonexistent files by marking them bad with match.bad().
1164 1164 """
1165 1165 if match.always():
1166 1166 for f in iter(self):
1167 1167 yield f
1168 1168 return
1169 1169
1170 1170 fset = set(match.files())
1171 1171
1172 1172 for fn in self._walk(match):
1173 1173 if fn in fset:
1174 1174 # specified pattern is the exact name
1175 1175 fset.remove(fn)
1176 1176 yield fn
1177 1177
1178 1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1179 1179 # follow that here, too
1180 1180 fset.discard(b'')
1181 1181
1182 1182 for fn in sorted(fset):
1183 1183 if not self.hasdir(fn):
1184 1184 match.bad(fn, None)
1185 1185
1186 1186 def _walk(self, match):
1187 1187 '''Recursively generates matching file names for walk().'''
1188 1188 visit = match.visitchildrenset(self._dir[:-1])
1189 1189 if not visit:
1190 1190 return
1191 1191
1192 1192 # yield this dir's files and walk its submanifests
1193 1193 self._load()
1194 1194 visit = self._loadchildrensetlazy(visit)
1195 1195 for p in sorted(list(self._dirs) + list(self._files)):
1196 1196 if p in self._files:
1197 1197 fullp = self._subpath(p)
1198 1198 if match(fullp):
1199 1199 yield fullp
1200 1200 else:
1201 1201 if not visit or p[:-1] in visit:
1202 1202 for f in self._dirs[p]._walk(match):
1203 1203 yield f
1204 1204
1205 1205 def _matches(self, match):
1206 1206 """recursively generate a new manifest filtered by the match argument."""
1207 1207 if match.always():
1208 1208 return self.copy()
1209 1209 return self._matches_inner(match)
1210 1210
1211 1211 def _matches_inner(self, match):
1212 1212 if match.always():
1213 1213 return self.copy()
1214 1214
1215 1215 visit = match.visitchildrenset(self._dir[:-1])
1216 1216 if visit == b'all':
1217 1217 return self.copy()
1218 1218 ret = treemanifest(self._dir)
1219 1219 if not visit:
1220 1220 return ret
1221 1221
1222 1222 self._load()
1223 1223 for fn in self._files:
1224 1224 # While visitchildrenset *usually* lists only subdirs, this is
1225 1225 # actually up to the matcher and may have some files in the set().
1226 1226 # If visit == 'this', we should obviously look at the files in this
1227 1227 # directory; if visit is a set, and fn is in it, we should inspect
1228 1228 # fn (but no need to inspect things not in the set).
1229 1229 if visit != b'this' and fn not in visit:
1230 1230 continue
1231 1231 fullp = self._subpath(fn)
1232 1232 # visitchildrenset isn't perfect, we still need to call the regular
1233 1233 # matcher code to further filter results.
1234 1234 if not match(fullp):
1235 1235 continue
1236 1236 ret._files[fn] = self._files[fn]
1237 1237 if fn in self._flags:
1238 1238 ret._flags[fn] = self._flags[fn]
1239 1239
1240 1240 visit = self._loadchildrensetlazy(visit)
1241 1241 for dir, subm in pycompat.iteritems(self._dirs):
1242 1242 if visit and dir[:-1] not in visit:
1243 1243 continue
1244 1244 m = subm._matches_inner(match)
1245 1245 if not m._isempty():
1246 1246 ret._dirs[dir] = m
1247 1247
1248 1248 if not ret._isempty():
1249 1249 ret._dirty = True
1250 1250 return ret
1251 1251
1252 1252 def fastdelta(self, base, changes):
1253 1253 raise FastdeltaUnavailable()
1254 1254
1255 1255 def diff(self, m2, match=None, clean=False):
1256 1256 """Finds changes between the current manifest and m2.
1257 1257
1258 1258 Args:
1259 1259 m2: the manifest to which this manifest should be compared.
1260 1260 clean: if true, include files unchanged between these manifests
1261 1261 with a None value in the returned dictionary.
1262 1262
1263 1263 The result is returned as a dict with filename as key and
1264 1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1265 1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1266 1266 in the current/other manifest. Where the file does not exist,
1267 1267 the nodeid will be None and the flags will be the empty
1268 1268 string.
1269 1269 """
1270 1270 if match and not match.always():
1271 1271 m1 = self._matches(match)
1272 1272 m2 = m2._matches(match)
1273 1273 return m1.diff(m2, clean=clean)
1274 1274 result = {}
1275 1275 emptytree = treemanifest()
1276 1276
1277 1277 def _iterativediff(t1, t2, stack):
1278 1278 """compares two tree manifests and append new tree-manifests which
1279 1279 needs to be compared to stack"""
1280 1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1281 1281 return
1282 1282 t1._load()
1283 1283 t2._load()
1284 1284 self._loaddifflazy(t1, t2)
1285 1285
1286 1286 for d, m1 in pycompat.iteritems(t1._dirs):
1287 1287 m2 = t2._dirs.get(d, emptytree)
1288 1288 stack.append((m1, m2))
1289 1289
1290 1290 for d, m2 in pycompat.iteritems(t2._dirs):
1291 1291 if d not in t1._dirs:
1292 1292 stack.append((emptytree, m2))
1293 1293
1294 1294 for fn, n1 in pycompat.iteritems(t1._files):
1295 1295 fl1 = t1._flags.get(fn, b'')
1296 1296 n2 = t2._files.get(fn, None)
1297 1297 fl2 = t2._flags.get(fn, b'')
1298 1298 if n1 != n2 or fl1 != fl2:
1299 1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1300 1300 elif clean:
1301 1301 result[t1._subpath(fn)] = None
1302 1302
1303 1303 for fn, n2 in pycompat.iteritems(t2._files):
1304 1304 if fn not in t1._files:
1305 1305 fl2 = t2._flags.get(fn, b'')
1306 1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1307 1307
1308 1308 stackls = []
1309 1309 _iterativediff(self, m2, stackls)
1310 1310 while stackls:
1311 1311 t1, t2 = stackls.pop()
1312 1312 # stackls is populated in the function call
1313 1313 _iterativediff(t1, t2, stackls)
1314 1314 return result
1315 1315
1316 1316 def unmodifiedsince(self, m2):
1317 1317 return not self._dirty and not m2._dirty and self._node == m2._node
1318 1318
1319 1319 def parse(self, text, readsubtree):
1320 1320 selflazy = self._lazydirs
1321 1321 for f, n, fl in _parse(text):
1322 1322 if fl == b't':
1323 1323 f = f + b'/'
1324 1324 # False below means "doesn't need to be copied" and can use the
1325 1325 # cached value from readsubtree directly.
1326 1326 selflazy[f] = (n, readsubtree, False)
1327 1327 elif b'/' in f:
1328 1328 # This is a flat manifest, so use __setitem__ and setflag rather
1329 1329 # than assigning directly to _files and _flags, so we can
1330 1330 # assign a path in a subdirectory, and to mark dirty (compared
1331 1331 # to nullid).
1332 1332 self[f] = n
1333 1333 if fl:
1334 1334 self.setflag(f, fl)
1335 1335 else:
1336 1336 # Assigning to _files and _flags avoids marking as dirty,
1337 1337 # and should be a little faster.
1338 1338 self._files[f] = n
1339 1339 if fl:
1340 1340 self._flags[f] = fl
1341 1341
1342 1342 def text(self):
1343 1343 """Get the full data of this manifest as a bytestring."""
1344 1344 self._load()
1345 1345 return _text(self.iterentries())
1346 1346
1347 1347 def dirtext(self):
1348 1348 """Get the full data of this directory as a bytestring. Make sure that
1349 1349 any submanifests have been written first, so their nodeids are correct.
1350 1350 """
1351 1351 self._load()
1352 1352 flags = self.flags
1353 1353 lazydirs = [
1354 1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1355 1355 ]
1356 1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1357 1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1358 1358 return _text(sorted(dirs + files + lazydirs))
1359 1359
1360 1360 def read(self, gettext, readsubtree):
1361 1361 def _load_for_read(s):
1362 1362 s.parse(gettext(), readsubtree)
1363 1363 s._dirty = False
1364 1364
1365 1365 self._loadfunc = _load_for_read
1366 1366
1367 1367 def writesubtrees(self, m1, m2, writesubtree, match):
1368 1368 self._load() # for consistency; should never have any effect here
1369 1369 m1._load()
1370 1370 m2._load()
1371 1371 emptytree = treemanifest()
1372 1372
1373 1373 def getnode(m, d):
1374 1374 ld = m._lazydirs.get(d)
1375 1375 if ld:
1376 1376 return ld[0]
1377 1377 return m._dirs.get(d, emptytree)._node
1378 1378
1379 1379 # let's skip investigating things that `match` says we do not need.
1380 1380 visit = match.visitchildrenset(self._dir[:-1])
1381 1381 visit = self._loadchildrensetlazy(visit)
1382 1382 if visit == b'this' or visit == b'all':
1383 1383 visit = None
1384 1384 for d, subm in pycompat.iteritems(self._dirs):
1385 1385 if visit and d[:-1] not in visit:
1386 1386 continue
1387 1387 subp1 = getnode(m1, d)
1388 1388 subp2 = getnode(m2, d)
1389 1389 if subp1 == nullid:
1390 1390 subp1, subp2 = subp2, subp1
1391 1391 writesubtree(subm, subp1, subp2, match)
1392 1392
1393 1393 def walksubtrees(self, matcher=None):
1394 1394 """Returns an iterator of the subtrees of this manifest, including this
1395 1395 manifest itself.
1396 1396
1397 1397 If `matcher` is provided, it only returns subtrees that match.
1398 1398 """
1399 1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1400 1400 return
1401 1401 if not matcher or matcher(self._dir[:-1]):
1402 1402 yield self
1403 1403
1404 1404 self._load()
1405 1405 # OPT: use visitchildrenset to avoid loading everything.
1406 1406 self._loadalllazy()
1407 1407 for d, subm in pycompat.iteritems(self._dirs):
1408 1408 for subtree in subm.walksubtrees(matcher=matcher):
1409 1409 yield subtree
1410 1410
1411 1411
1412 1412 class manifestfulltextcache(util.lrucachedict):
1413 1413 """File-backed LRU cache for the manifest cache
1414 1414
1415 1415 File consists of entries, up to EOF:
1416 1416
1417 1417 - 20 bytes node, 4 bytes length, <length> manifest data
1418 1418
1419 1419 These are written in reverse cache order (oldest to newest).
1420 1420
1421 1421 """
1422 1422
1423 1423 _file = b'manifestfulltextcache'
1424 1424
1425 1425 def __init__(self, max):
1426 1426 super(manifestfulltextcache, self).__init__(max)
1427 1427 self._dirty = False
1428 1428 self._read = False
1429 1429 self._opener = None
1430 1430
1431 1431 def read(self):
1432 1432 if self._read or self._opener is None:
1433 1433 return
1434 1434
1435 1435 try:
1436 1436 with self._opener(self._file) as fp:
1437 1437 set = super(manifestfulltextcache, self).__setitem__
1438 1438 # ignore trailing data, this is a cache, corruption is skipped
1439 1439 while True:
1440 1440 # TODO do we need to do work here for sha1 portability?
1441 1441 node = fp.read(20)
1442 1442 if len(node) < 20:
1443 1443 break
1444 1444 try:
1445 1445 size = struct.unpack(b'>L', fp.read(4))[0]
1446 1446 except struct.error:
1447 1447 break
1448 1448 value = bytearray(fp.read(size))
1449 1449 if len(value) != size:
1450 1450 break
1451 1451 set(node, value)
1452 1452 except IOError:
1453 1453 # the file is allowed to be missing
1454 1454 pass
1455 1455
1456 1456 self._read = True
1457 1457 self._dirty = False
1458 1458
1459 1459 def write(self):
1460 1460 if not self._dirty or self._opener is None:
1461 1461 return
1462 1462 # rotate backwards to the first used node
1463 1463 try:
1464 1464 with self._opener(
1465 1465 self._file, b'w', atomictemp=True, checkambig=True
1466 1466 ) as fp:
1467 1467 node = self._head.prev
1468 1468 while True:
1469 1469 if node.key in self._cache:
1470 1470 fp.write(node.key)
1471 1471 fp.write(struct.pack(b'>L', len(node.value)))
1472 1472 fp.write(node.value)
1473 1473 if node is self._head:
1474 1474 break
1475 1475 node = node.prev
1476 1476 except IOError:
1477 1477 # We could not write the cache (eg: permission error)
1478 1478 # the content can be missing.
1479 1479 #
1480 1480 # We could try harder and see if we could recreate a wcache
1481 1481 # directory were we coudl write too.
1482 1482 #
1483 1483 # XXX the error pass silently, having some way to issue an error
1484 1484 # log `ui.log` would be nice.
1485 1485 pass
1486 1486
1487 1487 def __len__(self):
1488 1488 if not self._read:
1489 1489 self.read()
1490 1490 return super(manifestfulltextcache, self).__len__()
1491 1491
1492 1492 def __contains__(self, k):
1493 1493 if not self._read:
1494 1494 self.read()
1495 1495 return super(manifestfulltextcache, self).__contains__(k)
1496 1496
1497 1497 def __iter__(self):
1498 1498 if not self._read:
1499 1499 self.read()
1500 1500 return super(manifestfulltextcache, self).__iter__()
1501 1501
1502 1502 def __getitem__(self, k):
1503 1503 if not self._read:
1504 1504 self.read()
1505 1505 # the cache lru order can change on read
1506 1506 setdirty = self._cache.get(k) is not self._head
1507 1507 value = super(manifestfulltextcache, self).__getitem__(k)
1508 1508 if setdirty:
1509 1509 self._dirty = True
1510 1510 return value
1511 1511
1512 1512 def __setitem__(self, k, v):
1513 1513 if not self._read:
1514 1514 self.read()
1515 1515 super(manifestfulltextcache, self).__setitem__(k, v)
1516 1516 self._dirty = True
1517 1517
1518 1518 def __delitem__(self, k):
1519 1519 if not self._read:
1520 1520 self.read()
1521 1521 super(manifestfulltextcache, self).__delitem__(k)
1522 1522 self._dirty = True
1523 1523
1524 1524 def get(self, k, default=None):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 return super(manifestfulltextcache, self).get(k, default=default)
1528 1528
1529 1529 def clear(self, clear_persisted_data=False):
1530 1530 super(manifestfulltextcache, self).clear()
1531 1531 if clear_persisted_data:
1532 1532 self._dirty = True
1533 1533 self.write()
1534 1534 self._read = False
1535 1535
1536 1536
1537 1537 # and upper bound of what we expect from compression
1538 1538 # (real live value seems to be "3")
1539 1539 MAXCOMPRESSION = 3
1540 1540
1541 1541
1542 1542 class FastdeltaUnavailable(Exception):
1543 1543 """Exception raised when fastdelta isn't usable on a manifest."""
1544 1544
1545 1545
1546 1546 @interfaceutil.implementer(repository.imanifeststorage)
1547 1547 class manifestrevlog(object):
1548 1548 """A revlog that stores manifest texts. This is responsible for caching the
1549 1549 full-text manifest contents.
1550 1550 """
1551 1551
1552 1552 def __init__(
1553 1553 self,
1554 1554 opener,
1555 1555 tree=b'',
1556 1556 dirlogcache=None,
1557 1557 indexfile=None,
1558 1558 treemanifest=False,
1559 1559 ):
1560 1560 """Constructs a new manifest revlog
1561 1561
1562 1562 `indexfile` - used by extensions to have two manifests at once, like
1563 1563 when transitioning between flatmanifeset and treemanifests.
1564 1564
1565 1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1566 1566 options can also be used to make this a tree manifest revlog. The opener
1567 1567 option takes precedence, so if it is set to True, we ignore whatever
1568 1568 value is passed in to the constructor.
1569 1569 """
1570 1570 # During normal operations, we expect to deal with not more than four
1571 1571 # revs at a time (such as during commit --amend). When rebasing large
1572 1572 # stacks of commits, the number can go up, hence the config knob below.
1573 1573 cachesize = 4
1574 1574 optiontreemanifest = False
1575 1575 opts = getattr(opener, 'options', None)
1576 1576 if opts is not None:
1577 1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1578 1578 optiontreemanifest = opts.get(b'treemanifest', False)
1579 1579
1580 1580 self._treeondisk = optiontreemanifest or treemanifest
1581 1581
1582 1582 self._fulltextcache = manifestfulltextcache(cachesize)
1583 1583
1584 1584 if tree:
1585 1585 assert self._treeondisk, b'opts is %r' % opts
1586 1586
1587 1587 if indexfile is None:
1588 1588 indexfile = b'00manifest.i'
1589 1589 if tree:
1590 1590 indexfile = b"meta/" + tree + indexfile
1591 1591
1592 1592 self.tree = tree
1593 1593
1594 1594 # The dirlogcache is kept on the root manifest log
1595 1595 if tree:
1596 1596 self._dirlogcache = dirlogcache
1597 1597 else:
1598 1598 self._dirlogcache = {b'': self}
1599 1599
1600 1600 self._revlog = revlog.revlog(
1601 1601 opener,
1602 1602 indexfile,
1603 1603 # only root indexfile is cached
1604 1604 checkambig=not bool(tree),
1605 1605 mmaplargeindex=True,
1606 1606 upperboundcomp=MAXCOMPRESSION,
1607 1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1608 1608 )
1609 1609
1610 1610 self.index = self._revlog.index
1611 1611 self.version = self._revlog.version
1612 1612 self._generaldelta = self._revlog._generaldelta
1613 1613
1614 1614 def _setupmanifestcachehooks(self, repo):
1615 1615 """Persist the manifestfulltextcache on lock release"""
1616 1616 if not util.safehasattr(repo, b'_wlockref'):
1617 1617 return
1618 1618
1619 1619 self._fulltextcache._opener = repo.wcachevfs
1620 1620 if repo._currentlock(repo._wlockref) is None:
1621 1621 return
1622 1622
1623 1623 reporef = weakref.ref(repo)
1624 1624 manifestrevlogref = weakref.ref(self)
1625 1625
1626 1626 def persistmanifestcache(success):
1627 1627 # Repo is in an unknown state, do not persist.
1628 1628 if not success:
1629 1629 return
1630 1630
1631 1631 repo = reporef()
1632 1632 self = manifestrevlogref()
1633 1633 if repo is None or self is None:
1634 1634 return
1635 1635 if repo.manifestlog.getstorage(b'') is not self:
1636 1636 # there's a different manifest in play now, abort
1637 1637 return
1638 1638 self._fulltextcache.write()
1639 1639
1640 1640 repo._afterlock(persistmanifestcache)
1641 1641
1642 1642 @property
1643 1643 def fulltextcache(self):
1644 1644 return self._fulltextcache
1645 1645
1646 1646 def clearcaches(self, clear_persisted_data=False):
1647 1647 self._revlog.clearcaches()
1648 1648 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1649 1649 self._dirlogcache = {self.tree: self}
1650 1650
1651 1651 def dirlog(self, d):
1652 1652 if d:
1653 1653 assert self._treeondisk
1654 1654 if d not in self._dirlogcache:
1655 1655 mfrevlog = manifestrevlog(
1656 1656 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1657 1657 )
1658 1658 self._dirlogcache[d] = mfrevlog
1659 1659 return self._dirlogcache[d]
1660 1660
1661 1661 def add(
1662 1662 self,
1663 1663 m,
1664 1664 transaction,
1665 1665 link,
1666 1666 p1,
1667 1667 p2,
1668 1668 added,
1669 1669 removed,
1670 1670 readtree=None,
1671 1671 match=None,
1672 1672 ):
1673 1673 """add some manifest entry in to the manifest log
1674 1674
1675 1675 input:
1676 1676
1677 1677 m: the manifest dict we want to store
1678 1678 transaction: the open transaction
1679 1679 p1: manifest-node of p1
1680 1680 p2: manifest-node of p2
1681 1681 added: file added/changed compared to parent
1682 1682 removed: file removed compared to parent
1683 1683
1684 1684 tree manifest input:
1685 1685
1686 1686 readtree: a function to read a subtree
1687 1687 match: a filematcher for the subpart of the tree manifest
1688 1688 """
1689 1689 try:
1690 1690 if p1 not in self.fulltextcache:
1691 1691 raise FastdeltaUnavailable()
1692 1692 # If our first parent is in the manifest cache, we can
1693 1693 # compute a delta here using properties we know about the
1694 1694 # manifest up-front, which may save time later for the
1695 1695 # revlog layer.
1696 1696
1697 1697 _checkforbidden(added)
1698 1698 # combine the changed lists into one sorted iterator
1699 1699 work = heapq.merge(
1700 1700 [(x, False) for x in sorted(added)],
1701 1701 [(x, True) for x in sorted(removed)],
1702 1702 )
1703 1703
1704 1704 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1705 1705 cachedelta = self._revlog.rev(p1), deltatext
1706 1706 text = util.buffer(arraytext)
1707 1707 n = self._revlog.addrevision(
1708 1708 text, transaction, link, p1, p2, cachedelta
1709 1709 )
1710 1710 except FastdeltaUnavailable:
1711 1711 # The first parent manifest isn't already loaded or the
1712 1712 # manifest implementation doesn't support fastdelta, so
1713 1713 # we'll just encode a fulltext of the manifest and pass
1714 1714 # that through to the revlog layer, and let it handle the
1715 1715 # delta process.
1716 1716 if self._treeondisk:
1717 1717 assert readtree, b"readtree must be set for treemanifest writes"
1718 1718 assert match, b"match must be specified for treemanifest writes"
1719 1719 m1 = readtree(self.tree, p1)
1720 1720 m2 = readtree(self.tree, p2)
1721 1721 n = self._addtree(
1722 1722 m, transaction, link, m1, m2, readtree, match=match
1723 1723 )
1724 1724 arraytext = None
1725 1725 else:
1726 1726 text = m.text()
1727 1727 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1728 1728 arraytext = bytearray(text)
1729 1729
1730 1730 if arraytext is not None:
1731 1731 self.fulltextcache[n] = arraytext
1732 1732
1733 1733 return n
1734 1734
1735 1735 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1736 1736 # If the manifest is unchanged compared to one parent,
1737 1737 # don't write a new revision
1738 1738 if self.tree != b'' and (
1739 1739 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1740 1740 ):
1741 1741 return m.node()
1742 1742
1743 1743 def writesubtree(subm, subp1, subp2, match):
1744 1744 sublog = self.dirlog(subm.dir())
1745 1745 sublog.add(
1746 1746 subm,
1747 1747 transaction,
1748 1748 link,
1749 1749 subp1,
1750 1750 subp2,
1751 1751 None,
1752 1752 None,
1753 1753 readtree=readtree,
1754 1754 match=match,
1755 1755 )
1756 1756
1757 1757 m.writesubtrees(m1, m2, writesubtree, match)
1758 1758 text = m.dirtext()
1759 1759 n = None
1760 1760 if self.tree != b'':
1761 1761 # Double-check whether contents are unchanged to one parent
1762 1762 if text == m1.dirtext():
1763 1763 n = m1.node()
1764 1764 elif text == m2.dirtext():
1765 1765 n = m2.node()
1766 1766
1767 1767 if not n:
1768 1768 n = self._revlog.addrevision(
1769 1769 text, transaction, link, m1.node(), m2.node()
1770 1770 )
1771 1771
1772 1772 # Save nodeid so parent manifest can calculate its nodeid
1773 1773 m.setnode(n)
1774 1774 return n
1775 1775
1776 1776 def __len__(self):
1777 1777 return len(self._revlog)
1778 1778
1779 1779 def __iter__(self):
1780 1780 return self._revlog.__iter__()
1781 1781
1782 1782 def rev(self, node):
1783 1783 return self._revlog.rev(node)
1784 1784
1785 1785 def node(self, rev):
1786 1786 return self._revlog.node(rev)
1787 1787
1788 1788 def lookup(self, value):
1789 1789 return self._revlog.lookup(value)
1790 1790
1791 1791 def parentrevs(self, rev):
1792 1792 return self._revlog.parentrevs(rev)
1793 1793
1794 1794 def parents(self, node):
1795 1795 return self._revlog.parents(node)
1796 1796
1797 1797 def linkrev(self, rev):
1798 1798 return self._revlog.linkrev(rev)
1799 1799
1800 1800 def checksize(self):
1801 1801 return self._revlog.checksize()
1802 1802
1803 1803 def revision(self, node, _df=None, raw=False):
1804 1804 return self._revlog.revision(node, _df=_df, raw=raw)
1805 1805
1806 1806 def rawdata(self, node, _df=None):
1807 1807 return self._revlog.rawdata(node, _df=_df)
1808 1808
1809 1809 def revdiff(self, rev1, rev2):
1810 1810 return self._revlog.revdiff(rev1, rev2)
1811 1811
1812 1812 def cmp(self, node, text):
1813 1813 return self._revlog.cmp(node, text)
1814 1814
1815 1815 def deltaparent(self, rev):
1816 1816 return self._revlog.deltaparent(rev)
1817 1817
1818 1818 def emitrevisions(
1819 1819 self,
1820 1820 nodes,
1821 1821 nodesorder=None,
1822 1822 revisiondata=False,
1823 1823 assumehaveparentrevisions=False,
1824 1824 deltamode=repository.CG_DELTAMODE_STD,
1825 1825 ):
1826 1826 return self._revlog.emitrevisions(
1827 1827 nodes,
1828 1828 nodesorder=nodesorder,
1829 1829 revisiondata=revisiondata,
1830 1830 assumehaveparentrevisions=assumehaveparentrevisions,
1831 1831 deltamode=deltamode,
1832 1832 )
1833 1833
1834 1834 def addgroup(
1835 1835 self,
1836 1836 deltas,
1837 1837 linkmapper,
1838 1838 transaction,
1839 1839 addrevisioncb=None,
1840 1840 duplicaterevisioncb=None,
1841 1841 ):
1842 1842 return self._revlog.addgroup(
1843 1843 deltas,
1844 1844 linkmapper,
1845 1845 transaction,
1846 1846 addrevisioncb=addrevisioncb,
1847 1847 duplicaterevisioncb=duplicaterevisioncb,
1848 1848 )
1849 1849
1850 1850 def rawsize(self, rev):
1851 1851 return self._revlog.rawsize(rev)
1852 1852
1853 1853 def getstrippoint(self, minlink):
1854 1854 return self._revlog.getstrippoint(minlink)
1855 1855
1856 1856 def strip(self, minlink, transaction):
1857 1857 return self._revlog.strip(minlink, transaction)
1858 1858
1859 1859 def files(self):
1860 1860 return self._revlog.files()
1861 1861
1862 1862 def clone(self, tr, destrevlog, **kwargs):
1863 1863 if not isinstance(destrevlog, manifestrevlog):
1864 1864 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1865 1865
1866 1866 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1867 1867
1868 1868 def storageinfo(
1869 1869 self,
1870 1870 exclusivefiles=False,
1871 1871 sharedfiles=False,
1872 1872 revisionscount=False,
1873 1873 trackedsize=False,
1874 1874 storedsize=False,
1875 1875 ):
1876 1876 return self._revlog.storageinfo(
1877 1877 exclusivefiles=exclusivefiles,
1878 1878 sharedfiles=sharedfiles,
1879 1879 revisionscount=revisionscount,
1880 1880 trackedsize=trackedsize,
1881 1881 storedsize=storedsize,
1882 1882 )
1883 1883
1884 1884 @property
1885 1885 def indexfile(self):
1886 1886 return self._revlog.indexfile
1887 1887
1888 1888 @indexfile.setter
1889 1889 def indexfile(self, value):
1890 1890 self._revlog.indexfile = value
1891 1891
1892 1892 @property
1893 1893 def opener(self):
1894 1894 return self._revlog.opener
1895 1895
1896 1896 @opener.setter
1897 1897 def opener(self, value):
1898 1898 self._revlog.opener = value
1899 1899
1900 1900
1901 1901 @interfaceutil.implementer(repository.imanifestlog)
1902 1902 class manifestlog(object):
1903 1903 """A collection class representing the collection of manifest snapshots
1904 1904 referenced by commits in the repository.
1905 1905
1906 1906 In this situation, 'manifest' refers to the abstract concept of a snapshot
1907 1907 of the list of files in the given commit. Consumers of the output of this
1908 1908 class do not care about the implementation details of the actual manifests
1909 1909 they receive (i.e. tree or flat or lazily loaded, etc)."""
1910 1910
1911 1911 def __init__(self, opener, repo, rootstore, narrowmatch):
1912 1912 usetreemanifest = False
1913 1913 cachesize = 4
1914 1914
1915 1915 opts = getattr(opener, 'options', None)
1916 1916 if opts is not None:
1917 1917 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1918 1918 cachesize = opts.get(b'manifestcachesize', cachesize)
1919 1919
1920 1920 self._treemanifests = usetreemanifest
1921 1921
1922 1922 self._rootstore = rootstore
1923 1923 self._rootstore._setupmanifestcachehooks(repo)
1924 1924 self._narrowmatch = narrowmatch
1925 1925
1926 1926 # A cache of the manifestctx or treemanifestctx for each directory
1927 1927 self._dirmancache = {}
1928 1928 self._dirmancache[b''] = util.lrucachedict(cachesize)
1929 1929
1930 1930 self._cachesize = cachesize
1931 1931
1932 1932 def __getitem__(self, node):
1933 1933 """Retrieves the manifest instance for the given node. Throws a
1934 1934 LookupError if not found.
1935 1935 """
1936 1936 return self.get(b'', node)
1937 1937
1938 1938 def get(self, tree, node, verify=True):
1939 1939 """Retrieves the manifest instance for the given node. Throws a
1940 1940 LookupError if not found.
1941 1941
1942 1942 `verify` - if True an exception will be thrown if the node is not in
1943 1943 the revlog
1944 1944 """
1945 1945 if node in self._dirmancache.get(tree, ()):
1946 1946 return self._dirmancache[tree][node]
1947 1947
1948 1948 if not self._narrowmatch.always():
1949 1949 if not self._narrowmatch.visitdir(tree[:-1]):
1950 1950 return excludeddirmanifestctx(tree, node)
1951 1951 if tree:
1952 1952 if self._rootstore._treeondisk:
1953 1953 if verify:
1954 1954 # Side-effect is LookupError is raised if node doesn't
1955 1955 # exist.
1956 1956 self.getstorage(tree).rev(node)
1957 1957
1958 1958 m = treemanifestctx(self, tree, node)
1959 1959 else:
1960 1960 raise error.Abort(
1961 1961 _(
1962 1962 b"cannot ask for manifest directory '%s' in a flat "
1963 1963 b"manifest"
1964 1964 )
1965 1965 % tree
1966 1966 )
1967 1967 else:
1968 1968 if verify:
1969 1969 # Side-effect is LookupError is raised if node doesn't exist.
1970 1970 self._rootstore.rev(node)
1971 1971
1972 1972 if self._treemanifests:
1973 1973 m = treemanifestctx(self, b'', node)
1974 1974 else:
1975 1975 m = manifestctx(self, node)
1976 1976
1977 1977 if node != nullid:
1978 1978 mancache = self._dirmancache.get(tree)
1979 1979 if not mancache:
1980 1980 mancache = util.lrucachedict(self._cachesize)
1981 1981 self._dirmancache[tree] = mancache
1982 1982 mancache[node] = m
1983 1983 return m
1984 1984
1985 1985 def getstorage(self, tree):
1986 1986 return self._rootstore.dirlog(tree)
1987 1987
1988 1988 def clearcaches(self, clear_persisted_data=False):
1989 1989 self._dirmancache.clear()
1990 1990 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1991 1991
1992 1992 def rev(self, node):
1993 1993 return self._rootstore.rev(node)
1994 1994
1995 1995 def update_caches(self, transaction):
1996 1996 return self._rootstore._revlog.update_caches(transaction=transaction)
1997 1997
1998 1998
1999 1999 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2000 2000 class memmanifestctx(object):
2001 2001 def __init__(self, manifestlog):
2002 2002 self._manifestlog = manifestlog
2003 2003 self._manifestdict = manifestdict()
2004 2004
2005 2005 def _storage(self):
2006 2006 return self._manifestlog.getstorage(b'')
2007 2007
2008 2008 def copy(self):
2009 2009 memmf = memmanifestctx(self._manifestlog)
2010 2010 memmf._manifestdict = self.read().copy()
2011 2011 return memmf
2012 2012
2013 2013 def read(self):
2014 2014 return self._manifestdict
2015 2015
2016 2016 def write(self, transaction, link, p1, p2, added, removed, match=None):
2017 2017 return self._storage().add(
2018 2018 self._manifestdict,
2019 2019 transaction,
2020 2020 link,
2021 2021 p1,
2022 2022 p2,
2023 2023 added,
2024 2024 removed,
2025 2025 match=match,
2026 2026 )
2027 2027
2028 2028
2029 2029 @interfaceutil.implementer(repository.imanifestrevisionstored)
2030 2030 class manifestctx(object):
2031 2031 """A class representing a single revision of a manifest, including its
2032 2032 contents, its parent revs, and its linkrev.
2033 2033 """
2034 2034
2035 2035 def __init__(self, manifestlog, node):
2036 2036 self._manifestlog = manifestlog
2037 2037 self._data = None
2038 2038
2039 2039 self._node = node
2040 2040
2041 2041 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2042 2042 # but let's add it later when something needs it and we can load it
2043 2043 # lazily.
2044 2044 # self.p1, self.p2 = store.parents(node)
2045 2045 # rev = store.rev(node)
2046 2046 # self.linkrev = store.linkrev(rev)
2047 2047
2048 2048 def _storage(self):
2049 2049 return self._manifestlog.getstorage(b'')
2050 2050
2051 2051 def node(self):
2052 2052 return self._node
2053 2053
2054 2054 def copy(self):
2055 2055 memmf = memmanifestctx(self._manifestlog)
2056 2056 memmf._manifestdict = self.read().copy()
2057 2057 return memmf
2058 2058
2059 2059 @propertycache
2060 2060 def parents(self):
2061 2061 return self._storage().parents(self._node)
2062 2062
2063 2063 def read(self):
2064 2064 if self._data is None:
2065 2065 if self._node == nullid:
2066 2066 self._data = manifestdict()
2067 2067 else:
2068 2068 store = self._storage()
2069 2069 if self._node in store.fulltextcache:
2070 2070 text = pycompat.bytestr(store.fulltextcache[self._node])
2071 2071 else:
2072 2072 text = store.revision(self._node)
2073 2073 arraytext = bytearray(text)
2074 2074 store.fulltextcache[self._node] = arraytext
2075 2075 self._data = manifestdict(text)
2076 2076 return self._data
2077 2077
2078 2078 def readfast(self, shallow=False):
2079 2079 """Calls either readdelta or read, based on which would be less work.
2080 2080 readdelta is called if the delta is against the p1, and therefore can be
2081 2081 read quickly.
2082 2082
2083 2083 If `shallow` is True, nothing changes since this is a flat manifest.
2084 2084 """
2085 2085 store = self._storage()
2086 2086 r = store.rev(self._node)
2087 2087 deltaparent = store.deltaparent(r)
2088 2088 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2089 2089 return self.readdelta()
2090 2090 return self.read()
2091 2091
2092 2092 def readdelta(self, shallow=False):
2093 2093 """Returns a manifest containing just the entries that are present
2094 2094 in this manifest, but not in its p1 manifest. This is efficient to read
2095 2095 if the revlog delta is already p1.
2096 2096
2097 2097 Changing the value of `shallow` has no effect on flat manifests.
2098 2098 """
2099 2099 store = self._storage()
2100 2100 r = store.rev(self._node)
2101 2101 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2102 2102 return manifestdict(d)
2103 2103
2104 2104 def find(self, key):
2105 2105 return self.read().find(key)
2106 2106
2107 2107
2108 2108 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2109 2109 class memtreemanifestctx(object):
2110 2110 def __init__(self, manifestlog, dir=b''):
2111 2111 self._manifestlog = manifestlog
2112 2112 self._dir = dir
2113 2113 self._treemanifest = treemanifest()
2114 2114
2115 2115 def _storage(self):
2116 2116 return self._manifestlog.getstorage(b'')
2117 2117
2118 2118 def copy(self):
2119 2119 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2120 2120 memmf._treemanifest = self._treemanifest.copy()
2121 2121 return memmf
2122 2122
2123 2123 def read(self):
2124 2124 return self._treemanifest
2125 2125
2126 2126 def write(self, transaction, link, p1, p2, added, removed, match=None):
2127 2127 def readtree(dir, node):
2128 2128 return self._manifestlog.get(dir, node).read()
2129 2129
2130 2130 return self._storage().add(
2131 2131 self._treemanifest,
2132 2132 transaction,
2133 2133 link,
2134 2134 p1,
2135 2135 p2,
2136 2136 added,
2137 2137 removed,
2138 2138 readtree=readtree,
2139 2139 match=match,
2140 2140 )
2141 2141
2142 2142
2143 2143 @interfaceutil.implementer(repository.imanifestrevisionstored)
2144 2144 class treemanifestctx(object):
2145 2145 def __init__(self, manifestlog, dir, node):
2146 2146 self._manifestlog = manifestlog
2147 2147 self._dir = dir
2148 2148 self._data = None
2149 2149
2150 2150 self._node = node
2151 2151
2152 2152 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2153 2153 # we can instantiate treemanifestctx objects for directories we don't
2154 2154 # have on disk.
2155 2155 # self.p1, self.p2 = store.parents(node)
2156 2156 # rev = store.rev(node)
2157 2157 # self.linkrev = store.linkrev(rev)
2158 2158
2159 2159 def _storage(self):
2160 2160 narrowmatch = self._manifestlog._narrowmatch
2161 2161 if not narrowmatch.always():
2162 2162 if not narrowmatch.visitdir(self._dir[:-1]):
2163 2163 return excludedmanifestrevlog(self._dir)
2164 2164 return self._manifestlog.getstorage(self._dir)
2165 2165
2166 2166 def read(self):
2167 2167 if self._data is None:
2168 2168 store = self._storage()
2169 2169 if self._node == nullid:
2170 2170 self._data = treemanifest()
2171 2171 # TODO accessing non-public API
2172 2172 elif store._treeondisk:
2173 2173 m = treemanifest(dir=self._dir)
2174 2174
2175 2175 def gettext():
2176 2176 return store.revision(self._node)
2177 2177
2178 2178 def readsubtree(dir, subm):
2179 2179 # Set verify to False since we need to be able to create
2180 2180 # subtrees for trees that don't exist on disk.
2181 2181 return self._manifestlog.get(dir, subm, verify=False).read()
2182 2182
2183 2183 m.read(gettext, readsubtree)
2184 2184 m.setnode(self._node)
2185 2185 self._data = m
2186 2186 else:
2187 2187 if self._node in store.fulltextcache:
2188 2188 text = pycompat.bytestr(store.fulltextcache[self._node])
2189 2189 else:
2190 2190 text = store.revision(self._node)
2191 2191 arraytext = bytearray(text)
2192 2192 store.fulltextcache[self._node] = arraytext
2193 2193 self._data = treemanifest(dir=self._dir, text=text)
2194 2194
2195 2195 return self._data
2196 2196
2197 2197 def node(self):
2198 2198 return self._node
2199 2199
2200 2200 def copy(self):
2201 2201 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2202 2202 memmf._treemanifest = self.read().copy()
2203 2203 return memmf
2204 2204
2205 2205 @propertycache
2206 2206 def parents(self):
2207 2207 return self._storage().parents(self._node)
2208 2208
2209 2209 def readdelta(self, shallow=False):
2210 2210 """Returns a manifest containing just the entries that are present
2211 2211 in this manifest, but not in its p1 manifest. This is efficient to read
2212 2212 if the revlog delta is already p1.
2213 2213
2214 2214 If `shallow` is True, this will read the delta for this directory,
2215 2215 without recursively reading subdirectory manifests. Instead, any
2216 2216 subdirectory entry will be reported as it appears in the manifest, i.e.
2217 2217 the subdirectory will be reported among files and distinguished only by
2218 2218 its 't' flag.
2219 2219 """
2220 2220 store = self._storage()
2221 2221 if shallow:
2222 2222 r = store.rev(self._node)
2223 2223 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2224 2224 return manifestdict(d)
2225 2225 else:
2226 2226 # Need to perform a slow delta
2227 2227 r0 = store.deltaparent(store.rev(self._node))
2228 2228 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2229 2229 m1 = self.read()
2230 2230 md = treemanifest(dir=self._dir)
2231 2231 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2232 2232 if n1:
2233 2233 md[f] = n1
2234 2234 if fl1:
2235 2235 md.setflag(f, fl1)
2236 2236 return md
2237 2237
2238 2238 def readfast(self, shallow=False):
2239 2239 """Calls either readdelta or read, based on which would be less work.
2240 2240 readdelta is called if the delta is against the p1, and therefore can be
2241 2241 read quickly.
2242 2242
2243 2243 If `shallow` is True, it only returns the entries from this manifest,
2244 2244 and not any submanifests.
2245 2245 """
2246 2246 store = self._storage()
2247 2247 r = store.rev(self._node)
2248 2248 deltaparent = store.deltaparent(r)
2249 2249 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2250 2250 return self.readdelta(shallow=shallow)
2251 2251
2252 2252 if shallow:
2253 2253 return manifestdict(store.revision(self._node))
2254 2254 else:
2255 2255 return self.read()
2256 2256
2257 2257 def find(self, key):
2258 2258 return self.read().find(key)
2259 2259
2260 2260
2261 2261 class excludeddir(treemanifest):
2262 2262 """Stand-in for a directory that is excluded from the repository.
2263 2263
2264 2264 With narrowing active on a repository that uses treemanifests,
2265 2265 some of the directory revlogs will be excluded from the resulting
2266 2266 clone. This is a huge storage win for clients, but means we need
2267 2267 some sort of pseudo-manifest to surface to internals so we can
2268 2268 detect a merge conflict outside the narrowspec. That's what this
2269 2269 class is: it stands in for a directory whose node is known, but
2270 2270 whose contents are unknown.
2271 2271 """
2272 2272
2273 2273 def __init__(self, dir, node):
2274 2274 super(excludeddir, self).__init__(dir)
2275 2275 self._node = node
2276 2276 # Add an empty file, which will be included by iterators and such,
2277 2277 # appearing as the directory itself (i.e. something like "dir/")
2278 2278 self._files[b''] = node
2279 2279 self._flags[b''] = b't'
2280 2280
2281 2281 # Manifests outside the narrowspec should never be modified, so avoid
2282 2282 # copying. This makes a noticeable difference when there are very many
2283 2283 # directories outside the narrowspec. Also, it makes sense for the copy to
2284 2284 # be of the same type as the original, which would not happen with the
2285 2285 # super type's copy().
2286 2286 def copy(self):
2287 2287 return self
2288 2288
2289 2289
2290 2290 class excludeddirmanifestctx(treemanifestctx):
2291 2291 """context wrapper for excludeddir - see that docstring for rationale"""
2292 2292
2293 2293 def __init__(self, dir, node):
2294 2294 self._dir = dir
2295 2295 self._node = node
2296 2296
2297 2297 def read(self):
2298 2298 return excludeddir(self._dir, self._node)
2299 2299
2300 def readfast(self, shallow=False):
2301 # special version of readfast since we don't have underlying storage
2302 return self.read()
2303
2300 2304 def write(self, *args):
2301 2305 raise error.ProgrammingError(
2302 2306 b'attempt to write manifest from excluded dir %s' % self._dir
2303 2307 )
2304 2308
2305 2309
2306 2310 class excludedmanifestrevlog(manifestrevlog):
2307 2311 """Stand-in for excluded treemanifest revlogs.
2308 2312
2309 2313 When narrowing is active on a treemanifest repository, we'll have
2310 2314 references to directories we can't see due to the revlog being
2311 2315 skipped. This class exists to conform to the manifestrevlog
2312 2316 interface for those directories and proactively prevent writes to
2313 2317 outside the narrowspec.
2314 2318 """
2315 2319
2316 2320 def __init__(self, dir):
2317 2321 self._dir = dir
2318 2322
2319 2323 def __len__(self):
2320 2324 raise error.ProgrammingError(
2321 2325 b'attempt to get length of excluded dir %s' % self._dir
2322 2326 )
2323 2327
2324 2328 def rev(self, node):
2325 2329 raise error.ProgrammingError(
2326 2330 b'attempt to get rev from excluded dir %s' % self._dir
2327 2331 )
2328 2332
2329 2333 def linkrev(self, node):
2330 2334 raise error.ProgrammingError(
2331 2335 b'attempt to get linkrev from excluded dir %s' % self._dir
2332 2336 )
2333 2337
2334 2338 def node(self, rev):
2335 2339 raise error.ProgrammingError(
2336 2340 b'attempt to get node from excluded dir %s' % self._dir
2337 2341 )
2338 2342
2339 2343 def add(self, *args, **kwargs):
2340 2344 # We should never write entries in dirlogs outside the narrow clone.
2341 2345 # However, the method still gets called from writesubtree() in
2342 2346 # _addtree(), so we need to handle it. We should possibly make that
2343 2347 # avoid calling add() with a clean manifest (_dirty is always False
2344 2348 # in excludeddir instances).
2345 2349 pass
General Comments 0
You need to be logged in to leave comments. Login now