##// END OF EJS Templates
manifest: type and fix unhexlify...
marmoute -
r52657:2f88df88 default
parent child Browse files
Show More
@@ -1,2404 +1,2404 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import heapq
10 10 import itertools
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 bin,
17 17 hex,
18 18 nullrev,
19 19 )
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 match as matchmod,
24 24 mdiff,
25 25 pathutil,
26 26 policy,
27 27 pycompat,
28 28 revlog,
29 29 util,
30 30 )
31 31 from .interfaces import (
32 32 repository,
33 33 util as interfaceutil,
34 34 )
35 35 from .revlogutils import (
36 36 constants as revlog_constants,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(nodelen, data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 flags = n[-1:]
62 62 if flags in _manifestflags:
63 63 n = n[:-1]
64 64 nl -= 1
65 65 else:
66 66 flags = b''
67 67 if nl != 2 * nodelen:
68 68 raise ValueError(b'Invalid manifest line')
69 69
70 70 yield f, bin(n), flags
71 71
72 72
73 73 def _text(it):
74 74 files = []
75 75 lines = []
76 76 for f, n, fl in it:
77 77 files.append(f)
78 78 # if this is changed to support newlines in filenames,
79 79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 81
82 82 _checkforbidden(files)
83 83 return b''.join(lines)
84 84
85 85
86 86 class lazymanifestiter:
87 87 def __init__(self, lm):
88 88 self.pos = 0
89 89 self.lm = lm
90 90
91 91 def __iter__(self):
92 92 return self
93 93
94 94 def next(self):
95 95 try:
96 96 data, pos = self.lm._get(self.pos)
97 97 except IndexError:
98 98 raise StopIteration
99 99 if pos == -1:
100 100 self.pos += 1
101 101 return data[0]
102 102 self.pos += 1
103 103 zeropos = data.find(b'\x00', pos)
104 104 return data[pos:zeropos]
105 105
106 106 __next__ = next
107 107
108 108
109 109 class lazymanifestiterentries:
110 110 def __init__(self, lm):
111 111 self.lm = lm
112 112 self.pos = 0
113 113
114 114 def __iter__(self):
115 115 return self
116 116
117 117 def next(self):
118 118 try:
119 119 data, pos = self.lm._get(self.pos)
120 120 except IndexError:
121 121 raise StopIteration
122 122 if pos == -1:
123 123 self.pos += 1
124 124 return data
125 125 zeropos = data.find(b'\x00', pos)
126 126 nlpos = data.find(b'\n', pos)
127 127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 128 raise error.StorageError(b'Invalid manifest line')
129 129 flags = data[nlpos - 1 : nlpos]
130 130 if flags in _manifestflags:
131 131 hlen = nlpos - zeropos - 2
132 132 else:
133 133 hlen = nlpos - zeropos - 1
134 134 flags = b''
135 135 if hlen != 2 * self.lm._nodelen:
136 136 raise error.StorageError(b'Invalid manifest line')
137 137 hashval = unhexlify(
138 138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 139 )
140 140 self.pos += 1
141 141 return (data[pos:zeropos], hashval, flags)
142 142
143 143 __next__ = next
144 144
145 145
146 def unhexlify(data, extra, pos, length):
146 def unhexlify(data: bytes, extra: int, pos, length: int):
147 147 s = bin(data[pos : pos + length])
148 148 if extra:
149 s += chr(extra & 0xFF)
149 s += bytes([extra & 0xFF])
150 150 return s
151 151
152 152
153 153 def _cmp(a, b):
154 154 return (a > b) - (a < b)
155 155
156 156
157 157 _manifestflags = {b'', b'l', b't', b'x'}
158 158
159 159
160 160 class _lazymanifest:
161 161 """A pure python manifest backed by a byte string. It is supplimented with
162 162 internal lists as it is modified, until it is compacted back to a pure byte
163 163 string.
164 164
165 165 ``data`` is the initial manifest data.
166 166
167 167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 168 values are offsets into ``data``, negative values are offsets into the
169 169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 170 ``positions``. The values are encoded such that when walking the list and
171 171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 172 sorted by filename.
173 173
174 174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 175 modified since the manifest was created or compacted.
176 176 """
177 177
178 178 def __init__(
179 179 self,
180 180 nodelen,
181 181 data,
182 182 positions=None,
183 183 extrainfo=None,
184 184 extradata=None,
185 185 hasremovals=False,
186 186 ):
187 187 self._nodelen = nodelen
188 188 if positions is None:
189 189 self.positions = self.findlines(data)
190 190 self.extrainfo = [0] * len(self.positions)
191 191 self.data = data
192 192 self.extradata = []
193 193 self.hasremovals = False
194 194 else:
195 195 self.positions = positions[:]
196 196 self.extrainfo = extrainfo[:]
197 197 self.extradata = extradata[:]
198 198 self.data = data
199 199 self.hasremovals = hasremovals
200 200
201 201 def findlines(self, data):
202 202 if not data:
203 203 return []
204 204 pos = data.find(b"\n")
205 205 if pos == -1 or data[-1:] != b'\n':
206 206 raise ValueError(b"Manifest did not end in a newline.")
207 207 positions = [0]
208 208 prev = data[: data.find(b'\x00')]
209 209 while pos < len(data) - 1 and pos != -1:
210 210 positions.append(pos + 1)
211 211 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
212 212 if nexts < prev:
213 213 raise ValueError(b"Manifest lines not in sorted order.")
214 214 prev = nexts
215 215 pos = data.find(b"\n", pos + 1)
216 216 return positions
217 217
218 218 def _get(self, index):
219 219 # get the position encoded in pos:
220 220 # positive number is an index in 'data'
221 221 # negative number is in extrapieces
222 222 pos = self.positions[index]
223 223 if pos >= 0:
224 224 return self.data, pos
225 225 return self.extradata[-pos - 1], -1
226 226
227 227 def _getkey(self, pos):
228 228 if pos >= 0:
229 229 return self.data[pos : self.data.find(b'\x00', pos + 1)]
230 230 return self.extradata[-pos - 1][0]
231 231
232 232 def bsearch(self, key):
233 233 first = 0
234 234 last = len(self.positions) - 1
235 235
236 236 while first <= last:
237 237 midpoint = (first + last) // 2
238 238 nextpos = self.positions[midpoint]
239 239 candidate = self._getkey(nextpos)
240 240 r = _cmp(key, candidate)
241 241 if r == 0:
242 242 return midpoint
243 243 else:
244 244 if r < 0:
245 245 last = midpoint - 1
246 246 else:
247 247 first = midpoint + 1
248 248 return -1
249 249
250 250 def bsearch2(self, key):
251 251 # same as the above, but will always return the position
252 252 # done for performance reasons
253 253 first = 0
254 254 last = len(self.positions) - 1
255 255
256 256 while first <= last:
257 257 midpoint = (first + last) // 2
258 258 nextpos = self.positions[midpoint]
259 259 candidate = self._getkey(nextpos)
260 260 r = _cmp(key, candidate)
261 261 if r == 0:
262 262 return (midpoint, True)
263 263 else:
264 264 if r < 0:
265 265 last = midpoint - 1
266 266 else:
267 267 first = midpoint + 1
268 268 return (first, False)
269 269
270 270 def __contains__(self, key):
271 271 return self.bsearch(key) != -1
272 272
273 273 def __getitem__(self, key):
274 274 if not isinstance(key, bytes):
275 275 raise TypeError(b"getitem: manifest keys must be a bytes.")
276 276 needle = self.bsearch(key)
277 277 if needle == -1:
278 278 raise KeyError
279 279 data, pos = self._get(needle)
280 280 if pos == -1:
281 281 return (data[1], data[2])
282 282 zeropos = data.find(b'\x00', pos)
283 283 nlpos = data.find(b'\n', zeropos)
284 284 assert 0 <= needle <= len(self.positions)
285 285 assert len(self.extrainfo) == len(self.positions)
286 286 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
287 287 raise error.StorageError(b'Invalid manifest line')
288 288 hlen = nlpos - zeropos - 1
289 289 flags = data[nlpos - 1 : nlpos]
290 290 if flags in _manifestflags:
291 291 hlen -= 1
292 292 else:
293 293 flags = b''
294 294 if hlen != 2 * self._nodelen:
295 295 raise error.StorageError(b'Invalid manifest line')
296 296 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
297 297 return (hashval, flags)
298 298
299 299 def __delitem__(self, key):
300 300 needle, found = self.bsearch2(key)
301 301 if not found:
302 302 raise KeyError
303 303 cur = self.positions[needle]
304 304 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
305 305 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
306 306 if cur >= 0:
307 307 # This does NOT unsort the list as far as the search functions are
308 308 # concerned, as they only examine lines mapped by self.positions.
309 309 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
310 310 self.hasremovals = True
311 311
312 312 def __setitem__(self, key, value):
313 313 if not isinstance(key, bytes):
314 314 raise TypeError(b"setitem: manifest keys must be a byte string.")
315 315 if not isinstance(value, tuple) or len(value) != 2:
316 316 raise TypeError(
317 317 b"Manifest values must be a tuple of (node, flags)."
318 318 )
319 319 hashval = value[0]
320 320 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
321 321 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
322 322 flags = value[1]
323 323 if not isinstance(flags, bytes) or len(flags) > 1:
324 324 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
325 325 needle, found = self.bsearch2(key)
326 326 if found:
327 327 # put the item
328 328 pos = self.positions[needle]
329 329 if pos < 0:
330 330 self.extradata[-pos - 1] = (key, hashval, value[1])
331 331 else:
332 332 # just don't bother
333 333 self.extradata.append((key, hashval, value[1]))
334 334 self.positions[needle] = -len(self.extradata)
335 335 else:
336 336 # not found, put it in with extra positions
337 337 self.extradata.append((key, hashval, value[1]))
338 338 self.positions = (
339 339 self.positions[:needle]
340 340 + [-len(self.extradata)]
341 341 + self.positions[needle:]
342 342 )
343 343 self.extrainfo = (
344 344 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
345 345 )
346 346
347 347 def copy(self):
348 348 # XXX call _compact like in C?
349 349 return _lazymanifest(
350 350 self._nodelen,
351 351 self.data,
352 352 self.positions,
353 353 self.extrainfo,
354 354 self.extradata,
355 355 self.hasremovals,
356 356 )
357 357
358 358 def _compact(self):
359 359 # hopefully not called TOO often
360 360 if len(self.extradata) == 0 and not self.hasremovals:
361 361 return
362 362 l = []
363 363 i = 0
364 364 offset = 0
365 365 self.extrainfo = [0] * len(self.positions)
366 366 while i < len(self.positions):
367 367 if self.positions[i] >= 0:
368 368 cur = self.positions[i]
369 369 last_cut = cur
370 370
371 371 # Collect all contiguous entries in the buffer at the current
372 372 # offset, breaking out only for added/modified items held in
373 373 # extradata, or a deleted line prior to the next position.
374 374 while True:
375 375 self.positions[i] = offset
376 376 i += 1
377 377 if i == len(self.positions) or self.positions[i] < 0:
378 378 break
379 379
380 380 # A removed file has no positions[] entry, but does have an
381 381 # overwritten first byte. Break out and find the end of the
382 382 # current good entry/entries if there is a removed file
383 383 # before the next position.
384 384 if (
385 385 self.hasremovals
386 386 and self.data.find(b'\n\x00', cur, self.positions[i])
387 387 != -1
388 388 ):
389 389 break
390 390
391 391 offset += self.positions[i] - cur
392 392 cur = self.positions[i]
393 393 end_cut = self.data.find(b'\n', cur)
394 394 if end_cut != -1:
395 395 end_cut += 1
396 396 offset += end_cut - cur
397 397 l.append(self.data[last_cut:end_cut])
398 398 else:
399 399 while i < len(self.positions) and self.positions[i] < 0:
400 400 cur = self.positions[i]
401 401 t = self.extradata[-cur - 1]
402 402 l.append(self._pack(t))
403 403 self.positions[i] = offset
404 404 # Hashes are either 20 bytes (old sha1s) or 32
405 405 # bytes (new non-sha1).
406 406 hlen = 20
407 407 if len(t[1]) > 25:
408 408 hlen = 32
409 409 if len(t[1]) > hlen:
410 410 self.extrainfo[i] = ord(t[1][hlen + 1])
411 411 offset += len(l[-1])
412 412 i += 1
413 413 self.data = b''.join(l)
414 414 self.hasremovals = False
415 415 self.extradata = []
416 416
417 417 def _pack(self, d):
418 418 n = d[1]
419 419 assert len(n) in (20, 32)
420 420 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
421 421
422 422 def text(self):
423 423 self._compact()
424 424 return self.data
425 425
426 426 def diff(self, m2, clean=False):
427 427 '''Finds changes between the current manifest and m2.'''
428 428 # XXX think whether efficiency matters here
429 429 diff = {}
430 430
431 431 for fn, e1, flags in self.iterentries():
432 432 if fn not in m2:
433 433 diff[fn] = (e1, flags), (None, b'')
434 434 else:
435 435 e2 = m2[fn]
436 436 if (e1, flags) != e2:
437 437 diff[fn] = (e1, flags), e2
438 438 elif clean:
439 439 diff[fn] = None
440 440
441 441 for fn, e2, flags in m2.iterentries():
442 442 if fn not in self:
443 443 diff[fn] = (None, b''), (e2, flags)
444 444
445 445 return diff
446 446
447 447 def iterentries(self):
448 448 return lazymanifestiterentries(self)
449 449
450 450 def iterkeys(self):
451 451 return lazymanifestiter(self)
452 452
453 453 def __iter__(self):
454 454 return lazymanifestiter(self)
455 455
456 456 def __len__(self):
457 457 return len(self.positions)
458 458
459 459 def filtercopy(self, filterfn):
460 460 # XXX should be optimized
461 461 c = _lazymanifest(self._nodelen, b'')
462 462 for f, n, fl in self.iterentries():
463 463 if filterfn(f):
464 464 c[f] = n, fl
465 465 return c
466 466
467 467
468 468 try:
469 469 _lazymanifest = parsers.lazymanifest
470 470 except AttributeError:
471 471 pass
472 472
473 473
474 474 @interfaceutil.implementer(repository.imanifestdict)
475 475 class manifestdict:
476 476 def __init__(self, nodelen, data=b''):
477 477 self._nodelen = nodelen
478 478 self._lm = _lazymanifest(nodelen, data)
479 479
480 480 def __getitem__(self, key):
481 481 return self._lm[key][0]
482 482
483 483 def find(self, key):
484 484 return self._lm[key]
485 485
486 486 def __len__(self):
487 487 return len(self._lm)
488 488
489 489 def __nonzero__(self):
490 490 # nonzero is covered by the __len__ function, but implementing it here
491 491 # makes it easier for extensions to override.
492 492 return len(self._lm) != 0
493 493
494 494 __bool__ = __nonzero__
495 495
496 496 def set(self, key, node, flags):
497 497 self._lm[key] = node, flags
498 498
499 499 def __setitem__(self, key, node):
500 500 self._lm[key] = node, self.flags(key)
501 501
502 502 def __contains__(self, key):
503 503 if key is None:
504 504 return False
505 505 return key in self._lm
506 506
507 507 def __delitem__(self, key):
508 508 del self._lm[key]
509 509
510 510 def __iter__(self):
511 511 return self._lm.__iter__()
512 512
513 513 def iterkeys(self):
514 514 return self._lm.iterkeys()
515 515
516 516 def keys(self):
517 517 return list(self.iterkeys())
518 518
519 519 def filesnotin(self, m2, match=None):
520 520 '''Set of files in this manifest that are not in the other'''
521 521 if match is not None:
522 522 match = matchmod.badmatch(match, lambda path, msg: None)
523 523 sm2 = set(m2.walk(match))
524 524 return {f for f in self.walk(match) if f not in sm2}
525 525 return {f for f in self if f not in m2}
526 526
527 527 @propertycache
528 528 def _dirs(self):
529 529 return pathutil.dirs(self)
530 530
531 531 def dirs(self):
532 532 return self._dirs
533 533
534 534 def hasdir(self, dir):
535 535 return dir in self._dirs
536 536
537 537 def _filesfastpath(self, match):
538 538 """Checks whether we can correctly and quickly iterate over matcher
539 539 files instead of over manifest files."""
540 540 files = match.files()
541 541 return len(files) < 100 and (
542 542 match.isexact()
543 543 or (match.prefix() and all(fn in self for fn in files))
544 544 )
545 545
546 546 def walk(self, match):
547 547 """Generates matching file names.
548 548
549 549 Equivalent to manifest.matches(match).iterkeys(), but without creating
550 550 an entirely new manifest.
551 551
552 552 It also reports nonexistent files by marking them bad with match.bad().
553 553 """
554 554 if match.always():
555 555 for f in iter(self):
556 556 yield f
557 557 return
558 558
559 559 fset = set(match.files())
560 560
561 561 # avoid the entire walk if we're only looking for specific files
562 562 if self._filesfastpath(match):
563 563 for fn in sorted(fset):
564 564 if fn in self:
565 565 yield fn
566 566 return
567 567
568 568 for fn in self:
569 569 if fn in fset:
570 570 # specified pattern is the exact name
571 571 fset.remove(fn)
572 572 if match(fn):
573 573 yield fn
574 574
575 575 # for dirstate.walk, files=[''] means "walk the whole tree".
576 576 # follow that here, too
577 577 fset.discard(b'')
578 578
579 579 for fn in sorted(fset):
580 580 if not self.hasdir(fn):
581 581 match.bad(fn, None)
582 582
583 583 def _matches(self, match):
584 584 '''generate a new manifest filtered by the match argument'''
585 585 if match.always():
586 586 return self.copy()
587 587
588 588 if self._filesfastpath(match):
589 589 m = manifestdict(self._nodelen)
590 590 lm = self._lm
591 591 for fn in match.files():
592 592 if fn in lm:
593 593 m._lm[fn] = lm[fn]
594 594 return m
595 595
596 596 m = manifestdict(self._nodelen)
597 597 m._lm = self._lm.filtercopy(match)
598 598 return m
599 599
600 600 def diff(self, m2, match=None, clean=False):
601 601 """Finds changes between the current manifest and m2.
602 602
603 603 Args:
604 604 m2: the manifest to which this manifest should be compared.
605 605 clean: if true, include files unchanged between these manifests
606 606 with a None value in the returned dictionary.
607 607
608 608 The result is returned as a dict with filename as key and
609 609 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
610 610 nodeid in the current/other manifest and fl1/fl2 is the flag
611 611 in the current/other manifest. Where the file does not exist,
612 612 the nodeid will be None and the flags will be the empty
613 613 string.
614 614 """
615 615 if match:
616 616 m1 = self._matches(match)
617 617 m2 = m2._matches(match)
618 618 return m1.diff(m2, clean=clean)
619 619 return self._lm.diff(m2._lm, clean)
620 620
621 621 def setflag(self, key, flag):
622 622 if flag not in _manifestflags:
623 623 raise TypeError(b"Invalid manifest flag set.")
624 624 self._lm[key] = self[key], flag
625 625
626 626 def get(self, key, default=None):
627 627 try:
628 628 return self._lm[key][0]
629 629 except KeyError:
630 630 return default
631 631
632 632 def flags(self, key):
633 633 try:
634 634 return self._lm[key][1]
635 635 except KeyError:
636 636 return b''
637 637
638 638 def copy(self):
639 639 c = manifestdict(self._nodelen)
640 640 c._lm = self._lm.copy()
641 641 return c
642 642
643 643 def items(self):
644 644 return (x[:2] for x in self._lm.iterentries())
645 645
646 646 def iteritems(self):
647 647 return (x[:2] for x in self._lm.iterentries())
648 648
649 649 def iterentries(self):
650 650 return self._lm.iterentries()
651 651
652 652 def text(self):
653 653 # most likely uses native version
654 654 return self._lm.text()
655 655
656 656 def fastdelta(self, base, changes):
657 657 """Given a base manifest text as a bytearray and a list of changes
658 658 relative to that text, compute a delta that can be used by revlog.
659 659 """
660 660 delta = []
661 661 dstart = None
662 662 dend = None
663 663 dline = [b""]
664 664 start = 0
665 665 # zero copy representation of base as a buffer
666 666 addbuf = util.buffer(base)
667 667
668 668 changes = list(changes)
669 669 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
670 670 # start with a readonly loop that finds the offset of
671 671 # each line and creates the deltas
672 672 for f, todelete in changes:
673 673 # bs will either be the index of the item or the insert point
674 674 start, end = _msearch(addbuf, f, start)
675 675 if not todelete:
676 676 h, fl = self._lm[f]
677 677 l = b"%s\0%s%s\n" % (f, hex(h), fl)
678 678 else:
679 679 if start == end:
680 680 # item we want to delete was not found, error out
681 681 raise AssertionError(
682 682 _(b"failed to remove %s from manifest") % f
683 683 )
684 684 l = b""
685 685 if dstart is not None and dstart <= start and dend >= start:
686 686 if dend < end:
687 687 dend = end
688 688 if l:
689 689 dline.append(l)
690 690 else:
691 691 if dstart is not None:
692 692 delta.append([dstart, dend, b"".join(dline)])
693 693 dstart = start
694 694 dend = end
695 695 dline = [l]
696 696
697 697 if dstart is not None:
698 698 delta.append([dstart, dend, b"".join(dline)])
699 699 # apply the delta to the base, and get a delta for addrevision
700 700 deltatext, arraytext = _addlistdelta(base, delta)
701 701 else:
702 702 # For large changes, it's much cheaper to just build the text and
703 703 # diff it.
704 704 arraytext = bytearray(self.text())
705 705 deltatext = mdiff.textdiff(
706 706 util.buffer(base), util.buffer(arraytext)
707 707 )
708 708
709 709 return arraytext, deltatext
710 710
711 711
712 712 def _msearch(m, s, lo=0, hi=None):
713 713 """return a tuple (start, end) that says where to find s within m.
714 714
715 715 If the string is found m[start:end] are the line containing
716 716 that string. If start == end the string was not found and
717 717 they indicate the proper sorted insertion point.
718 718
719 719 m should be a buffer, a memoryview or a byte string.
720 720 s is a byte string"""
721 721
722 722 def advance(i, c):
723 723 while i < lenm and m[i : i + 1] != c:
724 724 i += 1
725 725 return i
726 726
727 727 if not s:
728 728 return (lo, lo)
729 729 lenm = len(m)
730 730 if not hi:
731 731 hi = lenm
732 732 while lo < hi:
733 733 mid = (lo + hi) // 2
734 734 start = mid
735 735 while start > 0 and m[start - 1 : start] != b'\n':
736 736 start -= 1
737 737 end = advance(start, b'\0')
738 738 if bytes(m[start:end]) < s:
739 739 # we know that after the null there are 40 bytes of sha1
740 740 # this translates to the bisect lo = mid + 1
741 741 lo = advance(end + 40, b'\n') + 1
742 742 else:
743 743 # this translates to the bisect hi = mid
744 744 hi = start
745 745 end = advance(lo, b'\0')
746 746 found = m[lo:end]
747 747 if s == found:
748 748 # we know that after the null there are 40 bytes of sha1
749 749 end = advance(end + 40, b'\n')
750 750 return (lo, end + 1)
751 751 else:
752 752 return (lo, lo)
753 753
754 754
755 755 def _checkforbidden(l):
756 756 """Check filenames for illegal characters."""
757 757 for f in l:
758 758 if b'\n' in f or b'\r' in f:
759 759 raise error.StorageError(
760 760 _(b"'\\n' and '\\r' disallowed in filenames: %r")
761 761 % pycompat.bytestr(f)
762 762 )
763 763
764 764
765 765 # apply the changes collected during the bisect loop to our addlist
766 766 # return a delta suitable for addrevision
767 767 def _addlistdelta(addlist, x):
768 768 # for large addlist arrays, building a new array is cheaper
769 769 # than repeatedly modifying the existing one
770 770 currentposition = 0
771 771 newaddlist = bytearray()
772 772
773 773 for start, end, content in x:
774 774 newaddlist += addlist[currentposition:start]
775 775 if content:
776 776 newaddlist += bytearray(content)
777 777
778 778 currentposition = end
779 779
780 780 newaddlist += addlist[currentposition:]
781 781
782 782 deltatext = b"".join(
783 783 struct.pack(b">lll", start, end, len(content)) + content
784 784 for start, end, content in x
785 785 )
786 786 return deltatext, newaddlist
787 787
788 788
789 789 def _splittopdir(f):
790 790 if b'/' in f:
791 791 dir, subpath = f.split(b'/', 1)
792 792 return dir + b'/', subpath
793 793 else:
794 794 return b'', f
795 795
796 796
797 797 _noop = lambda s: None
798 798
799 799
800 800 @interfaceutil.implementer(repository.imanifestdict)
801 801 class treemanifest:
802 802 def __init__(self, nodeconstants, dir=b'', text=b''):
803 803 self._dir = dir
804 804 self.nodeconstants = nodeconstants
805 805 self._node = self.nodeconstants.nullid
806 806 self._nodelen = self.nodeconstants.nodelen
807 807 self._loadfunc = _noop
808 808 self._copyfunc = _noop
809 809 self._dirty = False
810 810 self._dirs = {}
811 811 self._lazydirs = {}
812 812 # Using _lazymanifest here is a little slower than plain old dicts
813 813 self._files = {}
814 814 self._flags = {}
815 815 if text:
816 816
817 817 def readsubtree(subdir, subm):
818 818 raise AssertionError(
819 819 b'treemanifest constructor only accepts flat manifests'
820 820 )
821 821
822 822 self.parse(text, readsubtree)
823 823 self._dirty = True # Mark flat manifest dirty after parsing
824 824
825 825 def _subpath(self, path):
826 826 return self._dir + path
827 827
828 828 def _loadalllazy(self):
829 829 selfdirs = self._dirs
830 830 subpath = self._subpath
831 831 for d, (node, readsubtree, docopy) in self._lazydirs.items():
832 832 if docopy:
833 833 selfdirs[d] = readsubtree(subpath(d), node).copy()
834 834 else:
835 835 selfdirs[d] = readsubtree(subpath(d), node)
836 836 self._lazydirs = {}
837 837
838 838 def _loadlazy(self, d):
839 839 v = self._lazydirs.get(d)
840 840 if v:
841 841 node, readsubtree, docopy = v
842 842 if docopy:
843 843 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
844 844 else:
845 845 self._dirs[d] = readsubtree(self._subpath(d), node)
846 846 del self._lazydirs[d]
847 847
848 848 def _loadchildrensetlazy(self, visit):
849 849 if not visit:
850 850 return None
851 851 if visit == b'all' or visit == b'this':
852 852 self._loadalllazy()
853 853 return None
854 854
855 855 loadlazy = self._loadlazy
856 856 for k in visit:
857 857 loadlazy(k + b'/')
858 858 return visit
859 859
860 860 def _loaddifflazy(self, t1, t2):
861 861 """load items in t1 and t2 if they're needed for diffing.
862 862
863 863 The criteria currently is:
864 864 - if it's not present in _lazydirs in either t1 or t2, load it in the
865 865 other (it may already be loaded or it may not exist, doesn't matter)
866 866 - if it's present in _lazydirs in both, compare the nodeid; if it
867 867 differs, load it in both
868 868 """
869 869 toloadlazy = []
870 870 for d, v1 in t1._lazydirs.items():
871 871 v2 = t2._lazydirs.get(d)
872 872 if not v2 or v2[0] != v1[0]:
873 873 toloadlazy.append(d)
874 874 for d, v1 in t2._lazydirs.items():
875 875 if d not in t1._lazydirs:
876 876 toloadlazy.append(d)
877 877
878 878 for d in toloadlazy:
879 879 t1._loadlazy(d)
880 880 t2._loadlazy(d)
881 881
882 882 def __len__(self):
883 883 self._load()
884 884 size = len(self._files)
885 885 self._loadalllazy()
886 886 for m in self._dirs.values():
887 887 size += m.__len__()
888 888 return size
889 889
890 890 def __nonzero__(self):
891 891 # Faster than "__len() != 0" since it avoids loading sub-manifests
892 892 return not self._isempty()
893 893
894 894 __bool__ = __nonzero__
895 895
896 896 def _isempty(self):
897 897 self._load() # for consistency; already loaded by all callers
898 898 # See if we can skip loading everything.
899 899 if self._files or (
900 900 self._dirs and any(not m._isempty() for m in self._dirs.values())
901 901 ):
902 902 return False
903 903 self._loadalllazy()
904 904 return not self._dirs or all(m._isempty() for m in self._dirs.values())
905 905
906 906 @encoding.strmethod
907 907 def __repr__(self):
908 908 return (
909 909 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
910 910 % (
911 911 self._dir,
912 912 hex(self._node),
913 913 bool(self._loadfunc is _noop),
914 914 self._dirty,
915 915 id(self),
916 916 )
917 917 )
918 918
919 919 def dir(self):
920 920 """The directory that this tree manifest represents, including a
921 921 trailing '/'. Empty string for the repo root directory."""
922 922 return self._dir
923 923
924 924 def node(self):
925 925 """This node of this instance. nullid for unsaved instances. Should
926 926 be updated when the instance is read or written from a revlog.
927 927 """
928 928 assert not self._dirty
929 929 return self._node
930 930
931 931 def setnode(self, node):
932 932 self._node = node
933 933 self._dirty = False
934 934
935 935 def iterentries(self):
936 936 self._load()
937 937 self._loadalllazy()
938 938 for p, n in sorted(
939 939 itertools.chain(self._dirs.items(), self._files.items())
940 940 ):
941 941 if p in self._files:
942 942 yield self._subpath(p), n, self._flags.get(p, b'')
943 943 else:
944 944 for x in n.iterentries():
945 945 yield x
946 946
947 947 def items(self):
948 948 self._load()
949 949 self._loadalllazy()
950 950 for p, n in sorted(
951 951 itertools.chain(self._dirs.items(), self._files.items())
952 952 ):
953 953 if p in self._files:
954 954 yield self._subpath(p), n
955 955 else:
956 956 for f, sn in n.items():
957 957 yield f, sn
958 958
959 959 iteritems = items
960 960
961 961 def iterkeys(self):
962 962 self._load()
963 963 self._loadalllazy()
964 964 for p in sorted(itertools.chain(self._dirs, self._files)):
965 965 if p in self._files:
966 966 yield self._subpath(p)
967 967 else:
968 968 for f in self._dirs[p]:
969 969 yield f
970 970
971 971 def keys(self):
972 972 return list(self.iterkeys())
973 973
974 974 def __iter__(self):
975 975 return self.iterkeys()
976 976
977 977 def __contains__(self, f):
978 978 if f is None:
979 979 return False
980 980 self._load()
981 981 dir, subpath = _splittopdir(f)
982 982 if dir:
983 983 self._loadlazy(dir)
984 984
985 985 if dir not in self._dirs:
986 986 return False
987 987
988 988 return self._dirs[dir].__contains__(subpath)
989 989 else:
990 990 return f in self._files
991 991
992 992 def get(self, f, default=None):
993 993 self._load()
994 994 dir, subpath = _splittopdir(f)
995 995 if dir:
996 996 self._loadlazy(dir)
997 997
998 998 if dir not in self._dirs:
999 999 return default
1000 1000 return self._dirs[dir].get(subpath, default)
1001 1001 else:
1002 1002 return self._files.get(f, default)
1003 1003
1004 1004 def __getitem__(self, f):
1005 1005 self._load()
1006 1006 dir, subpath = _splittopdir(f)
1007 1007 if dir:
1008 1008 self._loadlazy(dir)
1009 1009
1010 1010 return self._dirs[dir].__getitem__(subpath)
1011 1011 else:
1012 1012 return self._files[f]
1013 1013
1014 1014 def flags(self, f):
1015 1015 self._load()
1016 1016 dir, subpath = _splittopdir(f)
1017 1017 if dir:
1018 1018 self._loadlazy(dir)
1019 1019
1020 1020 if dir not in self._dirs:
1021 1021 return b''
1022 1022 return self._dirs[dir].flags(subpath)
1023 1023 else:
1024 1024 if f in self._lazydirs or f in self._dirs:
1025 1025 return b''
1026 1026 return self._flags.get(f, b'')
1027 1027
1028 1028 def find(self, f):
1029 1029 self._load()
1030 1030 dir, subpath = _splittopdir(f)
1031 1031 if dir:
1032 1032 self._loadlazy(dir)
1033 1033
1034 1034 return self._dirs[dir].find(subpath)
1035 1035 else:
1036 1036 return self._files[f], self._flags.get(f, b'')
1037 1037
1038 1038 def __delitem__(self, f):
1039 1039 self._load()
1040 1040 dir, subpath = _splittopdir(f)
1041 1041 if dir:
1042 1042 self._loadlazy(dir)
1043 1043
1044 1044 self._dirs[dir].__delitem__(subpath)
1045 1045 # If the directory is now empty, remove it
1046 1046 if self._dirs[dir]._isempty():
1047 1047 del self._dirs[dir]
1048 1048 else:
1049 1049 del self._files[f]
1050 1050 if f in self._flags:
1051 1051 del self._flags[f]
1052 1052 self._dirty = True
1053 1053
1054 1054 def set(self, f, node, flags):
1055 1055 """Set both the node and the flags for path f."""
1056 1056 assert node is not None
1057 1057 if flags not in _manifestflags:
1058 1058 raise TypeError(b"Invalid manifest flag set.")
1059 1059 self._load()
1060 1060 dir, subpath = _splittopdir(f)
1061 1061 if dir:
1062 1062 self._loadlazy(dir)
1063 1063 if dir not in self._dirs:
1064 1064 self._dirs[dir] = treemanifest(
1065 1065 self.nodeconstants, self._subpath(dir)
1066 1066 )
1067 1067 self._dirs[dir].set(subpath, node, flags)
1068 1068 else:
1069 1069 assert len(node) in (20, 32)
1070 1070 self._files[f] = node
1071 1071 self._flags[f] = flags
1072 1072 self._dirty = True
1073 1073
1074 1074 def __setitem__(self, f, n):
1075 1075 assert n is not None
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080 if dir not in self._dirs:
1081 1081 self._dirs[dir] = treemanifest(
1082 1082 self.nodeconstants, self._subpath(dir)
1083 1083 )
1084 1084 self._dirs[dir].__setitem__(subpath, n)
1085 1085 else:
1086 1086 # manifest nodes are either 20 bytes or 32 bytes,
1087 1087 # depending on the hash in use. Assert this as historically
1088 1088 # sometimes extra bytes were added.
1089 1089 assert len(n) in (20, 32)
1090 1090 self._files[f] = n
1091 1091 self._dirty = True
1092 1092
1093 1093 def _load(self):
1094 1094 if self._loadfunc is not _noop:
1095 1095 lf, self._loadfunc = self._loadfunc, _noop
1096 1096 lf(self)
1097 1097 elif self._copyfunc is not _noop:
1098 1098 cf, self._copyfunc = self._copyfunc, _noop
1099 1099 cf(self)
1100 1100
1101 1101 def setflag(self, f, flags):
1102 1102 """Set the flags (symlink, executable) for path f."""
1103 1103 if flags not in _manifestflags:
1104 1104 raise TypeError(b"Invalid manifest flag set.")
1105 1105 self._load()
1106 1106 dir, subpath = _splittopdir(f)
1107 1107 if dir:
1108 1108 self._loadlazy(dir)
1109 1109 if dir not in self._dirs:
1110 1110 self._dirs[dir] = treemanifest(
1111 1111 self.nodeconstants, self._subpath(dir)
1112 1112 )
1113 1113 self._dirs[dir].setflag(subpath, flags)
1114 1114 else:
1115 1115 self._flags[f] = flags
1116 1116 self._dirty = True
1117 1117
1118 1118 def copy(self):
1119 1119 copy = treemanifest(self.nodeconstants, self._dir)
1120 1120 copy._node = self._node
1121 1121 copy._dirty = self._dirty
1122 1122 if self._copyfunc is _noop:
1123 1123
1124 1124 def _copyfunc(s):
1125 1125 self._load()
1126 1126 s._lazydirs = {
1127 1127 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1128 1128 }
1129 1129 sdirs = s._dirs
1130 1130 for d, v in self._dirs.items():
1131 1131 sdirs[d] = v.copy()
1132 1132 s._files = dict.copy(self._files)
1133 1133 s._flags = dict.copy(self._flags)
1134 1134
1135 1135 if self._loadfunc is _noop:
1136 1136 _copyfunc(copy)
1137 1137 else:
1138 1138 copy._copyfunc = _copyfunc
1139 1139 else:
1140 1140 copy._copyfunc = self._copyfunc
1141 1141 return copy
1142 1142
1143 1143 def filesnotin(self, m2, match=None):
1144 1144 '''Set of files in this manifest that are not in the other'''
1145 1145 if match and not match.always():
1146 1146 m1 = self._matches(match)
1147 1147 m2 = m2._matches(match)
1148 1148 return m1.filesnotin(m2)
1149 1149
1150 1150 files = set()
1151 1151
1152 1152 def _filesnotin(t1, t2):
1153 1153 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1154 1154 return
1155 1155 t1._load()
1156 1156 t2._load()
1157 1157 self._loaddifflazy(t1, t2)
1158 1158 for d, m1 in t1._dirs.items():
1159 1159 if d in t2._dirs:
1160 1160 m2 = t2._dirs[d]
1161 1161 _filesnotin(m1, m2)
1162 1162 else:
1163 1163 files.update(m1.iterkeys())
1164 1164
1165 1165 for fn in t1._files:
1166 1166 if fn not in t2._files:
1167 1167 files.add(t1._subpath(fn))
1168 1168
1169 1169 _filesnotin(self, m2)
1170 1170 return files
1171 1171
1172 1172 @propertycache
1173 1173 def _alldirs(self):
1174 1174 return pathutil.dirs(self)
1175 1175
1176 1176 def dirs(self):
1177 1177 return self._alldirs
1178 1178
1179 1179 def hasdir(self, dir):
1180 1180 self._load()
1181 1181 topdir, subdir = _splittopdir(dir)
1182 1182 if topdir:
1183 1183 self._loadlazy(topdir)
1184 1184 if topdir in self._dirs:
1185 1185 return self._dirs[topdir].hasdir(subdir)
1186 1186 return False
1187 1187 dirslash = dir + b'/'
1188 1188 return dirslash in self._dirs or dirslash in self._lazydirs
1189 1189
1190 1190 def walk(self, match):
1191 1191 """Generates matching file names.
1192 1192
1193 1193 It also reports nonexistent files by marking them bad with match.bad().
1194 1194 """
1195 1195 if match.always():
1196 1196 for f in iter(self):
1197 1197 yield f
1198 1198 return
1199 1199
1200 1200 fset = set(match.files())
1201 1201
1202 1202 for fn in self._walk(match):
1203 1203 if fn in fset:
1204 1204 # specified pattern is the exact name
1205 1205 fset.remove(fn)
1206 1206 yield fn
1207 1207
1208 1208 # for dirstate.walk, files=[''] means "walk the whole tree".
1209 1209 # follow that here, too
1210 1210 fset.discard(b'')
1211 1211
1212 1212 for fn in sorted(fset):
1213 1213 if not self.hasdir(fn):
1214 1214 match.bad(fn, None)
1215 1215
1216 1216 def _walk(self, match):
1217 1217 '''Recursively generates matching file names for walk().'''
1218 1218 visit = match.visitchildrenset(self._dir[:-1])
1219 1219 if not visit:
1220 1220 return
1221 1221
1222 1222 # yield this dir's files and walk its submanifests
1223 1223 self._load()
1224 1224 visit = self._loadchildrensetlazy(visit)
1225 1225 for p in sorted(list(self._dirs) + list(self._files)):
1226 1226 if p in self._files:
1227 1227 fullp = self._subpath(p)
1228 1228 if match(fullp):
1229 1229 yield fullp
1230 1230 else:
1231 1231 if not visit or p[:-1] in visit:
1232 1232 for f in self._dirs[p]._walk(match):
1233 1233 yield f
1234 1234
1235 1235 def _matches(self, match):
1236 1236 """recursively generate a new manifest filtered by the match argument."""
1237 1237 if match.always():
1238 1238 return self.copy()
1239 1239 return self._matches_inner(match)
1240 1240
1241 1241 def _matches_inner(self, match):
1242 1242 if match.always():
1243 1243 return self.copy()
1244 1244
1245 1245 visit = match.visitchildrenset(self._dir[:-1])
1246 1246 if visit == b'all':
1247 1247 return self.copy()
1248 1248 ret = treemanifest(self.nodeconstants, self._dir)
1249 1249 if not visit:
1250 1250 return ret
1251 1251
1252 1252 self._load()
1253 1253 for fn in self._files:
1254 1254 # While visitchildrenset *usually* lists only subdirs, this is
1255 1255 # actually up to the matcher and may have some files in the set().
1256 1256 # If visit == 'this', we should obviously look at the files in this
1257 1257 # directory; if visit is a set, and fn is in it, we should inspect
1258 1258 # fn (but no need to inspect things not in the set).
1259 1259 if visit != b'this' and fn not in visit:
1260 1260 continue
1261 1261 fullp = self._subpath(fn)
1262 1262 # visitchildrenset isn't perfect, we still need to call the regular
1263 1263 # matcher code to further filter results.
1264 1264 if not match(fullp):
1265 1265 continue
1266 1266 ret._files[fn] = self._files[fn]
1267 1267 if fn in self._flags:
1268 1268 ret._flags[fn] = self._flags[fn]
1269 1269
1270 1270 visit = self._loadchildrensetlazy(visit)
1271 1271 for dir, subm in self._dirs.items():
1272 1272 if visit and dir[:-1] not in visit:
1273 1273 continue
1274 1274 m = subm._matches_inner(match)
1275 1275 if not m._isempty():
1276 1276 ret._dirs[dir] = m
1277 1277
1278 1278 if not ret._isempty():
1279 1279 ret._dirty = True
1280 1280 return ret
1281 1281
1282 1282 def fastdelta(self, base, changes):
1283 1283 raise FastdeltaUnavailable()
1284 1284
1285 1285 def diff(self, m2, match=None, clean=False):
1286 1286 """Finds changes between the current manifest and m2.
1287 1287
1288 1288 Args:
1289 1289 m2: the manifest to which this manifest should be compared.
1290 1290 clean: if true, include files unchanged between these manifests
1291 1291 with a None value in the returned dictionary.
1292 1292
1293 1293 The result is returned as a dict with filename as key and
1294 1294 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1295 1295 nodeid in the current/other manifest and fl1/fl2 is the flag
1296 1296 in the current/other manifest. Where the file does not exist,
1297 1297 the nodeid will be None and the flags will be the empty
1298 1298 string.
1299 1299 """
1300 1300 if match and not match.always():
1301 1301 m1 = self._matches(match)
1302 1302 m2 = m2._matches(match)
1303 1303 return m1.diff(m2, clean=clean)
1304 1304 result = {}
1305 1305 emptytree = treemanifest(self.nodeconstants)
1306 1306
1307 1307 def _iterativediff(t1, t2, stack):
1308 1308 """compares two tree manifests and append new tree-manifests which
1309 1309 needs to be compared to stack"""
1310 1310 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1311 1311 return
1312 1312 t1._load()
1313 1313 t2._load()
1314 1314 self._loaddifflazy(t1, t2)
1315 1315
1316 1316 for d, m1 in t1._dirs.items():
1317 1317 m2 = t2._dirs.get(d, emptytree)
1318 1318 stack.append((m1, m2))
1319 1319
1320 1320 for d, m2 in t2._dirs.items():
1321 1321 if d not in t1._dirs:
1322 1322 stack.append((emptytree, m2))
1323 1323
1324 1324 for fn, n1 in t1._files.items():
1325 1325 fl1 = t1._flags.get(fn, b'')
1326 1326 n2 = t2._files.get(fn, None)
1327 1327 fl2 = t2._flags.get(fn, b'')
1328 1328 if n1 != n2 or fl1 != fl2:
1329 1329 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1330 1330 elif clean:
1331 1331 result[t1._subpath(fn)] = None
1332 1332
1333 1333 for fn, n2 in t2._files.items():
1334 1334 if fn not in t1._files:
1335 1335 fl2 = t2._flags.get(fn, b'')
1336 1336 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1337 1337
1338 1338 stackls = []
1339 1339 _iterativediff(self, m2, stackls)
1340 1340 while stackls:
1341 1341 t1, t2 = stackls.pop()
1342 1342 # stackls is populated in the function call
1343 1343 _iterativediff(t1, t2, stackls)
1344 1344 return result
1345 1345
1346 1346 def unmodifiedsince(self, m2):
1347 1347 return not self._dirty and not m2._dirty and self._node == m2._node
1348 1348
1349 1349 def parse(self, text, readsubtree):
1350 1350 selflazy = self._lazydirs
1351 1351 for f, n, fl in _parse(self._nodelen, text):
1352 1352 if fl == b't':
1353 1353 f = f + b'/'
1354 1354 # False below means "doesn't need to be copied" and can use the
1355 1355 # cached value from readsubtree directly.
1356 1356 selflazy[f] = (n, readsubtree, False)
1357 1357 elif b'/' in f:
1358 1358 # This is a flat manifest, so use __setitem__ and setflag rather
1359 1359 # than assigning directly to _files and _flags, so we can
1360 1360 # assign a path in a subdirectory, and to mark dirty (compared
1361 1361 # to nullid).
1362 1362 self[f] = n
1363 1363 if fl:
1364 1364 self.setflag(f, fl)
1365 1365 else:
1366 1366 # Assigning to _files and _flags avoids marking as dirty,
1367 1367 # and should be a little faster.
1368 1368 self._files[f] = n
1369 1369 if fl:
1370 1370 self._flags[f] = fl
1371 1371
1372 1372 def text(self):
1373 1373 """Get the full data of this manifest as a bytestring."""
1374 1374 self._load()
1375 1375 return _text(self.iterentries())
1376 1376
1377 1377 def dirtext(self):
1378 1378 """Get the full data of this directory as a bytestring. Make sure that
1379 1379 any submanifests have been written first, so their nodeids are correct.
1380 1380 """
1381 1381 self._load()
1382 1382 flags = self.flags
1383 1383 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1384 1384 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1385 1385 files = [(f, self._files[f], flags(f)) for f in self._files]
1386 1386 return _text(sorted(dirs + files + lazydirs))
1387 1387
1388 1388 def read(self, gettext, readsubtree):
1389 1389 def _load_for_read(s):
1390 1390 s.parse(gettext(), readsubtree)
1391 1391 s._dirty = False
1392 1392
1393 1393 self._loadfunc = _load_for_read
1394 1394
1395 1395 def writesubtrees(self, m1, m2, writesubtree, match):
1396 1396 self._load() # for consistency; should never have any effect here
1397 1397 m1._load()
1398 1398 m2._load()
1399 1399 emptytree = treemanifest(self.nodeconstants)
1400 1400
1401 1401 def getnode(m, d):
1402 1402 ld = m._lazydirs.get(d)
1403 1403 if ld:
1404 1404 return ld[0]
1405 1405 return m._dirs.get(d, emptytree)._node
1406 1406
1407 1407 # let's skip investigating things that `match` says we do not need.
1408 1408 visit = match.visitchildrenset(self._dir[:-1])
1409 1409 visit = self._loadchildrensetlazy(visit)
1410 1410 if visit == b'this' or visit == b'all':
1411 1411 visit = None
1412 1412 for d, subm in self._dirs.items():
1413 1413 if visit and d[:-1] not in visit:
1414 1414 continue
1415 1415 subp1 = getnode(m1, d)
1416 1416 subp2 = getnode(m2, d)
1417 1417 if subp1 == self.nodeconstants.nullid:
1418 1418 subp1, subp2 = subp2, subp1
1419 1419 writesubtree(subm, subp1, subp2, match)
1420 1420
1421 1421 def walksubtrees(self, matcher=None):
1422 1422 """Returns an iterator of the subtrees of this manifest, including this
1423 1423 manifest itself.
1424 1424
1425 1425 If `matcher` is provided, it only returns subtrees that match.
1426 1426 """
1427 1427 if matcher and not matcher.visitdir(self._dir[:-1]):
1428 1428 return
1429 1429 if not matcher or matcher(self._dir[:-1]):
1430 1430 yield self
1431 1431
1432 1432 self._load()
1433 1433 # OPT: use visitchildrenset to avoid loading everything.
1434 1434 self._loadalllazy()
1435 1435 for d, subm in self._dirs.items():
1436 1436 for subtree in subm.walksubtrees(matcher=matcher):
1437 1437 yield subtree
1438 1438
1439 1439
1440 1440 class manifestfulltextcache(util.lrucachedict):
1441 1441 """File-backed LRU cache for the manifest cache
1442 1442
1443 1443 File consists of entries, up to EOF:
1444 1444
1445 1445 - 20 bytes node, 4 bytes length, <length> manifest data
1446 1446
1447 1447 These are written in reverse cache order (oldest to newest).
1448 1448
1449 1449 """
1450 1450
1451 1451 _file = b'manifestfulltextcache'
1452 1452
1453 1453 def __init__(self, max):
1454 1454 super(manifestfulltextcache, self).__init__(max)
1455 1455 self._dirty = False
1456 1456 self._read = False
1457 1457 self._opener = None
1458 1458
1459 1459 def read(self):
1460 1460 if self._read or self._opener is None:
1461 1461 return
1462 1462
1463 1463 try:
1464 1464 with self._opener(self._file) as fp:
1465 1465 set = super(manifestfulltextcache, self).__setitem__
1466 1466 # ignore trailing data, this is a cache, corruption is skipped
1467 1467 while True:
1468 1468 # TODO do we need to do work here for sha1 portability?
1469 1469 node = fp.read(20)
1470 1470 if len(node) < 20:
1471 1471 break
1472 1472 try:
1473 1473 size = struct.unpack(b'>L', fp.read(4))[0]
1474 1474 except struct.error:
1475 1475 break
1476 1476 value = bytearray(fp.read(size))
1477 1477 if len(value) != size:
1478 1478 break
1479 1479 set(node, value)
1480 1480 except IOError:
1481 1481 # the file is allowed to be missing
1482 1482 pass
1483 1483
1484 1484 self._read = True
1485 1485 self._dirty = False
1486 1486
1487 1487 def write(self):
1488 1488 if not self._dirty or self._opener is None:
1489 1489 return
1490 1490 # rotate backwards to the first used node
1491 1491 try:
1492 1492 with self._opener(
1493 1493 self._file, b'w', atomictemp=True, checkambig=True
1494 1494 ) as fp:
1495 1495 node = self._head.prev
1496 1496 while True:
1497 1497 if node.key in self._cache:
1498 1498 fp.write(node.key)
1499 1499 fp.write(struct.pack(b'>L', len(node.value)))
1500 1500 fp.write(node.value)
1501 1501 if node is self._head:
1502 1502 break
1503 1503 node = node.prev
1504 1504 except IOError:
1505 1505 # We could not write the cache (eg: permission error)
1506 1506 # the content can be missing.
1507 1507 #
1508 1508 # We could try harder and see if we could recreate a wcache
1509 1509 # directory were we coudl write too.
1510 1510 #
1511 1511 # XXX the error pass silently, having some way to issue an error
1512 1512 # log `ui.log` would be nice.
1513 1513 pass
1514 1514
1515 1515 def __len__(self):
1516 1516 if not self._read:
1517 1517 self.read()
1518 1518 return super(manifestfulltextcache, self).__len__()
1519 1519
1520 1520 def __contains__(self, k):
1521 1521 if not self._read:
1522 1522 self.read()
1523 1523 return super(manifestfulltextcache, self).__contains__(k)
1524 1524
1525 1525 def __iter__(self):
1526 1526 if not self._read:
1527 1527 self.read()
1528 1528 return super(manifestfulltextcache, self).__iter__()
1529 1529
1530 1530 def __getitem__(self, k):
1531 1531 if not self._read:
1532 1532 self.read()
1533 1533 # the cache lru order can change on read
1534 1534 setdirty = self._cache.get(k) is not self._head
1535 1535 value = super(manifestfulltextcache, self).__getitem__(k)
1536 1536 if setdirty:
1537 1537 self._dirty = True
1538 1538 return value
1539 1539
1540 1540 def __setitem__(self, k, v):
1541 1541 if not self._read:
1542 1542 self.read()
1543 1543 super(manifestfulltextcache, self).__setitem__(k, v)
1544 1544 self._dirty = True
1545 1545
1546 1546 def __delitem__(self, k):
1547 1547 if not self._read:
1548 1548 self.read()
1549 1549 super(manifestfulltextcache, self).__delitem__(k)
1550 1550 self._dirty = True
1551 1551
1552 1552 def get(self, k, default=None):
1553 1553 if not self._read:
1554 1554 self.read()
1555 1555 return super(manifestfulltextcache, self).get(k, default=default)
1556 1556
1557 1557 def clear(self, clear_persisted_data=False):
1558 1558 super(manifestfulltextcache, self).clear()
1559 1559 if clear_persisted_data:
1560 1560 self._dirty = True
1561 1561 self.write()
1562 1562 self._read = False
1563 1563
1564 1564
1565 1565 # and upper bound of what we expect from compression
1566 1566 # (real live value seems to be "3")
1567 1567 MAXCOMPRESSION = 3
1568 1568
1569 1569
1570 1570 class FastdeltaUnavailable(Exception):
1571 1571 """Exception raised when fastdelta isn't usable on a manifest."""
1572 1572
1573 1573
1574 1574 @interfaceutil.implementer(repository.imanifeststorage)
1575 1575 class manifestrevlog:
1576 1576 """A revlog that stores manifest texts. This is responsible for caching the
1577 1577 full-text manifest contents.
1578 1578 """
1579 1579
1580 1580 def __init__(
1581 1581 self,
1582 1582 nodeconstants,
1583 1583 opener,
1584 1584 tree=b'',
1585 1585 dirlogcache=None,
1586 1586 treemanifest=False,
1587 1587 ):
1588 1588 """Constructs a new manifest revlog
1589 1589
1590 1590 `indexfile` - used by extensions to have two manifests at once, like
1591 1591 when transitioning between flatmanifeset and treemanifests.
1592 1592
1593 1593 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1594 1594 options can also be used to make this a tree manifest revlog. The opener
1595 1595 option takes precedence, so if it is set to True, we ignore whatever
1596 1596 value is passed in to the constructor.
1597 1597 """
1598 1598 self.nodeconstants = nodeconstants
1599 1599 # During normal operations, we expect to deal with not more than four
1600 1600 # revs at a time (such as during commit --amend). When rebasing large
1601 1601 # stacks of commits, the number can go up, hence the config knob below.
1602 1602 cachesize = 4
1603 1603 optiontreemanifest = False
1604 1604 opts = getattr(opener, 'options', None)
1605 1605 if opts is not None:
1606 1606 cachesize = opts.get(b'manifestcachesize', cachesize)
1607 1607 optiontreemanifest = opts.get(b'treemanifest', False)
1608 1608
1609 1609 self._treeondisk = optiontreemanifest or treemanifest
1610 1610
1611 1611 self._fulltextcache = manifestfulltextcache(cachesize)
1612 1612
1613 1613 if tree:
1614 1614 assert self._treeondisk, (tree, b'opts is %r' % opts)
1615 1615
1616 1616 radix = b'00manifest'
1617 1617 if tree:
1618 1618 radix = b"meta/" + tree + radix
1619 1619
1620 1620 self.tree = tree
1621 1621
1622 1622 # The dirlogcache is kept on the root manifest log
1623 1623 if tree:
1624 1624 self._dirlogcache = dirlogcache
1625 1625 else:
1626 1626 self._dirlogcache = {b'': self}
1627 1627
1628 1628 self._revlog = revlog.revlog(
1629 1629 opener,
1630 1630 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1631 1631 radix=radix,
1632 1632 # only root indexfile is cached
1633 1633 checkambig=not bool(tree),
1634 1634 mmaplargeindex=True,
1635 1635 upperboundcomp=MAXCOMPRESSION,
1636 1636 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1637 1637 )
1638 1638
1639 1639 self.index = self._revlog.index
1640 1640
1641 1641 def get_revlog(self):
1642 1642 """return an actual revlog instance if any
1643 1643
1644 1644 This exist because a lot of code leverage the fact the underlying
1645 1645 storage is a revlog for optimization, so giving simple way to access
1646 1646 the revlog instance helps such code.
1647 1647 """
1648 1648 return self._revlog
1649 1649
1650 1650 def _setupmanifestcachehooks(self, repo):
1651 1651 """Persist the manifestfulltextcache on lock release"""
1652 1652 if not hasattr(repo, '_wlockref'):
1653 1653 return
1654 1654
1655 1655 self._fulltextcache._opener = repo.wcachevfs
1656 1656 if repo._currentlock(repo._wlockref) is None:
1657 1657 return
1658 1658
1659 1659 reporef = weakref.ref(repo)
1660 1660 manifestrevlogref = weakref.ref(self)
1661 1661
1662 1662 def persistmanifestcache(success):
1663 1663 # Repo is in an unknown state, do not persist.
1664 1664 if not success:
1665 1665 return
1666 1666
1667 1667 repo = reporef()
1668 1668 self = manifestrevlogref()
1669 1669 if repo is None or self is None:
1670 1670 return
1671 1671 if repo.manifestlog.getstorage(b'') is not self:
1672 1672 # there's a different manifest in play now, abort
1673 1673 return
1674 1674 self._fulltextcache.write()
1675 1675
1676 1676 repo._afterlock(persistmanifestcache)
1677 1677
1678 1678 @property
1679 1679 def fulltextcache(self):
1680 1680 return self._fulltextcache
1681 1681
1682 1682 def clearcaches(self, clear_persisted_data=False):
1683 1683 self._revlog.clearcaches()
1684 1684 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1685 1685 self._dirlogcache = {self.tree: self}
1686 1686
1687 1687 def dirlog(self, d):
1688 1688 if d:
1689 1689 assert self._treeondisk
1690 1690 if d not in self._dirlogcache:
1691 1691 mfrevlog = manifestrevlog(
1692 1692 self.nodeconstants,
1693 1693 self.opener,
1694 1694 d,
1695 1695 self._dirlogcache,
1696 1696 treemanifest=self._treeondisk,
1697 1697 )
1698 1698 self._dirlogcache[d] = mfrevlog
1699 1699 return self._dirlogcache[d]
1700 1700
1701 1701 def add(
1702 1702 self,
1703 1703 m,
1704 1704 transaction,
1705 1705 link,
1706 1706 p1,
1707 1707 p2,
1708 1708 added,
1709 1709 removed,
1710 1710 readtree=None,
1711 1711 match=None,
1712 1712 ):
1713 1713 """add some manifest entry in to the manifest log
1714 1714
1715 1715 input:
1716 1716
1717 1717 m: the manifest dict we want to store
1718 1718 transaction: the open transaction
1719 1719 p1: manifest-node of p1
1720 1720 p2: manifest-node of p2
1721 1721 added: file added/changed compared to parent
1722 1722 removed: file removed compared to parent
1723 1723
1724 1724 tree manifest input:
1725 1725
1726 1726 readtree: a function to read a subtree
1727 1727 match: a filematcher for the subpart of the tree manifest
1728 1728 """
1729 1729 try:
1730 1730 if p1 not in self.fulltextcache:
1731 1731 raise FastdeltaUnavailable()
1732 1732 # If our first parent is in the manifest cache, we can
1733 1733 # compute a delta here using properties we know about the
1734 1734 # manifest up-front, which may save time later for the
1735 1735 # revlog layer.
1736 1736
1737 1737 _checkforbidden(added)
1738 1738 # combine the changed lists into one sorted iterator
1739 1739 work = heapq.merge(
1740 1740 [(x, False) for x in sorted(added)],
1741 1741 [(x, True) for x in sorted(removed)],
1742 1742 )
1743 1743
1744 1744 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1745 1745 cachedelta = self._revlog.rev(p1), deltatext
1746 1746 text = util.buffer(arraytext)
1747 1747 rev = self._revlog.addrevision(
1748 1748 text, transaction, link, p1, p2, cachedelta
1749 1749 )
1750 1750 n = self._revlog.node(rev)
1751 1751 except FastdeltaUnavailable:
1752 1752 # The first parent manifest isn't already loaded or the
1753 1753 # manifest implementation doesn't support fastdelta, so
1754 1754 # we'll just encode a fulltext of the manifest and pass
1755 1755 # that through to the revlog layer, and let it handle the
1756 1756 # delta process.
1757 1757 if self._treeondisk:
1758 1758 assert readtree, b"readtree must be set for treemanifest writes"
1759 1759 assert match, b"match must be specified for treemanifest writes"
1760 1760 m1 = readtree(self.tree, p1)
1761 1761 m2 = readtree(self.tree, p2)
1762 1762 n = self._addtree(
1763 1763 m, transaction, link, m1, m2, readtree, match=match
1764 1764 )
1765 1765 arraytext = None
1766 1766 else:
1767 1767 text = m.text()
1768 1768 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1769 1769 n = self._revlog.node(rev)
1770 1770 arraytext = bytearray(text)
1771 1771
1772 1772 if arraytext is not None:
1773 1773 self.fulltextcache[n] = arraytext
1774 1774
1775 1775 return n
1776 1776
1777 1777 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1778 1778 # If the manifest is unchanged compared to one parent,
1779 1779 # don't write a new revision
1780 1780 if self.tree != b'' and (
1781 1781 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1782 1782 ):
1783 1783 return m.node()
1784 1784
1785 1785 def writesubtree(subm, subp1, subp2, match):
1786 1786 sublog = self.dirlog(subm.dir())
1787 1787 sublog.add(
1788 1788 subm,
1789 1789 transaction,
1790 1790 link,
1791 1791 subp1,
1792 1792 subp2,
1793 1793 None,
1794 1794 None,
1795 1795 readtree=readtree,
1796 1796 match=match,
1797 1797 )
1798 1798
1799 1799 m.writesubtrees(m1, m2, writesubtree, match)
1800 1800 text = m.dirtext()
1801 1801 n = None
1802 1802 if self.tree != b'':
1803 1803 # Double-check whether contents are unchanged to one parent
1804 1804 if text == m1.dirtext():
1805 1805 n = m1.node()
1806 1806 elif text == m2.dirtext():
1807 1807 n = m2.node()
1808 1808
1809 1809 if not n:
1810 1810 rev = self._revlog.addrevision(
1811 1811 text, transaction, link, m1.node(), m2.node()
1812 1812 )
1813 1813 n = self._revlog.node(rev)
1814 1814
1815 1815 # Save nodeid so parent manifest can calculate its nodeid
1816 1816 m.setnode(n)
1817 1817 return n
1818 1818
1819 1819 def __len__(self):
1820 1820 return len(self._revlog)
1821 1821
1822 1822 def __iter__(self):
1823 1823 return self._revlog.__iter__()
1824 1824
1825 1825 def rev(self, node):
1826 1826 return self._revlog.rev(node)
1827 1827
1828 1828 def node(self, rev):
1829 1829 return self._revlog.node(rev)
1830 1830
1831 1831 def lookup(self, value):
1832 1832 return self._revlog.lookup(value)
1833 1833
1834 1834 def parentrevs(self, rev):
1835 1835 return self._revlog.parentrevs(rev)
1836 1836
1837 1837 def parents(self, node):
1838 1838 return self._revlog.parents(node)
1839 1839
1840 1840 def linkrev(self, rev):
1841 1841 return self._revlog.linkrev(rev)
1842 1842
1843 1843 def checksize(self):
1844 1844 return self._revlog.checksize()
1845 1845
1846 1846 def revision(self, node):
1847 1847 return self._revlog.revision(node)
1848 1848
1849 1849 def rawdata(self, node):
1850 1850 return self._revlog.rawdata(node)
1851 1851
1852 1852 def revdiff(self, rev1, rev2):
1853 1853 return self._revlog.revdiff(rev1, rev2)
1854 1854
1855 1855 def cmp(self, node, text):
1856 1856 return self._revlog.cmp(node, text)
1857 1857
1858 1858 def deltaparent(self, rev):
1859 1859 return self._revlog.deltaparent(rev)
1860 1860
1861 1861 def emitrevisions(
1862 1862 self,
1863 1863 nodes,
1864 1864 nodesorder=None,
1865 1865 revisiondata=False,
1866 1866 assumehaveparentrevisions=False,
1867 1867 deltamode=repository.CG_DELTAMODE_STD,
1868 1868 sidedata_helpers=None,
1869 1869 debug_info=None,
1870 1870 ):
1871 1871 return self._revlog.emitrevisions(
1872 1872 nodes,
1873 1873 nodesorder=nodesorder,
1874 1874 revisiondata=revisiondata,
1875 1875 assumehaveparentrevisions=assumehaveparentrevisions,
1876 1876 deltamode=deltamode,
1877 1877 sidedata_helpers=sidedata_helpers,
1878 1878 debug_info=debug_info,
1879 1879 )
1880 1880
1881 1881 def addgroup(
1882 1882 self,
1883 1883 deltas,
1884 1884 linkmapper,
1885 1885 transaction,
1886 1886 alwayscache=False,
1887 1887 addrevisioncb=None,
1888 1888 duplicaterevisioncb=None,
1889 1889 debug_info=None,
1890 1890 delta_base_reuse_policy=None,
1891 1891 ):
1892 1892 return self._revlog.addgroup(
1893 1893 deltas,
1894 1894 linkmapper,
1895 1895 transaction,
1896 1896 alwayscache=alwayscache,
1897 1897 addrevisioncb=addrevisioncb,
1898 1898 duplicaterevisioncb=duplicaterevisioncb,
1899 1899 debug_info=debug_info,
1900 1900 delta_base_reuse_policy=delta_base_reuse_policy,
1901 1901 )
1902 1902
1903 1903 def rawsize(self, rev):
1904 1904 return self._revlog.rawsize(rev)
1905 1905
1906 1906 def getstrippoint(self, minlink):
1907 1907 return self._revlog.getstrippoint(minlink)
1908 1908
1909 1909 def strip(self, minlink, transaction):
1910 1910 return self._revlog.strip(minlink, transaction)
1911 1911
1912 1912 def files(self):
1913 1913 return self._revlog.files()
1914 1914
1915 1915 def clone(self, tr, destrevlog, **kwargs):
1916 1916 if not isinstance(destrevlog, manifestrevlog):
1917 1917 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1918 1918
1919 1919 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1920 1920
1921 1921 def storageinfo(
1922 1922 self,
1923 1923 exclusivefiles=False,
1924 1924 sharedfiles=False,
1925 1925 revisionscount=False,
1926 1926 trackedsize=False,
1927 1927 storedsize=False,
1928 1928 ):
1929 1929 return self._revlog.storageinfo(
1930 1930 exclusivefiles=exclusivefiles,
1931 1931 sharedfiles=sharedfiles,
1932 1932 revisionscount=revisionscount,
1933 1933 trackedsize=trackedsize,
1934 1934 storedsize=storedsize,
1935 1935 )
1936 1936
1937 1937 @property
1938 1938 def opener(self):
1939 1939 return self._revlog.opener
1940 1940
1941 1941 @opener.setter
1942 1942 def opener(self, value):
1943 1943 self._revlog.opener = value
1944 1944
1945 1945
1946 1946 @interfaceutil.implementer(repository.imanifestlog)
1947 1947 class manifestlog:
1948 1948 """A collection class representing the collection of manifest snapshots
1949 1949 referenced by commits in the repository.
1950 1950
1951 1951 In this situation, 'manifest' refers to the abstract concept of a snapshot
1952 1952 of the list of files in the given commit. Consumers of the output of this
1953 1953 class do not care about the implementation details of the actual manifests
1954 1954 they receive (i.e. tree or flat or lazily loaded, etc)."""
1955 1955
1956 1956 def __init__(self, opener, repo, rootstore, narrowmatch):
1957 1957 self.nodeconstants = repo.nodeconstants
1958 1958 usetreemanifest = False
1959 1959 cachesize = 4
1960 1960
1961 1961 opts = getattr(opener, 'options', None)
1962 1962 if opts is not None:
1963 1963 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1964 1964 cachesize = opts.get(b'manifestcachesize', cachesize)
1965 1965
1966 1966 self._treemanifests = usetreemanifest
1967 1967
1968 1968 self._rootstore = rootstore
1969 1969 self._rootstore._setupmanifestcachehooks(repo)
1970 1970 self._narrowmatch = narrowmatch
1971 1971
1972 1972 # A cache of the manifestctx or treemanifestctx for each directory
1973 1973 self._dirmancache = {}
1974 1974 self._dirmancache[b''] = util.lrucachedict(cachesize)
1975 1975
1976 1976 self._cachesize = cachesize
1977 1977
1978 1978 def __getitem__(self, node):
1979 1979 """Retrieves the manifest instance for the given node. Throws a
1980 1980 LookupError if not found.
1981 1981 """
1982 1982 return self.get(b'', node)
1983 1983
1984 1984 def get(self, tree, node, verify=True):
1985 1985 """Retrieves the manifest instance for the given node. Throws a
1986 1986 LookupError if not found.
1987 1987
1988 1988 `verify` - if True an exception will be thrown if the node is not in
1989 1989 the revlog
1990 1990 """
1991 1991 if node in self._dirmancache.get(tree, ()):
1992 1992 return self._dirmancache[tree][node]
1993 1993
1994 1994 if not self._narrowmatch.always():
1995 1995 if not self._narrowmatch.visitdir(tree[:-1]):
1996 1996 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1997 1997 if tree:
1998 1998 if self._rootstore._treeondisk:
1999 1999 if verify:
2000 2000 # Side-effect is LookupError is raised if node doesn't
2001 2001 # exist.
2002 2002 self.getstorage(tree).rev(node)
2003 2003
2004 2004 m = treemanifestctx(self, tree, node)
2005 2005 else:
2006 2006 raise error.Abort(
2007 2007 _(
2008 2008 b"cannot ask for manifest directory '%s' in a flat "
2009 2009 b"manifest"
2010 2010 )
2011 2011 % tree
2012 2012 )
2013 2013 else:
2014 2014 if verify:
2015 2015 # Side-effect is LookupError is raised if node doesn't exist.
2016 2016 self._rootstore.rev(node)
2017 2017
2018 2018 if self._treemanifests:
2019 2019 m = treemanifestctx(self, b'', node)
2020 2020 else:
2021 2021 m = manifestctx(self, node)
2022 2022
2023 2023 if node != self.nodeconstants.nullid:
2024 2024 mancache = self._dirmancache.get(tree)
2025 2025 if not mancache:
2026 2026 mancache = util.lrucachedict(self._cachesize)
2027 2027 self._dirmancache[tree] = mancache
2028 2028 mancache[node] = m
2029 2029 return m
2030 2030
2031 2031 def getstorage(self, tree):
2032 2032 return self._rootstore.dirlog(tree)
2033 2033
2034 2034 def clearcaches(self, clear_persisted_data=False):
2035 2035 self._dirmancache.clear()
2036 2036 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2037 2037
2038 2038 def rev(self, node):
2039 2039 return self._rootstore.rev(node)
2040 2040
2041 2041 def update_caches(self, transaction):
2042 2042 return self._rootstore._revlog.update_caches(transaction=transaction)
2043 2043
2044 2044
2045 2045 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2046 2046 class memmanifestctx:
2047 2047 def __init__(self, manifestlog):
2048 2048 self._manifestlog = manifestlog
2049 2049 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2050 2050
2051 2051 def _storage(self):
2052 2052 return self._manifestlog.getstorage(b'')
2053 2053
2054 2054 def copy(self):
2055 2055 memmf = memmanifestctx(self._manifestlog)
2056 2056 memmf._manifestdict = self.read().copy()
2057 2057 return memmf
2058 2058
2059 2059 def read(self):
2060 2060 return self._manifestdict
2061 2061
2062 2062 def write(self, transaction, link, p1, p2, added, removed, match=None):
2063 2063 return self._storage().add(
2064 2064 self._manifestdict,
2065 2065 transaction,
2066 2066 link,
2067 2067 p1,
2068 2068 p2,
2069 2069 added,
2070 2070 removed,
2071 2071 match=match,
2072 2072 )
2073 2073
2074 2074
2075 2075 @interfaceutil.implementer(repository.imanifestrevisionstored)
2076 2076 class manifestctx:
2077 2077 """A class representing a single revision of a manifest, including its
2078 2078 contents, its parent revs, and its linkrev.
2079 2079 """
2080 2080
2081 2081 def __init__(self, manifestlog, node):
2082 2082 self._manifestlog = manifestlog
2083 2083 self._data = None
2084 2084
2085 2085 self._node = node
2086 2086
2087 2087 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2088 2088 # but let's add it later when something needs it and we can load it
2089 2089 # lazily.
2090 2090 # self.p1, self.p2 = store.parents(node)
2091 2091 # rev = store.rev(node)
2092 2092 # self.linkrev = store.linkrev(rev)
2093 2093
2094 2094 def _storage(self):
2095 2095 return self._manifestlog.getstorage(b'')
2096 2096
2097 2097 def node(self):
2098 2098 return self._node
2099 2099
2100 2100 def copy(self):
2101 2101 memmf = memmanifestctx(self._manifestlog)
2102 2102 memmf._manifestdict = self.read().copy()
2103 2103 return memmf
2104 2104
2105 2105 @propertycache
2106 2106 def parents(self):
2107 2107 return self._storage().parents(self._node)
2108 2108
2109 2109 def read(self):
2110 2110 if self._data is None:
2111 2111 nc = self._manifestlog.nodeconstants
2112 2112 if self._node == nc.nullid:
2113 2113 self._data = manifestdict(nc.nodelen)
2114 2114 else:
2115 2115 store = self._storage()
2116 2116 if self._node in store.fulltextcache:
2117 2117 text = pycompat.bytestr(store.fulltextcache[self._node])
2118 2118 else:
2119 2119 text = store.revision(self._node)
2120 2120 arraytext = bytearray(text)
2121 2121 store.fulltextcache[self._node] = arraytext
2122 2122 self._data = manifestdict(nc.nodelen, text)
2123 2123 return self._data
2124 2124
2125 2125 def readfast(self, shallow=False):
2126 2126 """Calls either readdelta or read, based on which would be less work.
2127 2127 readdelta is called if the delta is against the p1, and therefore can be
2128 2128 read quickly.
2129 2129
2130 2130 If `shallow` is True, nothing changes since this is a flat manifest.
2131 2131 """
2132 2132 store = self._storage()
2133 2133 r = store.rev(self._node)
2134 2134 deltaparent = store.deltaparent(r)
2135 2135 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2136 2136 return self.readdelta()
2137 2137 return self.read()
2138 2138
2139 2139 def readdelta(self, shallow=False):
2140 2140 """Returns a manifest containing just the entries that are present
2141 2141 in this manifest, but not in its p1 manifest. This is efficient to read
2142 2142 if the revlog delta is already p1.
2143 2143
2144 2144 Changing the value of `shallow` has no effect on flat manifests.
2145 2145 """
2146 2146 store = self._storage()
2147 2147 r = store.rev(self._node)
2148 2148 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2149 2149 return manifestdict(store.nodeconstants.nodelen, d)
2150 2150
2151 2151 def find(self, key):
2152 2152 return self.read().find(key)
2153 2153
2154 2154
2155 2155 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2156 2156 class memtreemanifestctx:
2157 2157 def __init__(self, manifestlog, dir=b''):
2158 2158 self._manifestlog = manifestlog
2159 2159 self._dir = dir
2160 2160 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2161 2161
2162 2162 def _storage(self):
2163 2163 return self._manifestlog.getstorage(b'')
2164 2164
2165 2165 def copy(self):
2166 2166 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2167 2167 memmf._treemanifest = self._treemanifest.copy()
2168 2168 return memmf
2169 2169
2170 2170 def read(self):
2171 2171 return self._treemanifest
2172 2172
2173 2173 def write(self, transaction, link, p1, p2, added, removed, match=None):
2174 2174 def readtree(dir, node):
2175 2175 return self._manifestlog.get(dir, node).read()
2176 2176
2177 2177 return self._storage().add(
2178 2178 self._treemanifest,
2179 2179 transaction,
2180 2180 link,
2181 2181 p1,
2182 2182 p2,
2183 2183 added,
2184 2184 removed,
2185 2185 readtree=readtree,
2186 2186 match=match,
2187 2187 )
2188 2188
2189 2189
2190 2190 @interfaceutil.implementer(repository.imanifestrevisionstored)
2191 2191 class treemanifestctx:
2192 2192 def __init__(self, manifestlog, dir, node):
2193 2193 self._manifestlog = manifestlog
2194 2194 self._dir = dir
2195 2195 self._data = None
2196 2196
2197 2197 self._node = node
2198 2198
2199 2199 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2200 2200 # we can instantiate treemanifestctx objects for directories we don't
2201 2201 # have on disk.
2202 2202 # self.p1, self.p2 = store.parents(node)
2203 2203 # rev = store.rev(node)
2204 2204 # self.linkrev = store.linkrev(rev)
2205 2205
2206 2206 def _storage(self):
2207 2207 narrowmatch = self._manifestlog._narrowmatch
2208 2208 if not narrowmatch.always():
2209 2209 if not narrowmatch.visitdir(self._dir[:-1]):
2210 2210 return excludedmanifestrevlog(
2211 2211 self._manifestlog.nodeconstants, self._dir
2212 2212 )
2213 2213 return self._manifestlog.getstorage(self._dir)
2214 2214
2215 2215 def read(self):
2216 2216 if self._data is None:
2217 2217 store = self._storage()
2218 2218 if self._node == self._manifestlog.nodeconstants.nullid:
2219 2219 self._data = treemanifest(self._manifestlog.nodeconstants)
2220 2220 # TODO accessing non-public API
2221 2221 elif store._treeondisk:
2222 2222 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2223 2223
2224 2224 def gettext():
2225 2225 return store.revision(self._node)
2226 2226
2227 2227 def readsubtree(dir, subm):
2228 2228 # Set verify to False since we need to be able to create
2229 2229 # subtrees for trees that don't exist on disk.
2230 2230 return self._manifestlog.get(dir, subm, verify=False).read()
2231 2231
2232 2232 m.read(gettext, readsubtree)
2233 2233 m.setnode(self._node)
2234 2234 self._data = m
2235 2235 else:
2236 2236 if self._node in store.fulltextcache:
2237 2237 text = pycompat.bytestr(store.fulltextcache[self._node])
2238 2238 else:
2239 2239 text = store.revision(self._node)
2240 2240 arraytext = bytearray(text)
2241 2241 store.fulltextcache[self._node] = arraytext
2242 2242 self._data = treemanifest(
2243 2243 self._manifestlog.nodeconstants, dir=self._dir, text=text
2244 2244 )
2245 2245
2246 2246 return self._data
2247 2247
2248 2248 def node(self):
2249 2249 return self._node
2250 2250
2251 2251 def copy(self):
2252 2252 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2253 2253 memmf._treemanifest = self.read().copy()
2254 2254 return memmf
2255 2255
2256 2256 @propertycache
2257 2257 def parents(self):
2258 2258 return self._storage().parents(self._node)
2259 2259
2260 2260 def readdelta(self, shallow=False):
2261 2261 """Returns a manifest containing just the entries that are present
2262 2262 in this manifest, but not in its p1 manifest. This is efficient to read
2263 2263 if the revlog delta is already p1.
2264 2264
2265 2265 If `shallow` is True, this will read the delta for this directory,
2266 2266 without recursively reading subdirectory manifests. Instead, any
2267 2267 subdirectory entry will be reported as it appears in the manifest, i.e.
2268 2268 the subdirectory will be reported among files and distinguished only by
2269 2269 its 't' flag.
2270 2270 """
2271 2271 store = self._storage()
2272 2272 if shallow:
2273 2273 r = store.rev(self._node)
2274 2274 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2275 2275 return manifestdict(store.nodeconstants.nodelen, d)
2276 2276 else:
2277 2277 # Need to perform a slow delta
2278 2278 r0 = store.deltaparent(store.rev(self._node))
2279 2279 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2280 2280 m1 = self.read()
2281 2281 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2282 2282 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2283 2283 if n1:
2284 2284 md[f] = n1
2285 2285 if fl1:
2286 2286 md.setflag(f, fl1)
2287 2287 return md
2288 2288
2289 2289 def readfast(self, shallow=False):
2290 2290 """Calls either readdelta or read, based on which would be less work.
2291 2291 readdelta is called if the delta is against the p1, and therefore can be
2292 2292 read quickly.
2293 2293
2294 2294 If `shallow` is True, it only returns the entries from this manifest,
2295 2295 and not any submanifests.
2296 2296 """
2297 2297 store = self._storage()
2298 2298 r = store.rev(self._node)
2299 2299 deltaparent = store.deltaparent(r)
2300 2300 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2301 2301 return self.readdelta(shallow=shallow)
2302 2302
2303 2303 if shallow:
2304 2304 return manifestdict(
2305 2305 store.nodeconstants.nodelen, store.revision(self._node)
2306 2306 )
2307 2307 else:
2308 2308 return self.read()
2309 2309
2310 2310 def find(self, key):
2311 2311 return self.read().find(key)
2312 2312
2313 2313
2314 2314 class excludeddir(treemanifest):
2315 2315 """Stand-in for a directory that is excluded from the repository.
2316 2316
2317 2317 With narrowing active on a repository that uses treemanifests,
2318 2318 some of the directory revlogs will be excluded from the resulting
2319 2319 clone. This is a huge storage win for clients, but means we need
2320 2320 some sort of pseudo-manifest to surface to internals so we can
2321 2321 detect a merge conflict outside the narrowspec. That's what this
2322 2322 class is: it stands in for a directory whose node is known, but
2323 2323 whose contents are unknown.
2324 2324 """
2325 2325
2326 2326 def __init__(self, nodeconstants, dir, node):
2327 2327 super(excludeddir, self).__init__(nodeconstants, dir)
2328 2328 self._node = node
2329 2329 # Add an empty file, which will be included by iterators and such,
2330 2330 # appearing as the directory itself (i.e. something like "dir/")
2331 2331 self._files[b''] = node
2332 2332 self._flags[b''] = b't'
2333 2333
2334 2334 # Manifests outside the narrowspec should never be modified, so avoid
2335 2335 # copying. This makes a noticeable difference when there are very many
2336 2336 # directories outside the narrowspec. Also, it makes sense for the copy to
2337 2337 # be of the same type as the original, which would not happen with the
2338 2338 # super type's copy().
2339 2339 def copy(self):
2340 2340 return self
2341 2341
2342 2342
2343 2343 class excludeddirmanifestctx(treemanifestctx):
2344 2344 """context wrapper for excludeddir - see that docstring for rationale"""
2345 2345
2346 2346 def __init__(self, nodeconstants, dir, node):
2347 2347 self.nodeconstants = nodeconstants
2348 2348 self._dir = dir
2349 2349 self._node = node
2350 2350
2351 2351 def read(self):
2352 2352 return excludeddir(self.nodeconstants, self._dir, self._node)
2353 2353
2354 2354 def readfast(self, shallow=False):
2355 2355 # special version of readfast since we don't have underlying storage
2356 2356 return self.read()
2357 2357
2358 2358 def write(self, *args):
2359 2359 raise error.ProgrammingError(
2360 2360 b'attempt to write manifest from excluded dir %s' % self._dir
2361 2361 )
2362 2362
2363 2363
2364 2364 class excludedmanifestrevlog(manifestrevlog):
2365 2365 """Stand-in for excluded treemanifest revlogs.
2366 2366
2367 2367 When narrowing is active on a treemanifest repository, we'll have
2368 2368 references to directories we can't see due to the revlog being
2369 2369 skipped. This class exists to conform to the manifestrevlog
2370 2370 interface for those directories and proactively prevent writes to
2371 2371 outside the narrowspec.
2372 2372 """
2373 2373
2374 2374 def __init__(self, nodeconstants, dir):
2375 2375 self.nodeconstants = nodeconstants
2376 2376 self._dir = dir
2377 2377
2378 2378 def __len__(self):
2379 2379 raise error.ProgrammingError(
2380 2380 b'attempt to get length of excluded dir %s' % self._dir
2381 2381 )
2382 2382
2383 2383 def rev(self, node):
2384 2384 raise error.ProgrammingError(
2385 2385 b'attempt to get rev from excluded dir %s' % self._dir
2386 2386 )
2387 2387
2388 2388 def linkrev(self, node):
2389 2389 raise error.ProgrammingError(
2390 2390 b'attempt to get linkrev from excluded dir %s' % self._dir
2391 2391 )
2392 2392
2393 2393 def node(self, rev):
2394 2394 raise error.ProgrammingError(
2395 2395 b'attempt to get node from excluded dir %s' % self._dir
2396 2396 )
2397 2397
2398 2398 def add(self, *args, **kwargs):
2399 2399 # We should never write entries in dirlogs outside the narrow clone.
2400 2400 # However, the method still gets called from writesubtree() in
2401 2401 # _addtree(), so we need to handle it. We should possibly make that
2402 2402 # avoid calling add() with a clean manifest (_dirty is always False
2403 2403 # in excludeddir instances).
2404 2404 pass
General Comments 0
You need to be logged in to leave comments. Login now