##// END OF EJS Templates
commitctx: document the manifest writing function...
marmoute -
r45626:5a80915e default
parent child Browse files
Show More
@@ -1,2313 +1,2329 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 if 64 < nl:
62 62 # modern hash, full width
63 63 yield f, bin(n[:64]), n[64:]
64 64 elif 40 < nl < 45:
65 65 # legacy hash, always sha1
66 66 yield f, bin(n[:40]), n[40:]
67 67 else:
68 68 yield f, bin(n), b''
69 69
70 70
71 71 def _text(it):
72 72 files = []
73 73 lines = []
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return b''.join(lines)
82 82
83 83
84 84 class lazymanifestiter(object):
85 85 def __init__(self, lm):
86 86 self.pos = 0
87 87 self.lm = lm
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data[0]
100 100 self.pos += 1
101 101 zeropos = data.find(b'\x00', pos)
102 102 return data[pos:zeropos]
103 103
104 104 __next__ = next
105 105
106 106
107 107 class lazymanifestiterentries(object):
108 108 def __init__(self, lm):
109 109 self.lm = lm
110 110 self.pos = 0
111 111
112 112 def __iter__(self):
113 113 return self
114 114
115 115 def next(self):
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 self.pos += 1
122 122 return data
123 123 zeropos = data.find(b'\x00', pos)
124 124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
125 125 flags = self.lm._getflags(data, self.pos, zeropos)
126 126 self.pos += 1
127 127 return (data[pos:zeropos], hashval, flags)
128 128
129 129 __next__ = next
130 130
131 131
132 132 def unhexlify(data, extra, pos, length):
133 133 s = bin(data[pos : pos + length])
134 134 if extra:
135 135 s += chr(extra & 0xFF)
136 136 return s
137 137
138 138
139 139 def _cmp(a, b):
140 140 return (a > b) - (a < b)
141 141
142 142
143 143 class _lazymanifest(object):
144 144 """A pure python manifest backed by a byte string. It is supplimented with
145 145 internal lists as it is modified, until it is compacted back to a pure byte
146 146 string.
147 147
148 148 ``data`` is the initial manifest data.
149 149
150 150 ``positions`` is a list of offsets, one per manifest entry. Positive
151 151 values are offsets into ``data``, negative values are offsets into the
152 152 ``extradata`` list. When an entry is removed, its entry is dropped from
153 153 ``positions``. The values are encoded such that when walking the list and
154 154 indexing into ``data`` or ``extradata`` as appropriate, the entries are
155 155 sorted by filename.
156 156
157 157 ``extradata`` is a list of (key, hash, flags) for entries that were added or
158 158 modified since the manifest was created or compacted.
159 159 """
160 160
161 161 def __init__(
162 162 self,
163 163 data,
164 164 positions=None,
165 165 extrainfo=None,
166 166 extradata=None,
167 167 hasremovals=False,
168 168 ):
169 169 if positions is None:
170 170 self.positions = self.findlines(data)
171 171 self.extrainfo = [0] * len(self.positions)
172 172 self.data = data
173 173 self.extradata = []
174 174 self.hasremovals = False
175 175 else:
176 176 self.positions = positions[:]
177 177 self.extrainfo = extrainfo[:]
178 178 self.extradata = extradata[:]
179 179 self.data = data
180 180 self.hasremovals = hasremovals
181 181
182 182 def findlines(self, data):
183 183 if not data:
184 184 return []
185 185 pos = data.find(b"\n")
186 186 if pos == -1 or data[-1:] != b'\n':
187 187 raise ValueError(b"Manifest did not end in a newline.")
188 188 positions = [0]
189 189 prev = data[: data.find(b'\x00')]
190 190 while pos < len(data) - 1 and pos != -1:
191 191 positions.append(pos + 1)
192 192 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
193 193 if nexts < prev:
194 194 raise ValueError(b"Manifest lines not in sorted order.")
195 195 prev = nexts
196 196 pos = data.find(b"\n", pos + 1)
197 197 return positions
198 198
199 199 def _get(self, index):
200 200 # get the position encoded in pos:
201 201 # positive number is an index in 'data'
202 202 # negative number is in extrapieces
203 203 pos = self.positions[index]
204 204 if pos >= 0:
205 205 return self.data, pos
206 206 return self.extradata[-pos - 1], -1
207 207
208 208 def _getkey(self, pos):
209 209 if pos >= 0:
210 210 return self.data[pos : self.data.find(b'\x00', pos + 1)]
211 211 return self.extradata[-pos - 1][0]
212 212
213 213 def bsearch(self, key):
214 214 first = 0
215 215 last = len(self.positions) - 1
216 216
217 217 while first <= last:
218 218 midpoint = (first + last) // 2
219 219 nextpos = self.positions[midpoint]
220 220 candidate = self._getkey(nextpos)
221 221 r = _cmp(key, candidate)
222 222 if r == 0:
223 223 return midpoint
224 224 else:
225 225 if r < 0:
226 226 last = midpoint - 1
227 227 else:
228 228 first = midpoint + 1
229 229 return -1
230 230
231 231 def bsearch2(self, key):
232 232 # same as the above, but will always return the position
233 233 # done for performance reasons
234 234 first = 0
235 235 last = len(self.positions) - 1
236 236
237 237 while first <= last:
238 238 midpoint = (first + last) // 2
239 239 nextpos = self.positions[midpoint]
240 240 candidate = self._getkey(nextpos)
241 241 r = _cmp(key, candidate)
242 242 if r == 0:
243 243 return (midpoint, True)
244 244 else:
245 245 if r < 0:
246 246 last = midpoint - 1
247 247 else:
248 248 first = midpoint + 1
249 249 return (first, False)
250 250
251 251 def __contains__(self, key):
252 252 return self.bsearch(key) != -1
253 253
254 254 def _getflags(self, data, needle, pos):
255 255 start = pos + 41
256 256 end = data.find(b"\n", start)
257 257 if end == -1:
258 258 end = len(data) - 1
259 259 if start == end:
260 260 return b''
261 261 return self.data[start:end]
262 262
263 263 def __getitem__(self, key):
264 264 if not isinstance(key, bytes):
265 265 raise TypeError(b"getitem: manifest keys must be a bytes.")
266 266 needle = self.bsearch(key)
267 267 if needle == -1:
268 268 raise KeyError
269 269 data, pos = self._get(needle)
270 270 if pos == -1:
271 271 return (data[1], data[2])
272 272 zeropos = data.find(b'\x00', pos)
273 273 nlpos = data.find(b'\n', zeropos)
274 274 assert 0 <= needle <= len(self.positions)
275 275 assert len(self.extrainfo) == len(self.positions)
276 276 hlen = nlpos - zeropos - 1
277 277 # Hashes sometimes have an extra byte tucked on the end, so
278 278 # detect that.
279 279 if hlen % 2:
280 280 hlen -= 1
281 281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 282 flags = self._getflags(data, needle, zeropos)
283 283 return (hashval, flags)
284 284
285 285 def __delitem__(self, key):
286 286 needle, found = self.bsearch2(key)
287 287 if not found:
288 288 raise KeyError
289 289 cur = self.positions[needle]
290 290 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
291 291 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
292 292 if cur >= 0:
293 293 # This does NOT unsort the list as far as the search functions are
294 294 # concerned, as they only examine lines mapped by self.positions.
295 295 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
296 296 self.hasremovals = True
297 297
298 298 def __setitem__(self, key, value):
299 299 if not isinstance(key, bytes):
300 300 raise TypeError(b"setitem: manifest keys must be a byte string.")
301 301 if not isinstance(value, tuple) or len(value) != 2:
302 302 raise TypeError(
303 303 b"Manifest values must be a tuple of (node, flags)."
304 304 )
305 305 hashval = value[0]
306 306 # hashes are either 20 or 32 bytes (sha1 or its replacement),
307 307 # and allow one extra byte taht won't be persisted to disk but
308 308 # is sometimes used in memory.
309 309 if not isinstance(hashval, bytes) or not (
310 310 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
311 311 ):
312 312 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
313 313 flags = value[1]
314 314 if len(hashval) == 22:
315 315 hashval = hashval[:-1]
316 316 if not isinstance(flags, bytes) or len(flags) > 1:
317 317 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
318 318 needle, found = self.bsearch2(key)
319 319 if found:
320 320 # put the item
321 321 pos = self.positions[needle]
322 322 if pos < 0:
323 323 self.extradata[-pos - 1] = (key, hashval, value[1])
324 324 else:
325 325 # just don't bother
326 326 self.extradata.append((key, hashval, value[1]))
327 327 self.positions[needle] = -len(self.extradata)
328 328 else:
329 329 # not found, put it in with extra positions
330 330 self.extradata.append((key, hashval, value[1]))
331 331 self.positions = (
332 332 self.positions[:needle]
333 333 + [-len(self.extradata)]
334 334 + self.positions[needle:]
335 335 )
336 336 self.extrainfo = (
337 337 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
338 338 )
339 339
340 340 def copy(self):
341 341 # XXX call _compact like in C?
342 342 return _lazymanifest(
343 343 self.data,
344 344 self.positions,
345 345 self.extrainfo,
346 346 self.extradata,
347 347 self.hasremovals,
348 348 )
349 349
350 350 def _compact(self):
351 351 # hopefully not called TOO often
352 352 if len(self.extradata) == 0 and not self.hasremovals:
353 353 return
354 354 l = []
355 355 i = 0
356 356 offset = 0
357 357 self.extrainfo = [0] * len(self.positions)
358 358 while i < len(self.positions):
359 359 if self.positions[i] >= 0:
360 360 cur = self.positions[i]
361 361 last_cut = cur
362 362
363 363 # Collect all contiguous entries in the buffer at the current
364 364 # offset, breaking out only for added/modified items held in
365 365 # extradata, or a deleted line prior to the next position.
366 366 while True:
367 367 self.positions[i] = offset
368 368 i += 1
369 369 if i == len(self.positions) or self.positions[i] < 0:
370 370 break
371 371
372 372 # A removed file has no positions[] entry, but does have an
373 373 # overwritten first byte. Break out and find the end of the
374 374 # current good entry/entries if there is a removed file
375 375 # before the next position.
376 376 if (
377 377 self.hasremovals
378 378 and self.data.find(b'\n\x00', cur, self.positions[i])
379 379 != -1
380 380 ):
381 381 break
382 382
383 383 offset += self.positions[i] - cur
384 384 cur = self.positions[i]
385 385 end_cut = self.data.find(b'\n', cur)
386 386 if end_cut != -1:
387 387 end_cut += 1
388 388 offset += end_cut - cur
389 389 l.append(self.data[last_cut:end_cut])
390 390 else:
391 391 while i < len(self.positions) and self.positions[i] < 0:
392 392 cur = self.positions[i]
393 393 t = self.extradata[-cur - 1]
394 394 l.append(self._pack(t))
395 395 self.positions[i] = offset
396 396 # Hashes are either 20 bytes (old sha1s) or 32
397 397 # bytes (new non-sha1).
398 398 hlen = 20
399 399 if len(t[1]) > 25:
400 400 hlen = 32
401 401 if len(t[1]) > hlen:
402 402 self.extrainfo[i] = ord(t[1][hlen + 1])
403 403 offset += len(l[-1])
404 404 i += 1
405 405 self.data = b''.join(l)
406 406 self.hasremovals = False
407 407 self.extradata = []
408 408
409 409 def _pack(self, d):
410 410 n = d[1]
411 411 if len(n) == 21 or len(n) == 33:
412 412 n = n[:-1]
413 413 assert len(n) == 20 or len(n) == 32
414 414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415 415
416 416 def text(self):
417 417 self._compact()
418 418 return self.data
419 419
420 420 def diff(self, m2, clean=False):
421 421 '''Finds changes between the current manifest and m2.'''
422 422 # XXX think whether efficiency matters here
423 423 diff = {}
424 424
425 425 for fn, e1, flags in self.iterentries():
426 426 if fn not in m2:
427 427 diff[fn] = (e1, flags), (None, b'')
428 428 else:
429 429 e2 = m2[fn]
430 430 if (e1, flags) != e2:
431 431 diff[fn] = (e1, flags), e2
432 432 elif clean:
433 433 diff[fn] = None
434 434
435 435 for fn, e2, flags in m2.iterentries():
436 436 if fn not in self:
437 437 diff[fn] = (None, b''), (e2, flags)
438 438
439 439 return diff
440 440
441 441 def iterentries(self):
442 442 return lazymanifestiterentries(self)
443 443
444 444 def iterkeys(self):
445 445 return lazymanifestiter(self)
446 446
447 447 def __iter__(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __len__(self):
451 451 return len(self.positions)
452 452
453 453 def filtercopy(self, filterfn):
454 454 # XXX should be optimized
455 455 c = _lazymanifest(b'')
456 456 for f, n, fl in self.iterentries():
457 457 if filterfn(f):
458 458 c[f] = n, fl
459 459 return c
460 460
461 461
462 462 try:
463 463 _lazymanifest = parsers.lazymanifest
464 464 except AttributeError:
465 465 pass
466 466
467 467
468 468 @interfaceutil.implementer(repository.imanifestdict)
469 469 class manifestdict(object):
470 470 def __init__(self, data=b''):
471 471 self._lm = _lazymanifest(data)
472 472
473 473 def __getitem__(self, key):
474 474 return self._lm[key][0]
475 475
476 476 def find(self, key):
477 477 return self._lm[key]
478 478
479 479 def __len__(self):
480 480 return len(self._lm)
481 481
482 482 def __nonzero__(self):
483 483 # nonzero is covered by the __len__ function, but implementing it here
484 484 # makes it easier for extensions to override.
485 485 return len(self._lm) != 0
486 486
487 487 __bool__ = __nonzero__
488 488
489 489 def __setitem__(self, key, node):
490 490 self._lm[key] = node, self.flags(key)
491 491
492 492 def __contains__(self, key):
493 493 if key is None:
494 494 return False
495 495 return key in self._lm
496 496
497 497 def __delitem__(self, key):
498 498 del self._lm[key]
499 499
500 500 def __iter__(self):
501 501 return self._lm.__iter__()
502 502
503 503 def iterkeys(self):
504 504 return self._lm.iterkeys()
505 505
506 506 def keys(self):
507 507 return list(self.iterkeys())
508 508
509 509 def filesnotin(self, m2, match=None):
510 510 '''Set of files in this manifest that are not in the other'''
511 511 if match is not None:
512 512 match = matchmod.badmatch(match, lambda path, msg: None)
513 513 sm2 = set(m2.walk(match))
514 514 return {f for f in self.walk(match) if f not in sm2}
515 515 return {f for f in self if f not in m2}
516 516
517 517 @propertycache
518 518 def _dirs(self):
519 519 return pathutil.dirs(self)
520 520
521 521 def dirs(self):
522 522 return self._dirs
523 523
524 524 def hasdir(self, dir):
525 525 return dir in self._dirs
526 526
527 527 def _filesfastpath(self, match):
528 528 '''Checks whether we can correctly and quickly iterate over matcher
529 529 files instead of over manifest files.'''
530 530 files = match.files()
531 531 return len(files) < 100 and (
532 532 match.isexact()
533 533 or (match.prefix() and all(fn in self for fn in files))
534 534 )
535 535
536 536 def walk(self, match):
537 537 '''Generates matching file names.
538 538
539 539 Equivalent to manifest.matches(match).iterkeys(), but without creating
540 540 an entirely new manifest.
541 541
542 542 It also reports nonexistent files by marking them bad with match.bad().
543 543 '''
544 544 if match.always():
545 545 for f in iter(self):
546 546 yield f
547 547 return
548 548
549 549 fset = set(match.files())
550 550
551 551 # avoid the entire walk if we're only looking for specific files
552 552 if self._filesfastpath(match):
553 553 for fn in sorted(fset):
554 554 if fn in self:
555 555 yield fn
556 556 return
557 557
558 558 for fn in self:
559 559 if fn in fset:
560 560 # specified pattern is the exact name
561 561 fset.remove(fn)
562 562 if match(fn):
563 563 yield fn
564 564
565 565 # for dirstate.walk, files=[''] means "walk the whole tree".
566 566 # follow that here, too
567 567 fset.discard(b'')
568 568
569 569 for fn in sorted(fset):
570 570 if not self.hasdir(fn):
571 571 match.bad(fn, None)
572 572
573 573 def _matches(self, match):
574 574 '''generate a new manifest filtered by the match argument'''
575 575 if match.always():
576 576 return self.copy()
577 577
578 578 if self._filesfastpath(match):
579 579 m = manifestdict()
580 580 lm = self._lm
581 581 for fn in match.files():
582 582 if fn in lm:
583 583 m._lm[fn] = lm[fn]
584 584 return m
585 585
586 586 m = manifestdict()
587 587 m._lm = self._lm.filtercopy(match)
588 588 return m
589 589
590 590 def diff(self, m2, match=None, clean=False):
591 591 '''Finds changes between the current manifest and m2.
592 592
593 593 Args:
594 594 m2: the manifest to which this manifest should be compared.
595 595 clean: if true, include files unchanged between these manifests
596 596 with a None value in the returned dictionary.
597 597
598 598 The result is returned as a dict with filename as key and
599 599 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
600 600 nodeid in the current/other manifest and fl1/fl2 is the flag
601 601 in the current/other manifest. Where the file does not exist,
602 602 the nodeid will be None and the flags will be the empty
603 603 string.
604 604 '''
605 605 if match:
606 606 m1 = self._matches(match)
607 607 m2 = m2._matches(match)
608 608 return m1.diff(m2, clean=clean)
609 609 return self._lm.diff(m2._lm, clean)
610 610
611 611 def setflag(self, key, flag):
612 612 self._lm[key] = self[key], flag
613 613
614 614 def get(self, key, default=None):
615 615 try:
616 616 return self._lm[key][0]
617 617 except KeyError:
618 618 return default
619 619
620 620 def flags(self, key):
621 621 try:
622 622 return self._lm[key][1]
623 623 except KeyError:
624 624 return b''
625 625
626 626 def copy(self):
627 627 c = manifestdict()
628 628 c._lm = self._lm.copy()
629 629 return c
630 630
631 631 def items(self):
632 632 return (x[:2] for x in self._lm.iterentries())
633 633
634 634 def iteritems(self):
635 635 return (x[:2] for x in self._lm.iterentries())
636 636
637 637 def iterentries(self):
638 638 return self._lm.iterentries()
639 639
640 640 def text(self):
641 641 # most likely uses native version
642 642 return self._lm.text()
643 643
644 644 def fastdelta(self, base, changes):
645 645 """Given a base manifest text as a bytearray and a list of changes
646 646 relative to that text, compute a delta that can be used by revlog.
647 647 """
648 648 delta = []
649 649 dstart = None
650 650 dend = None
651 651 dline = [b""]
652 652 start = 0
653 653 # zero copy representation of base as a buffer
654 654 addbuf = util.buffer(base)
655 655
656 656 changes = list(changes)
657 657 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
658 658 # start with a readonly loop that finds the offset of
659 659 # each line and creates the deltas
660 660 for f, todelete in changes:
661 661 # bs will either be the index of the item or the insert point
662 662 start, end = _msearch(addbuf, f, start)
663 663 if not todelete:
664 664 h, fl = self._lm[f]
665 665 l = b"%s\0%s%s\n" % (f, hex(h), fl)
666 666 else:
667 667 if start == end:
668 668 # item we want to delete was not found, error out
669 669 raise AssertionError(
670 670 _(b"failed to remove %s from manifest") % f
671 671 )
672 672 l = b""
673 673 if dstart is not None and dstart <= start and dend >= start:
674 674 if dend < end:
675 675 dend = end
676 676 if l:
677 677 dline.append(l)
678 678 else:
679 679 if dstart is not None:
680 680 delta.append([dstart, dend, b"".join(dline)])
681 681 dstart = start
682 682 dend = end
683 683 dline = [l]
684 684
685 685 if dstart is not None:
686 686 delta.append([dstart, dend, b"".join(dline)])
687 687 # apply the delta to the base, and get a delta for addrevision
688 688 deltatext, arraytext = _addlistdelta(base, delta)
689 689 else:
690 690 # For large changes, it's much cheaper to just build the text and
691 691 # diff it.
692 692 arraytext = bytearray(self.text())
693 693 deltatext = mdiff.textdiff(
694 694 util.buffer(base), util.buffer(arraytext)
695 695 )
696 696
697 697 return arraytext, deltatext
698 698
699 699
700 700 def _msearch(m, s, lo=0, hi=None):
701 701 '''return a tuple (start, end) that says where to find s within m.
702 702
703 703 If the string is found m[start:end] are the line containing
704 704 that string. If start == end the string was not found and
705 705 they indicate the proper sorted insertion point.
706 706
707 707 m should be a buffer, a memoryview or a byte string.
708 708 s is a byte string'''
709 709
710 710 def advance(i, c):
711 711 while i < lenm and m[i : i + 1] != c:
712 712 i += 1
713 713 return i
714 714
715 715 if not s:
716 716 return (lo, lo)
717 717 lenm = len(m)
718 718 if not hi:
719 719 hi = lenm
720 720 while lo < hi:
721 721 mid = (lo + hi) // 2
722 722 start = mid
723 723 while start > 0 and m[start - 1 : start] != b'\n':
724 724 start -= 1
725 725 end = advance(start, b'\0')
726 726 if bytes(m[start:end]) < s:
727 727 # we know that after the null there are 40 bytes of sha1
728 728 # this translates to the bisect lo = mid + 1
729 729 lo = advance(end + 40, b'\n') + 1
730 730 else:
731 731 # this translates to the bisect hi = mid
732 732 hi = start
733 733 end = advance(lo, b'\0')
734 734 found = m[lo:end]
735 735 if s == found:
736 736 # we know that after the null there are 40 bytes of sha1
737 737 end = advance(end + 40, b'\n')
738 738 return (lo, end + 1)
739 739 else:
740 740 return (lo, lo)
741 741
742 742
743 743 def _checkforbidden(l):
744 744 """Check filenames for illegal characters."""
745 745 for f in l:
746 746 if b'\n' in f or b'\r' in f:
747 747 raise error.StorageError(
748 748 _(b"'\\n' and '\\r' disallowed in filenames: %r")
749 749 % pycompat.bytestr(f)
750 750 )
751 751
752 752
753 753 # apply the changes collected during the bisect loop to our addlist
754 754 # return a delta suitable for addrevision
755 755 def _addlistdelta(addlist, x):
756 756 # for large addlist arrays, building a new array is cheaper
757 757 # than repeatedly modifying the existing one
758 758 currentposition = 0
759 759 newaddlist = bytearray()
760 760
761 761 for start, end, content in x:
762 762 newaddlist += addlist[currentposition:start]
763 763 if content:
764 764 newaddlist += bytearray(content)
765 765
766 766 currentposition = end
767 767
768 768 newaddlist += addlist[currentposition:]
769 769
770 770 deltatext = b"".join(
771 771 struct.pack(b">lll", start, end, len(content)) + content
772 772 for start, end, content in x
773 773 )
774 774 return deltatext, newaddlist
775 775
776 776
777 777 def _splittopdir(f):
778 778 if b'/' in f:
779 779 dir, subpath = f.split(b'/', 1)
780 780 return dir + b'/', subpath
781 781 else:
782 782 return b'', f
783 783
784 784
785 785 _noop = lambda s: None
786 786
787 787
788 788 @interfaceutil.implementer(repository.imanifestdict)
789 789 class treemanifest(object):
790 790 def __init__(self, dir=b'', text=b''):
791 791 self._dir = dir
792 792 self._node = nullid
793 793 self._loadfunc = _noop
794 794 self._copyfunc = _noop
795 795 self._dirty = False
796 796 self._dirs = {}
797 797 self._lazydirs = {}
798 798 # Using _lazymanifest here is a little slower than plain old dicts
799 799 self._files = {}
800 800 self._flags = {}
801 801 if text:
802 802
803 803 def readsubtree(subdir, subm):
804 804 raise AssertionError(
805 805 b'treemanifest constructor only accepts flat manifests'
806 806 )
807 807
808 808 self.parse(text, readsubtree)
809 809 self._dirty = True # Mark flat manifest dirty after parsing
810 810
811 811 def _subpath(self, path):
812 812 return self._dir + path
813 813
814 814 def _loadalllazy(self):
815 815 selfdirs = self._dirs
816 816 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
817 817 self._lazydirs
818 818 ):
819 819 if docopy:
820 820 selfdirs[d] = readsubtree(path, node).copy()
821 821 else:
822 822 selfdirs[d] = readsubtree(path, node)
823 823 self._lazydirs = {}
824 824
825 825 def _loadlazy(self, d):
826 826 v = self._lazydirs.get(d)
827 827 if v:
828 828 path, node, readsubtree, docopy = v
829 829 if docopy:
830 830 self._dirs[d] = readsubtree(path, node).copy()
831 831 else:
832 832 self._dirs[d] = readsubtree(path, node)
833 833 del self._lazydirs[d]
834 834
835 835 def _loadchildrensetlazy(self, visit):
836 836 if not visit:
837 837 return None
838 838 if visit == b'all' or visit == b'this':
839 839 self._loadalllazy()
840 840 return None
841 841
842 842 loadlazy = self._loadlazy
843 843 for k in visit:
844 844 loadlazy(k + b'/')
845 845 return visit
846 846
847 847 def _loaddifflazy(self, t1, t2):
848 848 """load items in t1 and t2 if they're needed for diffing.
849 849
850 850 The criteria currently is:
851 851 - if it's not present in _lazydirs in either t1 or t2, load it in the
852 852 other (it may already be loaded or it may not exist, doesn't matter)
853 853 - if it's present in _lazydirs in both, compare the nodeid; if it
854 854 differs, load it in both
855 855 """
856 856 toloadlazy = []
857 857 for d, v1 in pycompat.iteritems(t1._lazydirs):
858 858 v2 = t2._lazydirs.get(d)
859 859 if not v2 or v2[1] != v1[1]:
860 860 toloadlazy.append(d)
861 861 for d, v1 in pycompat.iteritems(t2._lazydirs):
862 862 if d not in t1._lazydirs:
863 863 toloadlazy.append(d)
864 864
865 865 for d in toloadlazy:
866 866 t1._loadlazy(d)
867 867 t2._loadlazy(d)
868 868
869 869 def __len__(self):
870 870 self._load()
871 871 size = len(self._files)
872 872 self._loadalllazy()
873 873 for m in self._dirs.values():
874 874 size += m.__len__()
875 875 return size
876 876
877 877 def __nonzero__(self):
878 878 # Faster than "__len() != 0" since it avoids loading sub-manifests
879 879 return not self._isempty()
880 880
881 881 __bool__ = __nonzero__
882 882
883 883 def _isempty(self):
884 884 self._load() # for consistency; already loaded by all callers
885 885 # See if we can skip loading everything.
886 886 if self._files or (
887 887 self._dirs and any(not m._isempty() for m in self._dirs.values())
888 888 ):
889 889 return False
890 890 self._loadalllazy()
891 891 return not self._dirs or all(m._isempty() for m in self._dirs.values())
892 892
893 893 @encoding.strmethod
894 894 def __repr__(self):
895 895 return (
896 896 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
897 897 % (
898 898 self._dir,
899 899 hex(self._node),
900 900 bool(self._loadfunc is _noop),
901 901 self._dirty,
902 902 id(self),
903 903 )
904 904 )
905 905
906 906 def dir(self):
907 907 '''The directory that this tree manifest represents, including a
908 908 trailing '/'. Empty string for the repo root directory.'''
909 909 return self._dir
910 910
911 911 def node(self):
912 912 '''This node of this instance. nullid for unsaved instances. Should
913 913 be updated when the instance is read or written from a revlog.
914 914 '''
915 915 assert not self._dirty
916 916 return self._node
917 917
918 918 def setnode(self, node):
919 919 self._node = node
920 920 self._dirty = False
921 921
922 922 def iterentries(self):
923 923 self._load()
924 924 self._loadalllazy()
925 925 for p, n in sorted(
926 926 itertools.chain(self._dirs.items(), self._files.items())
927 927 ):
928 928 if p in self._files:
929 929 yield self._subpath(p), n, self._flags.get(p, b'')
930 930 else:
931 931 for x in n.iterentries():
932 932 yield x
933 933
934 934 def items(self):
935 935 self._load()
936 936 self._loadalllazy()
937 937 for p, n in sorted(
938 938 itertools.chain(self._dirs.items(), self._files.items())
939 939 ):
940 940 if p in self._files:
941 941 yield self._subpath(p), n
942 942 else:
943 943 for f, sn in pycompat.iteritems(n):
944 944 yield f, sn
945 945
946 946 iteritems = items
947 947
948 948 def iterkeys(self):
949 949 self._load()
950 950 self._loadalllazy()
951 951 for p in sorted(itertools.chain(self._dirs, self._files)):
952 952 if p in self._files:
953 953 yield self._subpath(p)
954 954 else:
955 955 for f in self._dirs[p]:
956 956 yield f
957 957
958 958 def keys(self):
959 959 return list(self.iterkeys())
960 960
961 961 def __iter__(self):
962 962 return self.iterkeys()
963 963
964 964 def __contains__(self, f):
965 965 if f is None:
966 966 return False
967 967 self._load()
968 968 dir, subpath = _splittopdir(f)
969 969 if dir:
970 970 self._loadlazy(dir)
971 971
972 972 if dir not in self._dirs:
973 973 return False
974 974
975 975 return self._dirs[dir].__contains__(subpath)
976 976 else:
977 977 return f in self._files
978 978
979 979 def get(self, f, default=None):
980 980 self._load()
981 981 dir, subpath = _splittopdir(f)
982 982 if dir:
983 983 self._loadlazy(dir)
984 984
985 985 if dir not in self._dirs:
986 986 return default
987 987 return self._dirs[dir].get(subpath, default)
988 988 else:
989 989 return self._files.get(f, default)
990 990
991 991 def __getitem__(self, f):
992 992 self._load()
993 993 dir, subpath = _splittopdir(f)
994 994 if dir:
995 995 self._loadlazy(dir)
996 996
997 997 return self._dirs[dir].__getitem__(subpath)
998 998 else:
999 999 return self._files[f]
1000 1000
1001 1001 def flags(self, f):
1002 1002 self._load()
1003 1003 dir, subpath = _splittopdir(f)
1004 1004 if dir:
1005 1005 self._loadlazy(dir)
1006 1006
1007 1007 if dir not in self._dirs:
1008 1008 return b''
1009 1009 return self._dirs[dir].flags(subpath)
1010 1010 else:
1011 1011 if f in self._lazydirs or f in self._dirs:
1012 1012 return b''
1013 1013 return self._flags.get(f, b'')
1014 1014
1015 1015 def find(self, f):
1016 1016 self._load()
1017 1017 dir, subpath = _splittopdir(f)
1018 1018 if dir:
1019 1019 self._loadlazy(dir)
1020 1020
1021 1021 return self._dirs[dir].find(subpath)
1022 1022 else:
1023 1023 return self._files[f], self._flags.get(f, b'')
1024 1024
1025 1025 def __delitem__(self, f):
1026 1026 self._load()
1027 1027 dir, subpath = _splittopdir(f)
1028 1028 if dir:
1029 1029 self._loadlazy(dir)
1030 1030
1031 1031 self._dirs[dir].__delitem__(subpath)
1032 1032 # If the directory is now empty, remove it
1033 1033 if self._dirs[dir]._isempty():
1034 1034 del self._dirs[dir]
1035 1035 else:
1036 1036 del self._files[f]
1037 1037 if f in self._flags:
1038 1038 del self._flags[f]
1039 1039 self._dirty = True
1040 1040
1041 1041 def __setitem__(self, f, n):
1042 1042 assert n is not None
1043 1043 self._load()
1044 1044 dir, subpath = _splittopdir(f)
1045 1045 if dir:
1046 1046 self._loadlazy(dir)
1047 1047 if dir not in self._dirs:
1048 1048 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 1049 self._dirs[dir].__setitem__(subpath, n)
1050 1050 else:
1051 1051 # manifest nodes are either 20 bytes or 32 bytes,
1052 1052 # depending on the hash in use. An extra byte is
1053 1053 # occasionally used by hg, but won't ever be
1054 1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1055 1055 trim = 21 if len(n) < 25 else 33
1056 1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1057 1057 self._dirty = True
1058 1058
1059 1059 def _load(self):
1060 1060 if self._loadfunc is not _noop:
1061 1061 lf, self._loadfunc = self._loadfunc, _noop
1062 1062 lf(self)
1063 1063 elif self._copyfunc is not _noop:
1064 1064 cf, self._copyfunc = self._copyfunc, _noop
1065 1065 cf(self)
1066 1066
1067 1067 def setflag(self, f, flags):
1068 1068 """Set the flags (symlink, executable) for path f."""
1069 1069 self._load()
1070 1070 dir, subpath = _splittopdir(f)
1071 1071 if dir:
1072 1072 self._loadlazy(dir)
1073 1073 if dir not in self._dirs:
1074 1074 self._dirs[dir] = treemanifest(self._subpath(dir))
1075 1075 self._dirs[dir].setflag(subpath, flags)
1076 1076 else:
1077 1077 self._flags[f] = flags
1078 1078 self._dirty = True
1079 1079
1080 1080 def copy(self):
1081 1081 copy = treemanifest(self._dir)
1082 1082 copy._node = self._node
1083 1083 copy._dirty = self._dirty
1084 1084 if self._copyfunc is _noop:
1085 1085
1086 1086 def _copyfunc(s):
1087 1087 self._load()
1088 1088 s._lazydirs = {
1089 1089 d: (p, n, r, True)
1090 1090 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1091 1091 }
1092 1092 sdirs = s._dirs
1093 1093 for d, v in pycompat.iteritems(self._dirs):
1094 1094 sdirs[d] = v.copy()
1095 1095 s._files = dict.copy(self._files)
1096 1096 s._flags = dict.copy(self._flags)
1097 1097
1098 1098 if self._loadfunc is _noop:
1099 1099 _copyfunc(copy)
1100 1100 else:
1101 1101 copy._copyfunc = _copyfunc
1102 1102 else:
1103 1103 copy._copyfunc = self._copyfunc
1104 1104 return copy
1105 1105
1106 1106 def filesnotin(self, m2, match=None):
1107 1107 '''Set of files in this manifest that are not in the other'''
1108 1108 if match and not match.always():
1109 1109 m1 = self._matches(match)
1110 1110 m2 = m2._matches(match)
1111 1111 return m1.filesnotin(m2)
1112 1112
1113 1113 files = set()
1114 1114
1115 1115 def _filesnotin(t1, t2):
1116 1116 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1117 1117 return
1118 1118 t1._load()
1119 1119 t2._load()
1120 1120 self._loaddifflazy(t1, t2)
1121 1121 for d, m1 in pycompat.iteritems(t1._dirs):
1122 1122 if d in t2._dirs:
1123 1123 m2 = t2._dirs[d]
1124 1124 _filesnotin(m1, m2)
1125 1125 else:
1126 1126 files.update(m1.iterkeys())
1127 1127
1128 1128 for fn in t1._files:
1129 1129 if fn not in t2._files:
1130 1130 files.add(t1._subpath(fn))
1131 1131
1132 1132 _filesnotin(self, m2)
1133 1133 return files
1134 1134
1135 1135 @propertycache
1136 1136 def _alldirs(self):
1137 1137 return pathutil.dirs(self)
1138 1138
1139 1139 def dirs(self):
1140 1140 return self._alldirs
1141 1141
1142 1142 def hasdir(self, dir):
1143 1143 self._load()
1144 1144 topdir, subdir = _splittopdir(dir)
1145 1145 if topdir:
1146 1146 self._loadlazy(topdir)
1147 1147 if topdir in self._dirs:
1148 1148 return self._dirs[topdir].hasdir(subdir)
1149 1149 return False
1150 1150 dirslash = dir + b'/'
1151 1151 return dirslash in self._dirs or dirslash in self._lazydirs
1152 1152
1153 1153 def walk(self, match):
1154 1154 '''Generates matching file names.
1155 1155
1156 1156 It also reports nonexistent files by marking them bad with match.bad().
1157 1157 '''
1158 1158 if match.always():
1159 1159 for f in iter(self):
1160 1160 yield f
1161 1161 return
1162 1162
1163 1163 fset = set(match.files())
1164 1164
1165 1165 for fn in self._walk(match):
1166 1166 if fn in fset:
1167 1167 # specified pattern is the exact name
1168 1168 fset.remove(fn)
1169 1169 yield fn
1170 1170
1171 1171 # for dirstate.walk, files=[''] means "walk the whole tree".
1172 1172 # follow that here, too
1173 1173 fset.discard(b'')
1174 1174
1175 1175 for fn in sorted(fset):
1176 1176 if not self.hasdir(fn):
1177 1177 match.bad(fn, None)
1178 1178
1179 1179 def _walk(self, match):
1180 1180 '''Recursively generates matching file names for walk().'''
1181 1181 visit = match.visitchildrenset(self._dir[:-1])
1182 1182 if not visit:
1183 1183 return
1184 1184
1185 1185 # yield this dir's files and walk its submanifests
1186 1186 self._load()
1187 1187 visit = self._loadchildrensetlazy(visit)
1188 1188 for p in sorted(list(self._dirs) + list(self._files)):
1189 1189 if p in self._files:
1190 1190 fullp = self._subpath(p)
1191 1191 if match(fullp):
1192 1192 yield fullp
1193 1193 else:
1194 1194 if not visit or p[:-1] in visit:
1195 1195 for f in self._dirs[p]._walk(match):
1196 1196 yield f
1197 1197
1198 1198 def _matches(self, match):
1199 1199 '''recursively generate a new manifest filtered by the match argument.
1200 1200 '''
1201 1201 if match.always():
1202 1202 return self.copy()
1203 1203 return self._matches_inner(match)
1204 1204
1205 1205 def _matches_inner(self, match):
1206 1206 if match.always():
1207 1207 return self.copy()
1208 1208
1209 1209 visit = match.visitchildrenset(self._dir[:-1])
1210 1210 if visit == b'all':
1211 1211 return self.copy()
1212 1212 ret = treemanifest(self._dir)
1213 1213 if not visit:
1214 1214 return ret
1215 1215
1216 1216 self._load()
1217 1217 for fn in self._files:
1218 1218 # While visitchildrenset *usually* lists only subdirs, this is
1219 1219 # actually up to the matcher and may have some files in the set().
1220 1220 # If visit == 'this', we should obviously look at the files in this
1221 1221 # directory; if visit is a set, and fn is in it, we should inspect
1222 1222 # fn (but no need to inspect things not in the set).
1223 1223 if visit != b'this' and fn not in visit:
1224 1224 continue
1225 1225 fullp = self._subpath(fn)
1226 1226 # visitchildrenset isn't perfect, we still need to call the regular
1227 1227 # matcher code to further filter results.
1228 1228 if not match(fullp):
1229 1229 continue
1230 1230 ret._files[fn] = self._files[fn]
1231 1231 if fn in self._flags:
1232 1232 ret._flags[fn] = self._flags[fn]
1233 1233
1234 1234 visit = self._loadchildrensetlazy(visit)
1235 1235 for dir, subm in pycompat.iteritems(self._dirs):
1236 1236 if visit and dir[:-1] not in visit:
1237 1237 continue
1238 1238 m = subm._matches_inner(match)
1239 1239 if not m._isempty():
1240 1240 ret._dirs[dir] = m
1241 1241
1242 1242 if not ret._isempty():
1243 1243 ret._dirty = True
1244 1244 return ret
1245 1245
1246 1246 def fastdelta(self, base, changes):
1247 1247 raise FastdeltaUnavailable()
1248 1248
1249 1249 def diff(self, m2, match=None, clean=False):
1250 1250 '''Finds changes between the current manifest and m2.
1251 1251
1252 1252 Args:
1253 1253 m2: the manifest to which this manifest should be compared.
1254 1254 clean: if true, include files unchanged between these manifests
1255 1255 with a None value in the returned dictionary.
1256 1256
1257 1257 The result is returned as a dict with filename as key and
1258 1258 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1259 1259 nodeid in the current/other manifest and fl1/fl2 is the flag
1260 1260 in the current/other manifest. Where the file does not exist,
1261 1261 the nodeid will be None and the flags will be the empty
1262 1262 string.
1263 1263 '''
1264 1264 if match and not match.always():
1265 1265 m1 = self._matches(match)
1266 1266 m2 = m2._matches(match)
1267 1267 return m1.diff(m2, clean=clean)
1268 1268 result = {}
1269 1269 emptytree = treemanifest()
1270 1270
1271 1271 def _iterativediff(t1, t2, stack):
1272 1272 """compares two tree manifests and append new tree-manifests which
1273 1273 needs to be compared to stack"""
1274 1274 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1275 1275 return
1276 1276 t1._load()
1277 1277 t2._load()
1278 1278 self._loaddifflazy(t1, t2)
1279 1279
1280 1280 for d, m1 in pycompat.iteritems(t1._dirs):
1281 1281 m2 = t2._dirs.get(d, emptytree)
1282 1282 stack.append((m1, m2))
1283 1283
1284 1284 for d, m2 in pycompat.iteritems(t2._dirs):
1285 1285 if d not in t1._dirs:
1286 1286 stack.append((emptytree, m2))
1287 1287
1288 1288 for fn, n1 in pycompat.iteritems(t1._files):
1289 1289 fl1 = t1._flags.get(fn, b'')
1290 1290 n2 = t2._files.get(fn, None)
1291 1291 fl2 = t2._flags.get(fn, b'')
1292 1292 if n1 != n2 or fl1 != fl2:
1293 1293 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1294 1294 elif clean:
1295 1295 result[t1._subpath(fn)] = None
1296 1296
1297 1297 for fn, n2 in pycompat.iteritems(t2._files):
1298 1298 if fn not in t1._files:
1299 1299 fl2 = t2._flags.get(fn, b'')
1300 1300 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1301 1301
1302 1302 stackls = []
1303 1303 _iterativediff(self, m2, stackls)
1304 1304 while stackls:
1305 1305 t1, t2 = stackls.pop()
1306 1306 # stackls is populated in the function call
1307 1307 _iterativediff(t1, t2, stackls)
1308 1308 return result
1309 1309
1310 1310 def unmodifiedsince(self, m2):
1311 1311 return not self._dirty and not m2._dirty and self._node == m2._node
1312 1312
1313 1313 def parse(self, text, readsubtree):
1314 1314 selflazy = self._lazydirs
1315 1315 subpath = self._subpath
1316 1316 for f, n, fl in _parse(text):
1317 1317 if fl == b't':
1318 1318 f = f + b'/'
1319 1319 # False below means "doesn't need to be copied" and can use the
1320 1320 # cached value from readsubtree directly.
1321 1321 selflazy[f] = (subpath(f), n, readsubtree, False)
1322 1322 elif b'/' in f:
1323 1323 # This is a flat manifest, so use __setitem__ and setflag rather
1324 1324 # than assigning directly to _files and _flags, so we can
1325 1325 # assign a path in a subdirectory, and to mark dirty (compared
1326 1326 # to nullid).
1327 1327 self[f] = n
1328 1328 if fl:
1329 1329 self.setflag(f, fl)
1330 1330 else:
1331 1331 # Assigning to _files and _flags avoids marking as dirty,
1332 1332 # and should be a little faster.
1333 1333 self._files[f] = n
1334 1334 if fl:
1335 1335 self._flags[f] = fl
1336 1336
1337 1337 def text(self):
1338 1338 """Get the full data of this manifest as a bytestring."""
1339 1339 self._load()
1340 1340 return _text(self.iterentries())
1341 1341
1342 1342 def dirtext(self):
1343 1343 """Get the full data of this directory as a bytestring. Make sure that
1344 1344 any submanifests have been written first, so their nodeids are correct.
1345 1345 """
1346 1346 self._load()
1347 1347 flags = self.flags
1348 1348 lazydirs = [
1349 1349 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1350 1350 ]
1351 1351 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1352 1352 files = [(f, self._files[f], flags(f)) for f in self._files]
1353 1353 return _text(sorted(dirs + files + lazydirs))
1354 1354
1355 1355 def read(self, gettext, readsubtree):
1356 1356 def _load_for_read(s):
1357 1357 s.parse(gettext(), readsubtree)
1358 1358 s._dirty = False
1359 1359
1360 1360 self._loadfunc = _load_for_read
1361 1361
1362 1362 def writesubtrees(self, m1, m2, writesubtree, match):
1363 1363 self._load() # for consistency; should never have any effect here
1364 1364 m1._load()
1365 1365 m2._load()
1366 1366 emptytree = treemanifest()
1367 1367
1368 1368 def getnode(m, d):
1369 1369 ld = m._lazydirs.get(d)
1370 1370 if ld:
1371 1371 return ld[1]
1372 1372 return m._dirs.get(d, emptytree)._node
1373 1373
1374 1374 # let's skip investigating things that `match` says we do not need.
1375 1375 visit = match.visitchildrenset(self._dir[:-1])
1376 1376 visit = self._loadchildrensetlazy(visit)
1377 1377 if visit == b'this' or visit == b'all':
1378 1378 visit = None
1379 1379 for d, subm in pycompat.iteritems(self._dirs):
1380 1380 if visit and d[:-1] not in visit:
1381 1381 continue
1382 1382 subp1 = getnode(m1, d)
1383 1383 subp2 = getnode(m2, d)
1384 1384 if subp1 == nullid:
1385 1385 subp1, subp2 = subp2, subp1
1386 1386 writesubtree(subm, subp1, subp2, match)
1387 1387
1388 1388 def walksubtrees(self, matcher=None):
1389 1389 """Returns an iterator of the subtrees of this manifest, including this
1390 1390 manifest itself.
1391 1391
1392 1392 If `matcher` is provided, it only returns subtrees that match.
1393 1393 """
1394 1394 if matcher and not matcher.visitdir(self._dir[:-1]):
1395 1395 return
1396 1396 if not matcher or matcher(self._dir[:-1]):
1397 1397 yield self
1398 1398
1399 1399 self._load()
1400 1400 # OPT: use visitchildrenset to avoid loading everything.
1401 1401 self._loadalllazy()
1402 1402 for d, subm in pycompat.iteritems(self._dirs):
1403 1403 for subtree in subm.walksubtrees(matcher=matcher):
1404 1404 yield subtree
1405 1405
1406 1406
1407 1407 class manifestfulltextcache(util.lrucachedict):
1408 1408 """File-backed LRU cache for the manifest cache
1409 1409
1410 1410 File consists of entries, up to EOF:
1411 1411
1412 1412 - 20 bytes node, 4 bytes length, <length> manifest data
1413 1413
1414 1414 These are written in reverse cache order (oldest to newest).
1415 1415
1416 1416 """
1417 1417
1418 1418 _file = b'manifestfulltextcache'
1419 1419
1420 1420 def __init__(self, max):
1421 1421 super(manifestfulltextcache, self).__init__(max)
1422 1422 self._dirty = False
1423 1423 self._read = False
1424 1424 self._opener = None
1425 1425
1426 1426 def read(self):
1427 1427 if self._read or self._opener is None:
1428 1428 return
1429 1429
1430 1430 try:
1431 1431 with self._opener(self._file) as fp:
1432 1432 set = super(manifestfulltextcache, self).__setitem__
1433 1433 # ignore trailing data, this is a cache, corruption is skipped
1434 1434 while True:
1435 1435 # TODO do we need to do work here for sha1 portability?
1436 1436 node = fp.read(20)
1437 1437 if len(node) < 20:
1438 1438 break
1439 1439 try:
1440 1440 size = struct.unpack(b'>L', fp.read(4))[0]
1441 1441 except struct.error:
1442 1442 break
1443 1443 value = bytearray(fp.read(size))
1444 1444 if len(value) != size:
1445 1445 break
1446 1446 set(node, value)
1447 1447 except IOError:
1448 1448 # the file is allowed to be missing
1449 1449 pass
1450 1450
1451 1451 self._read = True
1452 1452 self._dirty = False
1453 1453
1454 1454 def write(self):
1455 1455 if not self._dirty or self._opener is None:
1456 1456 return
1457 1457 # rotate backwards to the first used node
1458 1458 try:
1459 1459 with self._opener(
1460 1460 self._file, b'w', atomictemp=True, checkambig=True
1461 1461 ) as fp:
1462 1462 node = self._head.prev
1463 1463 while True:
1464 1464 if node.key in self._cache:
1465 1465 fp.write(node.key)
1466 1466 fp.write(struct.pack(b'>L', len(node.value)))
1467 1467 fp.write(node.value)
1468 1468 if node is self._head:
1469 1469 break
1470 1470 node = node.prev
1471 1471 except IOError:
1472 1472 # We could not write the cache (eg: permission error)
1473 1473 # the content can be missing.
1474 1474 #
1475 1475 # We could try harder and see if we could recreate a wcache
1476 1476 # directory were we coudl write too.
1477 1477 #
1478 1478 # XXX the error pass silently, having some way to issue an error
1479 1479 # log `ui.log` would be nice.
1480 1480 pass
1481 1481
1482 1482 def __len__(self):
1483 1483 if not self._read:
1484 1484 self.read()
1485 1485 return super(manifestfulltextcache, self).__len__()
1486 1486
1487 1487 def __contains__(self, k):
1488 1488 if not self._read:
1489 1489 self.read()
1490 1490 return super(manifestfulltextcache, self).__contains__(k)
1491 1491
1492 1492 def __iter__(self):
1493 1493 if not self._read:
1494 1494 self.read()
1495 1495 return super(manifestfulltextcache, self).__iter__()
1496 1496
1497 1497 def __getitem__(self, k):
1498 1498 if not self._read:
1499 1499 self.read()
1500 1500 # the cache lru order can change on read
1501 1501 setdirty = self._cache.get(k) is not self._head
1502 1502 value = super(manifestfulltextcache, self).__getitem__(k)
1503 1503 if setdirty:
1504 1504 self._dirty = True
1505 1505 return value
1506 1506
1507 1507 def __setitem__(self, k, v):
1508 1508 if not self._read:
1509 1509 self.read()
1510 1510 super(manifestfulltextcache, self).__setitem__(k, v)
1511 1511 self._dirty = True
1512 1512
1513 1513 def __delitem__(self, k):
1514 1514 if not self._read:
1515 1515 self.read()
1516 1516 super(manifestfulltextcache, self).__delitem__(k)
1517 1517 self._dirty = True
1518 1518
1519 1519 def get(self, k, default=None):
1520 1520 if not self._read:
1521 1521 self.read()
1522 1522 return super(manifestfulltextcache, self).get(k, default=default)
1523 1523
1524 1524 def clear(self, clear_persisted_data=False):
1525 1525 super(manifestfulltextcache, self).clear()
1526 1526 if clear_persisted_data:
1527 1527 self._dirty = True
1528 1528 self.write()
1529 1529 self._read = False
1530 1530
1531 1531
1532 1532 # and upper bound of what we expect from compression
1533 1533 # (real live value seems to be "3")
1534 1534 MAXCOMPRESSION = 3
1535 1535
1536 1536
1537 1537 class FastdeltaUnavailable(Exception):
1538 1538 """Exception raised when fastdelta isn't usable on a manifest."""
1539 1539
1540 1540
1541 1541 @interfaceutil.implementer(repository.imanifeststorage)
1542 1542 class manifestrevlog(object):
1543 1543 '''A revlog that stores manifest texts. This is responsible for caching the
1544 1544 full-text manifest contents.
1545 1545 '''
1546 1546
1547 1547 def __init__(
1548 1548 self,
1549 1549 opener,
1550 1550 tree=b'',
1551 1551 dirlogcache=None,
1552 1552 indexfile=None,
1553 1553 treemanifest=False,
1554 1554 ):
1555 1555 """Constructs a new manifest revlog
1556 1556
1557 1557 `indexfile` - used by extensions to have two manifests at once, like
1558 1558 when transitioning between flatmanifeset and treemanifests.
1559 1559
1560 1560 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1561 1561 options can also be used to make this a tree manifest revlog. The opener
1562 1562 option takes precedence, so if it is set to True, we ignore whatever
1563 1563 value is passed in to the constructor.
1564 1564 """
1565 1565 # During normal operations, we expect to deal with not more than four
1566 1566 # revs at a time (such as during commit --amend). When rebasing large
1567 1567 # stacks of commits, the number can go up, hence the config knob below.
1568 1568 cachesize = 4
1569 1569 optiontreemanifest = False
1570 1570 opts = getattr(opener, 'options', None)
1571 1571 if opts is not None:
1572 1572 cachesize = opts.get(b'manifestcachesize', cachesize)
1573 1573 optiontreemanifest = opts.get(b'treemanifest', False)
1574 1574
1575 1575 self._treeondisk = optiontreemanifest or treemanifest
1576 1576
1577 1577 self._fulltextcache = manifestfulltextcache(cachesize)
1578 1578
1579 1579 if tree:
1580 1580 assert self._treeondisk, b'opts is %r' % opts
1581 1581
1582 1582 if indexfile is None:
1583 1583 indexfile = b'00manifest.i'
1584 1584 if tree:
1585 1585 indexfile = b"meta/" + tree + indexfile
1586 1586
1587 1587 self.tree = tree
1588 1588
1589 1589 # The dirlogcache is kept on the root manifest log
1590 1590 if tree:
1591 1591 self._dirlogcache = dirlogcache
1592 1592 else:
1593 1593 self._dirlogcache = {b'': self}
1594 1594
1595 1595 self._revlog = revlog.revlog(
1596 1596 opener,
1597 1597 indexfile,
1598 1598 # only root indexfile is cached
1599 1599 checkambig=not bool(tree),
1600 1600 mmaplargeindex=True,
1601 1601 upperboundcomp=MAXCOMPRESSION,
1602 1602 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1603 1603 )
1604 1604
1605 1605 self.index = self._revlog.index
1606 1606 self.version = self._revlog.version
1607 1607 self._generaldelta = self._revlog._generaldelta
1608 1608
1609 1609 def _setupmanifestcachehooks(self, repo):
1610 1610 """Persist the manifestfulltextcache on lock release"""
1611 1611 if not util.safehasattr(repo, b'_wlockref'):
1612 1612 return
1613 1613
1614 1614 self._fulltextcache._opener = repo.wcachevfs
1615 1615 if repo._currentlock(repo._wlockref) is None:
1616 1616 return
1617 1617
1618 1618 reporef = weakref.ref(repo)
1619 1619 manifestrevlogref = weakref.ref(self)
1620 1620
1621 1621 def persistmanifestcache(success):
1622 1622 # Repo is in an unknown state, do not persist.
1623 1623 if not success:
1624 1624 return
1625 1625
1626 1626 repo = reporef()
1627 1627 self = manifestrevlogref()
1628 1628 if repo is None or self is None:
1629 1629 return
1630 1630 if repo.manifestlog.getstorage(b'') is not self:
1631 1631 # there's a different manifest in play now, abort
1632 1632 return
1633 1633 self._fulltextcache.write()
1634 1634
1635 1635 repo._afterlock(persistmanifestcache)
1636 1636
1637 1637 @property
1638 1638 def fulltextcache(self):
1639 1639 return self._fulltextcache
1640 1640
1641 1641 def clearcaches(self, clear_persisted_data=False):
1642 1642 self._revlog.clearcaches()
1643 1643 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1644 1644 self._dirlogcache = {self.tree: self}
1645 1645
1646 1646 def dirlog(self, d):
1647 1647 if d:
1648 1648 assert self._treeondisk
1649 1649 if d not in self._dirlogcache:
1650 1650 mfrevlog = manifestrevlog(
1651 1651 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1652 1652 )
1653 1653 self._dirlogcache[d] = mfrevlog
1654 1654 return self._dirlogcache[d]
1655 1655
1656 1656 def add(
1657 1657 self,
1658 1658 m,
1659 1659 transaction,
1660 1660 link,
1661 1661 p1,
1662 1662 p2,
1663 1663 added,
1664 1664 removed,
1665 1665 readtree=None,
1666 1666 match=None,
1667 1667 ):
1668 """add some manifest entry in to the manifest log
1669
1670 input:
1671
1672 m: the manifest dict we want to store
1673 transaction: the open transaction
1674 p1: manifest-node of p1
1675 p2: manifest-node of p2
1676 added: file added/changed compared to parent
1677 removed: file removed compared to parent
1678
1679 tree manifest input:
1680
1681 readtree: a function to read a subtree
1682 match: a filematcher for the subpart of the tree manifest
1683 """
1668 1684 try:
1669 1685 if p1 not in self.fulltextcache:
1670 1686 raise FastdeltaUnavailable()
1671 1687 # If our first parent is in the manifest cache, we can
1672 1688 # compute a delta here using properties we know about the
1673 1689 # manifest up-front, which may save time later for the
1674 1690 # revlog layer.
1675 1691
1676 1692 _checkforbidden(added)
1677 1693 # combine the changed lists into one sorted iterator
1678 1694 work = heapq.merge(
1679 1695 [(x, False) for x in sorted(added)],
1680 1696 [(x, True) for x in sorted(removed)],
1681 1697 )
1682 1698
1683 1699 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1684 1700 cachedelta = self._revlog.rev(p1), deltatext
1685 1701 text = util.buffer(arraytext)
1686 1702 n = self._revlog.addrevision(
1687 1703 text, transaction, link, p1, p2, cachedelta
1688 1704 )
1689 1705 except FastdeltaUnavailable:
1690 1706 # The first parent manifest isn't already loaded or the
1691 1707 # manifest implementation doesn't support fastdelta, so
1692 1708 # we'll just encode a fulltext of the manifest and pass
1693 1709 # that through to the revlog layer, and let it handle the
1694 1710 # delta process.
1695 1711 if self._treeondisk:
1696 1712 assert readtree, b"readtree must be set for treemanifest writes"
1697 1713 assert match, b"match must be specified for treemanifest writes"
1698 1714 m1 = readtree(self.tree, p1)
1699 1715 m2 = readtree(self.tree, p2)
1700 1716 n = self._addtree(
1701 1717 m, transaction, link, m1, m2, readtree, match=match
1702 1718 )
1703 1719 arraytext = None
1704 1720 else:
1705 1721 text = m.text()
1706 1722 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1707 1723 arraytext = bytearray(text)
1708 1724
1709 1725 if arraytext is not None:
1710 1726 self.fulltextcache[n] = arraytext
1711 1727
1712 1728 return n
1713 1729
1714 1730 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1715 1731 # If the manifest is unchanged compared to one parent,
1716 1732 # don't write a new revision
1717 1733 if self.tree != b'' and (
1718 1734 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1719 1735 ):
1720 1736 return m.node()
1721 1737
1722 1738 def writesubtree(subm, subp1, subp2, match):
1723 1739 sublog = self.dirlog(subm.dir())
1724 1740 sublog.add(
1725 1741 subm,
1726 1742 transaction,
1727 1743 link,
1728 1744 subp1,
1729 1745 subp2,
1730 1746 None,
1731 1747 None,
1732 1748 readtree=readtree,
1733 1749 match=match,
1734 1750 )
1735 1751
1736 1752 m.writesubtrees(m1, m2, writesubtree, match)
1737 1753 text = m.dirtext()
1738 1754 n = None
1739 1755 if self.tree != b'':
1740 1756 # Double-check whether contents are unchanged to one parent
1741 1757 if text == m1.dirtext():
1742 1758 n = m1.node()
1743 1759 elif text == m2.dirtext():
1744 1760 n = m2.node()
1745 1761
1746 1762 if not n:
1747 1763 n = self._revlog.addrevision(
1748 1764 text, transaction, link, m1.node(), m2.node()
1749 1765 )
1750 1766
1751 1767 # Save nodeid so parent manifest can calculate its nodeid
1752 1768 m.setnode(n)
1753 1769 return n
1754 1770
1755 1771 def __len__(self):
1756 1772 return len(self._revlog)
1757 1773
1758 1774 def __iter__(self):
1759 1775 return self._revlog.__iter__()
1760 1776
1761 1777 def rev(self, node):
1762 1778 return self._revlog.rev(node)
1763 1779
1764 1780 def node(self, rev):
1765 1781 return self._revlog.node(rev)
1766 1782
1767 1783 def lookup(self, value):
1768 1784 return self._revlog.lookup(value)
1769 1785
1770 1786 def parentrevs(self, rev):
1771 1787 return self._revlog.parentrevs(rev)
1772 1788
1773 1789 def parents(self, node):
1774 1790 return self._revlog.parents(node)
1775 1791
1776 1792 def linkrev(self, rev):
1777 1793 return self._revlog.linkrev(rev)
1778 1794
1779 1795 def checksize(self):
1780 1796 return self._revlog.checksize()
1781 1797
1782 1798 def revision(self, node, _df=None, raw=False):
1783 1799 return self._revlog.revision(node, _df=_df, raw=raw)
1784 1800
1785 1801 def rawdata(self, node, _df=None):
1786 1802 return self._revlog.rawdata(node, _df=_df)
1787 1803
1788 1804 def revdiff(self, rev1, rev2):
1789 1805 return self._revlog.revdiff(rev1, rev2)
1790 1806
1791 1807 def cmp(self, node, text):
1792 1808 return self._revlog.cmp(node, text)
1793 1809
1794 1810 def deltaparent(self, rev):
1795 1811 return self._revlog.deltaparent(rev)
1796 1812
1797 1813 def emitrevisions(
1798 1814 self,
1799 1815 nodes,
1800 1816 nodesorder=None,
1801 1817 revisiondata=False,
1802 1818 assumehaveparentrevisions=False,
1803 1819 deltamode=repository.CG_DELTAMODE_STD,
1804 1820 ):
1805 1821 return self._revlog.emitrevisions(
1806 1822 nodes,
1807 1823 nodesorder=nodesorder,
1808 1824 revisiondata=revisiondata,
1809 1825 assumehaveparentrevisions=assumehaveparentrevisions,
1810 1826 deltamode=deltamode,
1811 1827 )
1812 1828
1813 1829 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1814 1830 return self._revlog.addgroup(
1815 1831 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1816 1832 )
1817 1833
1818 1834 def rawsize(self, rev):
1819 1835 return self._revlog.rawsize(rev)
1820 1836
1821 1837 def getstrippoint(self, minlink):
1822 1838 return self._revlog.getstrippoint(minlink)
1823 1839
1824 1840 def strip(self, minlink, transaction):
1825 1841 return self._revlog.strip(minlink, transaction)
1826 1842
1827 1843 def files(self):
1828 1844 return self._revlog.files()
1829 1845
1830 1846 def clone(self, tr, destrevlog, **kwargs):
1831 1847 if not isinstance(destrevlog, manifestrevlog):
1832 1848 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1833 1849
1834 1850 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1835 1851
1836 1852 def storageinfo(
1837 1853 self,
1838 1854 exclusivefiles=False,
1839 1855 sharedfiles=False,
1840 1856 revisionscount=False,
1841 1857 trackedsize=False,
1842 1858 storedsize=False,
1843 1859 ):
1844 1860 return self._revlog.storageinfo(
1845 1861 exclusivefiles=exclusivefiles,
1846 1862 sharedfiles=sharedfiles,
1847 1863 revisionscount=revisionscount,
1848 1864 trackedsize=trackedsize,
1849 1865 storedsize=storedsize,
1850 1866 )
1851 1867
1852 1868 @property
1853 1869 def indexfile(self):
1854 1870 return self._revlog.indexfile
1855 1871
1856 1872 @indexfile.setter
1857 1873 def indexfile(self, value):
1858 1874 self._revlog.indexfile = value
1859 1875
1860 1876 @property
1861 1877 def opener(self):
1862 1878 return self._revlog.opener
1863 1879
1864 1880 @opener.setter
1865 1881 def opener(self, value):
1866 1882 self._revlog.opener = value
1867 1883
1868 1884
1869 1885 @interfaceutil.implementer(repository.imanifestlog)
1870 1886 class manifestlog(object):
1871 1887 """A collection class representing the collection of manifest snapshots
1872 1888 referenced by commits in the repository.
1873 1889
1874 1890 In this situation, 'manifest' refers to the abstract concept of a snapshot
1875 1891 of the list of files in the given commit. Consumers of the output of this
1876 1892 class do not care about the implementation details of the actual manifests
1877 1893 they receive (i.e. tree or flat or lazily loaded, etc)."""
1878 1894
1879 1895 def __init__(self, opener, repo, rootstore, narrowmatch):
1880 1896 usetreemanifest = False
1881 1897 cachesize = 4
1882 1898
1883 1899 opts = getattr(opener, 'options', None)
1884 1900 if opts is not None:
1885 1901 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1886 1902 cachesize = opts.get(b'manifestcachesize', cachesize)
1887 1903
1888 1904 self._treemanifests = usetreemanifest
1889 1905
1890 1906 self._rootstore = rootstore
1891 1907 self._rootstore._setupmanifestcachehooks(repo)
1892 1908 self._narrowmatch = narrowmatch
1893 1909
1894 1910 # A cache of the manifestctx or treemanifestctx for each directory
1895 1911 self._dirmancache = {}
1896 1912 self._dirmancache[b''] = util.lrucachedict(cachesize)
1897 1913
1898 1914 self._cachesize = cachesize
1899 1915
1900 1916 def __getitem__(self, node):
1901 1917 """Retrieves the manifest instance for the given node. Throws a
1902 1918 LookupError if not found.
1903 1919 """
1904 1920 return self.get(b'', node)
1905 1921
1906 1922 def get(self, tree, node, verify=True):
1907 1923 """Retrieves the manifest instance for the given node. Throws a
1908 1924 LookupError if not found.
1909 1925
1910 1926 `verify` - if True an exception will be thrown if the node is not in
1911 1927 the revlog
1912 1928 """
1913 1929 if node in self._dirmancache.get(tree, ()):
1914 1930 return self._dirmancache[tree][node]
1915 1931
1916 1932 if not self._narrowmatch.always():
1917 1933 if not self._narrowmatch.visitdir(tree[:-1]):
1918 1934 return excludeddirmanifestctx(tree, node)
1919 1935 if tree:
1920 1936 if self._rootstore._treeondisk:
1921 1937 if verify:
1922 1938 # Side-effect is LookupError is raised if node doesn't
1923 1939 # exist.
1924 1940 self.getstorage(tree).rev(node)
1925 1941
1926 1942 m = treemanifestctx(self, tree, node)
1927 1943 else:
1928 1944 raise error.Abort(
1929 1945 _(
1930 1946 b"cannot ask for manifest directory '%s' in a flat "
1931 1947 b"manifest"
1932 1948 )
1933 1949 % tree
1934 1950 )
1935 1951 else:
1936 1952 if verify:
1937 1953 # Side-effect is LookupError is raised if node doesn't exist.
1938 1954 self._rootstore.rev(node)
1939 1955
1940 1956 if self._treemanifests:
1941 1957 m = treemanifestctx(self, b'', node)
1942 1958 else:
1943 1959 m = manifestctx(self, node)
1944 1960
1945 1961 if node != nullid:
1946 1962 mancache = self._dirmancache.get(tree)
1947 1963 if not mancache:
1948 1964 mancache = util.lrucachedict(self._cachesize)
1949 1965 self._dirmancache[tree] = mancache
1950 1966 mancache[node] = m
1951 1967 return m
1952 1968
1953 1969 def getstorage(self, tree):
1954 1970 return self._rootstore.dirlog(tree)
1955 1971
1956 1972 def clearcaches(self, clear_persisted_data=False):
1957 1973 self._dirmancache.clear()
1958 1974 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1959 1975
1960 1976 def rev(self, node):
1961 1977 return self._rootstore.rev(node)
1962 1978
1963 1979 def update_caches(self, transaction):
1964 1980 return self._rootstore._revlog.update_caches(transaction=transaction)
1965 1981
1966 1982
1967 1983 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1968 1984 class memmanifestctx(object):
1969 1985 def __init__(self, manifestlog):
1970 1986 self._manifestlog = manifestlog
1971 1987 self._manifestdict = manifestdict()
1972 1988
1973 1989 def _storage(self):
1974 1990 return self._manifestlog.getstorage(b'')
1975 1991
1976 1992 def copy(self):
1977 1993 memmf = memmanifestctx(self._manifestlog)
1978 1994 memmf._manifestdict = self.read().copy()
1979 1995 return memmf
1980 1996
1981 1997 def read(self):
1982 1998 return self._manifestdict
1983 1999
1984 2000 def write(self, transaction, link, p1, p2, added, removed, match=None):
1985 2001 return self._storage().add(
1986 2002 self._manifestdict,
1987 2003 transaction,
1988 2004 link,
1989 2005 p1,
1990 2006 p2,
1991 2007 added,
1992 2008 removed,
1993 2009 match=match,
1994 2010 )
1995 2011
1996 2012
1997 2013 @interfaceutil.implementer(repository.imanifestrevisionstored)
1998 2014 class manifestctx(object):
1999 2015 """A class representing a single revision of a manifest, including its
2000 2016 contents, its parent revs, and its linkrev.
2001 2017 """
2002 2018
2003 2019 def __init__(self, manifestlog, node):
2004 2020 self._manifestlog = manifestlog
2005 2021 self._data = None
2006 2022
2007 2023 self._node = node
2008 2024
2009 2025 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2010 2026 # but let's add it later when something needs it and we can load it
2011 2027 # lazily.
2012 2028 # self.p1, self.p2 = store.parents(node)
2013 2029 # rev = store.rev(node)
2014 2030 # self.linkrev = store.linkrev(rev)
2015 2031
2016 2032 def _storage(self):
2017 2033 return self._manifestlog.getstorage(b'')
2018 2034
2019 2035 def node(self):
2020 2036 return self._node
2021 2037
2022 2038 def copy(self):
2023 2039 memmf = memmanifestctx(self._manifestlog)
2024 2040 memmf._manifestdict = self.read().copy()
2025 2041 return memmf
2026 2042
2027 2043 @propertycache
2028 2044 def parents(self):
2029 2045 return self._storage().parents(self._node)
2030 2046
2031 2047 def read(self):
2032 2048 if self._data is None:
2033 2049 if self._node == nullid:
2034 2050 self._data = manifestdict()
2035 2051 else:
2036 2052 store = self._storage()
2037 2053 if self._node in store.fulltextcache:
2038 2054 text = pycompat.bytestr(store.fulltextcache[self._node])
2039 2055 else:
2040 2056 text = store.revision(self._node)
2041 2057 arraytext = bytearray(text)
2042 2058 store.fulltextcache[self._node] = arraytext
2043 2059 self._data = manifestdict(text)
2044 2060 return self._data
2045 2061
2046 2062 def readfast(self, shallow=False):
2047 2063 '''Calls either readdelta or read, based on which would be less work.
2048 2064 readdelta is called if the delta is against the p1, and therefore can be
2049 2065 read quickly.
2050 2066
2051 2067 If `shallow` is True, nothing changes since this is a flat manifest.
2052 2068 '''
2053 2069 store = self._storage()
2054 2070 r = store.rev(self._node)
2055 2071 deltaparent = store.deltaparent(r)
2056 2072 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2057 2073 return self.readdelta()
2058 2074 return self.read()
2059 2075
2060 2076 def readdelta(self, shallow=False):
2061 2077 '''Returns a manifest containing just the entries that are present
2062 2078 in this manifest, but not in its p1 manifest. This is efficient to read
2063 2079 if the revlog delta is already p1.
2064 2080
2065 2081 Changing the value of `shallow` has no effect on flat manifests.
2066 2082 '''
2067 2083 store = self._storage()
2068 2084 r = store.rev(self._node)
2069 2085 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2070 2086 return manifestdict(d)
2071 2087
2072 2088 def find(self, key):
2073 2089 return self.read().find(key)
2074 2090
2075 2091
2076 2092 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2077 2093 class memtreemanifestctx(object):
2078 2094 def __init__(self, manifestlog, dir=b''):
2079 2095 self._manifestlog = manifestlog
2080 2096 self._dir = dir
2081 2097 self._treemanifest = treemanifest()
2082 2098
2083 2099 def _storage(self):
2084 2100 return self._manifestlog.getstorage(b'')
2085 2101
2086 2102 def copy(self):
2087 2103 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2088 2104 memmf._treemanifest = self._treemanifest.copy()
2089 2105 return memmf
2090 2106
2091 2107 def read(self):
2092 2108 return self._treemanifest
2093 2109
2094 2110 def write(self, transaction, link, p1, p2, added, removed, match=None):
2095 2111 def readtree(dir, node):
2096 2112 return self._manifestlog.get(dir, node).read()
2097 2113
2098 2114 return self._storage().add(
2099 2115 self._treemanifest,
2100 2116 transaction,
2101 2117 link,
2102 2118 p1,
2103 2119 p2,
2104 2120 added,
2105 2121 removed,
2106 2122 readtree=readtree,
2107 2123 match=match,
2108 2124 )
2109 2125
2110 2126
2111 2127 @interfaceutil.implementer(repository.imanifestrevisionstored)
2112 2128 class treemanifestctx(object):
2113 2129 def __init__(self, manifestlog, dir, node):
2114 2130 self._manifestlog = manifestlog
2115 2131 self._dir = dir
2116 2132 self._data = None
2117 2133
2118 2134 self._node = node
2119 2135
2120 2136 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2121 2137 # we can instantiate treemanifestctx objects for directories we don't
2122 2138 # have on disk.
2123 2139 # self.p1, self.p2 = store.parents(node)
2124 2140 # rev = store.rev(node)
2125 2141 # self.linkrev = store.linkrev(rev)
2126 2142
2127 2143 def _storage(self):
2128 2144 narrowmatch = self._manifestlog._narrowmatch
2129 2145 if not narrowmatch.always():
2130 2146 if not narrowmatch.visitdir(self._dir[:-1]):
2131 2147 return excludedmanifestrevlog(self._dir)
2132 2148 return self._manifestlog.getstorage(self._dir)
2133 2149
2134 2150 def read(self):
2135 2151 if self._data is None:
2136 2152 store = self._storage()
2137 2153 if self._node == nullid:
2138 2154 self._data = treemanifest()
2139 2155 # TODO accessing non-public API
2140 2156 elif store._treeondisk:
2141 2157 m = treemanifest(dir=self._dir)
2142 2158
2143 2159 def gettext():
2144 2160 return store.revision(self._node)
2145 2161
2146 2162 def readsubtree(dir, subm):
2147 2163 # Set verify to False since we need to be able to create
2148 2164 # subtrees for trees that don't exist on disk.
2149 2165 return self._manifestlog.get(dir, subm, verify=False).read()
2150 2166
2151 2167 m.read(gettext, readsubtree)
2152 2168 m.setnode(self._node)
2153 2169 self._data = m
2154 2170 else:
2155 2171 if self._node in store.fulltextcache:
2156 2172 text = pycompat.bytestr(store.fulltextcache[self._node])
2157 2173 else:
2158 2174 text = store.revision(self._node)
2159 2175 arraytext = bytearray(text)
2160 2176 store.fulltextcache[self._node] = arraytext
2161 2177 self._data = treemanifest(dir=self._dir, text=text)
2162 2178
2163 2179 return self._data
2164 2180
2165 2181 def node(self):
2166 2182 return self._node
2167 2183
2168 2184 def copy(self):
2169 2185 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2170 2186 memmf._treemanifest = self.read().copy()
2171 2187 return memmf
2172 2188
2173 2189 @propertycache
2174 2190 def parents(self):
2175 2191 return self._storage().parents(self._node)
2176 2192
2177 2193 def readdelta(self, shallow=False):
2178 2194 '''Returns a manifest containing just the entries that are present
2179 2195 in this manifest, but not in its p1 manifest. This is efficient to read
2180 2196 if the revlog delta is already p1.
2181 2197
2182 2198 If `shallow` is True, this will read the delta for this directory,
2183 2199 without recursively reading subdirectory manifests. Instead, any
2184 2200 subdirectory entry will be reported as it appears in the manifest, i.e.
2185 2201 the subdirectory will be reported among files and distinguished only by
2186 2202 its 't' flag.
2187 2203 '''
2188 2204 store = self._storage()
2189 2205 if shallow:
2190 2206 r = store.rev(self._node)
2191 2207 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2192 2208 return manifestdict(d)
2193 2209 else:
2194 2210 # Need to perform a slow delta
2195 2211 r0 = store.deltaparent(store.rev(self._node))
2196 2212 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2197 2213 m1 = self.read()
2198 2214 md = treemanifest(dir=self._dir)
2199 2215 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2200 2216 if n1:
2201 2217 md[f] = n1
2202 2218 if fl1:
2203 2219 md.setflag(f, fl1)
2204 2220 return md
2205 2221
2206 2222 def readfast(self, shallow=False):
2207 2223 '''Calls either readdelta or read, based on which would be less work.
2208 2224 readdelta is called if the delta is against the p1, and therefore can be
2209 2225 read quickly.
2210 2226
2211 2227 If `shallow` is True, it only returns the entries from this manifest,
2212 2228 and not any submanifests.
2213 2229 '''
2214 2230 store = self._storage()
2215 2231 r = store.rev(self._node)
2216 2232 deltaparent = store.deltaparent(r)
2217 2233 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2218 2234 return self.readdelta(shallow=shallow)
2219 2235
2220 2236 if shallow:
2221 2237 return manifestdict(store.revision(self._node))
2222 2238 else:
2223 2239 return self.read()
2224 2240
2225 2241 def find(self, key):
2226 2242 return self.read().find(key)
2227 2243
2228 2244
2229 2245 class excludeddir(treemanifest):
2230 2246 """Stand-in for a directory that is excluded from the repository.
2231 2247
2232 2248 With narrowing active on a repository that uses treemanifests,
2233 2249 some of the directory revlogs will be excluded from the resulting
2234 2250 clone. This is a huge storage win for clients, but means we need
2235 2251 some sort of pseudo-manifest to surface to internals so we can
2236 2252 detect a merge conflict outside the narrowspec. That's what this
2237 2253 class is: it stands in for a directory whose node is known, but
2238 2254 whose contents are unknown.
2239 2255 """
2240 2256
2241 2257 def __init__(self, dir, node):
2242 2258 super(excludeddir, self).__init__(dir)
2243 2259 self._node = node
2244 2260 # Add an empty file, which will be included by iterators and such,
2245 2261 # appearing as the directory itself (i.e. something like "dir/")
2246 2262 self._files[b''] = node
2247 2263 self._flags[b''] = b't'
2248 2264
2249 2265 # Manifests outside the narrowspec should never be modified, so avoid
2250 2266 # copying. This makes a noticeable difference when there are very many
2251 2267 # directories outside the narrowspec. Also, it makes sense for the copy to
2252 2268 # be of the same type as the original, which would not happen with the
2253 2269 # super type's copy().
2254 2270 def copy(self):
2255 2271 return self
2256 2272
2257 2273
2258 2274 class excludeddirmanifestctx(treemanifestctx):
2259 2275 """context wrapper for excludeddir - see that docstring for rationale"""
2260 2276
2261 2277 def __init__(self, dir, node):
2262 2278 self._dir = dir
2263 2279 self._node = node
2264 2280
2265 2281 def read(self):
2266 2282 return excludeddir(self._dir, self._node)
2267 2283
2268 2284 def write(self, *args):
2269 2285 raise error.ProgrammingError(
2270 2286 b'attempt to write manifest from excluded dir %s' % self._dir
2271 2287 )
2272 2288
2273 2289
2274 2290 class excludedmanifestrevlog(manifestrevlog):
2275 2291 """Stand-in for excluded treemanifest revlogs.
2276 2292
2277 2293 When narrowing is active on a treemanifest repository, we'll have
2278 2294 references to directories we can't see due to the revlog being
2279 2295 skipped. This class exists to conform to the manifestrevlog
2280 2296 interface for those directories and proactively prevent writes to
2281 2297 outside the narrowspec.
2282 2298 """
2283 2299
2284 2300 def __init__(self, dir):
2285 2301 self._dir = dir
2286 2302
2287 2303 def __len__(self):
2288 2304 raise error.ProgrammingError(
2289 2305 b'attempt to get length of excluded dir %s' % self._dir
2290 2306 )
2291 2307
2292 2308 def rev(self, node):
2293 2309 raise error.ProgrammingError(
2294 2310 b'attempt to get rev from excluded dir %s' % self._dir
2295 2311 )
2296 2312
2297 2313 def linkrev(self, node):
2298 2314 raise error.ProgrammingError(
2299 2315 b'attempt to get linkrev from excluded dir %s' % self._dir
2300 2316 )
2301 2317
2302 2318 def node(self, rev):
2303 2319 raise error.ProgrammingError(
2304 2320 b'attempt to get node from excluded dir %s' % self._dir
2305 2321 )
2306 2322
2307 2323 def add(self, *args, **kwargs):
2308 2324 # We should never write entries in dirlogs outside the narrow clone.
2309 2325 # However, the method still gets called from writesubtree() in
2310 2326 # _addtree(), so we need to handle it. We should possibly make that
2311 2327 # avoid calling add() with a clean manifest (_dirty is always False
2312 2328 # in excludeddir instances).
2313 2329 pass
General Comments 0
You need to be logged in to leave comments. Login now