##// END OF EJS Templates
manifest: leave a TODO where we may have more work for sha1 portability...
Augie Fackler -
r45198:f0d88d20 default draft
parent child Browse files
Show More
@@ -1,2297 +1,2298 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 if 64 < nl:
62 62 # modern hash, full width
63 63 yield f, bin(n[:64]), n[64:]
64 64 if 40 < nl < 45:
65 65 # legacy hash, always sha1
66 66 yield f, bin(n[:40]), n[40:]
67 67 else:
68 68 yield f, bin(n), b''
69 69
70 70
71 71 def _text(it):
72 72 files = []
73 73 lines = []
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return b''.join(lines)
82 82
83 83
84 84 class lazymanifestiter(object):
85 85 def __init__(self, lm):
86 86 self.pos = 0
87 87 self.lm = lm
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data[0]
100 100 self.pos += 1
101 101 zeropos = data.find(b'\x00', pos)
102 102 return data[pos:zeropos]
103 103
104 104 __next__ = next
105 105
106 106
107 107 class lazymanifestiterentries(object):
108 108 def __init__(self, lm):
109 109 self.lm = lm
110 110 self.pos = 0
111 111
112 112 def __iter__(self):
113 113 return self
114 114
115 115 def next(self):
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 self.pos += 1
122 122 return data
123 123 zeropos = data.find(b'\x00', pos)
124 124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
125 125 flags = self.lm._getflags(data, self.pos, zeropos)
126 126 self.pos += 1
127 127 return (data[pos:zeropos], hashval, flags)
128 128
129 129 __next__ = next
130 130
131 131
132 132 def unhexlify(data, extra, pos, length):
133 133 s = bin(data[pos : pos + length])
134 134 if extra:
135 135 s += chr(extra & 0xFF)
136 136 return s
137 137
138 138
139 139 def _cmp(a, b):
140 140 return (a > b) - (a < b)
141 141
142 142
143 143 class _lazymanifest(object):
144 144 """A pure python manifest backed by a byte string. It is supplimented with
145 145 internal lists as it is modified, until it is compacted back to a pure byte
146 146 string.
147 147
148 148 ``data`` is the initial manifest data.
149 149
150 150 ``positions`` is a list of offsets, one per manifest entry. Positive
151 151 values are offsets into ``data``, negative values are offsets into the
152 152 ``extradata`` list. When an entry is removed, its entry is dropped from
153 153 ``positions``. The values are encoded such that when walking the list and
154 154 indexing into ``data`` or ``extradata`` as appropriate, the entries are
155 155 sorted by filename.
156 156
157 157 ``extradata`` is a list of (key, hash, flags) for entries that were added or
158 158 modified since the manifest was created or compacted.
159 159 """
160 160
161 161 def __init__(
162 162 self,
163 163 data,
164 164 positions=None,
165 165 extrainfo=None,
166 166 extradata=None,
167 167 hasremovals=False,
168 168 ):
169 169 if positions is None:
170 170 self.positions = self.findlines(data)
171 171 self.extrainfo = [0] * len(self.positions)
172 172 self.data = data
173 173 self.extradata = []
174 174 self.hasremovals = False
175 175 else:
176 176 self.positions = positions[:]
177 177 self.extrainfo = extrainfo[:]
178 178 self.extradata = extradata[:]
179 179 self.data = data
180 180 self.hasremovals = hasremovals
181 181
182 182 def findlines(self, data):
183 183 if not data:
184 184 return []
185 185 pos = data.find(b"\n")
186 186 if pos == -1 or data[-1:] != b'\n':
187 187 raise ValueError(b"Manifest did not end in a newline.")
188 188 positions = [0]
189 189 prev = data[: data.find(b'\x00')]
190 190 while pos < len(data) - 1 and pos != -1:
191 191 positions.append(pos + 1)
192 192 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
193 193 if nexts < prev:
194 194 raise ValueError(b"Manifest lines not in sorted order.")
195 195 prev = nexts
196 196 pos = data.find(b"\n", pos + 1)
197 197 return positions
198 198
199 199 def _get(self, index):
200 200 # get the position encoded in pos:
201 201 # positive number is an index in 'data'
202 202 # negative number is in extrapieces
203 203 pos = self.positions[index]
204 204 if pos >= 0:
205 205 return self.data, pos
206 206 return self.extradata[-pos - 1], -1
207 207
208 208 def _getkey(self, pos):
209 209 if pos >= 0:
210 210 return self.data[pos : self.data.find(b'\x00', pos + 1)]
211 211 return self.extradata[-pos - 1][0]
212 212
213 213 def bsearch(self, key):
214 214 first = 0
215 215 last = len(self.positions) - 1
216 216
217 217 while first <= last:
218 218 midpoint = (first + last) // 2
219 219 nextpos = self.positions[midpoint]
220 220 candidate = self._getkey(nextpos)
221 221 r = _cmp(key, candidate)
222 222 if r == 0:
223 223 return midpoint
224 224 else:
225 225 if r < 0:
226 226 last = midpoint - 1
227 227 else:
228 228 first = midpoint + 1
229 229 return -1
230 230
231 231 def bsearch2(self, key):
232 232 # same as the above, but will always return the position
233 233 # done for performance reasons
234 234 first = 0
235 235 last = len(self.positions) - 1
236 236
237 237 while first <= last:
238 238 midpoint = (first + last) // 2
239 239 nextpos = self.positions[midpoint]
240 240 candidate = self._getkey(nextpos)
241 241 r = _cmp(key, candidate)
242 242 if r == 0:
243 243 return (midpoint, True)
244 244 else:
245 245 if r < 0:
246 246 last = midpoint - 1
247 247 else:
248 248 first = midpoint + 1
249 249 return (first, False)
250 250
251 251 def __contains__(self, key):
252 252 return self.bsearch(key) != -1
253 253
254 254 def _getflags(self, data, needle, pos):
255 255 start = pos + 41
256 256 end = data.find(b"\n", start)
257 257 if end == -1:
258 258 end = len(data) - 1
259 259 if start == end:
260 260 return b''
261 261 return self.data[start:end]
262 262
263 263 def __getitem__(self, key):
264 264 if not isinstance(key, bytes):
265 265 raise TypeError(b"getitem: manifest keys must be a bytes.")
266 266 needle = self.bsearch(key)
267 267 if needle == -1:
268 268 raise KeyError
269 269 data, pos = self._get(needle)
270 270 if pos == -1:
271 271 return (data[1], data[2])
272 272 zeropos = data.find(b'\x00', pos)
273 273 nlpos = data.find(b'\n', zeropos)
274 274 assert 0 <= needle <= len(self.positions)
275 275 assert len(self.extrainfo) == len(self.positions)
276 276 hlen = nlpos - zeropos - 1
277 277 # Hashes sometimes have an extra byte tucked on the end, so
278 278 # detect that.
279 279 if hlen % 2:
280 280 hlen -= 1
281 281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 282 flags = self._getflags(data, needle, zeropos)
283 283 return (hashval, flags)
284 284
285 285 def __delitem__(self, key):
286 286 needle, found = self.bsearch2(key)
287 287 if not found:
288 288 raise KeyError
289 289 cur = self.positions[needle]
290 290 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
291 291 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
292 292 if cur >= 0:
293 293 # This does NOT unsort the list as far as the search functions are
294 294 # concerned, as they only examine lines mapped by self.positions.
295 295 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
296 296 self.hasremovals = True
297 297
298 298 def __setitem__(self, key, value):
299 299 if not isinstance(key, bytes):
300 300 raise TypeError(b"setitem: manifest keys must be a byte string.")
301 301 if not isinstance(value, tuple) or len(value) != 2:
302 302 raise TypeError(
303 303 b"Manifest values must be a tuple of (node, flags)."
304 304 )
305 305 hashval = value[0]
306 306 # hashes are either 20 or 32 bytes (sha1 or its replacement),
307 307 # and allow one extra byte taht won't be persisted to disk but
308 308 # is sometimes used in memory.
309 309 if not isinstance(hashval, bytes) or not (
310 310 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
311 311 ):
312 312 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
313 313 flags = value[1]
314 314 if len(hashval) == 22:
315 315 hashval = hashval[:-1]
316 316 if not isinstance(flags, bytes) or len(flags) > 1:
317 317 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
318 318 needle, found = self.bsearch2(key)
319 319 if found:
320 320 # put the item
321 321 pos = self.positions[needle]
322 322 if pos < 0:
323 323 self.extradata[-pos - 1] = (key, hashval, value[1])
324 324 else:
325 325 # just don't bother
326 326 self.extradata.append((key, hashval, value[1]))
327 327 self.positions[needle] = -len(self.extradata)
328 328 else:
329 329 # not found, put it in with extra positions
330 330 self.extradata.append((key, hashval, value[1]))
331 331 self.positions = (
332 332 self.positions[:needle]
333 333 + [-len(self.extradata)]
334 334 + self.positions[needle:]
335 335 )
336 336 self.extrainfo = (
337 337 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
338 338 )
339 339
340 340 def copy(self):
341 341 # XXX call _compact like in C?
342 342 return _lazymanifest(
343 343 self.data,
344 344 self.positions,
345 345 self.extrainfo,
346 346 self.extradata,
347 347 self.hasremovals,
348 348 )
349 349
350 350 def _compact(self):
351 351 # hopefully not called TOO often
352 352 if len(self.extradata) == 0 and not self.hasremovals:
353 353 return
354 354 l = []
355 355 i = 0
356 356 offset = 0
357 357 self.extrainfo = [0] * len(self.positions)
358 358 while i < len(self.positions):
359 359 if self.positions[i] >= 0:
360 360 cur = self.positions[i]
361 361 last_cut = cur
362 362
363 363 # Collect all contiguous entries in the buffer at the current
364 364 # offset, breaking out only for added/modified items held in
365 365 # extradata, or a deleted line prior to the next position.
366 366 while True:
367 367 self.positions[i] = offset
368 368 i += 1
369 369 if i == len(self.positions) or self.positions[i] < 0:
370 370 break
371 371
372 372 # A removed file has no positions[] entry, but does have an
373 373 # overwritten first byte. Break out and find the end of the
374 374 # current good entry/entries if there is a removed file
375 375 # before the next position.
376 376 if (
377 377 self.hasremovals
378 378 and self.data.find(b'\n\x00', cur, self.positions[i])
379 379 != -1
380 380 ):
381 381 break
382 382
383 383 offset += self.positions[i] - cur
384 384 cur = self.positions[i]
385 385 end_cut = self.data.find(b'\n', cur)
386 386 if end_cut != -1:
387 387 end_cut += 1
388 388 offset += end_cut - cur
389 389 l.append(self.data[last_cut:end_cut])
390 390 else:
391 391 while i < len(self.positions) and self.positions[i] < 0:
392 392 cur = self.positions[i]
393 393 t = self.extradata[-cur - 1]
394 394 l.append(self._pack(t))
395 395 self.positions[i] = offset
396 396 # Hashes are either 20 bytes (old sha1s) or 32
397 397 # bytes (new non-sha1).
398 398 hlen = 20
399 399 if len(t[1]) > 25:
400 400 hlen = 32
401 401 if len(t[1]) > hlen:
402 402 self.extrainfo[i] = ord(t[1][hlen + 1])
403 403 offset += len(l[-1])
404 404 i += 1
405 405 self.data = b''.join(l)
406 406 self.hasremovals = False
407 407 self.extradata = []
408 408
409 409 def _pack(self, d):
410 410 n = d[1]
411 411 if len(n) == 21 or len(n) == 33:
412 412 n = n[:-1]
413 413 assert len(n) == 20 or len(n) == 32
414 414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415 415
416 416 def text(self):
417 417 self._compact()
418 418 return self.data
419 419
420 420 def diff(self, m2, clean=False):
421 421 '''Finds changes between the current manifest and m2.'''
422 422 # XXX think whether efficiency matters here
423 423 diff = {}
424 424
425 425 for fn, e1, flags in self.iterentries():
426 426 if fn not in m2:
427 427 diff[fn] = (e1, flags), (None, b'')
428 428 else:
429 429 e2 = m2[fn]
430 430 if (e1, flags) != e2:
431 431 diff[fn] = (e1, flags), e2
432 432 elif clean:
433 433 diff[fn] = None
434 434
435 435 for fn, e2, flags in m2.iterentries():
436 436 if fn not in self:
437 437 diff[fn] = (None, b''), (e2, flags)
438 438
439 439 return diff
440 440
441 441 def iterentries(self):
442 442 return lazymanifestiterentries(self)
443 443
444 444 def iterkeys(self):
445 445 return lazymanifestiter(self)
446 446
447 447 def __iter__(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __len__(self):
451 451 return len(self.positions)
452 452
453 453 def filtercopy(self, filterfn):
454 454 # XXX should be optimized
455 455 c = _lazymanifest(b'')
456 456 for f, n, fl in self.iterentries():
457 457 if filterfn(f):
458 458 c[f] = n, fl
459 459 return c
460 460
461 461
462 462 try:
463 463 _lazymanifest = parsers.lazymanifest
464 464 except AttributeError:
465 465 pass
466 466
467 467
468 468 @interfaceutil.implementer(repository.imanifestdict)
469 469 class manifestdict(object):
470 470 def __init__(self, data=b''):
471 471 self._lm = _lazymanifest(data)
472 472
473 473 def __getitem__(self, key):
474 474 return self._lm[key][0]
475 475
476 476 def find(self, key):
477 477 return self._lm[key]
478 478
479 479 def __len__(self):
480 480 return len(self._lm)
481 481
482 482 def __nonzero__(self):
483 483 # nonzero is covered by the __len__ function, but implementing it here
484 484 # makes it easier for extensions to override.
485 485 return len(self._lm) != 0
486 486
487 487 __bool__ = __nonzero__
488 488
489 489 def __setitem__(self, key, node):
490 490 self._lm[key] = node, self.flags(key)
491 491
492 492 def __contains__(self, key):
493 493 if key is None:
494 494 return False
495 495 return key in self._lm
496 496
497 497 def __delitem__(self, key):
498 498 del self._lm[key]
499 499
500 500 def __iter__(self):
501 501 return self._lm.__iter__()
502 502
503 503 def iterkeys(self):
504 504 return self._lm.iterkeys()
505 505
506 506 def keys(self):
507 507 return list(self.iterkeys())
508 508
509 509 def filesnotin(self, m2, match=None):
510 510 '''Set of files in this manifest that are not in the other'''
511 511 if match is not None:
512 512 match = matchmod.badmatch(match, lambda path, msg: None)
513 513 sm2 = set(m2.walk(match))
514 514 return {f for f in self.walk(match) if f not in sm2}
515 515 return {f for f in self if f not in m2}
516 516
517 517 @propertycache
518 518 def _dirs(self):
519 519 return pathutil.dirs(self)
520 520
521 521 def dirs(self):
522 522 return self._dirs
523 523
524 524 def hasdir(self, dir):
525 525 return dir in self._dirs
526 526
527 527 def _filesfastpath(self, match):
528 528 '''Checks whether we can correctly and quickly iterate over matcher
529 529 files instead of over manifest files.'''
530 530 files = match.files()
531 531 return len(files) < 100 and (
532 532 match.isexact()
533 533 or (match.prefix() and all(fn in self for fn in files))
534 534 )
535 535
536 536 def walk(self, match):
537 537 '''Generates matching file names.
538 538
539 539 Equivalent to manifest.matches(match).iterkeys(), but without creating
540 540 an entirely new manifest.
541 541
542 542 It also reports nonexistent files by marking them bad with match.bad().
543 543 '''
544 544 if match.always():
545 545 for f in iter(self):
546 546 yield f
547 547 return
548 548
549 549 fset = set(match.files())
550 550
551 551 # avoid the entire walk if we're only looking for specific files
552 552 if self._filesfastpath(match):
553 553 for fn in sorted(fset):
554 554 if fn in self:
555 555 yield fn
556 556 return
557 557
558 558 for fn in self:
559 559 if fn in fset:
560 560 # specified pattern is the exact name
561 561 fset.remove(fn)
562 562 if match(fn):
563 563 yield fn
564 564
565 565 # for dirstate.walk, files=[''] means "walk the whole tree".
566 566 # follow that here, too
567 567 fset.discard(b'')
568 568
569 569 for fn in sorted(fset):
570 570 if not self.hasdir(fn):
571 571 match.bad(fn, None)
572 572
573 573 def _matches(self, match):
574 574 '''generate a new manifest filtered by the match argument'''
575 575 if match.always():
576 576 return self.copy()
577 577
578 578 if self._filesfastpath(match):
579 579 m = manifestdict()
580 580 lm = self._lm
581 581 for fn in match.files():
582 582 if fn in lm:
583 583 m._lm[fn] = lm[fn]
584 584 return m
585 585
586 586 m = manifestdict()
587 587 m._lm = self._lm.filtercopy(match)
588 588 return m
589 589
590 590 def diff(self, m2, match=None, clean=False):
591 591 '''Finds changes between the current manifest and m2.
592 592
593 593 Args:
594 594 m2: the manifest to which this manifest should be compared.
595 595 clean: if true, include files unchanged between these manifests
596 596 with a None value in the returned dictionary.
597 597
598 598 The result is returned as a dict with filename as key and
599 599 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
600 600 nodeid in the current/other manifest and fl1/fl2 is the flag
601 601 in the current/other manifest. Where the file does not exist,
602 602 the nodeid will be None and the flags will be the empty
603 603 string.
604 604 '''
605 605 if match:
606 606 m1 = self._matches(match)
607 607 m2 = m2._matches(match)
608 608 return m1.diff(m2, clean=clean)
609 609 return self._lm.diff(m2._lm, clean)
610 610
611 611 def setflag(self, key, flag):
612 612 self._lm[key] = self[key], flag
613 613
614 614 def get(self, key, default=None):
615 615 try:
616 616 return self._lm[key][0]
617 617 except KeyError:
618 618 return default
619 619
620 620 def flags(self, key):
621 621 try:
622 622 return self._lm[key][1]
623 623 except KeyError:
624 624 return b''
625 625
626 626 def copy(self):
627 627 c = manifestdict()
628 628 c._lm = self._lm.copy()
629 629 return c
630 630
631 631 def items(self):
632 632 return (x[:2] for x in self._lm.iterentries())
633 633
634 634 def iteritems(self):
635 635 return (x[:2] for x in self._lm.iterentries())
636 636
637 637 def iterentries(self):
638 638 return self._lm.iterentries()
639 639
640 640 def text(self):
641 641 # most likely uses native version
642 642 return self._lm.text()
643 643
644 644 def fastdelta(self, base, changes):
645 645 """Given a base manifest text as a bytearray and a list of changes
646 646 relative to that text, compute a delta that can be used by revlog.
647 647 """
648 648 delta = []
649 649 dstart = None
650 650 dend = None
651 651 dline = [b""]
652 652 start = 0
653 653 # zero copy representation of base as a buffer
654 654 addbuf = util.buffer(base)
655 655
656 656 changes = list(changes)
657 657 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
658 658 # start with a readonly loop that finds the offset of
659 659 # each line and creates the deltas
660 660 for f, todelete in changes:
661 661 # bs will either be the index of the item or the insert point
662 662 start, end = _msearch(addbuf, f, start)
663 663 if not todelete:
664 664 h, fl = self._lm[f]
665 665 l = b"%s\0%s%s\n" % (f, hex(h), fl)
666 666 else:
667 667 if start == end:
668 668 # item we want to delete was not found, error out
669 669 raise AssertionError(
670 670 _(b"failed to remove %s from manifest") % f
671 671 )
672 672 l = b""
673 673 if dstart is not None and dstart <= start and dend >= start:
674 674 if dend < end:
675 675 dend = end
676 676 if l:
677 677 dline.append(l)
678 678 else:
679 679 if dstart is not None:
680 680 delta.append([dstart, dend, b"".join(dline)])
681 681 dstart = start
682 682 dend = end
683 683 dline = [l]
684 684
685 685 if dstart is not None:
686 686 delta.append([dstart, dend, b"".join(dline)])
687 687 # apply the delta to the base, and get a delta for addrevision
688 688 deltatext, arraytext = _addlistdelta(base, delta)
689 689 else:
690 690 # For large changes, it's much cheaper to just build the text and
691 691 # diff it.
692 692 arraytext = bytearray(self.text())
693 693 deltatext = mdiff.textdiff(
694 694 util.buffer(base), util.buffer(arraytext)
695 695 )
696 696
697 697 return arraytext, deltatext
698 698
699 699
700 700 def _msearch(m, s, lo=0, hi=None):
701 701 '''return a tuple (start, end) that says where to find s within m.
702 702
703 703 If the string is found m[start:end] are the line containing
704 704 that string. If start == end the string was not found and
705 705 they indicate the proper sorted insertion point.
706 706
707 707 m should be a buffer, a memoryview or a byte string.
708 708 s is a byte string'''
709 709
710 710 def advance(i, c):
711 711 while i < lenm and m[i : i + 1] != c:
712 712 i += 1
713 713 return i
714 714
715 715 if not s:
716 716 return (lo, lo)
717 717 lenm = len(m)
718 718 if not hi:
719 719 hi = lenm
720 720 while lo < hi:
721 721 mid = (lo + hi) // 2
722 722 start = mid
723 723 while start > 0 and m[start - 1 : start] != b'\n':
724 724 start -= 1
725 725 end = advance(start, b'\0')
726 726 if bytes(m[start:end]) < s:
727 727 # we know that after the null there are 40 bytes of sha1
728 728 # this translates to the bisect lo = mid + 1
729 729 lo = advance(end + 40, b'\n') + 1
730 730 else:
731 731 # this translates to the bisect hi = mid
732 732 hi = start
733 733 end = advance(lo, b'\0')
734 734 found = m[lo:end]
735 735 if s == found:
736 736 # we know that after the null there are 40 bytes of sha1
737 737 end = advance(end + 40, b'\n')
738 738 return (lo, end + 1)
739 739 else:
740 740 return (lo, lo)
741 741
742 742
743 743 def _checkforbidden(l):
744 744 """Check filenames for illegal characters."""
745 745 for f in l:
746 746 if b'\n' in f or b'\r' in f:
747 747 raise error.StorageError(
748 748 _(b"'\\n' and '\\r' disallowed in filenames: %r")
749 749 % pycompat.bytestr(f)
750 750 )
751 751
752 752
753 753 # apply the changes collected during the bisect loop to our addlist
754 754 # return a delta suitable for addrevision
755 755 def _addlistdelta(addlist, x):
756 756 # for large addlist arrays, building a new array is cheaper
757 757 # than repeatedly modifying the existing one
758 758 currentposition = 0
759 759 newaddlist = bytearray()
760 760
761 761 for start, end, content in x:
762 762 newaddlist += addlist[currentposition:start]
763 763 if content:
764 764 newaddlist += bytearray(content)
765 765
766 766 currentposition = end
767 767
768 768 newaddlist += addlist[currentposition:]
769 769
770 770 deltatext = b"".join(
771 771 struct.pack(b">lll", start, end, len(content)) + content
772 772 for start, end, content in x
773 773 )
774 774 return deltatext, newaddlist
775 775
776 776
777 777 def _splittopdir(f):
778 778 if b'/' in f:
779 779 dir, subpath = f.split(b'/', 1)
780 780 return dir + b'/', subpath
781 781 else:
782 782 return b'', f
783 783
784 784
785 785 _noop = lambda s: None
786 786
787 787
788 788 @interfaceutil.implementer(repository.imanifestdict)
789 789 class treemanifest(object):
790 790 def __init__(self, dir=b'', text=b''):
791 791 self._dir = dir
792 792 self._node = nullid
793 793 self._loadfunc = _noop
794 794 self._copyfunc = _noop
795 795 self._dirty = False
796 796 self._dirs = {}
797 797 self._lazydirs = {}
798 798 # Using _lazymanifest here is a little slower than plain old dicts
799 799 self._files = {}
800 800 self._flags = {}
801 801 if text:
802 802
803 803 def readsubtree(subdir, subm):
804 804 raise AssertionError(
805 805 b'treemanifest constructor only accepts flat manifests'
806 806 )
807 807
808 808 self.parse(text, readsubtree)
809 809 self._dirty = True # Mark flat manifest dirty after parsing
810 810
811 811 def _subpath(self, path):
812 812 return self._dir + path
813 813
814 814 def _loadalllazy(self):
815 815 selfdirs = self._dirs
816 816 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
817 817 self._lazydirs
818 818 ):
819 819 if docopy:
820 820 selfdirs[d] = readsubtree(path, node).copy()
821 821 else:
822 822 selfdirs[d] = readsubtree(path, node)
823 823 self._lazydirs = {}
824 824
825 825 def _loadlazy(self, d):
826 826 v = self._lazydirs.get(d)
827 827 if v:
828 828 path, node, readsubtree, docopy = v
829 829 if docopy:
830 830 self._dirs[d] = readsubtree(path, node).copy()
831 831 else:
832 832 self._dirs[d] = readsubtree(path, node)
833 833 del self._lazydirs[d]
834 834
835 835 def _loadchildrensetlazy(self, visit):
836 836 if not visit:
837 837 return None
838 838 if visit == b'all' or visit == b'this':
839 839 self._loadalllazy()
840 840 return None
841 841
842 842 loadlazy = self._loadlazy
843 843 for k in visit:
844 844 loadlazy(k + b'/')
845 845 return visit
846 846
847 847 def _loaddifflazy(self, t1, t2):
848 848 """load items in t1 and t2 if they're needed for diffing.
849 849
850 850 The criteria currently is:
851 851 - if it's not present in _lazydirs in either t1 or t2, load it in the
852 852 other (it may already be loaded or it may not exist, doesn't matter)
853 853 - if it's present in _lazydirs in both, compare the nodeid; if it
854 854 differs, load it in both
855 855 """
856 856 toloadlazy = []
857 857 for d, v1 in pycompat.iteritems(t1._lazydirs):
858 858 v2 = t2._lazydirs.get(d)
859 859 if not v2 or v2[1] != v1[1]:
860 860 toloadlazy.append(d)
861 861 for d, v1 in pycompat.iteritems(t2._lazydirs):
862 862 if d not in t1._lazydirs:
863 863 toloadlazy.append(d)
864 864
865 865 for d in toloadlazy:
866 866 t1._loadlazy(d)
867 867 t2._loadlazy(d)
868 868
869 869 def __len__(self):
870 870 self._load()
871 871 size = len(self._files)
872 872 self._loadalllazy()
873 873 for m in self._dirs.values():
874 874 size += m.__len__()
875 875 return size
876 876
877 877 def __nonzero__(self):
878 878 # Faster than "__len() != 0" since it avoids loading sub-manifests
879 879 return not self._isempty()
880 880
881 881 __bool__ = __nonzero__
882 882
883 883 def _isempty(self):
884 884 self._load() # for consistency; already loaded by all callers
885 885 # See if we can skip loading everything.
886 886 if self._files or (
887 887 self._dirs and any(not m._isempty() for m in self._dirs.values())
888 888 ):
889 889 return False
890 890 self._loadalllazy()
891 891 return not self._dirs or all(m._isempty() for m in self._dirs.values())
892 892
893 893 @encoding.strmethod
894 894 def __repr__(self):
895 895 return (
896 896 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
897 897 % (
898 898 self._dir,
899 899 hex(self._node),
900 900 bool(self._loadfunc is _noop),
901 901 self._dirty,
902 902 id(self),
903 903 )
904 904 )
905 905
906 906 def dir(self):
907 907 '''The directory that this tree manifest represents, including a
908 908 trailing '/'. Empty string for the repo root directory.'''
909 909 return self._dir
910 910
911 911 def node(self):
912 912 '''This node of this instance. nullid for unsaved instances. Should
913 913 be updated when the instance is read or written from a revlog.
914 914 '''
915 915 assert not self._dirty
916 916 return self._node
917 917
918 918 def setnode(self, node):
919 919 self._node = node
920 920 self._dirty = False
921 921
922 922 def iterentries(self):
923 923 self._load()
924 924 self._loadalllazy()
925 925 for p, n in sorted(
926 926 itertools.chain(self._dirs.items(), self._files.items())
927 927 ):
928 928 if p in self._files:
929 929 yield self._subpath(p), n, self._flags.get(p, b'')
930 930 else:
931 931 for x in n.iterentries():
932 932 yield x
933 933
934 934 def items(self):
935 935 self._load()
936 936 self._loadalllazy()
937 937 for p, n in sorted(
938 938 itertools.chain(self._dirs.items(), self._files.items())
939 939 ):
940 940 if p in self._files:
941 941 yield self._subpath(p), n
942 942 else:
943 943 for f, sn in pycompat.iteritems(n):
944 944 yield f, sn
945 945
946 946 iteritems = items
947 947
948 948 def iterkeys(self):
949 949 self._load()
950 950 self._loadalllazy()
951 951 for p in sorted(itertools.chain(self._dirs, self._files)):
952 952 if p in self._files:
953 953 yield self._subpath(p)
954 954 else:
955 955 for f in self._dirs[p]:
956 956 yield f
957 957
958 958 def keys(self):
959 959 return list(self.iterkeys())
960 960
961 961 def __iter__(self):
962 962 return self.iterkeys()
963 963
964 964 def __contains__(self, f):
965 965 if f is None:
966 966 return False
967 967 self._load()
968 968 dir, subpath = _splittopdir(f)
969 969 if dir:
970 970 self._loadlazy(dir)
971 971
972 972 if dir not in self._dirs:
973 973 return False
974 974
975 975 return self._dirs[dir].__contains__(subpath)
976 976 else:
977 977 return f in self._files
978 978
979 979 def get(self, f, default=None):
980 980 self._load()
981 981 dir, subpath = _splittopdir(f)
982 982 if dir:
983 983 self._loadlazy(dir)
984 984
985 985 if dir not in self._dirs:
986 986 return default
987 987 return self._dirs[dir].get(subpath, default)
988 988 else:
989 989 return self._files.get(f, default)
990 990
991 991 def __getitem__(self, f):
992 992 self._load()
993 993 dir, subpath = _splittopdir(f)
994 994 if dir:
995 995 self._loadlazy(dir)
996 996
997 997 return self._dirs[dir].__getitem__(subpath)
998 998 else:
999 999 return self._files[f]
1000 1000
1001 1001 def flags(self, f):
1002 1002 self._load()
1003 1003 dir, subpath = _splittopdir(f)
1004 1004 if dir:
1005 1005 self._loadlazy(dir)
1006 1006
1007 1007 if dir not in self._dirs:
1008 1008 return b''
1009 1009 return self._dirs[dir].flags(subpath)
1010 1010 else:
1011 1011 if f in self._lazydirs or f in self._dirs:
1012 1012 return b''
1013 1013 return self._flags.get(f, b'')
1014 1014
1015 1015 def find(self, f):
1016 1016 self._load()
1017 1017 dir, subpath = _splittopdir(f)
1018 1018 if dir:
1019 1019 self._loadlazy(dir)
1020 1020
1021 1021 return self._dirs[dir].find(subpath)
1022 1022 else:
1023 1023 return self._files[f], self._flags.get(f, b'')
1024 1024
1025 1025 def __delitem__(self, f):
1026 1026 self._load()
1027 1027 dir, subpath = _splittopdir(f)
1028 1028 if dir:
1029 1029 self._loadlazy(dir)
1030 1030
1031 1031 self._dirs[dir].__delitem__(subpath)
1032 1032 # If the directory is now empty, remove it
1033 1033 if self._dirs[dir]._isempty():
1034 1034 del self._dirs[dir]
1035 1035 else:
1036 1036 del self._files[f]
1037 1037 if f in self._flags:
1038 1038 del self._flags[f]
1039 1039 self._dirty = True
1040 1040
1041 1041 def __setitem__(self, f, n):
1042 1042 assert n is not None
1043 1043 self._load()
1044 1044 dir, subpath = _splittopdir(f)
1045 1045 if dir:
1046 1046 self._loadlazy(dir)
1047 1047 if dir not in self._dirs:
1048 1048 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 1049 self._dirs[dir].__setitem__(subpath, n)
1050 1050 else:
1051 1051 # manifest nodes are either 20 bytes or 32 bytes,
1052 1052 # depending on the hash in use. An extra byte is
1053 1053 # occasionally used by hg, but won't ever be
1054 1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1055 1055 trim = 21 if len(n) < 25 else 33
1056 1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1057 1057 self._dirty = True
1058 1058
1059 1059 def _load(self):
1060 1060 if self._loadfunc is not _noop:
1061 1061 lf, self._loadfunc = self._loadfunc, _noop
1062 1062 lf(self)
1063 1063 elif self._copyfunc is not _noop:
1064 1064 cf, self._copyfunc = self._copyfunc, _noop
1065 1065 cf(self)
1066 1066
1067 1067 def setflag(self, f, flags):
1068 1068 """Set the flags (symlink, executable) for path f."""
1069 1069 self._load()
1070 1070 dir, subpath = _splittopdir(f)
1071 1071 if dir:
1072 1072 self._loadlazy(dir)
1073 1073 if dir not in self._dirs:
1074 1074 self._dirs[dir] = treemanifest(self._subpath(dir))
1075 1075 self._dirs[dir].setflag(subpath, flags)
1076 1076 else:
1077 1077 self._flags[f] = flags
1078 1078 self._dirty = True
1079 1079
1080 1080 def copy(self):
1081 1081 copy = treemanifest(self._dir)
1082 1082 copy._node = self._node
1083 1083 copy._dirty = self._dirty
1084 1084 if self._copyfunc is _noop:
1085 1085
1086 1086 def _copyfunc(s):
1087 1087 self._load()
1088 1088 s._lazydirs = {
1089 1089 d: (p, n, r, True)
1090 1090 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1091 1091 }
1092 1092 sdirs = s._dirs
1093 1093 for d, v in pycompat.iteritems(self._dirs):
1094 1094 sdirs[d] = v.copy()
1095 1095 s._files = dict.copy(self._files)
1096 1096 s._flags = dict.copy(self._flags)
1097 1097
1098 1098 if self._loadfunc is _noop:
1099 1099 _copyfunc(copy)
1100 1100 else:
1101 1101 copy._copyfunc = _copyfunc
1102 1102 else:
1103 1103 copy._copyfunc = self._copyfunc
1104 1104 return copy
1105 1105
1106 1106 def filesnotin(self, m2, match=None):
1107 1107 '''Set of files in this manifest that are not in the other'''
1108 1108 if match and not match.always():
1109 1109 m1 = self._matches(match)
1110 1110 m2 = m2._matches(match)
1111 1111 return m1.filesnotin(m2)
1112 1112
1113 1113 files = set()
1114 1114
1115 1115 def _filesnotin(t1, t2):
1116 1116 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1117 1117 return
1118 1118 t1._load()
1119 1119 t2._load()
1120 1120 self._loaddifflazy(t1, t2)
1121 1121 for d, m1 in pycompat.iteritems(t1._dirs):
1122 1122 if d in t2._dirs:
1123 1123 m2 = t2._dirs[d]
1124 1124 _filesnotin(m1, m2)
1125 1125 else:
1126 1126 files.update(m1.iterkeys())
1127 1127
1128 1128 for fn in t1._files:
1129 1129 if fn not in t2._files:
1130 1130 files.add(t1._subpath(fn))
1131 1131
1132 1132 _filesnotin(self, m2)
1133 1133 return files
1134 1134
1135 1135 @propertycache
1136 1136 def _alldirs(self):
1137 1137 return pathutil.dirs(self)
1138 1138
1139 1139 def dirs(self):
1140 1140 return self._alldirs
1141 1141
1142 1142 def hasdir(self, dir):
1143 1143 self._load()
1144 1144 topdir, subdir = _splittopdir(dir)
1145 1145 if topdir:
1146 1146 self._loadlazy(topdir)
1147 1147 if topdir in self._dirs:
1148 1148 return self._dirs[topdir].hasdir(subdir)
1149 1149 return False
1150 1150 dirslash = dir + b'/'
1151 1151 return dirslash in self._dirs or dirslash in self._lazydirs
1152 1152
1153 1153 def walk(self, match):
1154 1154 '''Generates matching file names.
1155 1155
1156 1156 It also reports nonexistent files by marking them bad with match.bad().
1157 1157 '''
1158 1158 if match.always():
1159 1159 for f in iter(self):
1160 1160 yield f
1161 1161 return
1162 1162
1163 1163 fset = set(match.files())
1164 1164
1165 1165 for fn in self._walk(match):
1166 1166 if fn in fset:
1167 1167 # specified pattern is the exact name
1168 1168 fset.remove(fn)
1169 1169 yield fn
1170 1170
1171 1171 # for dirstate.walk, files=[''] means "walk the whole tree".
1172 1172 # follow that here, too
1173 1173 fset.discard(b'')
1174 1174
1175 1175 for fn in sorted(fset):
1176 1176 if not self.hasdir(fn):
1177 1177 match.bad(fn, None)
1178 1178
1179 1179 def _walk(self, match):
1180 1180 '''Recursively generates matching file names for walk().'''
1181 1181 visit = match.visitchildrenset(self._dir[:-1])
1182 1182 if not visit:
1183 1183 return
1184 1184
1185 1185 # yield this dir's files and walk its submanifests
1186 1186 self._load()
1187 1187 visit = self._loadchildrensetlazy(visit)
1188 1188 for p in sorted(list(self._dirs) + list(self._files)):
1189 1189 if p in self._files:
1190 1190 fullp = self._subpath(p)
1191 1191 if match(fullp):
1192 1192 yield fullp
1193 1193 else:
1194 1194 if not visit or p[:-1] in visit:
1195 1195 for f in self._dirs[p]._walk(match):
1196 1196 yield f
1197 1197
1198 1198 def _matches(self, match):
1199 1199 '''recursively generate a new manifest filtered by the match argument.
1200 1200 '''
1201 1201 if match.always():
1202 1202 return self.copy()
1203 1203 return self._matches_inner(match)
1204 1204
1205 1205 def _matches_inner(self, match):
1206 1206 if match.always():
1207 1207 return self.copy()
1208 1208
1209 1209 visit = match.visitchildrenset(self._dir[:-1])
1210 1210 if visit == b'all':
1211 1211 return self.copy()
1212 1212 ret = treemanifest(self._dir)
1213 1213 if not visit:
1214 1214 return ret
1215 1215
1216 1216 self._load()
1217 1217 for fn in self._files:
1218 1218 # While visitchildrenset *usually* lists only subdirs, this is
1219 1219 # actually up to the matcher and may have some files in the set().
1220 1220 # If visit == 'this', we should obviously look at the files in this
1221 1221 # directory; if visit is a set, and fn is in it, we should inspect
1222 1222 # fn (but no need to inspect things not in the set).
1223 1223 if visit != b'this' and fn not in visit:
1224 1224 continue
1225 1225 fullp = self._subpath(fn)
1226 1226 # visitchildrenset isn't perfect, we still need to call the regular
1227 1227 # matcher code to further filter results.
1228 1228 if not match(fullp):
1229 1229 continue
1230 1230 ret._files[fn] = self._files[fn]
1231 1231 if fn in self._flags:
1232 1232 ret._flags[fn] = self._flags[fn]
1233 1233
1234 1234 visit = self._loadchildrensetlazy(visit)
1235 1235 for dir, subm in pycompat.iteritems(self._dirs):
1236 1236 if visit and dir[:-1] not in visit:
1237 1237 continue
1238 1238 m = subm._matches_inner(match)
1239 1239 if not m._isempty():
1240 1240 ret._dirs[dir] = m
1241 1241
1242 1242 if not ret._isempty():
1243 1243 ret._dirty = True
1244 1244 return ret
1245 1245
1246 1246 def fastdelta(self, base, changes):
1247 1247 raise FastdeltaUnavailable()
1248 1248
1249 1249 def diff(self, m2, match=None, clean=False):
1250 1250 '''Finds changes between the current manifest and m2.
1251 1251
1252 1252 Args:
1253 1253 m2: the manifest to which this manifest should be compared.
1254 1254 clean: if true, include files unchanged between these manifests
1255 1255 with a None value in the returned dictionary.
1256 1256
1257 1257 The result is returned as a dict with filename as key and
1258 1258 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1259 1259 nodeid in the current/other manifest and fl1/fl2 is the flag
1260 1260 in the current/other manifest. Where the file does not exist,
1261 1261 the nodeid will be None and the flags will be the empty
1262 1262 string.
1263 1263 '''
1264 1264 if match and not match.always():
1265 1265 m1 = self._matches(match)
1266 1266 m2 = m2._matches(match)
1267 1267 return m1.diff(m2, clean=clean)
1268 1268 result = {}
1269 1269 emptytree = treemanifest()
1270 1270
1271 1271 def _iterativediff(t1, t2, stack):
1272 1272 """compares two tree manifests and append new tree-manifests which
1273 1273 needs to be compared to stack"""
1274 1274 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1275 1275 return
1276 1276 t1._load()
1277 1277 t2._load()
1278 1278 self._loaddifflazy(t1, t2)
1279 1279
1280 1280 for d, m1 in pycompat.iteritems(t1._dirs):
1281 1281 m2 = t2._dirs.get(d, emptytree)
1282 1282 stack.append((m1, m2))
1283 1283
1284 1284 for d, m2 in pycompat.iteritems(t2._dirs):
1285 1285 if d not in t1._dirs:
1286 1286 stack.append((emptytree, m2))
1287 1287
1288 1288 for fn, n1 in pycompat.iteritems(t1._files):
1289 1289 fl1 = t1._flags.get(fn, b'')
1290 1290 n2 = t2._files.get(fn, None)
1291 1291 fl2 = t2._flags.get(fn, b'')
1292 1292 if n1 != n2 or fl1 != fl2:
1293 1293 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1294 1294 elif clean:
1295 1295 result[t1._subpath(fn)] = None
1296 1296
1297 1297 for fn, n2 in pycompat.iteritems(t2._files):
1298 1298 if fn not in t1._files:
1299 1299 fl2 = t2._flags.get(fn, b'')
1300 1300 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1301 1301
1302 1302 stackls = []
1303 1303 _iterativediff(self, m2, stackls)
1304 1304 while stackls:
1305 1305 t1, t2 = stackls.pop()
1306 1306 # stackls is populated in the function call
1307 1307 _iterativediff(t1, t2, stackls)
1308 1308 return result
1309 1309
1310 1310 def unmodifiedsince(self, m2):
1311 1311 return not self._dirty and not m2._dirty and self._node == m2._node
1312 1312
1313 1313 def parse(self, text, readsubtree):
1314 1314 selflazy = self._lazydirs
1315 1315 subpath = self._subpath
1316 1316 for f, n, fl in _parse(text):
1317 1317 if fl == b't':
1318 1318 f = f + b'/'
1319 1319 # False below means "doesn't need to be copied" and can use the
1320 1320 # cached value from readsubtree directly.
1321 1321 selflazy[f] = (subpath(f), n, readsubtree, False)
1322 1322 elif b'/' in f:
1323 1323 # This is a flat manifest, so use __setitem__ and setflag rather
1324 1324 # than assigning directly to _files and _flags, so we can
1325 1325 # assign a path in a subdirectory, and to mark dirty (compared
1326 1326 # to nullid).
1327 1327 self[f] = n
1328 1328 if fl:
1329 1329 self.setflag(f, fl)
1330 1330 else:
1331 1331 # Assigning to _files and _flags avoids marking as dirty,
1332 1332 # and should be a little faster.
1333 1333 self._files[f] = n
1334 1334 if fl:
1335 1335 self._flags[f] = fl
1336 1336
1337 1337 def text(self):
1338 1338 """Get the full data of this manifest as a bytestring."""
1339 1339 self._load()
1340 1340 return _text(self.iterentries())
1341 1341
1342 1342 def dirtext(self):
1343 1343 """Get the full data of this directory as a bytestring. Make sure that
1344 1344 any submanifests have been written first, so their nodeids are correct.
1345 1345 """
1346 1346 self._load()
1347 1347 flags = self.flags
1348 1348 lazydirs = [
1349 1349 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1350 1350 ]
1351 1351 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1352 1352 files = [(f, self._files[f], flags(f)) for f in self._files]
1353 1353 return _text(sorted(dirs + files + lazydirs))
1354 1354
1355 1355 def read(self, gettext, readsubtree):
1356 1356 def _load_for_read(s):
1357 1357 s.parse(gettext(), readsubtree)
1358 1358 s._dirty = False
1359 1359
1360 1360 self._loadfunc = _load_for_read
1361 1361
1362 1362 def writesubtrees(self, m1, m2, writesubtree, match):
1363 1363 self._load() # for consistency; should never have any effect here
1364 1364 m1._load()
1365 1365 m2._load()
1366 1366 emptytree = treemanifest()
1367 1367
1368 1368 def getnode(m, d):
1369 1369 ld = m._lazydirs.get(d)
1370 1370 if ld:
1371 1371 return ld[1]
1372 1372 return m._dirs.get(d, emptytree)._node
1373 1373
1374 1374 # let's skip investigating things that `match` says we do not need.
1375 1375 visit = match.visitchildrenset(self._dir[:-1])
1376 1376 visit = self._loadchildrensetlazy(visit)
1377 1377 if visit == b'this' or visit == b'all':
1378 1378 visit = None
1379 1379 for d, subm in pycompat.iteritems(self._dirs):
1380 1380 if visit and d[:-1] not in visit:
1381 1381 continue
1382 1382 subp1 = getnode(m1, d)
1383 1383 subp2 = getnode(m2, d)
1384 1384 if subp1 == nullid:
1385 1385 subp1, subp2 = subp2, subp1
1386 1386 writesubtree(subm, subp1, subp2, match)
1387 1387
1388 1388 def walksubtrees(self, matcher=None):
1389 1389 """Returns an iterator of the subtrees of this manifest, including this
1390 1390 manifest itself.
1391 1391
1392 1392 If `matcher` is provided, it only returns subtrees that match.
1393 1393 """
1394 1394 if matcher and not matcher.visitdir(self._dir[:-1]):
1395 1395 return
1396 1396 if not matcher or matcher(self._dir[:-1]):
1397 1397 yield self
1398 1398
1399 1399 self._load()
1400 1400 # OPT: use visitchildrenset to avoid loading everything.
1401 1401 self._loadalllazy()
1402 1402 for d, subm in pycompat.iteritems(self._dirs):
1403 1403 for subtree in subm.walksubtrees(matcher=matcher):
1404 1404 yield subtree
1405 1405
1406 1406
1407 1407 class manifestfulltextcache(util.lrucachedict):
1408 1408 """File-backed LRU cache for the manifest cache
1409 1409
1410 1410 File consists of entries, up to EOF:
1411 1411
1412 1412 - 20 bytes node, 4 bytes length, <length> manifest data
1413 1413
1414 1414 These are written in reverse cache order (oldest to newest).
1415 1415
1416 1416 """
1417 1417
1418 1418 _file = b'manifestfulltextcache'
1419 1419
1420 1420 def __init__(self, max):
1421 1421 super(manifestfulltextcache, self).__init__(max)
1422 1422 self._dirty = False
1423 1423 self._read = False
1424 1424 self._opener = None
1425 1425
1426 1426 def read(self):
1427 1427 if self._read or self._opener is None:
1428 1428 return
1429 1429
1430 1430 try:
1431 1431 with self._opener(self._file) as fp:
1432 1432 set = super(manifestfulltextcache, self).__setitem__
1433 1433 # ignore trailing data, this is a cache, corruption is skipped
1434 1434 while True:
1435 # TODO do we need to do work here for sha1 portability?
1435 1436 node = fp.read(20)
1436 1437 if len(node) < 20:
1437 1438 break
1438 1439 try:
1439 1440 size = struct.unpack(b'>L', fp.read(4))[0]
1440 1441 except struct.error:
1441 1442 break
1442 1443 value = bytearray(fp.read(size))
1443 1444 if len(value) != size:
1444 1445 break
1445 1446 set(node, value)
1446 1447 except IOError:
1447 1448 # the file is allowed to be missing
1448 1449 pass
1449 1450
1450 1451 self._read = True
1451 1452 self._dirty = False
1452 1453
1453 1454 def write(self):
1454 1455 if not self._dirty or self._opener is None:
1455 1456 return
1456 1457 # rotate backwards to the first used node
1457 1458 with self._opener(
1458 1459 self._file, b'w', atomictemp=True, checkambig=True
1459 1460 ) as fp:
1460 1461 node = self._head.prev
1461 1462 while True:
1462 1463 if node.key in self._cache:
1463 1464 fp.write(node.key)
1464 1465 fp.write(struct.pack(b'>L', len(node.value)))
1465 1466 fp.write(node.value)
1466 1467 if node is self._head:
1467 1468 break
1468 1469 node = node.prev
1469 1470
1470 1471 def __len__(self):
1471 1472 if not self._read:
1472 1473 self.read()
1473 1474 return super(manifestfulltextcache, self).__len__()
1474 1475
1475 1476 def __contains__(self, k):
1476 1477 if not self._read:
1477 1478 self.read()
1478 1479 return super(manifestfulltextcache, self).__contains__(k)
1479 1480
1480 1481 def __iter__(self):
1481 1482 if not self._read:
1482 1483 self.read()
1483 1484 return super(manifestfulltextcache, self).__iter__()
1484 1485
1485 1486 def __getitem__(self, k):
1486 1487 if not self._read:
1487 1488 self.read()
1488 1489 # the cache lru order can change on read
1489 1490 setdirty = self._cache.get(k) is not self._head
1490 1491 value = super(manifestfulltextcache, self).__getitem__(k)
1491 1492 if setdirty:
1492 1493 self._dirty = True
1493 1494 return value
1494 1495
1495 1496 def __setitem__(self, k, v):
1496 1497 if not self._read:
1497 1498 self.read()
1498 1499 super(manifestfulltextcache, self).__setitem__(k, v)
1499 1500 self._dirty = True
1500 1501
1501 1502 def __delitem__(self, k):
1502 1503 if not self._read:
1503 1504 self.read()
1504 1505 super(manifestfulltextcache, self).__delitem__(k)
1505 1506 self._dirty = True
1506 1507
1507 1508 def get(self, k, default=None):
1508 1509 if not self._read:
1509 1510 self.read()
1510 1511 return super(manifestfulltextcache, self).get(k, default=default)
1511 1512
1512 1513 def clear(self, clear_persisted_data=False):
1513 1514 super(manifestfulltextcache, self).clear()
1514 1515 if clear_persisted_data:
1515 1516 self._dirty = True
1516 1517 self.write()
1517 1518 self._read = False
1518 1519
1519 1520
1520 1521 # and upper bound of what we expect from compression
1521 1522 # (real live value seems to be "3")
1522 1523 MAXCOMPRESSION = 3
1523 1524
1524 1525
1525 1526 class FastdeltaUnavailable(Exception):
1526 1527 """Exception raised when fastdelta isn't usable on a manifest."""
1527 1528
1528 1529
1529 1530 @interfaceutil.implementer(repository.imanifeststorage)
1530 1531 class manifestrevlog(object):
1531 1532 '''A revlog that stores manifest texts. This is responsible for caching the
1532 1533 full-text manifest contents.
1533 1534 '''
1534 1535
1535 1536 def __init__(
1536 1537 self,
1537 1538 opener,
1538 1539 tree=b'',
1539 1540 dirlogcache=None,
1540 1541 indexfile=None,
1541 1542 treemanifest=False,
1542 1543 ):
1543 1544 """Constructs a new manifest revlog
1544 1545
1545 1546 `indexfile` - used by extensions to have two manifests at once, like
1546 1547 when transitioning between flatmanifeset and treemanifests.
1547 1548
1548 1549 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1549 1550 options can also be used to make this a tree manifest revlog. The opener
1550 1551 option takes precedence, so if it is set to True, we ignore whatever
1551 1552 value is passed in to the constructor.
1552 1553 """
1553 1554 # During normal operations, we expect to deal with not more than four
1554 1555 # revs at a time (such as during commit --amend). When rebasing large
1555 1556 # stacks of commits, the number can go up, hence the config knob below.
1556 1557 cachesize = 4
1557 1558 optiontreemanifest = False
1558 1559 opts = getattr(opener, 'options', None)
1559 1560 if opts is not None:
1560 1561 cachesize = opts.get(b'manifestcachesize', cachesize)
1561 1562 optiontreemanifest = opts.get(b'treemanifest', False)
1562 1563
1563 1564 self._treeondisk = optiontreemanifest or treemanifest
1564 1565
1565 1566 self._fulltextcache = manifestfulltextcache(cachesize)
1566 1567
1567 1568 if tree:
1568 1569 assert self._treeondisk, b'opts is %r' % opts
1569 1570
1570 1571 if indexfile is None:
1571 1572 indexfile = b'00manifest.i'
1572 1573 if tree:
1573 1574 indexfile = b"meta/" + tree + indexfile
1574 1575
1575 1576 self.tree = tree
1576 1577
1577 1578 # The dirlogcache is kept on the root manifest log
1578 1579 if tree:
1579 1580 self._dirlogcache = dirlogcache
1580 1581 else:
1581 1582 self._dirlogcache = {b'': self}
1582 1583
1583 1584 self._revlog = revlog.revlog(
1584 1585 opener,
1585 1586 indexfile,
1586 1587 # only root indexfile is cached
1587 1588 checkambig=not bool(tree),
1588 1589 mmaplargeindex=True,
1589 1590 upperboundcomp=MAXCOMPRESSION,
1590 1591 )
1591 1592
1592 1593 self.index = self._revlog.index
1593 1594 self.version = self._revlog.version
1594 1595 self._generaldelta = self._revlog._generaldelta
1595 1596
1596 1597 def _setupmanifestcachehooks(self, repo):
1597 1598 """Persist the manifestfulltextcache on lock release"""
1598 1599 if not util.safehasattr(repo, b'_wlockref'):
1599 1600 return
1600 1601
1601 1602 self._fulltextcache._opener = repo.wcachevfs
1602 1603 if repo._currentlock(repo._wlockref) is None:
1603 1604 return
1604 1605
1605 1606 reporef = weakref.ref(repo)
1606 1607 manifestrevlogref = weakref.ref(self)
1607 1608
1608 1609 def persistmanifestcache(success):
1609 1610 # Repo is in an unknown state, do not persist.
1610 1611 if not success:
1611 1612 return
1612 1613
1613 1614 repo = reporef()
1614 1615 self = manifestrevlogref()
1615 1616 if repo is None or self is None:
1616 1617 return
1617 1618 if repo.manifestlog.getstorage(b'') is not self:
1618 1619 # there's a different manifest in play now, abort
1619 1620 return
1620 1621 self._fulltextcache.write()
1621 1622
1622 1623 repo._afterlock(persistmanifestcache)
1623 1624
1624 1625 @property
1625 1626 def fulltextcache(self):
1626 1627 return self._fulltextcache
1627 1628
1628 1629 def clearcaches(self, clear_persisted_data=False):
1629 1630 self._revlog.clearcaches()
1630 1631 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1631 1632 self._dirlogcache = {self.tree: self}
1632 1633
1633 1634 def dirlog(self, d):
1634 1635 if d:
1635 1636 assert self._treeondisk
1636 1637 if d not in self._dirlogcache:
1637 1638 mfrevlog = manifestrevlog(
1638 1639 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1639 1640 )
1640 1641 self._dirlogcache[d] = mfrevlog
1641 1642 return self._dirlogcache[d]
1642 1643
1643 1644 def add(
1644 1645 self,
1645 1646 m,
1646 1647 transaction,
1647 1648 link,
1648 1649 p1,
1649 1650 p2,
1650 1651 added,
1651 1652 removed,
1652 1653 readtree=None,
1653 1654 match=None,
1654 1655 ):
1655 1656 try:
1656 1657 if p1 not in self.fulltextcache:
1657 1658 raise FastdeltaUnavailable()
1658 1659 # If our first parent is in the manifest cache, we can
1659 1660 # compute a delta here using properties we know about the
1660 1661 # manifest up-front, which may save time later for the
1661 1662 # revlog layer.
1662 1663
1663 1664 _checkforbidden(added)
1664 1665 # combine the changed lists into one sorted iterator
1665 1666 work = heapq.merge(
1666 1667 [(x, False) for x in sorted(added)],
1667 1668 [(x, True) for x in sorted(removed)],
1668 1669 )
1669 1670
1670 1671 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1671 1672 cachedelta = self._revlog.rev(p1), deltatext
1672 1673 text = util.buffer(arraytext)
1673 1674 n = self._revlog.addrevision(
1674 1675 text, transaction, link, p1, p2, cachedelta
1675 1676 )
1676 1677 except FastdeltaUnavailable:
1677 1678 # The first parent manifest isn't already loaded or the
1678 1679 # manifest implementation doesn't support fastdelta, so
1679 1680 # we'll just encode a fulltext of the manifest and pass
1680 1681 # that through to the revlog layer, and let it handle the
1681 1682 # delta process.
1682 1683 if self._treeondisk:
1683 1684 assert readtree, b"readtree must be set for treemanifest writes"
1684 1685 assert match, b"match must be specified for treemanifest writes"
1685 1686 m1 = readtree(self.tree, p1)
1686 1687 m2 = readtree(self.tree, p2)
1687 1688 n = self._addtree(
1688 1689 m, transaction, link, m1, m2, readtree, match=match
1689 1690 )
1690 1691 arraytext = None
1691 1692 else:
1692 1693 text = m.text()
1693 1694 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1694 1695 arraytext = bytearray(text)
1695 1696
1696 1697 if arraytext is not None:
1697 1698 self.fulltextcache[n] = arraytext
1698 1699
1699 1700 return n
1700 1701
1701 1702 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1702 1703 # If the manifest is unchanged compared to one parent,
1703 1704 # don't write a new revision
1704 1705 if self.tree != b'' and (
1705 1706 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1706 1707 ):
1707 1708 return m.node()
1708 1709
1709 1710 def writesubtree(subm, subp1, subp2, match):
1710 1711 sublog = self.dirlog(subm.dir())
1711 1712 sublog.add(
1712 1713 subm,
1713 1714 transaction,
1714 1715 link,
1715 1716 subp1,
1716 1717 subp2,
1717 1718 None,
1718 1719 None,
1719 1720 readtree=readtree,
1720 1721 match=match,
1721 1722 )
1722 1723
1723 1724 m.writesubtrees(m1, m2, writesubtree, match)
1724 1725 text = m.dirtext()
1725 1726 n = None
1726 1727 if self.tree != b'':
1727 1728 # Double-check whether contents are unchanged to one parent
1728 1729 if text == m1.dirtext():
1729 1730 n = m1.node()
1730 1731 elif text == m2.dirtext():
1731 1732 n = m2.node()
1732 1733
1733 1734 if not n:
1734 1735 n = self._revlog.addrevision(
1735 1736 text, transaction, link, m1.node(), m2.node()
1736 1737 )
1737 1738
1738 1739 # Save nodeid so parent manifest can calculate its nodeid
1739 1740 m.setnode(n)
1740 1741 return n
1741 1742
1742 1743 def __len__(self):
1743 1744 return len(self._revlog)
1744 1745
1745 1746 def __iter__(self):
1746 1747 return self._revlog.__iter__()
1747 1748
1748 1749 def rev(self, node):
1749 1750 return self._revlog.rev(node)
1750 1751
1751 1752 def node(self, rev):
1752 1753 return self._revlog.node(rev)
1753 1754
1754 1755 def lookup(self, value):
1755 1756 return self._revlog.lookup(value)
1756 1757
1757 1758 def parentrevs(self, rev):
1758 1759 return self._revlog.parentrevs(rev)
1759 1760
1760 1761 def parents(self, node):
1761 1762 return self._revlog.parents(node)
1762 1763
1763 1764 def linkrev(self, rev):
1764 1765 return self._revlog.linkrev(rev)
1765 1766
1766 1767 def checksize(self):
1767 1768 return self._revlog.checksize()
1768 1769
1769 1770 def revision(self, node, _df=None, raw=False):
1770 1771 return self._revlog.revision(node, _df=_df, raw=raw)
1771 1772
1772 1773 def rawdata(self, node, _df=None):
1773 1774 return self._revlog.rawdata(node, _df=_df)
1774 1775
1775 1776 def revdiff(self, rev1, rev2):
1776 1777 return self._revlog.revdiff(rev1, rev2)
1777 1778
1778 1779 def cmp(self, node, text):
1779 1780 return self._revlog.cmp(node, text)
1780 1781
1781 1782 def deltaparent(self, rev):
1782 1783 return self._revlog.deltaparent(rev)
1783 1784
1784 1785 def emitrevisions(
1785 1786 self,
1786 1787 nodes,
1787 1788 nodesorder=None,
1788 1789 revisiondata=False,
1789 1790 assumehaveparentrevisions=False,
1790 1791 deltamode=repository.CG_DELTAMODE_STD,
1791 1792 ):
1792 1793 return self._revlog.emitrevisions(
1793 1794 nodes,
1794 1795 nodesorder=nodesorder,
1795 1796 revisiondata=revisiondata,
1796 1797 assumehaveparentrevisions=assumehaveparentrevisions,
1797 1798 deltamode=deltamode,
1798 1799 )
1799 1800
1800 1801 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1801 1802 return self._revlog.addgroup(
1802 1803 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1803 1804 )
1804 1805
1805 1806 def rawsize(self, rev):
1806 1807 return self._revlog.rawsize(rev)
1807 1808
1808 1809 def getstrippoint(self, minlink):
1809 1810 return self._revlog.getstrippoint(minlink)
1810 1811
1811 1812 def strip(self, minlink, transaction):
1812 1813 return self._revlog.strip(minlink, transaction)
1813 1814
1814 1815 def files(self):
1815 1816 return self._revlog.files()
1816 1817
1817 1818 def clone(self, tr, destrevlog, **kwargs):
1818 1819 if not isinstance(destrevlog, manifestrevlog):
1819 1820 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1820 1821
1821 1822 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1822 1823
1823 1824 def storageinfo(
1824 1825 self,
1825 1826 exclusivefiles=False,
1826 1827 sharedfiles=False,
1827 1828 revisionscount=False,
1828 1829 trackedsize=False,
1829 1830 storedsize=False,
1830 1831 ):
1831 1832 return self._revlog.storageinfo(
1832 1833 exclusivefiles=exclusivefiles,
1833 1834 sharedfiles=sharedfiles,
1834 1835 revisionscount=revisionscount,
1835 1836 trackedsize=trackedsize,
1836 1837 storedsize=storedsize,
1837 1838 )
1838 1839
1839 1840 @property
1840 1841 def indexfile(self):
1841 1842 return self._revlog.indexfile
1842 1843
1843 1844 @indexfile.setter
1844 1845 def indexfile(self, value):
1845 1846 self._revlog.indexfile = value
1846 1847
1847 1848 @property
1848 1849 def opener(self):
1849 1850 return self._revlog.opener
1850 1851
1851 1852 @opener.setter
1852 1853 def opener(self, value):
1853 1854 self._revlog.opener = value
1854 1855
1855 1856
1856 1857 @interfaceutil.implementer(repository.imanifestlog)
1857 1858 class manifestlog(object):
1858 1859 """A collection class representing the collection of manifest snapshots
1859 1860 referenced by commits in the repository.
1860 1861
1861 1862 In this situation, 'manifest' refers to the abstract concept of a snapshot
1862 1863 of the list of files in the given commit. Consumers of the output of this
1863 1864 class do not care about the implementation details of the actual manifests
1864 1865 they receive (i.e. tree or flat or lazily loaded, etc)."""
1865 1866
1866 1867 def __init__(self, opener, repo, rootstore, narrowmatch):
1867 1868 usetreemanifest = False
1868 1869 cachesize = 4
1869 1870
1870 1871 opts = getattr(opener, 'options', None)
1871 1872 if opts is not None:
1872 1873 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1873 1874 cachesize = opts.get(b'manifestcachesize', cachesize)
1874 1875
1875 1876 self._treemanifests = usetreemanifest
1876 1877
1877 1878 self._rootstore = rootstore
1878 1879 self._rootstore._setupmanifestcachehooks(repo)
1879 1880 self._narrowmatch = narrowmatch
1880 1881
1881 1882 # A cache of the manifestctx or treemanifestctx for each directory
1882 1883 self._dirmancache = {}
1883 1884 self._dirmancache[b''] = util.lrucachedict(cachesize)
1884 1885
1885 1886 self._cachesize = cachesize
1886 1887
1887 1888 def __getitem__(self, node):
1888 1889 """Retrieves the manifest instance for the given node. Throws a
1889 1890 LookupError if not found.
1890 1891 """
1891 1892 return self.get(b'', node)
1892 1893
1893 1894 def get(self, tree, node, verify=True):
1894 1895 """Retrieves the manifest instance for the given node. Throws a
1895 1896 LookupError if not found.
1896 1897
1897 1898 `verify` - if True an exception will be thrown if the node is not in
1898 1899 the revlog
1899 1900 """
1900 1901 if node in self._dirmancache.get(tree, ()):
1901 1902 return self._dirmancache[tree][node]
1902 1903
1903 1904 if not self._narrowmatch.always():
1904 1905 if not self._narrowmatch.visitdir(tree[:-1]):
1905 1906 return excludeddirmanifestctx(tree, node)
1906 1907 if tree:
1907 1908 if self._rootstore._treeondisk:
1908 1909 if verify:
1909 1910 # Side-effect is LookupError is raised if node doesn't
1910 1911 # exist.
1911 1912 self.getstorage(tree).rev(node)
1912 1913
1913 1914 m = treemanifestctx(self, tree, node)
1914 1915 else:
1915 1916 raise error.Abort(
1916 1917 _(
1917 1918 b"cannot ask for manifest directory '%s' in a flat "
1918 1919 b"manifest"
1919 1920 )
1920 1921 % tree
1921 1922 )
1922 1923 else:
1923 1924 if verify:
1924 1925 # Side-effect is LookupError is raised if node doesn't exist.
1925 1926 self._rootstore.rev(node)
1926 1927
1927 1928 if self._treemanifests:
1928 1929 m = treemanifestctx(self, b'', node)
1929 1930 else:
1930 1931 m = manifestctx(self, node)
1931 1932
1932 1933 if node != nullid:
1933 1934 mancache = self._dirmancache.get(tree)
1934 1935 if not mancache:
1935 1936 mancache = util.lrucachedict(self._cachesize)
1936 1937 self._dirmancache[tree] = mancache
1937 1938 mancache[node] = m
1938 1939 return m
1939 1940
1940 1941 def getstorage(self, tree):
1941 1942 return self._rootstore.dirlog(tree)
1942 1943
1943 1944 def clearcaches(self, clear_persisted_data=False):
1944 1945 self._dirmancache.clear()
1945 1946 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1946 1947
1947 1948 def rev(self, node):
1948 1949 return self._rootstore.rev(node)
1949 1950
1950 1951
1951 1952 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1952 1953 class memmanifestctx(object):
1953 1954 def __init__(self, manifestlog):
1954 1955 self._manifestlog = manifestlog
1955 1956 self._manifestdict = manifestdict()
1956 1957
1957 1958 def _storage(self):
1958 1959 return self._manifestlog.getstorage(b'')
1959 1960
1960 1961 def copy(self):
1961 1962 memmf = memmanifestctx(self._manifestlog)
1962 1963 memmf._manifestdict = self.read().copy()
1963 1964 return memmf
1964 1965
1965 1966 def read(self):
1966 1967 return self._manifestdict
1967 1968
1968 1969 def write(self, transaction, link, p1, p2, added, removed, match=None):
1969 1970 return self._storage().add(
1970 1971 self._manifestdict,
1971 1972 transaction,
1972 1973 link,
1973 1974 p1,
1974 1975 p2,
1975 1976 added,
1976 1977 removed,
1977 1978 match=match,
1978 1979 )
1979 1980
1980 1981
1981 1982 @interfaceutil.implementer(repository.imanifestrevisionstored)
1982 1983 class manifestctx(object):
1983 1984 """A class representing a single revision of a manifest, including its
1984 1985 contents, its parent revs, and its linkrev.
1985 1986 """
1986 1987
1987 1988 def __init__(self, manifestlog, node):
1988 1989 self._manifestlog = manifestlog
1989 1990 self._data = None
1990 1991
1991 1992 self._node = node
1992 1993
1993 1994 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1994 1995 # but let's add it later when something needs it and we can load it
1995 1996 # lazily.
1996 1997 # self.p1, self.p2 = store.parents(node)
1997 1998 # rev = store.rev(node)
1998 1999 # self.linkrev = store.linkrev(rev)
1999 2000
2000 2001 def _storage(self):
2001 2002 return self._manifestlog.getstorage(b'')
2002 2003
2003 2004 def node(self):
2004 2005 return self._node
2005 2006
2006 2007 def copy(self):
2007 2008 memmf = memmanifestctx(self._manifestlog)
2008 2009 memmf._manifestdict = self.read().copy()
2009 2010 return memmf
2010 2011
2011 2012 @propertycache
2012 2013 def parents(self):
2013 2014 return self._storage().parents(self._node)
2014 2015
2015 2016 def read(self):
2016 2017 if self._data is None:
2017 2018 if self._node == nullid:
2018 2019 self._data = manifestdict()
2019 2020 else:
2020 2021 store = self._storage()
2021 2022 if self._node in store.fulltextcache:
2022 2023 text = pycompat.bytestr(store.fulltextcache[self._node])
2023 2024 else:
2024 2025 text = store.revision(self._node)
2025 2026 arraytext = bytearray(text)
2026 2027 store.fulltextcache[self._node] = arraytext
2027 2028 self._data = manifestdict(text)
2028 2029 return self._data
2029 2030
2030 2031 def readfast(self, shallow=False):
2031 2032 '''Calls either readdelta or read, based on which would be less work.
2032 2033 readdelta is called if the delta is against the p1, and therefore can be
2033 2034 read quickly.
2034 2035
2035 2036 If `shallow` is True, nothing changes since this is a flat manifest.
2036 2037 '''
2037 2038 store = self._storage()
2038 2039 r = store.rev(self._node)
2039 2040 deltaparent = store.deltaparent(r)
2040 2041 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2041 2042 return self.readdelta()
2042 2043 return self.read()
2043 2044
2044 2045 def readdelta(self, shallow=False):
2045 2046 '''Returns a manifest containing just the entries that are present
2046 2047 in this manifest, but not in its p1 manifest. This is efficient to read
2047 2048 if the revlog delta is already p1.
2048 2049
2049 2050 Changing the value of `shallow` has no effect on flat manifests.
2050 2051 '''
2051 2052 store = self._storage()
2052 2053 r = store.rev(self._node)
2053 2054 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2054 2055 return manifestdict(d)
2055 2056
2056 2057 def find(self, key):
2057 2058 return self.read().find(key)
2058 2059
2059 2060
2060 2061 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2061 2062 class memtreemanifestctx(object):
2062 2063 def __init__(self, manifestlog, dir=b''):
2063 2064 self._manifestlog = manifestlog
2064 2065 self._dir = dir
2065 2066 self._treemanifest = treemanifest()
2066 2067
2067 2068 def _storage(self):
2068 2069 return self._manifestlog.getstorage(b'')
2069 2070
2070 2071 def copy(self):
2071 2072 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2072 2073 memmf._treemanifest = self._treemanifest.copy()
2073 2074 return memmf
2074 2075
2075 2076 def read(self):
2076 2077 return self._treemanifest
2077 2078
2078 2079 def write(self, transaction, link, p1, p2, added, removed, match=None):
2079 2080 def readtree(dir, node):
2080 2081 return self._manifestlog.get(dir, node).read()
2081 2082
2082 2083 return self._storage().add(
2083 2084 self._treemanifest,
2084 2085 transaction,
2085 2086 link,
2086 2087 p1,
2087 2088 p2,
2088 2089 added,
2089 2090 removed,
2090 2091 readtree=readtree,
2091 2092 match=match,
2092 2093 )
2093 2094
2094 2095
2095 2096 @interfaceutil.implementer(repository.imanifestrevisionstored)
2096 2097 class treemanifestctx(object):
2097 2098 def __init__(self, manifestlog, dir, node):
2098 2099 self._manifestlog = manifestlog
2099 2100 self._dir = dir
2100 2101 self._data = None
2101 2102
2102 2103 self._node = node
2103 2104
2104 2105 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2105 2106 # we can instantiate treemanifestctx objects for directories we don't
2106 2107 # have on disk.
2107 2108 # self.p1, self.p2 = store.parents(node)
2108 2109 # rev = store.rev(node)
2109 2110 # self.linkrev = store.linkrev(rev)
2110 2111
2111 2112 def _storage(self):
2112 2113 narrowmatch = self._manifestlog._narrowmatch
2113 2114 if not narrowmatch.always():
2114 2115 if not narrowmatch.visitdir(self._dir[:-1]):
2115 2116 return excludedmanifestrevlog(self._dir)
2116 2117 return self._manifestlog.getstorage(self._dir)
2117 2118
2118 2119 def read(self):
2119 2120 if self._data is None:
2120 2121 store = self._storage()
2121 2122 if self._node == nullid:
2122 2123 self._data = treemanifest()
2123 2124 # TODO accessing non-public API
2124 2125 elif store._treeondisk:
2125 2126 m = treemanifest(dir=self._dir)
2126 2127
2127 2128 def gettext():
2128 2129 return store.revision(self._node)
2129 2130
2130 2131 def readsubtree(dir, subm):
2131 2132 # Set verify to False since we need to be able to create
2132 2133 # subtrees for trees that don't exist on disk.
2133 2134 return self._manifestlog.get(dir, subm, verify=False).read()
2134 2135
2135 2136 m.read(gettext, readsubtree)
2136 2137 m.setnode(self._node)
2137 2138 self._data = m
2138 2139 else:
2139 2140 if self._node in store.fulltextcache:
2140 2141 text = pycompat.bytestr(store.fulltextcache[self._node])
2141 2142 else:
2142 2143 text = store.revision(self._node)
2143 2144 arraytext = bytearray(text)
2144 2145 store.fulltextcache[self._node] = arraytext
2145 2146 self._data = treemanifest(dir=self._dir, text=text)
2146 2147
2147 2148 return self._data
2148 2149
2149 2150 def node(self):
2150 2151 return self._node
2151 2152
2152 2153 def copy(self):
2153 2154 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2154 2155 memmf._treemanifest = self.read().copy()
2155 2156 return memmf
2156 2157
2157 2158 @propertycache
2158 2159 def parents(self):
2159 2160 return self._storage().parents(self._node)
2160 2161
2161 2162 def readdelta(self, shallow=False):
2162 2163 '''Returns a manifest containing just the entries that are present
2163 2164 in this manifest, but not in its p1 manifest. This is efficient to read
2164 2165 if the revlog delta is already p1.
2165 2166
2166 2167 If `shallow` is True, this will read the delta for this directory,
2167 2168 without recursively reading subdirectory manifests. Instead, any
2168 2169 subdirectory entry will be reported as it appears in the manifest, i.e.
2169 2170 the subdirectory will be reported among files and distinguished only by
2170 2171 its 't' flag.
2171 2172 '''
2172 2173 store = self._storage()
2173 2174 if shallow:
2174 2175 r = store.rev(self._node)
2175 2176 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2176 2177 return manifestdict(d)
2177 2178 else:
2178 2179 # Need to perform a slow delta
2179 2180 r0 = store.deltaparent(store.rev(self._node))
2180 2181 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2181 2182 m1 = self.read()
2182 2183 md = treemanifest(dir=self._dir)
2183 2184 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2184 2185 if n1:
2185 2186 md[f] = n1
2186 2187 if fl1:
2187 2188 md.setflag(f, fl1)
2188 2189 return md
2189 2190
2190 2191 def readfast(self, shallow=False):
2191 2192 '''Calls either readdelta or read, based on which would be less work.
2192 2193 readdelta is called if the delta is against the p1, and therefore can be
2193 2194 read quickly.
2194 2195
2195 2196 If `shallow` is True, it only returns the entries from this manifest,
2196 2197 and not any submanifests.
2197 2198 '''
2198 2199 store = self._storage()
2199 2200 r = store.rev(self._node)
2200 2201 deltaparent = store.deltaparent(r)
2201 2202 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2202 2203 return self.readdelta(shallow=shallow)
2203 2204
2204 2205 if shallow:
2205 2206 return manifestdict(store.revision(self._node))
2206 2207 else:
2207 2208 return self.read()
2208 2209
2209 2210 def find(self, key):
2210 2211 return self.read().find(key)
2211 2212
2212 2213
2213 2214 class excludeddir(treemanifest):
2214 2215 """Stand-in for a directory that is excluded from the repository.
2215 2216
2216 2217 With narrowing active on a repository that uses treemanifests,
2217 2218 some of the directory revlogs will be excluded from the resulting
2218 2219 clone. This is a huge storage win for clients, but means we need
2219 2220 some sort of pseudo-manifest to surface to internals so we can
2220 2221 detect a merge conflict outside the narrowspec. That's what this
2221 2222 class is: it stands in for a directory whose node is known, but
2222 2223 whose contents are unknown.
2223 2224 """
2224 2225
2225 2226 def __init__(self, dir, node):
2226 2227 super(excludeddir, self).__init__(dir)
2227 2228 self._node = node
2228 2229 # Add an empty file, which will be included by iterators and such,
2229 2230 # appearing as the directory itself (i.e. something like "dir/")
2230 2231 self._files[b''] = node
2231 2232 self._flags[b''] = b't'
2232 2233
2233 2234 # Manifests outside the narrowspec should never be modified, so avoid
2234 2235 # copying. This makes a noticeable difference when there are very many
2235 2236 # directories outside the narrowspec. Also, it makes sense for the copy to
2236 2237 # be of the same type as the original, which would not happen with the
2237 2238 # super type's copy().
2238 2239 def copy(self):
2239 2240 return self
2240 2241
2241 2242
2242 2243 class excludeddirmanifestctx(treemanifestctx):
2243 2244 """context wrapper for excludeddir - see that docstring for rationale"""
2244 2245
2245 2246 def __init__(self, dir, node):
2246 2247 self._dir = dir
2247 2248 self._node = node
2248 2249
2249 2250 def read(self):
2250 2251 return excludeddir(self._dir, self._node)
2251 2252
2252 2253 def write(self, *args):
2253 2254 raise error.ProgrammingError(
2254 2255 b'attempt to write manifest from excluded dir %s' % self._dir
2255 2256 )
2256 2257
2257 2258
2258 2259 class excludedmanifestrevlog(manifestrevlog):
2259 2260 """Stand-in for excluded treemanifest revlogs.
2260 2261
2261 2262 When narrowing is active on a treemanifest repository, we'll have
2262 2263 references to directories we can't see due to the revlog being
2263 2264 skipped. This class exists to conform to the manifestrevlog
2264 2265 interface for those directories and proactively prevent writes to
2265 2266 outside the narrowspec.
2266 2267 """
2267 2268
2268 2269 def __init__(self, dir):
2269 2270 self._dir = dir
2270 2271
2271 2272 def __len__(self):
2272 2273 raise error.ProgrammingError(
2273 2274 b'attempt to get length of excluded dir %s' % self._dir
2274 2275 )
2275 2276
2276 2277 def rev(self, node):
2277 2278 raise error.ProgrammingError(
2278 2279 b'attempt to get rev from excluded dir %s' % self._dir
2279 2280 )
2280 2281
2281 2282 def linkrev(self, node):
2282 2283 raise error.ProgrammingError(
2283 2284 b'attempt to get linkrev from excluded dir %s' % self._dir
2284 2285 )
2285 2286
2286 2287 def node(self, rev):
2287 2288 raise error.ProgrammingError(
2288 2289 b'attempt to get node from excluded dir %s' % self._dir
2289 2290 )
2290 2291
2291 2292 def add(self, *args, **kwargs):
2292 2293 # We should never write entries in dirlogs outside the narrow clone.
2293 2294 # However, the method still gets called from writesubtree() in
2294 2295 # _addtree(), so we need to handle it. We should possibly make that
2295 2296 # avoid calling add() with a clean manifest (_dirty is always False
2296 2297 # in excludeddir instances).
2297 2298 pass
General Comments 0
You need to be logged in to leave comments. Login now