##// END OF EJS Templates
manifest: remove pycompat.iteritems()...
Gregory Szorc -
r49781:3d35e748 default
parent child Browse files
Show More
@@ -1,2370 +1,2368 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import heapq
10 10 import itertools
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 bin,
17 17 hex,
18 18 nullrev,
19 19 )
20 20 from .pycompat import getattr
21 21 from . import (
22 22 encoding,
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 pathutil,
27 27 policy,
28 28 pycompat,
29 29 revlog,
30 30 util,
31 31 )
32 32 from .interfaces import (
33 33 repository,
34 34 util as interfaceutil,
35 35 )
36 36 from .revlogutils import (
37 37 constants as revlog_constants,
38 38 )
39 39
40 40 parsers = policy.importmod('parsers')
41 41 propertycache = util.propertycache
42 42
43 43 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
44 44 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
45 45
46 46
47 47 def _parse(nodelen, data):
48 48 # This method does a little bit of excessive-looking
49 49 # precondition checking. This is so that the behavior of this
50 50 # class exactly matches its C counterpart to try and help
51 51 # prevent surprise breakage for anyone that develops against
52 52 # the pure version.
53 53 if data and data[-1:] != b'\n':
54 54 raise ValueError(b'Manifest did not end in a newline.')
55 55 prev = None
56 56 for l in data.splitlines():
57 57 if prev is not None and prev > l:
58 58 raise ValueError(b'Manifest lines not in sorted order.')
59 59 prev = l
60 60 f, n = l.split(b'\0')
61 61 nl = len(n)
62 62 flags = n[-1:]
63 63 if flags in _manifestflags:
64 64 n = n[:-1]
65 65 nl -= 1
66 66 else:
67 67 flags = b''
68 68 if nl != 2 * nodelen:
69 69 raise ValueError(b'Invalid manifest line')
70 70
71 71 yield f, bin(n), flags
72 72
73 73
74 74 def _text(it):
75 75 files = []
76 76 lines = []
77 77 for f, n, fl in it:
78 78 files.append(f)
79 79 # if this is changed to support newlines in filenames,
80 80 # be sure to check the templates/ dir again (especially *-raw.tmpl)
81 81 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
82 82
83 83 _checkforbidden(files)
84 84 return b''.join(lines)
85 85
86 86
87 87 class lazymanifestiter(object):
88 88 def __init__(self, lm):
89 89 self.pos = 0
90 90 self.lm = lm
91 91
92 92 def __iter__(self):
93 93 return self
94 94
95 95 def next(self):
96 96 try:
97 97 data, pos = self.lm._get(self.pos)
98 98 except IndexError:
99 99 raise StopIteration
100 100 if pos == -1:
101 101 self.pos += 1
102 102 return data[0]
103 103 self.pos += 1
104 104 zeropos = data.find(b'\x00', pos)
105 105 return data[pos:zeropos]
106 106
107 107 __next__ = next
108 108
109 109
110 110 class lazymanifestiterentries(object):
111 111 def __init__(self, lm):
112 112 self.lm = lm
113 113 self.pos = 0
114 114
115 115 def __iter__(self):
116 116 return self
117 117
118 118 def next(self):
119 119 try:
120 120 data, pos = self.lm._get(self.pos)
121 121 except IndexError:
122 122 raise StopIteration
123 123 if pos == -1:
124 124 self.pos += 1
125 125 return data
126 126 zeropos = data.find(b'\x00', pos)
127 127 nlpos = data.find(b'\n', pos)
128 128 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
129 129 raise error.StorageError(b'Invalid manifest line')
130 130 flags = data[nlpos - 1 : nlpos]
131 131 if flags in _manifestflags:
132 132 hlen = nlpos - zeropos - 2
133 133 else:
134 134 hlen = nlpos - zeropos - 1
135 135 flags = b''
136 136 if hlen != 2 * self.lm._nodelen:
137 137 raise error.StorageError(b'Invalid manifest line')
138 138 hashval = unhexlify(
139 139 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
140 140 )
141 141 self.pos += 1
142 142 return (data[pos:zeropos], hashval, flags)
143 143
144 144 __next__ = next
145 145
146 146
147 147 def unhexlify(data, extra, pos, length):
148 148 s = bin(data[pos : pos + length])
149 149 if extra:
150 150 s += chr(extra & 0xFF)
151 151 return s
152 152
153 153
154 154 def _cmp(a, b):
155 155 return (a > b) - (a < b)
156 156
157 157
158 158 _manifestflags = {b'', b'l', b't', b'x'}
159 159
160 160
161 161 class _lazymanifest(object):
162 162 """A pure python manifest backed by a byte string. It is supplimented with
163 163 internal lists as it is modified, until it is compacted back to a pure byte
164 164 string.
165 165
166 166 ``data`` is the initial manifest data.
167 167
168 168 ``positions`` is a list of offsets, one per manifest entry. Positive
169 169 values are offsets into ``data``, negative values are offsets into the
170 170 ``extradata`` list. When an entry is removed, its entry is dropped from
171 171 ``positions``. The values are encoded such that when walking the list and
172 172 indexing into ``data`` or ``extradata`` as appropriate, the entries are
173 173 sorted by filename.
174 174
175 175 ``extradata`` is a list of (key, hash, flags) for entries that were added or
176 176 modified since the manifest was created or compacted.
177 177 """
178 178
179 179 def __init__(
180 180 self,
181 181 nodelen,
182 182 data,
183 183 positions=None,
184 184 extrainfo=None,
185 185 extradata=None,
186 186 hasremovals=False,
187 187 ):
188 188 self._nodelen = nodelen
189 189 if positions is None:
190 190 self.positions = self.findlines(data)
191 191 self.extrainfo = [0] * len(self.positions)
192 192 self.data = data
193 193 self.extradata = []
194 194 self.hasremovals = False
195 195 else:
196 196 self.positions = positions[:]
197 197 self.extrainfo = extrainfo[:]
198 198 self.extradata = extradata[:]
199 199 self.data = data
200 200 self.hasremovals = hasremovals
201 201
202 202 def findlines(self, data):
203 203 if not data:
204 204 return []
205 205 pos = data.find(b"\n")
206 206 if pos == -1 or data[-1:] != b'\n':
207 207 raise ValueError(b"Manifest did not end in a newline.")
208 208 positions = [0]
209 209 prev = data[: data.find(b'\x00')]
210 210 while pos < len(data) - 1 and pos != -1:
211 211 positions.append(pos + 1)
212 212 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
213 213 if nexts < prev:
214 214 raise ValueError(b"Manifest lines not in sorted order.")
215 215 prev = nexts
216 216 pos = data.find(b"\n", pos + 1)
217 217 return positions
218 218
219 219 def _get(self, index):
220 220 # get the position encoded in pos:
221 221 # positive number is an index in 'data'
222 222 # negative number is in extrapieces
223 223 pos = self.positions[index]
224 224 if pos >= 0:
225 225 return self.data, pos
226 226 return self.extradata[-pos - 1], -1
227 227
228 228 def _getkey(self, pos):
229 229 if pos >= 0:
230 230 return self.data[pos : self.data.find(b'\x00', pos + 1)]
231 231 return self.extradata[-pos - 1][0]
232 232
233 233 def bsearch(self, key):
234 234 first = 0
235 235 last = len(self.positions) - 1
236 236
237 237 while first <= last:
238 238 midpoint = (first + last) // 2
239 239 nextpos = self.positions[midpoint]
240 240 candidate = self._getkey(nextpos)
241 241 r = _cmp(key, candidate)
242 242 if r == 0:
243 243 return midpoint
244 244 else:
245 245 if r < 0:
246 246 last = midpoint - 1
247 247 else:
248 248 first = midpoint + 1
249 249 return -1
250 250
251 251 def bsearch2(self, key):
252 252 # same as the above, but will always return the position
253 253 # done for performance reasons
254 254 first = 0
255 255 last = len(self.positions) - 1
256 256
257 257 while first <= last:
258 258 midpoint = (first + last) // 2
259 259 nextpos = self.positions[midpoint]
260 260 candidate = self._getkey(nextpos)
261 261 r = _cmp(key, candidate)
262 262 if r == 0:
263 263 return (midpoint, True)
264 264 else:
265 265 if r < 0:
266 266 last = midpoint - 1
267 267 else:
268 268 first = midpoint + 1
269 269 return (first, False)
270 270
271 271 def __contains__(self, key):
272 272 return self.bsearch(key) != -1
273 273
274 274 def __getitem__(self, key):
275 275 if not isinstance(key, bytes):
276 276 raise TypeError(b"getitem: manifest keys must be a bytes.")
277 277 needle = self.bsearch(key)
278 278 if needle == -1:
279 279 raise KeyError
280 280 data, pos = self._get(needle)
281 281 if pos == -1:
282 282 return (data[1], data[2])
283 283 zeropos = data.find(b'\x00', pos)
284 284 nlpos = data.find(b'\n', zeropos)
285 285 assert 0 <= needle <= len(self.positions)
286 286 assert len(self.extrainfo) == len(self.positions)
287 287 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
288 288 raise error.StorageError(b'Invalid manifest line')
289 289 hlen = nlpos - zeropos - 1
290 290 flags = data[nlpos - 1 : nlpos]
291 291 if flags in _manifestflags:
292 292 hlen -= 1
293 293 else:
294 294 flags = b''
295 295 if hlen != 2 * self._nodelen:
296 296 raise error.StorageError(b'Invalid manifest line')
297 297 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
298 298 return (hashval, flags)
299 299
300 300 def __delitem__(self, key):
301 301 needle, found = self.bsearch2(key)
302 302 if not found:
303 303 raise KeyError
304 304 cur = self.positions[needle]
305 305 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
306 306 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
307 307 if cur >= 0:
308 308 # This does NOT unsort the list as far as the search functions are
309 309 # concerned, as they only examine lines mapped by self.positions.
310 310 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
311 311 self.hasremovals = True
312 312
313 313 def __setitem__(self, key, value):
314 314 if not isinstance(key, bytes):
315 315 raise TypeError(b"setitem: manifest keys must be a byte string.")
316 316 if not isinstance(value, tuple) or len(value) != 2:
317 317 raise TypeError(
318 318 b"Manifest values must be a tuple of (node, flags)."
319 319 )
320 320 hashval = value[0]
321 321 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
322 322 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
323 323 flags = value[1]
324 324 if not isinstance(flags, bytes) or len(flags) > 1:
325 325 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
326 326 needle, found = self.bsearch2(key)
327 327 if found:
328 328 # put the item
329 329 pos = self.positions[needle]
330 330 if pos < 0:
331 331 self.extradata[-pos - 1] = (key, hashval, value[1])
332 332 else:
333 333 # just don't bother
334 334 self.extradata.append((key, hashval, value[1]))
335 335 self.positions[needle] = -len(self.extradata)
336 336 else:
337 337 # not found, put it in with extra positions
338 338 self.extradata.append((key, hashval, value[1]))
339 339 self.positions = (
340 340 self.positions[:needle]
341 341 + [-len(self.extradata)]
342 342 + self.positions[needle:]
343 343 )
344 344 self.extrainfo = (
345 345 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
346 346 )
347 347
348 348 def copy(self):
349 349 # XXX call _compact like in C?
350 350 return _lazymanifest(
351 351 self._nodelen,
352 352 self.data,
353 353 self.positions,
354 354 self.extrainfo,
355 355 self.extradata,
356 356 self.hasremovals,
357 357 )
358 358
359 359 def _compact(self):
360 360 # hopefully not called TOO often
361 361 if len(self.extradata) == 0 and not self.hasremovals:
362 362 return
363 363 l = []
364 364 i = 0
365 365 offset = 0
366 366 self.extrainfo = [0] * len(self.positions)
367 367 while i < len(self.positions):
368 368 if self.positions[i] >= 0:
369 369 cur = self.positions[i]
370 370 last_cut = cur
371 371
372 372 # Collect all contiguous entries in the buffer at the current
373 373 # offset, breaking out only for added/modified items held in
374 374 # extradata, or a deleted line prior to the next position.
375 375 while True:
376 376 self.positions[i] = offset
377 377 i += 1
378 378 if i == len(self.positions) or self.positions[i] < 0:
379 379 break
380 380
381 381 # A removed file has no positions[] entry, but does have an
382 382 # overwritten first byte. Break out and find the end of the
383 383 # current good entry/entries if there is a removed file
384 384 # before the next position.
385 385 if (
386 386 self.hasremovals
387 387 and self.data.find(b'\n\x00', cur, self.positions[i])
388 388 != -1
389 389 ):
390 390 break
391 391
392 392 offset += self.positions[i] - cur
393 393 cur = self.positions[i]
394 394 end_cut = self.data.find(b'\n', cur)
395 395 if end_cut != -1:
396 396 end_cut += 1
397 397 offset += end_cut - cur
398 398 l.append(self.data[last_cut:end_cut])
399 399 else:
400 400 while i < len(self.positions) and self.positions[i] < 0:
401 401 cur = self.positions[i]
402 402 t = self.extradata[-cur - 1]
403 403 l.append(self._pack(t))
404 404 self.positions[i] = offset
405 405 # Hashes are either 20 bytes (old sha1s) or 32
406 406 # bytes (new non-sha1).
407 407 hlen = 20
408 408 if len(t[1]) > 25:
409 409 hlen = 32
410 410 if len(t[1]) > hlen:
411 411 self.extrainfo[i] = ord(t[1][hlen + 1])
412 412 offset += len(l[-1])
413 413 i += 1
414 414 self.data = b''.join(l)
415 415 self.hasremovals = False
416 416 self.extradata = []
417 417
418 418 def _pack(self, d):
419 419 n = d[1]
420 420 assert len(n) in (20, 32)
421 421 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
422 422
423 423 def text(self):
424 424 self._compact()
425 425 return self.data
426 426
427 427 def diff(self, m2, clean=False):
428 428 '''Finds changes between the current manifest and m2.'''
429 429 # XXX think whether efficiency matters here
430 430 diff = {}
431 431
432 432 for fn, e1, flags in self.iterentries():
433 433 if fn not in m2:
434 434 diff[fn] = (e1, flags), (None, b'')
435 435 else:
436 436 e2 = m2[fn]
437 437 if (e1, flags) != e2:
438 438 diff[fn] = (e1, flags), e2
439 439 elif clean:
440 440 diff[fn] = None
441 441
442 442 for fn, e2, flags in m2.iterentries():
443 443 if fn not in self:
444 444 diff[fn] = (None, b''), (e2, flags)
445 445
446 446 return diff
447 447
448 448 def iterentries(self):
449 449 return lazymanifestiterentries(self)
450 450
451 451 def iterkeys(self):
452 452 return lazymanifestiter(self)
453 453
454 454 def __iter__(self):
455 455 return lazymanifestiter(self)
456 456
457 457 def __len__(self):
458 458 return len(self.positions)
459 459
460 460 def filtercopy(self, filterfn):
461 461 # XXX should be optimized
462 462 c = _lazymanifest(self._nodelen, b'')
463 463 for f, n, fl in self.iterentries():
464 464 if filterfn(f):
465 465 c[f] = n, fl
466 466 return c
467 467
468 468
469 469 try:
470 470 _lazymanifest = parsers.lazymanifest
471 471 except AttributeError:
472 472 pass
473 473
474 474
475 475 @interfaceutil.implementer(repository.imanifestdict)
476 476 class manifestdict(object):
477 477 def __init__(self, nodelen, data=b''):
478 478 self._nodelen = nodelen
479 479 self._lm = _lazymanifest(nodelen, data)
480 480
481 481 def __getitem__(self, key):
482 482 return self._lm[key][0]
483 483
484 484 def find(self, key):
485 485 return self._lm[key]
486 486
487 487 def __len__(self):
488 488 return len(self._lm)
489 489
490 490 def __nonzero__(self):
491 491 # nonzero is covered by the __len__ function, but implementing it here
492 492 # makes it easier for extensions to override.
493 493 return len(self._lm) != 0
494 494
495 495 __bool__ = __nonzero__
496 496
497 497 def __setitem__(self, key, node):
498 498 self._lm[key] = node, self.flags(key)
499 499
500 500 def __contains__(self, key):
501 501 if key is None:
502 502 return False
503 503 return key in self._lm
504 504
505 505 def __delitem__(self, key):
506 506 del self._lm[key]
507 507
508 508 def __iter__(self):
509 509 return self._lm.__iter__()
510 510
511 511 def iterkeys(self):
512 512 return self._lm.iterkeys()
513 513
514 514 def keys(self):
515 515 return list(self.iterkeys())
516 516
517 517 def filesnotin(self, m2, match=None):
518 518 '''Set of files in this manifest that are not in the other'''
519 519 if match is not None:
520 520 match = matchmod.badmatch(match, lambda path, msg: None)
521 521 sm2 = set(m2.walk(match))
522 522 return {f for f in self.walk(match) if f not in sm2}
523 523 return {f for f in self if f not in m2}
524 524
525 525 @propertycache
526 526 def _dirs(self):
527 527 return pathutil.dirs(self)
528 528
529 529 def dirs(self):
530 530 return self._dirs
531 531
532 532 def hasdir(self, dir):
533 533 return dir in self._dirs
534 534
535 535 def _filesfastpath(self, match):
536 536 """Checks whether we can correctly and quickly iterate over matcher
537 537 files instead of over manifest files."""
538 538 files = match.files()
539 539 return len(files) < 100 and (
540 540 match.isexact()
541 541 or (match.prefix() and all(fn in self for fn in files))
542 542 )
543 543
544 544 def walk(self, match):
545 545 """Generates matching file names.
546 546
547 547 Equivalent to manifest.matches(match).iterkeys(), but without creating
548 548 an entirely new manifest.
549 549
550 550 It also reports nonexistent files by marking them bad with match.bad().
551 551 """
552 552 if match.always():
553 553 for f in iter(self):
554 554 yield f
555 555 return
556 556
557 557 fset = set(match.files())
558 558
559 559 # avoid the entire walk if we're only looking for specific files
560 560 if self._filesfastpath(match):
561 561 for fn in sorted(fset):
562 562 if fn in self:
563 563 yield fn
564 564 return
565 565
566 566 for fn in self:
567 567 if fn in fset:
568 568 # specified pattern is the exact name
569 569 fset.remove(fn)
570 570 if match(fn):
571 571 yield fn
572 572
573 573 # for dirstate.walk, files=[''] means "walk the whole tree".
574 574 # follow that here, too
575 575 fset.discard(b'')
576 576
577 577 for fn in sorted(fset):
578 578 if not self.hasdir(fn):
579 579 match.bad(fn, None)
580 580
581 581 def _matches(self, match):
582 582 '''generate a new manifest filtered by the match argument'''
583 583 if match.always():
584 584 return self.copy()
585 585
586 586 if self._filesfastpath(match):
587 587 m = manifestdict(self._nodelen)
588 588 lm = self._lm
589 589 for fn in match.files():
590 590 if fn in lm:
591 591 m._lm[fn] = lm[fn]
592 592 return m
593 593
594 594 m = manifestdict(self._nodelen)
595 595 m._lm = self._lm.filtercopy(match)
596 596 return m
597 597
598 598 def diff(self, m2, match=None, clean=False):
599 599 """Finds changes between the current manifest and m2.
600 600
601 601 Args:
602 602 m2: the manifest to which this manifest should be compared.
603 603 clean: if true, include files unchanged between these manifests
604 604 with a None value in the returned dictionary.
605 605
606 606 The result is returned as a dict with filename as key and
607 607 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
608 608 nodeid in the current/other manifest and fl1/fl2 is the flag
609 609 in the current/other manifest. Where the file does not exist,
610 610 the nodeid will be None and the flags will be the empty
611 611 string.
612 612 """
613 613 if match:
614 614 m1 = self._matches(match)
615 615 m2 = m2._matches(match)
616 616 return m1.diff(m2, clean=clean)
617 617 return self._lm.diff(m2._lm, clean)
618 618
619 619 def setflag(self, key, flag):
620 620 if flag not in _manifestflags:
621 621 raise TypeError(b"Invalid manifest flag set.")
622 622 self._lm[key] = self[key], flag
623 623
624 624 def get(self, key, default=None):
625 625 try:
626 626 return self._lm[key][0]
627 627 except KeyError:
628 628 return default
629 629
630 630 def flags(self, key):
631 631 try:
632 632 return self._lm[key][1]
633 633 except KeyError:
634 634 return b''
635 635
636 636 def copy(self):
637 637 c = manifestdict(self._nodelen)
638 638 c._lm = self._lm.copy()
639 639 return c
640 640
641 641 def items(self):
642 642 return (x[:2] for x in self._lm.iterentries())
643 643
644 644 def iteritems(self):
645 645 return (x[:2] for x in self._lm.iterentries())
646 646
647 647 def iterentries(self):
648 648 return self._lm.iterentries()
649 649
650 650 def text(self):
651 651 # most likely uses native version
652 652 return self._lm.text()
653 653
654 654 def fastdelta(self, base, changes):
655 655 """Given a base manifest text as a bytearray and a list of changes
656 656 relative to that text, compute a delta that can be used by revlog.
657 657 """
658 658 delta = []
659 659 dstart = None
660 660 dend = None
661 661 dline = [b""]
662 662 start = 0
663 663 # zero copy representation of base as a buffer
664 664 addbuf = util.buffer(base)
665 665
666 666 changes = list(changes)
667 667 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
668 668 # start with a readonly loop that finds the offset of
669 669 # each line and creates the deltas
670 670 for f, todelete in changes:
671 671 # bs will either be the index of the item or the insert point
672 672 start, end = _msearch(addbuf, f, start)
673 673 if not todelete:
674 674 h, fl = self._lm[f]
675 675 l = b"%s\0%s%s\n" % (f, hex(h), fl)
676 676 else:
677 677 if start == end:
678 678 # item we want to delete was not found, error out
679 679 raise AssertionError(
680 680 _(b"failed to remove %s from manifest") % f
681 681 )
682 682 l = b""
683 683 if dstart is not None and dstart <= start and dend >= start:
684 684 if dend < end:
685 685 dend = end
686 686 if l:
687 687 dline.append(l)
688 688 else:
689 689 if dstart is not None:
690 690 delta.append([dstart, dend, b"".join(dline)])
691 691 dstart = start
692 692 dend = end
693 693 dline = [l]
694 694
695 695 if dstart is not None:
696 696 delta.append([dstart, dend, b"".join(dline)])
697 697 # apply the delta to the base, and get a delta for addrevision
698 698 deltatext, arraytext = _addlistdelta(base, delta)
699 699 else:
700 700 # For large changes, it's much cheaper to just build the text and
701 701 # diff it.
702 702 arraytext = bytearray(self.text())
703 703 deltatext = mdiff.textdiff(
704 704 util.buffer(base), util.buffer(arraytext)
705 705 )
706 706
707 707 return arraytext, deltatext
708 708
709 709
710 710 def _msearch(m, s, lo=0, hi=None):
711 711 """return a tuple (start, end) that says where to find s within m.
712 712
713 713 If the string is found m[start:end] are the line containing
714 714 that string. If start == end the string was not found and
715 715 they indicate the proper sorted insertion point.
716 716
717 717 m should be a buffer, a memoryview or a byte string.
718 718 s is a byte string"""
719 719
720 720 def advance(i, c):
721 721 while i < lenm and m[i : i + 1] != c:
722 722 i += 1
723 723 return i
724 724
725 725 if not s:
726 726 return (lo, lo)
727 727 lenm = len(m)
728 728 if not hi:
729 729 hi = lenm
730 730 while lo < hi:
731 731 mid = (lo + hi) // 2
732 732 start = mid
733 733 while start > 0 and m[start - 1 : start] != b'\n':
734 734 start -= 1
735 735 end = advance(start, b'\0')
736 736 if bytes(m[start:end]) < s:
737 737 # we know that after the null there are 40 bytes of sha1
738 738 # this translates to the bisect lo = mid + 1
739 739 lo = advance(end + 40, b'\n') + 1
740 740 else:
741 741 # this translates to the bisect hi = mid
742 742 hi = start
743 743 end = advance(lo, b'\0')
744 744 found = m[lo:end]
745 745 if s == found:
746 746 # we know that after the null there are 40 bytes of sha1
747 747 end = advance(end + 40, b'\n')
748 748 return (lo, end + 1)
749 749 else:
750 750 return (lo, lo)
751 751
752 752
753 753 def _checkforbidden(l):
754 754 """Check filenames for illegal characters."""
755 755 for f in l:
756 756 if b'\n' in f or b'\r' in f:
757 757 raise error.StorageError(
758 758 _(b"'\\n' and '\\r' disallowed in filenames: %r")
759 759 % pycompat.bytestr(f)
760 760 )
761 761
762 762
763 763 # apply the changes collected during the bisect loop to our addlist
764 764 # return a delta suitable for addrevision
765 765 def _addlistdelta(addlist, x):
766 766 # for large addlist arrays, building a new array is cheaper
767 767 # than repeatedly modifying the existing one
768 768 currentposition = 0
769 769 newaddlist = bytearray()
770 770
771 771 for start, end, content in x:
772 772 newaddlist += addlist[currentposition:start]
773 773 if content:
774 774 newaddlist += bytearray(content)
775 775
776 776 currentposition = end
777 777
778 778 newaddlist += addlist[currentposition:]
779 779
780 780 deltatext = b"".join(
781 781 struct.pack(b">lll", start, end, len(content)) + content
782 782 for start, end, content in x
783 783 )
784 784 return deltatext, newaddlist
785 785
786 786
787 787 def _splittopdir(f):
788 788 if b'/' in f:
789 789 dir, subpath = f.split(b'/', 1)
790 790 return dir + b'/', subpath
791 791 else:
792 792 return b'', f
793 793
794 794
795 795 _noop = lambda s: None
796 796
797 797
798 798 @interfaceutil.implementer(repository.imanifestdict)
799 799 class treemanifest(object):
800 800 def __init__(self, nodeconstants, dir=b'', text=b''):
801 801 self._dir = dir
802 802 self.nodeconstants = nodeconstants
803 803 self._node = self.nodeconstants.nullid
804 804 self._nodelen = self.nodeconstants.nodelen
805 805 self._loadfunc = _noop
806 806 self._copyfunc = _noop
807 807 self._dirty = False
808 808 self._dirs = {}
809 809 self._lazydirs = {}
810 810 # Using _lazymanifest here is a little slower than plain old dicts
811 811 self._files = {}
812 812 self._flags = {}
813 813 if text:
814 814
815 815 def readsubtree(subdir, subm):
816 816 raise AssertionError(
817 817 b'treemanifest constructor only accepts flat manifests'
818 818 )
819 819
820 820 self.parse(text, readsubtree)
821 821 self._dirty = True # Mark flat manifest dirty after parsing
822 822
823 823 def _subpath(self, path):
824 824 return self._dir + path
825 825
826 826 def _loadalllazy(self):
827 827 selfdirs = self._dirs
828 828 subpath = self._subpath
829 for d, (node, readsubtree, docopy) in pycompat.iteritems(
830 self._lazydirs
831 ):
829 for d, (node, readsubtree, docopy) in self._lazydirs.items():
832 830 if docopy:
833 831 selfdirs[d] = readsubtree(subpath(d), node).copy()
834 832 else:
835 833 selfdirs[d] = readsubtree(subpath(d), node)
836 834 self._lazydirs = {}
837 835
838 836 def _loadlazy(self, d):
839 837 v = self._lazydirs.get(d)
840 838 if v:
841 839 node, readsubtree, docopy = v
842 840 if docopy:
843 841 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
844 842 else:
845 843 self._dirs[d] = readsubtree(self._subpath(d), node)
846 844 del self._lazydirs[d]
847 845
848 846 def _loadchildrensetlazy(self, visit):
849 847 if not visit:
850 848 return None
851 849 if visit == b'all' or visit == b'this':
852 850 self._loadalllazy()
853 851 return None
854 852
855 853 loadlazy = self._loadlazy
856 854 for k in visit:
857 855 loadlazy(k + b'/')
858 856 return visit
859 857
860 858 def _loaddifflazy(self, t1, t2):
861 859 """load items in t1 and t2 if they're needed for diffing.
862 860
863 861 The criteria currently is:
864 862 - if it's not present in _lazydirs in either t1 or t2, load it in the
865 863 other (it may already be loaded or it may not exist, doesn't matter)
866 864 - if it's present in _lazydirs in both, compare the nodeid; if it
867 865 differs, load it in both
868 866 """
869 867 toloadlazy = []
870 868 for d, v1 in t1._lazydirs.items():
871 869 v2 = t2._lazydirs.get(d)
872 870 if not v2 or v2[0] != v1[0]:
873 871 toloadlazy.append(d)
874 872 for d, v1 in t2._lazydirs.items():
875 873 if d not in t1._lazydirs:
876 874 toloadlazy.append(d)
877 875
878 876 for d in toloadlazy:
879 877 t1._loadlazy(d)
880 878 t2._loadlazy(d)
881 879
882 880 def __len__(self):
883 881 self._load()
884 882 size = len(self._files)
885 883 self._loadalllazy()
886 884 for m in self._dirs.values():
887 885 size += m.__len__()
888 886 return size
889 887
890 888 def __nonzero__(self):
891 889 # Faster than "__len() != 0" since it avoids loading sub-manifests
892 890 return not self._isempty()
893 891
894 892 __bool__ = __nonzero__
895 893
896 894 def _isempty(self):
897 895 self._load() # for consistency; already loaded by all callers
898 896 # See if we can skip loading everything.
899 897 if self._files or (
900 898 self._dirs and any(not m._isempty() for m in self._dirs.values())
901 899 ):
902 900 return False
903 901 self._loadalllazy()
904 902 return not self._dirs or all(m._isempty() for m in self._dirs.values())
905 903
906 904 @encoding.strmethod
907 905 def __repr__(self):
908 906 return (
909 907 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
910 908 % (
911 909 self._dir,
912 910 hex(self._node),
913 911 bool(self._loadfunc is _noop),
914 912 self._dirty,
915 913 id(self),
916 914 )
917 915 )
918 916
919 917 def dir(self):
920 918 """The directory that this tree manifest represents, including a
921 919 trailing '/'. Empty string for the repo root directory."""
922 920 return self._dir
923 921
924 922 def node(self):
925 923 """This node of this instance. nullid for unsaved instances. Should
926 924 be updated when the instance is read or written from a revlog.
927 925 """
928 926 assert not self._dirty
929 927 return self._node
930 928
931 929 def setnode(self, node):
932 930 self._node = node
933 931 self._dirty = False
934 932
935 933 def iterentries(self):
936 934 self._load()
937 935 self._loadalllazy()
938 936 for p, n in sorted(
939 937 itertools.chain(self._dirs.items(), self._files.items())
940 938 ):
941 939 if p in self._files:
942 940 yield self._subpath(p), n, self._flags.get(p, b'')
943 941 else:
944 942 for x in n.iterentries():
945 943 yield x
946 944
947 945 def items(self):
948 946 self._load()
949 947 self._loadalllazy()
950 948 for p, n in sorted(
951 949 itertools.chain(self._dirs.items(), self._files.items())
952 950 ):
953 951 if p in self._files:
954 952 yield self._subpath(p), n
955 953 else:
956 954 for f, sn in n.items():
957 955 yield f, sn
958 956
959 957 iteritems = items
960 958
961 959 def iterkeys(self):
962 960 self._load()
963 961 self._loadalllazy()
964 962 for p in sorted(itertools.chain(self._dirs, self._files)):
965 963 if p in self._files:
966 964 yield self._subpath(p)
967 965 else:
968 966 for f in self._dirs[p]:
969 967 yield f
970 968
971 969 def keys(self):
972 970 return list(self.iterkeys())
973 971
974 972 def __iter__(self):
975 973 return self.iterkeys()
976 974
977 975 def __contains__(self, f):
978 976 if f is None:
979 977 return False
980 978 self._load()
981 979 dir, subpath = _splittopdir(f)
982 980 if dir:
983 981 self._loadlazy(dir)
984 982
985 983 if dir not in self._dirs:
986 984 return False
987 985
988 986 return self._dirs[dir].__contains__(subpath)
989 987 else:
990 988 return f in self._files
991 989
992 990 def get(self, f, default=None):
993 991 self._load()
994 992 dir, subpath = _splittopdir(f)
995 993 if dir:
996 994 self._loadlazy(dir)
997 995
998 996 if dir not in self._dirs:
999 997 return default
1000 998 return self._dirs[dir].get(subpath, default)
1001 999 else:
1002 1000 return self._files.get(f, default)
1003 1001
1004 1002 def __getitem__(self, f):
1005 1003 self._load()
1006 1004 dir, subpath = _splittopdir(f)
1007 1005 if dir:
1008 1006 self._loadlazy(dir)
1009 1007
1010 1008 return self._dirs[dir].__getitem__(subpath)
1011 1009 else:
1012 1010 return self._files[f]
1013 1011
1014 1012 def flags(self, f):
1015 1013 self._load()
1016 1014 dir, subpath = _splittopdir(f)
1017 1015 if dir:
1018 1016 self._loadlazy(dir)
1019 1017
1020 1018 if dir not in self._dirs:
1021 1019 return b''
1022 1020 return self._dirs[dir].flags(subpath)
1023 1021 else:
1024 1022 if f in self._lazydirs or f in self._dirs:
1025 1023 return b''
1026 1024 return self._flags.get(f, b'')
1027 1025
1028 1026 def find(self, f):
1029 1027 self._load()
1030 1028 dir, subpath = _splittopdir(f)
1031 1029 if dir:
1032 1030 self._loadlazy(dir)
1033 1031
1034 1032 return self._dirs[dir].find(subpath)
1035 1033 else:
1036 1034 return self._files[f], self._flags.get(f, b'')
1037 1035
1038 1036 def __delitem__(self, f):
1039 1037 self._load()
1040 1038 dir, subpath = _splittopdir(f)
1041 1039 if dir:
1042 1040 self._loadlazy(dir)
1043 1041
1044 1042 self._dirs[dir].__delitem__(subpath)
1045 1043 # If the directory is now empty, remove it
1046 1044 if self._dirs[dir]._isempty():
1047 1045 del self._dirs[dir]
1048 1046 else:
1049 1047 del self._files[f]
1050 1048 if f in self._flags:
1051 1049 del self._flags[f]
1052 1050 self._dirty = True
1053 1051
1054 1052 def __setitem__(self, f, n):
1055 1053 assert n is not None
1056 1054 self._load()
1057 1055 dir, subpath = _splittopdir(f)
1058 1056 if dir:
1059 1057 self._loadlazy(dir)
1060 1058 if dir not in self._dirs:
1061 1059 self._dirs[dir] = treemanifest(
1062 1060 self.nodeconstants, self._subpath(dir)
1063 1061 )
1064 1062 self._dirs[dir].__setitem__(subpath, n)
1065 1063 else:
1066 1064 # manifest nodes are either 20 bytes or 32 bytes,
1067 1065 # depending on the hash in use. Assert this as historically
1068 1066 # sometimes extra bytes were added.
1069 1067 assert len(n) in (20, 32)
1070 1068 self._files[f] = n
1071 1069 self._dirty = True
1072 1070
1073 1071 def _load(self):
1074 1072 if self._loadfunc is not _noop:
1075 1073 lf, self._loadfunc = self._loadfunc, _noop
1076 1074 lf(self)
1077 1075 elif self._copyfunc is not _noop:
1078 1076 cf, self._copyfunc = self._copyfunc, _noop
1079 1077 cf(self)
1080 1078
1081 1079 def setflag(self, f, flags):
1082 1080 """Set the flags (symlink, executable) for path f."""
1083 1081 if flags not in _manifestflags:
1084 1082 raise TypeError(b"Invalid manifest flag set.")
1085 1083 self._load()
1086 1084 dir, subpath = _splittopdir(f)
1087 1085 if dir:
1088 1086 self._loadlazy(dir)
1089 1087 if dir not in self._dirs:
1090 1088 self._dirs[dir] = treemanifest(
1091 1089 self.nodeconstants, self._subpath(dir)
1092 1090 )
1093 1091 self._dirs[dir].setflag(subpath, flags)
1094 1092 else:
1095 1093 self._flags[f] = flags
1096 1094 self._dirty = True
1097 1095
1098 1096 def copy(self):
1099 1097 copy = treemanifest(self.nodeconstants, self._dir)
1100 1098 copy._node = self._node
1101 1099 copy._dirty = self._dirty
1102 1100 if self._copyfunc is _noop:
1103 1101
1104 1102 def _copyfunc(s):
1105 1103 self._load()
1106 1104 s._lazydirs = {
1107 1105 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1108 1106 }
1109 1107 sdirs = s._dirs
1110 1108 for d, v in self._dirs.items():
1111 1109 sdirs[d] = v.copy()
1112 1110 s._files = dict.copy(self._files)
1113 1111 s._flags = dict.copy(self._flags)
1114 1112
1115 1113 if self._loadfunc is _noop:
1116 1114 _copyfunc(copy)
1117 1115 else:
1118 1116 copy._copyfunc = _copyfunc
1119 1117 else:
1120 1118 copy._copyfunc = self._copyfunc
1121 1119 return copy
1122 1120
1123 1121 def filesnotin(self, m2, match=None):
1124 1122 '''Set of files in this manifest that are not in the other'''
1125 1123 if match and not match.always():
1126 1124 m1 = self._matches(match)
1127 1125 m2 = m2._matches(match)
1128 1126 return m1.filesnotin(m2)
1129 1127
1130 1128 files = set()
1131 1129
1132 1130 def _filesnotin(t1, t2):
1133 1131 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1134 1132 return
1135 1133 t1._load()
1136 1134 t2._load()
1137 1135 self._loaddifflazy(t1, t2)
1138 1136 for d, m1 in t1._dirs.items():
1139 1137 if d in t2._dirs:
1140 1138 m2 = t2._dirs[d]
1141 1139 _filesnotin(m1, m2)
1142 1140 else:
1143 1141 files.update(m1.iterkeys())
1144 1142
1145 1143 for fn in t1._files:
1146 1144 if fn not in t2._files:
1147 1145 files.add(t1._subpath(fn))
1148 1146
1149 1147 _filesnotin(self, m2)
1150 1148 return files
1151 1149
1152 1150 @propertycache
1153 1151 def _alldirs(self):
1154 1152 return pathutil.dirs(self)
1155 1153
1156 1154 def dirs(self):
1157 1155 return self._alldirs
1158 1156
1159 1157 def hasdir(self, dir):
1160 1158 self._load()
1161 1159 topdir, subdir = _splittopdir(dir)
1162 1160 if topdir:
1163 1161 self._loadlazy(topdir)
1164 1162 if topdir in self._dirs:
1165 1163 return self._dirs[topdir].hasdir(subdir)
1166 1164 return False
1167 1165 dirslash = dir + b'/'
1168 1166 return dirslash in self._dirs or dirslash in self._lazydirs
1169 1167
1170 1168 def walk(self, match):
1171 1169 """Generates matching file names.
1172 1170
1173 1171 It also reports nonexistent files by marking them bad with match.bad().
1174 1172 """
1175 1173 if match.always():
1176 1174 for f in iter(self):
1177 1175 yield f
1178 1176 return
1179 1177
1180 1178 fset = set(match.files())
1181 1179
1182 1180 for fn in self._walk(match):
1183 1181 if fn in fset:
1184 1182 # specified pattern is the exact name
1185 1183 fset.remove(fn)
1186 1184 yield fn
1187 1185
1188 1186 # for dirstate.walk, files=[''] means "walk the whole tree".
1189 1187 # follow that here, too
1190 1188 fset.discard(b'')
1191 1189
1192 1190 for fn in sorted(fset):
1193 1191 if not self.hasdir(fn):
1194 1192 match.bad(fn, None)
1195 1193
1196 1194 def _walk(self, match):
1197 1195 '''Recursively generates matching file names for walk().'''
1198 1196 visit = match.visitchildrenset(self._dir[:-1])
1199 1197 if not visit:
1200 1198 return
1201 1199
1202 1200 # yield this dir's files and walk its submanifests
1203 1201 self._load()
1204 1202 visit = self._loadchildrensetlazy(visit)
1205 1203 for p in sorted(list(self._dirs) + list(self._files)):
1206 1204 if p in self._files:
1207 1205 fullp = self._subpath(p)
1208 1206 if match(fullp):
1209 1207 yield fullp
1210 1208 else:
1211 1209 if not visit or p[:-1] in visit:
1212 1210 for f in self._dirs[p]._walk(match):
1213 1211 yield f
1214 1212
1215 1213 def _matches(self, match):
1216 1214 """recursively generate a new manifest filtered by the match argument."""
1217 1215 if match.always():
1218 1216 return self.copy()
1219 1217 return self._matches_inner(match)
1220 1218
1221 1219 def _matches_inner(self, match):
1222 1220 if match.always():
1223 1221 return self.copy()
1224 1222
1225 1223 visit = match.visitchildrenset(self._dir[:-1])
1226 1224 if visit == b'all':
1227 1225 return self.copy()
1228 1226 ret = treemanifest(self.nodeconstants, self._dir)
1229 1227 if not visit:
1230 1228 return ret
1231 1229
1232 1230 self._load()
1233 1231 for fn in self._files:
1234 1232 # While visitchildrenset *usually* lists only subdirs, this is
1235 1233 # actually up to the matcher and may have some files in the set().
1236 1234 # If visit == 'this', we should obviously look at the files in this
1237 1235 # directory; if visit is a set, and fn is in it, we should inspect
1238 1236 # fn (but no need to inspect things not in the set).
1239 1237 if visit != b'this' and fn not in visit:
1240 1238 continue
1241 1239 fullp = self._subpath(fn)
1242 1240 # visitchildrenset isn't perfect, we still need to call the regular
1243 1241 # matcher code to further filter results.
1244 1242 if not match(fullp):
1245 1243 continue
1246 1244 ret._files[fn] = self._files[fn]
1247 1245 if fn in self._flags:
1248 1246 ret._flags[fn] = self._flags[fn]
1249 1247
1250 1248 visit = self._loadchildrensetlazy(visit)
1251 1249 for dir, subm in self._dirs.items():
1252 1250 if visit and dir[:-1] not in visit:
1253 1251 continue
1254 1252 m = subm._matches_inner(match)
1255 1253 if not m._isempty():
1256 1254 ret._dirs[dir] = m
1257 1255
1258 1256 if not ret._isempty():
1259 1257 ret._dirty = True
1260 1258 return ret
1261 1259
1262 1260 def fastdelta(self, base, changes):
1263 1261 raise FastdeltaUnavailable()
1264 1262
1265 1263 def diff(self, m2, match=None, clean=False):
1266 1264 """Finds changes between the current manifest and m2.
1267 1265
1268 1266 Args:
1269 1267 m2: the manifest to which this manifest should be compared.
1270 1268 clean: if true, include files unchanged between these manifests
1271 1269 with a None value in the returned dictionary.
1272 1270
1273 1271 The result is returned as a dict with filename as key and
1274 1272 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1275 1273 nodeid in the current/other manifest and fl1/fl2 is the flag
1276 1274 in the current/other manifest. Where the file does not exist,
1277 1275 the nodeid will be None and the flags will be the empty
1278 1276 string.
1279 1277 """
1280 1278 if match and not match.always():
1281 1279 m1 = self._matches(match)
1282 1280 m2 = m2._matches(match)
1283 1281 return m1.diff(m2, clean=clean)
1284 1282 result = {}
1285 1283 emptytree = treemanifest(self.nodeconstants)
1286 1284
1287 1285 def _iterativediff(t1, t2, stack):
1288 1286 """compares two tree manifests and append new tree-manifests which
1289 1287 needs to be compared to stack"""
1290 1288 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1291 1289 return
1292 1290 t1._load()
1293 1291 t2._load()
1294 1292 self._loaddifflazy(t1, t2)
1295 1293
1296 1294 for d, m1 in t1._dirs.items():
1297 1295 m2 = t2._dirs.get(d, emptytree)
1298 1296 stack.append((m1, m2))
1299 1297
1300 1298 for d, m2 in t2._dirs.items():
1301 1299 if d not in t1._dirs:
1302 1300 stack.append((emptytree, m2))
1303 1301
1304 1302 for fn, n1 in t1._files.items():
1305 1303 fl1 = t1._flags.get(fn, b'')
1306 1304 n2 = t2._files.get(fn, None)
1307 1305 fl2 = t2._flags.get(fn, b'')
1308 1306 if n1 != n2 or fl1 != fl2:
1309 1307 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1310 1308 elif clean:
1311 1309 result[t1._subpath(fn)] = None
1312 1310
1313 1311 for fn, n2 in t2._files.items():
1314 1312 if fn not in t1._files:
1315 1313 fl2 = t2._flags.get(fn, b'')
1316 1314 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1317 1315
1318 1316 stackls = []
1319 1317 _iterativediff(self, m2, stackls)
1320 1318 while stackls:
1321 1319 t1, t2 = stackls.pop()
1322 1320 # stackls is populated in the function call
1323 1321 _iterativediff(t1, t2, stackls)
1324 1322 return result
1325 1323
1326 1324 def unmodifiedsince(self, m2):
1327 1325 return not self._dirty and not m2._dirty and self._node == m2._node
1328 1326
1329 1327 def parse(self, text, readsubtree):
1330 1328 selflazy = self._lazydirs
1331 1329 for f, n, fl in _parse(self._nodelen, text):
1332 1330 if fl == b't':
1333 1331 f = f + b'/'
1334 1332 # False below means "doesn't need to be copied" and can use the
1335 1333 # cached value from readsubtree directly.
1336 1334 selflazy[f] = (n, readsubtree, False)
1337 1335 elif b'/' in f:
1338 1336 # This is a flat manifest, so use __setitem__ and setflag rather
1339 1337 # than assigning directly to _files and _flags, so we can
1340 1338 # assign a path in a subdirectory, and to mark dirty (compared
1341 1339 # to nullid).
1342 1340 self[f] = n
1343 1341 if fl:
1344 1342 self.setflag(f, fl)
1345 1343 else:
1346 1344 # Assigning to _files and _flags avoids marking as dirty,
1347 1345 # and should be a little faster.
1348 1346 self._files[f] = n
1349 1347 if fl:
1350 1348 self._flags[f] = fl
1351 1349
1352 1350 def text(self):
1353 1351 """Get the full data of this manifest as a bytestring."""
1354 1352 self._load()
1355 1353 return _text(self.iterentries())
1356 1354
1357 1355 def dirtext(self):
1358 1356 """Get the full data of this directory as a bytestring. Make sure that
1359 1357 any submanifests have been written first, so their nodeids are correct.
1360 1358 """
1361 1359 self._load()
1362 1360 flags = self.flags
1363 1361 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1364 1362 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1365 1363 files = [(f, self._files[f], flags(f)) for f in self._files]
1366 1364 return _text(sorted(dirs + files + lazydirs))
1367 1365
1368 1366 def read(self, gettext, readsubtree):
1369 1367 def _load_for_read(s):
1370 1368 s.parse(gettext(), readsubtree)
1371 1369 s._dirty = False
1372 1370
1373 1371 self._loadfunc = _load_for_read
1374 1372
1375 1373 def writesubtrees(self, m1, m2, writesubtree, match):
1376 1374 self._load() # for consistency; should never have any effect here
1377 1375 m1._load()
1378 1376 m2._load()
1379 1377 emptytree = treemanifest(self.nodeconstants)
1380 1378
1381 1379 def getnode(m, d):
1382 1380 ld = m._lazydirs.get(d)
1383 1381 if ld:
1384 1382 return ld[0]
1385 1383 return m._dirs.get(d, emptytree)._node
1386 1384
1387 1385 # let's skip investigating things that `match` says we do not need.
1388 1386 visit = match.visitchildrenset(self._dir[:-1])
1389 1387 visit = self._loadchildrensetlazy(visit)
1390 1388 if visit == b'this' or visit == b'all':
1391 1389 visit = None
1392 1390 for d, subm in self._dirs.items():
1393 1391 if visit and d[:-1] not in visit:
1394 1392 continue
1395 1393 subp1 = getnode(m1, d)
1396 1394 subp2 = getnode(m2, d)
1397 1395 if subp1 == self.nodeconstants.nullid:
1398 1396 subp1, subp2 = subp2, subp1
1399 1397 writesubtree(subm, subp1, subp2, match)
1400 1398
1401 1399 def walksubtrees(self, matcher=None):
1402 1400 """Returns an iterator of the subtrees of this manifest, including this
1403 1401 manifest itself.
1404 1402
1405 1403 If `matcher` is provided, it only returns subtrees that match.
1406 1404 """
1407 1405 if matcher and not matcher.visitdir(self._dir[:-1]):
1408 1406 return
1409 1407 if not matcher or matcher(self._dir[:-1]):
1410 1408 yield self
1411 1409
1412 1410 self._load()
1413 1411 # OPT: use visitchildrenset to avoid loading everything.
1414 1412 self._loadalllazy()
1415 1413 for d, subm in self._dirs.items():
1416 1414 for subtree in subm.walksubtrees(matcher=matcher):
1417 1415 yield subtree
1418 1416
1419 1417
1420 1418 class manifestfulltextcache(util.lrucachedict):
1421 1419 """File-backed LRU cache for the manifest cache
1422 1420
1423 1421 File consists of entries, up to EOF:
1424 1422
1425 1423 - 20 bytes node, 4 bytes length, <length> manifest data
1426 1424
1427 1425 These are written in reverse cache order (oldest to newest).
1428 1426
1429 1427 """
1430 1428
1431 1429 _file = b'manifestfulltextcache'
1432 1430
1433 1431 def __init__(self, max):
1434 1432 super(manifestfulltextcache, self).__init__(max)
1435 1433 self._dirty = False
1436 1434 self._read = False
1437 1435 self._opener = None
1438 1436
1439 1437 def read(self):
1440 1438 if self._read or self._opener is None:
1441 1439 return
1442 1440
1443 1441 try:
1444 1442 with self._opener(self._file) as fp:
1445 1443 set = super(manifestfulltextcache, self).__setitem__
1446 1444 # ignore trailing data, this is a cache, corruption is skipped
1447 1445 while True:
1448 1446 # TODO do we need to do work here for sha1 portability?
1449 1447 node = fp.read(20)
1450 1448 if len(node) < 20:
1451 1449 break
1452 1450 try:
1453 1451 size = struct.unpack(b'>L', fp.read(4))[0]
1454 1452 except struct.error:
1455 1453 break
1456 1454 value = bytearray(fp.read(size))
1457 1455 if len(value) != size:
1458 1456 break
1459 1457 set(node, value)
1460 1458 except IOError:
1461 1459 # the file is allowed to be missing
1462 1460 pass
1463 1461
1464 1462 self._read = True
1465 1463 self._dirty = False
1466 1464
1467 1465 def write(self):
1468 1466 if not self._dirty or self._opener is None:
1469 1467 return
1470 1468 # rotate backwards to the first used node
1471 1469 try:
1472 1470 with self._opener(
1473 1471 self._file, b'w', atomictemp=True, checkambig=True
1474 1472 ) as fp:
1475 1473 node = self._head.prev
1476 1474 while True:
1477 1475 if node.key in self._cache:
1478 1476 fp.write(node.key)
1479 1477 fp.write(struct.pack(b'>L', len(node.value)))
1480 1478 fp.write(node.value)
1481 1479 if node is self._head:
1482 1480 break
1483 1481 node = node.prev
1484 1482 except IOError:
1485 1483 # We could not write the cache (eg: permission error)
1486 1484 # the content can be missing.
1487 1485 #
1488 1486 # We could try harder and see if we could recreate a wcache
1489 1487 # directory were we coudl write too.
1490 1488 #
1491 1489 # XXX the error pass silently, having some way to issue an error
1492 1490 # log `ui.log` would be nice.
1493 1491 pass
1494 1492
1495 1493 def __len__(self):
1496 1494 if not self._read:
1497 1495 self.read()
1498 1496 return super(manifestfulltextcache, self).__len__()
1499 1497
1500 1498 def __contains__(self, k):
1501 1499 if not self._read:
1502 1500 self.read()
1503 1501 return super(manifestfulltextcache, self).__contains__(k)
1504 1502
1505 1503 def __iter__(self):
1506 1504 if not self._read:
1507 1505 self.read()
1508 1506 return super(manifestfulltextcache, self).__iter__()
1509 1507
1510 1508 def __getitem__(self, k):
1511 1509 if not self._read:
1512 1510 self.read()
1513 1511 # the cache lru order can change on read
1514 1512 setdirty = self._cache.get(k) is not self._head
1515 1513 value = super(manifestfulltextcache, self).__getitem__(k)
1516 1514 if setdirty:
1517 1515 self._dirty = True
1518 1516 return value
1519 1517
1520 1518 def __setitem__(self, k, v):
1521 1519 if not self._read:
1522 1520 self.read()
1523 1521 super(manifestfulltextcache, self).__setitem__(k, v)
1524 1522 self._dirty = True
1525 1523
1526 1524 def __delitem__(self, k):
1527 1525 if not self._read:
1528 1526 self.read()
1529 1527 super(manifestfulltextcache, self).__delitem__(k)
1530 1528 self._dirty = True
1531 1529
1532 1530 def get(self, k, default=None):
1533 1531 if not self._read:
1534 1532 self.read()
1535 1533 return super(manifestfulltextcache, self).get(k, default=default)
1536 1534
1537 1535 def clear(self, clear_persisted_data=False):
1538 1536 super(manifestfulltextcache, self).clear()
1539 1537 if clear_persisted_data:
1540 1538 self._dirty = True
1541 1539 self.write()
1542 1540 self._read = False
1543 1541
1544 1542
1545 1543 # and upper bound of what we expect from compression
1546 1544 # (real live value seems to be "3")
1547 1545 MAXCOMPRESSION = 3
1548 1546
1549 1547
1550 1548 class FastdeltaUnavailable(Exception):
1551 1549 """Exception raised when fastdelta isn't usable on a manifest."""
1552 1550
1553 1551
1554 1552 @interfaceutil.implementer(repository.imanifeststorage)
1555 1553 class manifestrevlog(object):
1556 1554 """A revlog that stores manifest texts. This is responsible for caching the
1557 1555 full-text manifest contents.
1558 1556 """
1559 1557
1560 1558 def __init__(
1561 1559 self,
1562 1560 nodeconstants,
1563 1561 opener,
1564 1562 tree=b'',
1565 1563 dirlogcache=None,
1566 1564 treemanifest=False,
1567 1565 ):
1568 1566 """Constructs a new manifest revlog
1569 1567
1570 1568 `indexfile` - used by extensions to have two manifests at once, like
1571 1569 when transitioning between flatmanifeset and treemanifests.
1572 1570
1573 1571 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1574 1572 options can also be used to make this a tree manifest revlog. The opener
1575 1573 option takes precedence, so if it is set to True, we ignore whatever
1576 1574 value is passed in to the constructor.
1577 1575 """
1578 1576 self.nodeconstants = nodeconstants
1579 1577 # During normal operations, we expect to deal with not more than four
1580 1578 # revs at a time (such as during commit --amend). When rebasing large
1581 1579 # stacks of commits, the number can go up, hence the config knob below.
1582 1580 cachesize = 4
1583 1581 optiontreemanifest = False
1584 1582 opts = getattr(opener, 'options', None)
1585 1583 if opts is not None:
1586 1584 cachesize = opts.get(b'manifestcachesize', cachesize)
1587 1585 optiontreemanifest = opts.get(b'treemanifest', False)
1588 1586
1589 1587 self._treeondisk = optiontreemanifest or treemanifest
1590 1588
1591 1589 self._fulltextcache = manifestfulltextcache(cachesize)
1592 1590
1593 1591 if tree:
1594 1592 assert self._treeondisk, b'opts is %r' % opts
1595 1593
1596 1594 radix = b'00manifest'
1597 1595 if tree:
1598 1596 radix = b"meta/" + tree + radix
1599 1597
1600 1598 self.tree = tree
1601 1599
1602 1600 # The dirlogcache is kept on the root manifest log
1603 1601 if tree:
1604 1602 self._dirlogcache = dirlogcache
1605 1603 else:
1606 1604 self._dirlogcache = {b'': self}
1607 1605
1608 1606 self._revlog = revlog.revlog(
1609 1607 opener,
1610 1608 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1611 1609 radix=radix,
1612 1610 # only root indexfile is cached
1613 1611 checkambig=not bool(tree),
1614 1612 mmaplargeindex=True,
1615 1613 upperboundcomp=MAXCOMPRESSION,
1616 1614 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1617 1615 )
1618 1616
1619 1617 self.index = self._revlog.index
1620 1618 self._generaldelta = self._revlog._generaldelta
1621 1619
1622 1620 def _setupmanifestcachehooks(self, repo):
1623 1621 """Persist the manifestfulltextcache on lock release"""
1624 1622 if not util.safehasattr(repo, b'_wlockref'):
1625 1623 return
1626 1624
1627 1625 self._fulltextcache._opener = repo.wcachevfs
1628 1626 if repo._currentlock(repo._wlockref) is None:
1629 1627 return
1630 1628
1631 1629 reporef = weakref.ref(repo)
1632 1630 manifestrevlogref = weakref.ref(self)
1633 1631
1634 1632 def persistmanifestcache(success):
1635 1633 # Repo is in an unknown state, do not persist.
1636 1634 if not success:
1637 1635 return
1638 1636
1639 1637 repo = reporef()
1640 1638 self = manifestrevlogref()
1641 1639 if repo is None or self is None:
1642 1640 return
1643 1641 if repo.manifestlog.getstorage(b'') is not self:
1644 1642 # there's a different manifest in play now, abort
1645 1643 return
1646 1644 self._fulltextcache.write()
1647 1645
1648 1646 repo._afterlock(persistmanifestcache)
1649 1647
1650 1648 @property
1651 1649 def fulltextcache(self):
1652 1650 return self._fulltextcache
1653 1651
1654 1652 def clearcaches(self, clear_persisted_data=False):
1655 1653 self._revlog.clearcaches()
1656 1654 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1657 1655 self._dirlogcache = {self.tree: self}
1658 1656
1659 1657 def dirlog(self, d):
1660 1658 if d:
1661 1659 assert self._treeondisk
1662 1660 if d not in self._dirlogcache:
1663 1661 mfrevlog = manifestrevlog(
1664 1662 self.nodeconstants,
1665 1663 self.opener,
1666 1664 d,
1667 1665 self._dirlogcache,
1668 1666 treemanifest=self._treeondisk,
1669 1667 )
1670 1668 self._dirlogcache[d] = mfrevlog
1671 1669 return self._dirlogcache[d]
1672 1670
1673 1671 def add(
1674 1672 self,
1675 1673 m,
1676 1674 transaction,
1677 1675 link,
1678 1676 p1,
1679 1677 p2,
1680 1678 added,
1681 1679 removed,
1682 1680 readtree=None,
1683 1681 match=None,
1684 1682 ):
1685 1683 """add some manifest entry in to the manifest log
1686 1684
1687 1685 input:
1688 1686
1689 1687 m: the manifest dict we want to store
1690 1688 transaction: the open transaction
1691 1689 p1: manifest-node of p1
1692 1690 p2: manifest-node of p2
1693 1691 added: file added/changed compared to parent
1694 1692 removed: file removed compared to parent
1695 1693
1696 1694 tree manifest input:
1697 1695
1698 1696 readtree: a function to read a subtree
1699 1697 match: a filematcher for the subpart of the tree manifest
1700 1698 """
1701 1699 try:
1702 1700 if p1 not in self.fulltextcache:
1703 1701 raise FastdeltaUnavailable()
1704 1702 # If our first parent is in the manifest cache, we can
1705 1703 # compute a delta here using properties we know about the
1706 1704 # manifest up-front, which may save time later for the
1707 1705 # revlog layer.
1708 1706
1709 1707 _checkforbidden(added)
1710 1708 # combine the changed lists into one sorted iterator
1711 1709 work = heapq.merge(
1712 1710 [(x, False) for x in sorted(added)],
1713 1711 [(x, True) for x in sorted(removed)],
1714 1712 )
1715 1713
1716 1714 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1717 1715 cachedelta = self._revlog.rev(p1), deltatext
1718 1716 text = util.buffer(arraytext)
1719 1717 rev = self._revlog.addrevision(
1720 1718 text, transaction, link, p1, p2, cachedelta
1721 1719 )
1722 1720 n = self._revlog.node(rev)
1723 1721 except FastdeltaUnavailable:
1724 1722 # The first parent manifest isn't already loaded or the
1725 1723 # manifest implementation doesn't support fastdelta, so
1726 1724 # we'll just encode a fulltext of the manifest and pass
1727 1725 # that through to the revlog layer, and let it handle the
1728 1726 # delta process.
1729 1727 if self._treeondisk:
1730 1728 assert readtree, b"readtree must be set for treemanifest writes"
1731 1729 assert match, b"match must be specified for treemanifest writes"
1732 1730 m1 = readtree(self.tree, p1)
1733 1731 m2 = readtree(self.tree, p2)
1734 1732 n = self._addtree(
1735 1733 m, transaction, link, m1, m2, readtree, match=match
1736 1734 )
1737 1735 arraytext = None
1738 1736 else:
1739 1737 text = m.text()
1740 1738 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1741 1739 n = self._revlog.node(rev)
1742 1740 arraytext = bytearray(text)
1743 1741
1744 1742 if arraytext is not None:
1745 1743 self.fulltextcache[n] = arraytext
1746 1744
1747 1745 return n
1748 1746
1749 1747 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1750 1748 # If the manifest is unchanged compared to one parent,
1751 1749 # don't write a new revision
1752 1750 if self.tree != b'' and (
1753 1751 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1754 1752 ):
1755 1753 return m.node()
1756 1754
1757 1755 def writesubtree(subm, subp1, subp2, match):
1758 1756 sublog = self.dirlog(subm.dir())
1759 1757 sublog.add(
1760 1758 subm,
1761 1759 transaction,
1762 1760 link,
1763 1761 subp1,
1764 1762 subp2,
1765 1763 None,
1766 1764 None,
1767 1765 readtree=readtree,
1768 1766 match=match,
1769 1767 )
1770 1768
1771 1769 m.writesubtrees(m1, m2, writesubtree, match)
1772 1770 text = m.dirtext()
1773 1771 n = None
1774 1772 if self.tree != b'':
1775 1773 # Double-check whether contents are unchanged to one parent
1776 1774 if text == m1.dirtext():
1777 1775 n = m1.node()
1778 1776 elif text == m2.dirtext():
1779 1777 n = m2.node()
1780 1778
1781 1779 if not n:
1782 1780 rev = self._revlog.addrevision(
1783 1781 text, transaction, link, m1.node(), m2.node()
1784 1782 )
1785 1783 n = self._revlog.node(rev)
1786 1784
1787 1785 # Save nodeid so parent manifest can calculate its nodeid
1788 1786 m.setnode(n)
1789 1787 return n
1790 1788
1791 1789 def __len__(self):
1792 1790 return len(self._revlog)
1793 1791
1794 1792 def __iter__(self):
1795 1793 return self._revlog.__iter__()
1796 1794
1797 1795 def rev(self, node):
1798 1796 return self._revlog.rev(node)
1799 1797
1800 1798 def node(self, rev):
1801 1799 return self._revlog.node(rev)
1802 1800
1803 1801 def lookup(self, value):
1804 1802 return self._revlog.lookup(value)
1805 1803
1806 1804 def parentrevs(self, rev):
1807 1805 return self._revlog.parentrevs(rev)
1808 1806
1809 1807 def parents(self, node):
1810 1808 return self._revlog.parents(node)
1811 1809
1812 1810 def linkrev(self, rev):
1813 1811 return self._revlog.linkrev(rev)
1814 1812
1815 1813 def checksize(self):
1816 1814 return self._revlog.checksize()
1817 1815
1818 1816 def revision(self, node, _df=None):
1819 1817 return self._revlog.revision(node, _df=_df)
1820 1818
1821 1819 def rawdata(self, node, _df=None):
1822 1820 return self._revlog.rawdata(node, _df=_df)
1823 1821
1824 1822 def revdiff(self, rev1, rev2):
1825 1823 return self._revlog.revdiff(rev1, rev2)
1826 1824
1827 1825 def cmp(self, node, text):
1828 1826 return self._revlog.cmp(node, text)
1829 1827
1830 1828 def deltaparent(self, rev):
1831 1829 return self._revlog.deltaparent(rev)
1832 1830
1833 1831 def emitrevisions(
1834 1832 self,
1835 1833 nodes,
1836 1834 nodesorder=None,
1837 1835 revisiondata=False,
1838 1836 assumehaveparentrevisions=False,
1839 1837 deltamode=repository.CG_DELTAMODE_STD,
1840 1838 sidedata_helpers=None,
1841 1839 ):
1842 1840 return self._revlog.emitrevisions(
1843 1841 nodes,
1844 1842 nodesorder=nodesorder,
1845 1843 revisiondata=revisiondata,
1846 1844 assumehaveparentrevisions=assumehaveparentrevisions,
1847 1845 deltamode=deltamode,
1848 1846 sidedata_helpers=sidedata_helpers,
1849 1847 )
1850 1848
1851 1849 def addgroup(
1852 1850 self,
1853 1851 deltas,
1854 1852 linkmapper,
1855 1853 transaction,
1856 1854 alwayscache=False,
1857 1855 addrevisioncb=None,
1858 1856 duplicaterevisioncb=None,
1859 1857 ):
1860 1858 return self._revlog.addgroup(
1861 1859 deltas,
1862 1860 linkmapper,
1863 1861 transaction,
1864 1862 alwayscache=alwayscache,
1865 1863 addrevisioncb=addrevisioncb,
1866 1864 duplicaterevisioncb=duplicaterevisioncb,
1867 1865 )
1868 1866
1869 1867 def rawsize(self, rev):
1870 1868 return self._revlog.rawsize(rev)
1871 1869
1872 1870 def getstrippoint(self, minlink):
1873 1871 return self._revlog.getstrippoint(minlink)
1874 1872
1875 1873 def strip(self, minlink, transaction):
1876 1874 return self._revlog.strip(minlink, transaction)
1877 1875
1878 1876 def files(self):
1879 1877 return self._revlog.files()
1880 1878
1881 1879 def clone(self, tr, destrevlog, **kwargs):
1882 1880 if not isinstance(destrevlog, manifestrevlog):
1883 1881 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1884 1882
1885 1883 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1886 1884
1887 1885 def storageinfo(
1888 1886 self,
1889 1887 exclusivefiles=False,
1890 1888 sharedfiles=False,
1891 1889 revisionscount=False,
1892 1890 trackedsize=False,
1893 1891 storedsize=False,
1894 1892 ):
1895 1893 return self._revlog.storageinfo(
1896 1894 exclusivefiles=exclusivefiles,
1897 1895 sharedfiles=sharedfiles,
1898 1896 revisionscount=revisionscount,
1899 1897 trackedsize=trackedsize,
1900 1898 storedsize=storedsize,
1901 1899 )
1902 1900
1903 1901 @property
1904 1902 def opener(self):
1905 1903 return self._revlog.opener
1906 1904
1907 1905 @opener.setter
1908 1906 def opener(self, value):
1909 1907 self._revlog.opener = value
1910 1908
1911 1909
1912 1910 @interfaceutil.implementer(repository.imanifestlog)
1913 1911 class manifestlog(object):
1914 1912 """A collection class representing the collection of manifest snapshots
1915 1913 referenced by commits in the repository.
1916 1914
1917 1915 In this situation, 'manifest' refers to the abstract concept of a snapshot
1918 1916 of the list of files in the given commit. Consumers of the output of this
1919 1917 class do not care about the implementation details of the actual manifests
1920 1918 they receive (i.e. tree or flat or lazily loaded, etc)."""
1921 1919
1922 1920 def __init__(self, opener, repo, rootstore, narrowmatch):
1923 1921 self.nodeconstants = repo.nodeconstants
1924 1922 usetreemanifest = False
1925 1923 cachesize = 4
1926 1924
1927 1925 opts = getattr(opener, 'options', None)
1928 1926 if opts is not None:
1929 1927 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1930 1928 cachesize = opts.get(b'manifestcachesize', cachesize)
1931 1929
1932 1930 self._treemanifests = usetreemanifest
1933 1931
1934 1932 self._rootstore = rootstore
1935 1933 self._rootstore._setupmanifestcachehooks(repo)
1936 1934 self._narrowmatch = narrowmatch
1937 1935
1938 1936 # A cache of the manifestctx or treemanifestctx for each directory
1939 1937 self._dirmancache = {}
1940 1938 self._dirmancache[b''] = util.lrucachedict(cachesize)
1941 1939
1942 1940 self._cachesize = cachesize
1943 1941
1944 1942 def __getitem__(self, node):
1945 1943 """Retrieves the manifest instance for the given node. Throws a
1946 1944 LookupError if not found.
1947 1945 """
1948 1946 return self.get(b'', node)
1949 1947
1950 1948 def get(self, tree, node, verify=True):
1951 1949 """Retrieves the manifest instance for the given node. Throws a
1952 1950 LookupError if not found.
1953 1951
1954 1952 `verify` - if True an exception will be thrown if the node is not in
1955 1953 the revlog
1956 1954 """
1957 1955 if node in self._dirmancache.get(tree, ()):
1958 1956 return self._dirmancache[tree][node]
1959 1957
1960 1958 if not self._narrowmatch.always():
1961 1959 if not self._narrowmatch.visitdir(tree[:-1]):
1962 1960 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1963 1961 if tree:
1964 1962 if self._rootstore._treeondisk:
1965 1963 if verify:
1966 1964 # Side-effect is LookupError is raised if node doesn't
1967 1965 # exist.
1968 1966 self.getstorage(tree).rev(node)
1969 1967
1970 1968 m = treemanifestctx(self, tree, node)
1971 1969 else:
1972 1970 raise error.Abort(
1973 1971 _(
1974 1972 b"cannot ask for manifest directory '%s' in a flat "
1975 1973 b"manifest"
1976 1974 )
1977 1975 % tree
1978 1976 )
1979 1977 else:
1980 1978 if verify:
1981 1979 # Side-effect is LookupError is raised if node doesn't exist.
1982 1980 self._rootstore.rev(node)
1983 1981
1984 1982 if self._treemanifests:
1985 1983 m = treemanifestctx(self, b'', node)
1986 1984 else:
1987 1985 m = manifestctx(self, node)
1988 1986
1989 1987 if node != self.nodeconstants.nullid:
1990 1988 mancache = self._dirmancache.get(tree)
1991 1989 if not mancache:
1992 1990 mancache = util.lrucachedict(self._cachesize)
1993 1991 self._dirmancache[tree] = mancache
1994 1992 mancache[node] = m
1995 1993 return m
1996 1994
1997 1995 def getstorage(self, tree):
1998 1996 return self._rootstore.dirlog(tree)
1999 1997
2000 1998 def clearcaches(self, clear_persisted_data=False):
2001 1999 self._dirmancache.clear()
2002 2000 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2003 2001
2004 2002 def rev(self, node):
2005 2003 return self._rootstore.rev(node)
2006 2004
2007 2005 def update_caches(self, transaction):
2008 2006 return self._rootstore._revlog.update_caches(transaction=transaction)
2009 2007
2010 2008
2011 2009 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2012 2010 class memmanifestctx(object):
2013 2011 def __init__(self, manifestlog):
2014 2012 self._manifestlog = manifestlog
2015 2013 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2016 2014
2017 2015 def _storage(self):
2018 2016 return self._manifestlog.getstorage(b'')
2019 2017
2020 2018 def copy(self):
2021 2019 memmf = memmanifestctx(self._manifestlog)
2022 2020 memmf._manifestdict = self.read().copy()
2023 2021 return memmf
2024 2022
2025 2023 def read(self):
2026 2024 return self._manifestdict
2027 2025
2028 2026 def write(self, transaction, link, p1, p2, added, removed, match=None):
2029 2027 return self._storage().add(
2030 2028 self._manifestdict,
2031 2029 transaction,
2032 2030 link,
2033 2031 p1,
2034 2032 p2,
2035 2033 added,
2036 2034 removed,
2037 2035 match=match,
2038 2036 )
2039 2037
2040 2038
2041 2039 @interfaceutil.implementer(repository.imanifestrevisionstored)
2042 2040 class manifestctx(object):
2043 2041 """A class representing a single revision of a manifest, including its
2044 2042 contents, its parent revs, and its linkrev.
2045 2043 """
2046 2044
2047 2045 def __init__(self, manifestlog, node):
2048 2046 self._manifestlog = manifestlog
2049 2047 self._data = None
2050 2048
2051 2049 self._node = node
2052 2050
2053 2051 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2054 2052 # but let's add it later when something needs it and we can load it
2055 2053 # lazily.
2056 2054 # self.p1, self.p2 = store.parents(node)
2057 2055 # rev = store.rev(node)
2058 2056 # self.linkrev = store.linkrev(rev)
2059 2057
2060 2058 def _storage(self):
2061 2059 return self._manifestlog.getstorage(b'')
2062 2060
2063 2061 def node(self):
2064 2062 return self._node
2065 2063
2066 2064 def copy(self):
2067 2065 memmf = memmanifestctx(self._manifestlog)
2068 2066 memmf._manifestdict = self.read().copy()
2069 2067 return memmf
2070 2068
2071 2069 @propertycache
2072 2070 def parents(self):
2073 2071 return self._storage().parents(self._node)
2074 2072
2075 2073 def read(self):
2076 2074 if self._data is None:
2077 2075 nc = self._manifestlog.nodeconstants
2078 2076 if self._node == nc.nullid:
2079 2077 self._data = manifestdict(nc.nodelen)
2080 2078 else:
2081 2079 store = self._storage()
2082 2080 if self._node in store.fulltextcache:
2083 2081 text = pycompat.bytestr(store.fulltextcache[self._node])
2084 2082 else:
2085 2083 text = store.revision(self._node)
2086 2084 arraytext = bytearray(text)
2087 2085 store.fulltextcache[self._node] = arraytext
2088 2086 self._data = manifestdict(nc.nodelen, text)
2089 2087 return self._data
2090 2088
2091 2089 def readfast(self, shallow=False):
2092 2090 """Calls either readdelta or read, based on which would be less work.
2093 2091 readdelta is called if the delta is against the p1, and therefore can be
2094 2092 read quickly.
2095 2093
2096 2094 If `shallow` is True, nothing changes since this is a flat manifest.
2097 2095 """
2098 2096 store = self._storage()
2099 2097 r = store.rev(self._node)
2100 2098 deltaparent = store.deltaparent(r)
2101 2099 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2102 2100 return self.readdelta()
2103 2101 return self.read()
2104 2102
2105 2103 def readdelta(self, shallow=False):
2106 2104 """Returns a manifest containing just the entries that are present
2107 2105 in this manifest, but not in its p1 manifest. This is efficient to read
2108 2106 if the revlog delta is already p1.
2109 2107
2110 2108 Changing the value of `shallow` has no effect on flat manifests.
2111 2109 """
2112 2110 store = self._storage()
2113 2111 r = store.rev(self._node)
2114 2112 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2115 2113 return manifestdict(store.nodeconstants.nodelen, d)
2116 2114
2117 2115 def find(self, key):
2118 2116 return self.read().find(key)
2119 2117
2120 2118
2121 2119 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2122 2120 class memtreemanifestctx(object):
2123 2121 def __init__(self, manifestlog, dir=b''):
2124 2122 self._manifestlog = manifestlog
2125 2123 self._dir = dir
2126 2124 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2127 2125
2128 2126 def _storage(self):
2129 2127 return self._manifestlog.getstorage(b'')
2130 2128
2131 2129 def copy(self):
2132 2130 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2133 2131 memmf._treemanifest = self._treemanifest.copy()
2134 2132 return memmf
2135 2133
2136 2134 def read(self):
2137 2135 return self._treemanifest
2138 2136
2139 2137 def write(self, transaction, link, p1, p2, added, removed, match=None):
2140 2138 def readtree(dir, node):
2141 2139 return self._manifestlog.get(dir, node).read()
2142 2140
2143 2141 return self._storage().add(
2144 2142 self._treemanifest,
2145 2143 transaction,
2146 2144 link,
2147 2145 p1,
2148 2146 p2,
2149 2147 added,
2150 2148 removed,
2151 2149 readtree=readtree,
2152 2150 match=match,
2153 2151 )
2154 2152
2155 2153
2156 2154 @interfaceutil.implementer(repository.imanifestrevisionstored)
2157 2155 class treemanifestctx(object):
2158 2156 def __init__(self, manifestlog, dir, node):
2159 2157 self._manifestlog = manifestlog
2160 2158 self._dir = dir
2161 2159 self._data = None
2162 2160
2163 2161 self._node = node
2164 2162
2165 2163 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2166 2164 # we can instantiate treemanifestctx objects for directories we don't
2167 2165 # have on disk.
2168 2166 # self.p1, self.p2 = store.parents(node)
2169 2167 # rev = store.rev(node)
2170 2168 # self.linkrev = store.linkrev(rev)
2171 2169
2172 2170 def _storage(self):
2173 2171 narrowmatch = self._manifestlog._narrowmatch
2174 2172 if not narrowmatch.always():
2175 2173 if not narrowmatch.visitdir(self._dir[:-1]):
2176 2174 return excludedmanifestrevlog(
2177 2175 self._manifestlog.nodeconstants, self._dir
2178 2176 )
2179 2177 return self._manifestlog.getstorage(self._dir)
2180 2178
2181 2179 def read(self):
2182 2180 if self._data is None:
2183 2181 store = self._storage()
2184 2182 if self._node == self._manifestlog.nodeconstants.nullid:
2185 2183 self._data = treemanifest(self._manifestlog.nodeconstants)
2186 2184 # TODO accessing non-public API
2187 2185 elif store._treeondisk:
2188 2186 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2189 2187
2190 2188 def gettext():
2191 2189 return store.revision(self._node)
2192 2190
2193 2191 def readsubtree(dir, subm):
2194 2192 # Set verify to False since we need to be able to create
2195 2193 # subtrees for trees that don't exist on disk.
2196 2194 return self._manifestlog.get(dir, subm, verify=False).read()
2197 2195
2198 2196 m.read(gettext, readsubtree)
2199 2197 m.setnode(self._node)
2200 2198 self._data = m
2201 2199 else:
2202 2200 if self._node in store.fulltextcache:
2203 2201 text = pycompat.bytestr(store.fulltextcache[self._node])
2204 2202 else:
2205 2203 text = store.revision(self._node)
2206 2204 arraytext = bytearray(text)
2207 2205 store.fulltextcache[self._node] = arraytext
2208 2206 self._data = treemanifest(
2209 2207 self._manifestlog.nodeconstants, dir=self._dir, text=text
2210 2208 )
2211 2209
2212 2210 return self._data
2213 2211
2214 2212 def node(self):
2215 2213 return self._node
2216 2214
2217 2215 def copy(self):
2218 2216 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2219 2217 memmf._treemanifest = self.read().copy()
2220 2218 return memmf
2221 2219
2222 2220 @propertycache
2223 2221 def parents(self):
2224 2222 return self._storage().parents(self._node)
2225 2223
2226 2224 def readdelta(self, shallow=False):
2227 2225 """Returns a manifest containing just the entries that are present
2228 2226 in this manifest, but not in its p1 manifest. This is efficient to read
2229 2227 if the revlog delta is already p1.
2230 2228
2231 2229 If `shallow` is True, this will read the delta for this directory,
2232 2230 without recursively reading subdirectory manifests. Instead, any
2233 2231 subdirectory entry will be reported as it appears in the manifest, i.e.
2234 2232 the subdirectory will be reported among files and distinguished only by
2235 2233 its 't' flag.
2236 2234 """
2237 2235 store = self._storage()
2238 2236 if shallow:
2239 2237 r = store.rev(self._node)
2240 2238 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2241 2239 return manifestdict(store.nodeconstants.nodelen, d)
2242 2240 else:
2243 2241 # Need to perform a slow delta
2244 2242 r0 = store.deltaparent(store.rev(self._node))
2245 2243 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2246 2244 m1 = self.read()
2247 2245 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2248 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2246 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2249 2247 if n1:
2250 2248 md[f] = n1
2251 2249 if fl1:
2252 2250 md.setflag(f, fl1)
2253 2251 return md
2254 2252
2255 2253 def readfast(self, shallow=False):
2256 2254 """Calls either readdelta or read, based on which would be less work.
2257 2255 readdelta is called if the delta is against the p1, and therefore can be
2258 2256 read quickly.
2259 2257
2260 2258 If `shallow` is True, it only returns the entries from this manifest,
2261 2259 and not any submanifests.
2262 2260 """
2263 2261 store = self._storage()
2264 2262 r = store.rev(self._node)
2265 2263 deltaparent = store.deltaparent(r)
2266 2264 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2267 2265 return self.readdelta(shallow=shallow)
2268 2266
2269 2267 if shallow:
2270 2268 return manifestdict(
2271 2269 store.nodeconstants.nodelen, store.revision(self._node)
2272 2270 )
2273 2271 else:
2274 2272 return self.read()
2275 2273
2276 2274 def find(self, key):
2277 2275 return self.read().find(key)
2278 2276
2279 2277
2280 2278 class excludeddir(treemanifest):
2281 2279 """Stand-in for a directory that is excluded from the repository.
2282 2280
2283 2281 With narrowing active on a repository that uses treemanifests,
2284 2282 some of the directory revlogs will be excluded from the resulting
2285 2283 clone. This is a huge storage win for clients, but means we need
2286 2284 some sort of pseudo-manifest to surface to internals so we can
2287 2285 detect a merge conflict outside the narrowspec. That's what this
2288 2286 class is: it stands in for a directory whose node is known, but
2289 2287 whose contents are unknown.
2290 2288 """
2291 2289
2292 2290 def __init__(self, nodeconstants, dir, node):
2293 2291 super(excludeddir, self).__init__(nodeconstants, dir)
2294 2292 self._node = node
2295 2293 # Add an empty file, which will be included by iterators and such,
2296 2294 # appearing as the directory itself (i.e. something like "dir/")
2297 2295 self._files[b''] = node
2298 2296 self._flags[b''] = b't'
2299 2297
2300 2298 # Manifests outside the narrowspec should never be modified, so avoid
2301 2299 # copying. This makes a noticeable difference when there are very many
2302 2300 # directories outside the narrowspec. Also, it makes sense for the copy to
2303 2301 # be of the same type as the original, which would not happen with the
2304 2302 # super type's copy().
2305 2303 def copy(self):
2306 2304 return self
2307 2305
2308 2306
2309 2307 class excludeddirmanifestctx(treemanifestctx):
2310 2308 """context wrapper for excludeddir - see that docstring for rationale"""
2311 2309
2312 2310 def __init__(self, nodeconstants, dir, node):
2313 2311 self.nodeconstants = nodeconstants
2314 2312 self._dir = dir
2315 2313 self._node = node
2316 2314
2317 2315 def read(self):
2318 2316 return excludeddir(self.nodeconstants, self._dir, self._node)
2319 2317
2320 2318 def readfast(self, shallow=False):
2321 2319 # special version of readfast since we don't have underlying storage
2322 2320 return self.read()
2323 2321
2324 2322 def write(self, *args):
2325 2323 raise error.ProgrammingError(
2326 2324 b'attempt to write manifest from excluded dir %s' % self._dir
2327 2325 )
2328 2326
2329 2327
2330 2328 class excludedmanifestrevlog(manifestrevlog):
2331 2329 """Stand-in for excluded treemanifest revlogs.
2332 2330
2333 2331 When narrowing is active on a treemanifest repository, we'll have
2334 2332 references to directories we can't see due to the revlog being
2335 2333 skipped. This class exists to conform to the manifestrevlog
2336 2334 interface for those directories and proactively prevent writes to
2337 2335 outside the narrowspec.
2338 2336 """
2339 2337
2340 2338 def __init__(self, nodeconstants, dir):
2341 2339 self.nodeconstants = nodeconstants
2342 2340 self._dir = dir
2343 2341
2344 2342 def __len__(self):
2345 2343 raise error.ProgrammingError(
2346 2344 b'attempt to get length of excluded dir %s' % self._dir
2347 2345 )
2348 2346
2349 2347 def rev(self, node):
2350 2348 raise error.ProgrammingError(
2351 2349 b'attempt to get rev from excluded dir %s' % self._dir
2352 2350 )
2353 2351
2354 2352 def linkrev(self, node):
2355 2353 raise error.ProgrammingError(
2356 2354 b'attempt to get linkrev from excluded dir %s' % self._dir
2357 2355 )
2358 2356
2359 2357 def node(self, rev):
2360 2358 raise error.ProgrammingError(
2361 2359 b'attempt to get node from excluded dir %s' % self._dir
2362 2360 )
2363 2361
2364 2362 def add(self, *args, **kwargs):
2365 2363 # We should never write entries in dirlogs outside the narrow clone.
2366 2364 # However, the method still gets called from writesubtree() in
2367 2365 # _addtree(), so we need to handle it. We should possibly make that
2368 2366 # avoid calling add() with a clean manifest (_dirty is always False
2369 2367 # in excludeddir instances).
2370 2368 pass
General Comments 0
You need to be logged in to leave comments. Login now