##// END OF EJS Templates
manifest: fix yet another 20-byte-hash assumption...
Augie Fackler -
r45195:75f1197d default
parent child Browse files
Show More
@@ -1,2281 +1,2286 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 if len(n) > 40:
61 61 yield f, bin(n[:40]), n[40:]
62 62 else:
63 63 yield f, bin(n), b''
64 64
65 65
66 66 def _text(it):
67 67 files = []
68 68 lines = []
69 69 for f, n, fl in it:
70 70 files.append(f)
71 71 # if this is changed to support newlines in filenames,
72 72 # be sure to check the templates/ dir again (especially *-raw.tmpl)
73 73 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
74 74
75 75 _checkforbidden(files)
76 76 return b''.join(lines)
77 77
78 78
79 79 class lazymanifestiter(object):
80 80 def __init__(self, lm):
81 81 self.pos = 0
82 82 self.lm = lm
83 83
84 84 def __iter__(self):
85 85 return self
86 86
87 87 def next(self):
88 88 try:
89 89 data, pos = self.lm._get(self.pos)
90 90 except IndexError:
91 91 raise StopIteration
92 92 if pos == -1:
93 93 self.pos += 1
94 94 return data[0]
95 95 self.pos += 1
96 96 zeropos = data.find(b'\x00', pos)
97 97 return data[pos:zeropos]
98 98
99 99 __next__ = next
100 100
101 101
102 102 class lazymanifestiterentries(object):
103 103 def __init__(self, lm):
104 104 self.lm = lm
105 105 self.pos = 0
106 106
107 107 def __iter__(self):
108 108 return self
109 109
110 110 def next(self):
111 111 try:
112 112 data, pos = self.lm._get(self.pos)
113 113 except IndexError:
114 114 raise StopIteration
115 115 if pos == -1:
116 116 self.pos += 1
117 117 return data
118 118 zeropos = data.find(b'\x00', pos)
119 119 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
120 120 flags = self.lm._getflags(data, self.pos, zeropos)
121 121 self.pos += 1
122 122 return (data[pos:zeropos], hashval, flags)
123 123
124 124 __next__ = next
125 125
126 126
127 127 def unhexlify(data, extra, pos, length):
128 128 s = bin(data[pos : pos + length])
129 129 if extra:
130 130 s += chr(extra & 0xFF)
131 131 return s
132 132
133 133
134 134 def _cmp(a, b):
135 135 return (a > b) - (a < b)
136 136
137 137
138 138 class _lazymanifest(object):
139 139 """A pure python manifest backed by a byte string. It is supplimented with
140 140 internal lists as it is modified, until it is compacted back to a pure byte
141 141 string.
142 142
143 143 ``data`` is the initial manifest data.
144 144
145 145 ``positions`` is a list of offsets, one per manifest entry. Positive
146 146 values are offsets into ``data``, negative values are offsets into the
147 147 ``extradata`` list. When an entry is removed, its entry is dropped from
148 148 ``positions``. The values are encoded such that when walking the list and
149 149 indexing into ``data`` or ``extradata`` as appropriate, the entries are
150 150 sorted by filename.
151 151
152 152 ``extradata`` is a list of (key, hash, flags) for entries that were added or
153 153 modified since the manifest was created or compacted.
154 154 """
155 155
156 156 def __init__(
157 157 self,
158 158 data,
159 159 positions=None,
160 160 extrainfo=None,
161 161 extradata=None,
162 162 hasremovals=False,
163 163 ):
164 164 if positions is None:
165 165 self.positions = self.findlines(data)
166 166 self.extrainfo = [0] * len(self.positions)
167 167 self.data = data
168 168 self.extradata = []
169 169 self.hasremovals = False
170 170 else:
171 171 self.positions = positions[:]
172 172 self.extrainfo = extrainfo[:]
173 173 self.extradata = extradata[:]
174 174 self.data = data
175 175 self.hasremovals = hasremovals
176 176
177 177 def findlines(self, data):
178 178 if not data:
179 179 return []
180 180 pos = data.find(b"\n")
181 181 if pos == -1 or data[-1:] != b'\n':
182 182 raise ValueError(b"Manifest did not end in a newline.")
183 183 positions = [0]
184 184 prev = data[: data.find(b'\x00')]
185 185 while pos < len(data) - 1 and pos != -1:
186 186 positions.append(pos + 1)
187 187 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
188 188 if nexts < prev:
189 189 raise ValueError(b"Manifest lines not in sorted order.")
190 190 prev = nexts
191 191 pos = data.find(b"\n", pos + 1)
192 192 return positions
193 193
194 194 def _get(self, index):
195 195 # get the position encoded in pos:
196 196 # positive number is an index in 'data'
197 197 # negative number is in extrapieces
198 198 pos = self.positions[index]
199 199 if pos >= 0:
200 200 return self.data, pos
201 201 return self.extradata[-pos - 1], -1
202 202
203 203 def _getkey(self, pos):
204 204 if pos >= 0:
205 205 return self.data[pos : self.data.find(b'\x00', pos + 1)]
206 206 return self.extradata[-pos - 1][0]
207 207
208 208 def bsearch(self, key):
209 209 first = 0
210 210 last = len(self.positions) - 1
211 211
212 212 while first <= last:
213 213 midpoint = (first + last) // 2
214 214 nextpos = self.positions[midpoint]
215 215 candidate = self._getkey(nextpos)
216 216 r = _cmp(key, candidate)
217 217 if r == 0:
218 218 return midpoint
219 219 else:
220 220 if r < 0:
221 221 last = midpoint - 1
222 222 else:
223 223 first = midpoint + 1
224 224 return -1
225 225
226 226 def bsearch2(self, key):
227 227 # same as the above, but will always return the position
228 228 # done for performance reasons
229 229 first = 0
230 230 last = len(self.positions) - 1
231 231
232 232 while first <= last:
233 233 midpoint = (first + last) // 2
234 234 nextpos = self.positions[midpoint]
235 235 candidate = self._getkey(nextpos)
236 236 r = _cmp(key, candidate)
237 237 if r == 0:
238 238 return (midpoint, True)
239 239 else:
240 240 if r < 0:
241 241 last = midpoint - 1
242 242 else:
243 243 first = midpoint + 1
244 244 return (first, False)
245 245
246 246 def __contains__(self, key):
247 247 return self.bsearch(key) != -1
248 248
249 249 def _getflags(self, data, needle, pos):
250 250 start = pos + 41
251 251 end = data.find(b"\n", start)
252 252 if end == -1:
253 253 end = len(data) - 1
254 254 if start == end:
255 255 return b''
256 256 return self.data[start:end]
257 257
258 258 def __getitem__(self, key):
259 259 if not isinstance(key, bytes):
260 260 raise TypeError(b"getitem: manifest keys must be a bytes.")
261 261 needle = self.bsearch(key)
262 262 if needle == -1:
263 263 raise KeyError
264 264 data, pos = self._get(needle)
265 265 if pos == -1:
266 266 return (data[1], data[2])
267 267 zeropos = data.find(b'\x00', pos)
268 268 assert 0 <= needle <= len(self.positions)
269 269 assert len(self.extrainfo) == len(self.positions)
270 270 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
271 271 flags = self._getflags(data, needle, zeropos)
272 272 return (hashval, flags)
273 273
274 274 def __delitem__(self, key):
275 275 needle, found = self.bsearch2(key)
276 276 if not found:
277 277 raise KeyError
278 278 cur = self.positions[needle]
279 279 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
280 280 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
281 281 if cur >= 0:
282 282 # This does NOT unsort the list as far as the search functions are
283 283 # concerned, as they only examine lines mapped by self.positions.
284 284 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
285 285 self.hasremovals = True
286 286
287 287 def __setitem__(self, key, value):
288 288 if not isinstance(key, bytes):
289 289 raise TypeError(b"setitem: manifest keys must be a byte string.")
290 290 if not isinstance(value, tuple) or len(value) != 2:
291 291 raise TypeError(
292 292 b"Manifest values must be a tuple of (node, flags)."
293 293 )
294 294 hashval = value[0]
295 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
296 raise TypeError(b"node must be a 20-byte byte string")
295 # hashes are either 20 or 32 bytes (sha1 or its replacement),
296 # and allow one extra byte taht won't be persisted to disk but
297 # is sometimes used in memory.
298 if not isinstance(hashval, bytes) or not (
299 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
300 ):
301 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
297 302 flags = value[1]
298 303 if len(hashval) == 22:
299 304 hashval = hashval[:-1]
300 305 if not isinstance(flags, bytes) or len(flags) > 1:
301 306 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
302 307 needle, found = self.bsearch2(key)
303 308 if found:
304 309 # put the item
305 310 pos = self.positions[needle]
306 311 if pos < 0:
307 312 self.extradata[-pos - 1] = (key, hashval, value[1])
308 313 else:
309 314 # just don't bother
310 315 self.extradata.append((key, hashval, value[1]))
311 316 self.positions[needle] = -len(self.extradata)
312 317 else:
313 318 # not found, put it in with extra positions
314 319 self.extradata.append((key, hashval, value[1]))
315 320 self.positions = (
316 321 self.positions[:needle]
317 322 + [-len(self.extradata)]
318 323 + self.positions[needle:]
319 324 )
320 325 self.extrainfo = (
321 326 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
322 327 )
323 328
324 329 def copy(self):
325 330 # XXX call _compact like in C?
326 331 return _lazymanifest(
327 332 self.data,
328 333 self.positions,
329 334 self.extrainfo,
330 335 self.extradata,
331 336 self.hasremovals,
332 337 )
333 338
334 339 def _compact(self):
335 340 # hopefully not called TOO often
336 341 if len(self.extradata) == 0 and not self.hasremovals:
337 342 return
338 343 l = []
339 344 i = 0
340 345 offset = 0
341 346 self.extrainfo = [0] * len(self.positions)
342 347 while i < len(self.positions):
343 348 if self.positions[i] >= 0:
344 349 cur = self.positions[i]
345 350 last_cut = cur
346 351
347 352 # Collect all contiguous entries in the buffer at the current
348 353 # offset, breaking out only for added/modified items held in
349 354 # extradata, or a deleted line prior to the next position.
350 355 while True:
351 356 self.positions[i] = offset
352 357 i += 1
353 358 if i == len(self.positions) or self.positions[i] < 0:
354 359 break
355 360
356 361 # A removed file has no positions[] entry, but does have an
357 362 # overwritten first byte. Break out and find the end of the
358 363 # current good entry/entries if there is a removed file
359 364 # before the next position.
360 365 if (
361 366 self.hasremovals
362 367 and self.data.find(b'\n\x00', cur, self.positions[i])
363 368 != -1
364 369 ):
365 370 break
366 371
367 372 offset += self.positions[i] - cur
368 373 cur = self.positions[i]
369 374 end_cut = self.data.find(b'\n', cur)
370 375 if end_cut != -1:
371 376 end_cut += 1
372 377 offset += end_cut - cur
373 378 l.append(self.data[last_cut:end_cut])
374 379 else:
375 380 while i < len(self.positions) and self.positions[i] < 0:
376 381 cur = self.positions[i]
377 382 t = self.extradata[-cur - 1]
378 383 l.append(self._pack(t))
379 384 self.positions[i] = offset
380 385 # Hashes are either 20 bytes (old sha1s) or 32
381 386 # bytes (new non-sha1).
382 387 hlen = 20
383 388 if len(t[1]) > 25:
384 389 hlen = 32
385 390 if len(t[1]) > hlen:
386 391 self.extrainfo[i] = ord(t[1][hlen + 1])
387 392 offset += len(l[-1])
388 393 i += 1
389 394 self.data = b''.join(l)
390 395 self.hasremovals = False
391 396 self.extradata = []
392 397
393 398 def _pack(self, d):
394 399 n = d[1]
395 400 if len(n) == 21 or len(n) == 33:
396 401 n = n[:-1]
397 402 assert len(n) == 20 or len(n) == 32
398 403 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
399 404
400 405 def text(self):
401 406 self._compact()
402 407 return self.data
403 408
404 409 def diff(self, m2, clean=False):
405 410 '''Finds changes between the current manifest and m2.'''
406 411 # XXX think whether efficiency matters here
407 412 diff = {}
408 413
409 414 for fn, e1, flags in self.iterentries():
410 415 if fn not in m2:
411 416 diff[fn] = (e1, flags), (None, b'')
412 417 else:
413 418 e2 = m2[fn]
414 419 if (e1, flags) != e2:
415 420 diff[fn] = (e1, flags), e2
416 421 elif clean:
417 422 diff[fn] = None
418 423
419 424 for fn, e2, flags in m2.iterentries():
420 425 if fn not in self:
421 426 diff[fn] = (None, b''), (e2, flags)
422 427
423 428 return diff
424 429
425 430 def iterentries(self):
426 431 return lazymanifestiterentries(self)
427 432
428 433 def iterkeys(self):
429 434 return lazymanifestiter(self)
430 435
431 436 def __iter__(self):
432 437 return lazymanifestiter(self)
433 438
434 439 def __len__(self):
435 440 return len(self.positions)
436 441
437 442 def filtercopy(self, filterfn):
438 443 # XXX should be optimized
439 444 c = _lazymanifest(b'')
440 445 for f, n, fl in self.iterentries():
441 446 if filterfn(f):
442 447 c[f] = n, fl
443 448 return c
444 449
445 450
446 451 try:
447 452 _lazymanifest = parsers.lazymanifest
448 453 except AttributeError:
449 454 pass
450 455
451 456
452 457 @interfaceutil.implementer(repository.imanifestdict)
453 458 class manifestdict(object):
454 459 def __init__(self, data=b''):
455 460 self._lm = _lazymanifest(data)
456 461
457 462 def __getitem__(self, key):
458 463 return self._lm[key][0]
459 464
460 465 def find(self, key):
461 466 return self._lm[key]
462 467
463 468 def __len__(self):
464 469 return len(self._lm)
465 470
466 471 def __nonzero__(self):
467 472 # nonzero is covered by the __len__ function, but implementing it here
468 473 # makes it easier for extensions to override.
469 474 return len(self._lm) != 0
470 475
471 476 __bool__ = __nonzero__
472 477
473 478 def __setitem__(self, key, node):
474 479 self._lm[key] = node, self.flags(key)
475 480
476 481 def __contains__(self, key):
477 482 if key is None:
478 483 return False
479 484 return key in self._lm
480 485
481 486 def __delitem__(self, key):
482 487 del self._lm[key]
483 488
484 489 def __iter__(self):
485 490 return self._lm.__iter__()
486 491
487 492 def iterkeys(self):
488 493 return self._lm.iterkeys()
489 494
490 495 def keys(self):
491 496 return list(self.iterkeys())
492 497
493 498 def filesnotin(self, m2, match=None):
494 499 '''Set of files in this manifest that are not in the other'''
495 500 if match is not None:
496 501 match = matchmod.badmatch(match, lambda path, msg: None)
497 502 sm2 = set(m2.walk(match))
498 503 return {f for f in self.walk(match) if f not in sm2}
499 504 return {f for f in self if f not in m2}
500 505
501 506 @propertycache
502 507 def _dirs(self):
503 508 return pathutil.dirs(self)
504 509
505 510 def dirs(self):
506 511 return self._dirs
507 512
508 513 def hasdir(self, dir):
509 514 return dir in self._dirs
510 515
511 516 def _filesfastpath(self, match):
512 517 '''Checks whether we can correctly and quickly iterate over matcher
513 518 files instead of over manifest files.'''
514 519 files = match.files()
515 520 return len(files) < 100 and (
516 521 match.isexact()
517 522 or (match.prefix() and all(fn in self for fn in files))
518 523 )
519 524
520 525 def walk(self, match):
521 526 '''Generates matching file names.
522 527
523 528 Equivalent to manifest.matches(match).iterkeys(), but without creating
524 529 an entirely new manifest.
525 530
526 531 It also reports nonexistent files by marking them bad with match.bad().
527 532 '''
528 533 if match.always():
529 534 for f in iter(self):
530 535 yield f
531 536 return
532 537
533 538 fset = set(match.files())
534 539
535 540 # avoid the entire walk if we're only looking for specific files
536 541 if self._filesfastpath(match):
537 542 for fn in sorted(fset):
538 543 if fn in self:
539 544 yield fn
540 545 return
541 546
542 547 for fn in self:
543 548 if fn in fset:
544 549 # specified pattern is the exact name
545 550 fset.remove(fn)
546 551 if match(fn):
547 552 yield fn
548 553
549 554 # for dirstate.walk, files=[''] means "walk the whole tree".
550 555 # follow that here, too
551 556 fset.discard(b'')
552 557
553 558 for fn in sorted(fset):
554 559 if not self.hasdir(fn):
555 560 match.bad(fn, None)
556 561
557 562 def _matches(self, match):
558 563 '''generate a new manifest filtered by the match argument'''
559 564 if match.always():
560 565 return self.copy()
561 566
562 567 if self._filesfastpath(match):
563 568 m = manifestdict()
564 569 lm = self._lm
565 570 for fn in match.files():
566 571 if fn in lm:
567 572 m._lm[fn] = lm[fn]
568 573 return m
569 574
570 575 m = manifestdict()
571 576 m._lm = self._lm.filtercopy(match)
572 577 return m
573 578
574 579 def diff(self, m2, match=None, clean=False):
575 580 '''Finds changes between the current manifest and m2.
576 581
577 582 Args:
578 583 m2: the manifest to which this manifest should be compared.
579 584 clean: if true, include files unchanged between these manifests
580 585 with a None value in the returned dictionary.
581 586
582 587 The result is returned as a dict with filename as key and
583 588 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
584 589 nodeid in the current/other manifest and fl1/fl2 is the flag
585 590 in the current/other manifest. Where the file does not exist,
586 591 the nodeid will be None and the flags will be the empty
587 592 string.
588 593 '''
589 594 if match:
590 595 m1 = self._matches(match)
591 596 m2 = m2._matches(match)
592 597 return m1.diff(m2, clean=clean)
593 598 return self._lm.diff(m2._lm, clean)
594 599
595 600 def setflag(self, key, flag):
596 601 self._lm[key] = self[key], flag
597 602
598 603 def get(self, key, default=None):
599 604 try:
600 605 return self._lm[key][0]
601 606 except KeyError:
602 607 return default
603 608
604 609 def flags(self, key):
605 610 try:
606 611 return self._lm[key][1]
607 612 except KeyError:
608 613 return b''
609 614
610 615 def copy(self):
611 616 c = manifestdict()
612 617 c._lm = self._lm.copy()
613 618 return c
614 619
615 620 def items(self):
616 621 return (x[:2] for x in self._lm.iterentries())
617 622
618 623 def iteritems(self):
619 624 return (x[:2] for x in self._lm.iterentries())
620 625
621 626 def iterentries(self):
622 627 return self._lm.iterentries()
623 628
624 629 def text(self):
625 630 # most likely uses native version
626 631 return self._lm.text()
627 632
628 633 def fastdelta(self, base, changes):
629 634 """Given a base manifest text as a bytearray and a list of changes
630 635 relative to that text, compute a delta that can be used by revlog.
631 636 """
632 637 delta = []
633 638 dstart = None
634 639 dend = None
635 640 dline = [b""]
636 641 start = 0
637 642 # zero copy representation of base as a buffer
638 643 addbuf = util.buffer(base)
639 644
640 645 changes = list(changes)
641 646 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
642 647 # start with a readonly loop that finds the offset of
643 648 # each line and creates the deltas
644 649 for f, todelete in changes:
645 650 # bs will either be the index of the item or the insert point
646 651 start, end = _msearch(addbuf, f, start)
647 652 if not todelete:
648 653 h, fl = self._lm[f]
649 654 l = b"%s\0%s%s\n" % (f, hex(h), fl)
650 655 else:
651 656 if start == end:
652 657 # item we want to delete was not found, error out
653 658 raise AssertionError(
654 659 _(b"failed to remove %s from manifest") % f
655 660 )
656 661 l = b""
657 662 if dstart is not None and dstart <= start and dend >= start:
658 663 if dend < end:
659 664 dend = end
660 665 if l:
661 666 dline.append(l)
662 667 else:
663 668 if dstart is not None:
664 669 delta.append([dstart, dend, b"".join(dline)])
665 670 dstart = start
666 671 dend = end
667 672 dline = [l]
668 673
669 674 if dstart is not None:
670 675 delta.append([dstart, dend, b"".join(dline)])
671 676 # apply the delta to the base, and get a delta for addrevision
672 677 deltatext, arraytext = _addlistdelta(base, delta)
673 678 else:
674 679 # For large changes, it's much cheaper to just build the text and
675 680 # diff it.
676 681 arraytext = bytearray(self.text())
677 682 deltatext = mdiff.textdiff(
678 683 util.buffer(base), util.buffer(arraytext)
679 684 )
680 685
681 686 return arraytext, deltatext
682 687
683 688
684 689 def _msearch(m, s, lo=0, hi=None):
685 690 '''return a tuple (start, end) that says where to find s within m.
686 691
687 692 If the string is found m[start:end] are the line containing
688 693 that string. If start == end the string was not found and
689 694 they indicate the proper sorted insertion point.
690 695
691 696 m should be a buffer, a memoryview or a byte string.
692 697 s is a byte string'''
693 698
694 699 def advance(i, c):
695 700 while i < lenm and m[i : i + 1] != c:
696 701 i += 1
697 702 return i
698 703
699 704 if not s:
700 705 return (lo, lo)
701 706 lenm = len(m)
702 707 if not hi:
703 708 hi = lenm
704 709 while lo < hi:
705 710 mid = (lo + hi) // 2
706 711 start = mid
707 712 while start > 0 and m[start - 1 : start] != b'\n':
708 713 start -= 1
709 714 end = advance(start, b'\0')
710 715 if bytes(m[start:end]) < s:
711 716 # we know that after the null there are 40 bytes of sha1
712 717 # this translates to the bisect lo = mid + 1
713 718 lo = advance(end + 40, b'\n') + 1
714 719 else:
715 720 # this translates to the bisect hi = mid
716 721 hi = start
717 722 end = advance(lo, b'\0')
718 723 found = m[lo:end]
719 724 if s == found:
720 725 # we know that after the null there are 40 bytes of sha1
721 726 end = advance(end + 40, b'\n')
722 727 return (lo, end + 1)
723 728 else:
724 729 return (lo, lo)
725 730
726 731
727 732 def _checkforbidden(l):
728 733 """Check filenames for illegal characters."""
729 734 for f in l:
730 735 if b'\n' in f or b'\r' in f:
731 736 raise error.StorageError(
732 737 _(b"'\\n' and '\\r' disallowed in filenames: %r")
733 738 % pycompat.bytestr(f)
734 739 )
735 740
736 741
737 742 # apply the changes collected during the bisect loop to our addlist
738 743 # return a delta suitable for addrevision
739 744 def _addlistdelta(addlist, x):
740 745 # for large addlist arrays, building a new array is cheaper
741 746 # than repeatedly modifying the existing one
742 747 currentposition = 0
743 748 newaddlist = bytearray()
744 749
745 750 for start, end, content in x:
746 751 newaddlist += addlist[currentposition:start]
747 752 if content:
748 753 newaddlist += bytearray(content)
749 754
750 755 currentposition = end
751 756
752 757 newaddlist += addlist[currentposition:]
753 758
754 759 deltatext = b"".join(
755 760 struct.pack(b">lll", start, end, len(content)) + content
756 761 for start, end, content in x
757 762 )
758 763 return deltatext, newaddlist
759 764
760 765
761 766 def _splittopdir(f):
762 767 if b'/' in f:
763 768 dir, subpath = f.split(b'/', 1)
764 769 return dir + b'/', subpath
765 770 else:
766 771 return b'', f
767 772
768 773
769 774 _noop = lambda s: None
770 775
771 776
772 777 @interfaceutil.implementer(repository.imanifestdict)
773 778 class treemanifest(object):
774 779 def __init__(self, dir=b'', text=b''):
775 780 self._dir = dir
776 781 self._node = nullid
777 782 self._loadfunc = _noop
778 783 self._copyfunc = _noop
779 784 self._dirty = False
780 785 self._dirs = {}
781 786 self._lazydirs = {}
782 787 # Using _lazymanifest here is a little slower than plain old dicts
783 788 self._files = {}
784 789 self._flags = {}
785 790 if text:
786 791
787 792 def readsubtree(subdir, subm):
788 793 raise AssertionError(
789 794 b'treemanifest constructor only accepts flat manifests'
790 795 )
791 796
792 797 self.parse(text, readsubtree)
793 798 self._dirty = True # Mark flat manifest dirty after parsing
794 799
795 800 def _subpath(self, path):
796 801 return self._dir + path
797 802
798 803 def _loadalllazy(self):
799 804 selfdirs = self._dirs
800 805 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
801 806 self._lazydirs
802 807 ):
803 808 if docopy:
804 809 selfdirs[d] = readsubtree(path, node).copy()
805 810 else:
806 811 selfdirs[d] = readsubtree(path, node)
807 812 self._lazydirs = {}
808 813
809 814 def _loadlazy(self, d):
810 815 v = self._lazydirs.get(d)
811 816 if v:
812 817 path, node, readsubtree, docopy = v
813 818 if docopy:
814 819 self._dirs[d] = readsubtree(path, node).copy()
815 820 else:
816 821 self._dirs[d] = readsubtree(path, node)
817 822 del self._lazydirs[d]
818 823
819 824 def _loadchildrensetlazy(self, visit):
820 825 if not visit:
821 826 return None
822 827 if visit == b'all' or visit == b'this':
823 828 self._loadalllazy()
824 829 return None
825 830
826 831 loadlazy = self._loadlazy
827 832 for k in visit:
828 833 loadlazy(k + b'/')
829 834 return visit
830 835
831 836 def _loaddifflazy(self, t1, t2):
832 837 """load items in t1 and t2 if they're needed for diffing.
833 838
834 839 The criteria currently is:
835 840 - if it's not present in _lazydirs in either t1 or t2, load it in the
836 841 other (it may already be loaded or it may not exist, doesn't matter)
837 842 - if it's present in _lazydirs in both, compare the nodeid; if it
838 843 differs, load it in both
839 844 """
840 845 toloadlazy = []
841 846 for d, v1 in pycompat.iteritems(t1._lazydirs):
842 847 v2 = t2._lazydirs.get(d)
843 848 if not v2 or v2[1] != v1[1]:
844 849 toloadlazy.append(d)
845 850 for d, v1 in pycompat.iteritems(t2._lazydirs):
846 851 if d not in t1._lazydirs:
847 852 toloadlazy.append(d)
848 853
849 854 for d in toloadlazy:
850 855 t1._loadlazy(d)
851 856 t2._loadlazy(d)
852 857
853 858 def __len__(self):
854 859 self._load()
855 860 size = len(self._files)
856 861 self._loadalllazy()
857 862 for m in self._dirs.values():
858 863 size += m.__len__()
859 864 return size
860 865
861 866 def __nonzero__(self):
862 867 # Faster than "__len() != 0" since it avoids loading sub-manifests
863 868 return not self._isempty()
864 869
865 870 __bool__ = __nonzero__
866 871
867 872 def _isempty(self):
868 873 self._load() # for consistency; already loaded by all callers
869 874 # See if we can skip loading everything.
870 875 if self._files or (
871 876 self._dirs and any(not m._isempty() for m in self._dirs.values())
872 877 ):
873 878 return False
874 879 self._loadalllazy()
875 880 return not self._dirs or all(m._isempty() for m in self._dirs.values())
876 881
877 882 @encoding.strmethod
878 883 def __repr__(self):
879 884 return (
880 885 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
881 886 % (
882 887 self._dir,
883 888 hex(self._node),
884 889 bool(self._loadfunc is _noop),
885 890 self._dirty,
886 891 id(self),
887 892 )
888 893 )
889 894
890 895 def dir(self):
891 896 '''The directory that this tree manifest represents, including a
892 897 trailing '/'. Empty string for the repo root directory.'''
893 898 return self._dir
894 899
895 900 def node(self):
896 901 '''This node of this instance. nullid for unsaved instances. Should
897 902 be updated when the instance is read or written from a revlog.
898 903 '''
899 904 assert not self._dirty
900 905 return self._node
901 906
902 907 def setnode(self, node):
903 908 self._node = node
904 909 self._dirty = False
905 910
906 911 def iterentries(self):
907 912 self._load()
908 913 self._loadalllazy()
909 914 for p, n in sorted(
910 915 itertools.chain(self._dirs.items(), self._files.items())
911 916 ):
912 917 if p in self._files:
913 918 yield self._subpath(p), n, self._flags.get(p, b'')
914 919 else:
915 920 for x in n.iterentries():
916 921 yield x
917 922
918 923 def items(self):
919 924 self._load()
920 925 self._loadalllazy()
921 926 for p, n in sorted(
922 927 itertools.chain(self._dirs.items(), self._files.items())
923 928 ):
924 929 if p in self._files:
925 930 yield self._subpath(p), n
926 931 else:
927 932 for f, sn in pycompat.iteritems(n):
928 933 yield f, sn
929 934
930 935 iteritems = items
931 936
932 937 def iterkeys(self):
933 938 self._load()
934 939 self._loadalllazy()
935 940 for p in sorted(itertools.chain(self._dirs, self._files)):
936 941 if p in self._files:
937 942 yield self._subpath(p)
938 943 else:
939 944 for f in self._dirs[p]:
940 945 yield f
941 946
942 947 def keys(self):
943 948 return list(self.iterkeys())
944 949
945 950 def __iter__(self):
946 951 return self.iterkeys()
947 952
948 953 def __contains__(self, f):
949 954 if f is None:
950 955 return False
951 956 self._load()
952 957 dir, subpath = _splittopdir(f)
953 958 if dir:
954 959 self._loadlazy(dir)
955 960
956 961 if dir not in self._dirs:
957 962 return False
958 963
959 964 return self._dirs[dir].__contains__(subpath)
960 965 else:
961 966 return f in self._files
962 967
963 968 def get(self, f, default=None):
964 969 self._load()
965 970 dir, subpath = _splittopdir(f)
966 971 if dir:
967 972 self._loadlazy(dir)
968 973
969 974 if dir not in self._dirs:
970 975 return default
971 976 return self._dirs[dir].get(subpath, default)
972 977 else:
973 978 return self._files.get(f, default)
974 979
975 980 def __getitem__(self, f):
976 981 self._load()
977 982 dir, subpath = _splittopdir(f)
978 983 if dir:
979 984 self._loadlazy(dir)
980 985
981 986 return self._dirs[dir].__getitem__(subpath)
982 987 else:
983 988 return self._files[f]
984 989
985 990 def flags(self, f):
986 991 self._load()
987 992 dir, subpath = _splittopdir(f)
988 993 if dir:
989 994 self._loadlazy(dir)
990 995
991 996 if dir not in self._dirs:
992 997 return b''
993 998 return self._dirs[dir].flags(subpath)
994 999 else:
995 1000 if f in self._lazydirs or f in self._dirs:
996 1001 return b''
997 1002 return self._flags.get(f, b'')
998 1003
999 1004 def find(self, f):
1000 1005 self._load()
1001 1006 dir, subpath = _splittopdir(f)
1002 1007 if dir:
1003 1008 self._loadlazy(dir)
1004 1009
1005 1010 return self._dirs[dir].find(subpath)
1006 1011 else:
1007 1012 return self._files[f], self._flags.get(f, b'')
1008 1013
1009 1014 def __delitem__(self, f):
1010 1015 self._load()
1011 1016 dir, subpath = _splittopdir(f)
1012 1017 if dir:
1013 1018 self._loadlazy(dir)
1014 1019
1015 1020 self._dirs[dir].__delitem__(subpath)
1016 1021 # If the directory is now empty, remove it
1017 1022 if self._dirs[dir]._isempty():
1018 1023 del self._dirs[dir]
1019 1024 else:
1020 1025 del self._files[f]
1021 1026 if f in self._flags:
1022 1027 del self._flags[f]
1023 1028 self._dirty = True
1024 1029
1025 1030 def __setitem__(self, f, n):
1026 1031 assert n is not None
1027 1032 self._load()
1028 1033 dir, subpath = _splittopdir(f)
1029 1034 if dir:
1030 1035 self._loadlazy(dir)
1031 1036 if dir not in self._dirs:
1032 1037 self._dirs[dir] = treemanifest(self._subpath(dir))
1033 1038 self._dirs[dir].__setitem__(subpath, n)
1034 1039 else:
1035 1040 # manifest nodes are either 20 bytes or 32 bytes,
1036 1041 # depending on the hash in use. An extra byte is
1037 1042 # occasionally used by hg, but won't ever be
1038 1043 # persisted. Trim to 21 or 33 bytes as appropriate.
1039 1044 trim = 21 if len(n) < 25 else 33
1040 1045 self._files[f] = n[:trim] # to match manifestdict's behavior
1041 1046 self._dirty = True
1042 1047
1043 1048 def _load(self):
1044 1049 if self._loadfunc is not _noop:
1045 1050 lf, self._loadfunc = self._loadfunc, _noop
1046 1051 lf(self)
1047 1052 elif self._copyfunc is not _noop:
1048 1053 cf, self._copyfunc = self._copyfunc, _noop
1049 1054 cf(self)
1050 1055
1051 1056 def setflag(self, f, flags):
1052 1057 """Set the flags (symlink, executable) for path f."""
1053 1058 self._load()
1054 1059 dir, subpath = _splittopdir(f)
1055 1060 if dir:
1056 1061 self._loadlazy(dir)
1057 1062 if dir not in self._dirs:
1058 1063 self._dirs[dir] = treemanifest(self._subpath(dir))
1059 1064 self._dirs[dir].setflag(subpath, flags)
1060 1065 else:
1061 1066 self._flags[f] = flags
1062 1067 self._dirty = True
1063 1068
1064 1069 def copy(self):
1065 1070 copy = treemanifest(self._dir)
1066 1071 copy._node = self._node
1067 1072 copy._dirty = self._dirty
1068 1073 if self._copyfunc is _noop:
1069 1074
1070 1075 def _copyfunc(s):
1071 1076 self._load()
1072 1077 s._lazydirs = {
1073 1078 d: (p, n, r, True)
1074 1079 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1075 1080 }
1076 1081 sdirs = s._dirs
1077 1082 for d, v in pycompat.iteritems(self._dirs):
1078 1083 sdirs[d] = v.copy()
1079 1084 s._files = dict.copy(self._files)
1080 1085 s._flags = dict.copy(self._flags)
1081 1086
1082 1087 if self._loadfunc is _noop:
1083 1088 _copyfunc(copy)
1084 1089 else:
1085 1090 copy._copyfunc = _copyfunc
1086 1091 else:
1087 1092 copy._copyfunc = self._copyfunc
1088 1093 return copy
1089 1094
1090 1095 def filesnotin(self, m2, match=None):
1091 1096 '''Set of files in this manifest that are not in the other'''
1092 1097 if match and not match.always():
1093 1098 m1 = self._matches(match)
1094 1099 m2 = m2._matches(match)
1095 1100 return m1.filesnotin(m2)
1096 1101
1097 1102 files = set()
1098 1103
1099 1104 def _filesnotin(t1, t2):
1100 1105 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1101 1106 return
1102 1107 t1._load()
1103 1108 t2._load()
1104 1109 self._loaddifflazy(t1, t2)
1105 1110 for d, m1 in pycompat.iteritems(t1._dirs):
1106 1111 if d in t2._dirs:
1107 1112 m2 = t2._dirs[d]
1108 1113 _filesnotin(m1, m2)
1109 1114 else:
1110 1115 files.update(m1.iterkeys())
1111 1116
1112 1117 for fn in t1._files:
1113 1118 if fn not in t2._files:
1114 1119 files.add(t1._subpath(fn))
1115 1120
1116 1121 _filesnotin(self, m2)
1117 1122 return files
1118 1123
1119 1124 @propertycache
1120 1125 def _alldirs(self):
1121 1126 return pathutil.dirs(self)
1122 1127
1123 1128 def dirs(self):
1124 1129 return self._alldirs
1125 1130
1126 1131 def hasdir(self, dir):
1127 1132 self._load()
1128 1133 topdir, subdir = _splittopdir(dir)
1129 1134 if topdir:
1130 1135 self._loadlazy(topdir)
1131 1136 if topdir in self._dirs:
1132 1137 return self._dirs[topdir].hasdir(subdir)
1133 1138 return False
1134 1139 dirslash = dir + b'/'
1135 1140 return dirslash in self._dirs or dirslash in self._lazydirs
1136 1141
1137 1142 def walk(self, match):
1138 1143 '''Generates matching file names.
1139 1144
1140 1145 It also reports nonexistent files by marking them bad with match.bad().
1141 1146 '''
1142 1147 if match.always():
1143 1148 for f in iter(self):
1144 1149 yield f
1145 1150 return
1146 1151
1147 1152 fset = set(match.files())
1148 1153
1149 1154 for fn in self._walk(match):
1150 1155 if fn in fset:
1151 1156 # specified pattern is the exact name
1152 1157 fset.remove(fn)
1153 1158 yield fn
1154 1159
1155 1160 # for dirstate.walk, files=[''] means "walk the whole tree".
1156 1161 # follow that here, too
1157 1162 fset.discard(b'')
1158 1163
1159 1164 for fn in sorted(fset):
1160 1165 if not self.hasdir(fn):
1161 1166 match.bad(fn, None)
1162 1167
1163 1168 def _walk(self, match):
1164 1169 '''Recursively generates matching file names for walk().'''
1165 1170 visit = match.visitchildrenset(self._dir[:-1])
1166 1171 if not visit:
1167 1172 return
1168 1173
1169 1174 # yield this dir's files and walk its submanifests
1170 1175 self._load()
1171 1176 visit = self._loadchildrensetlazy(visit)
1172 1177 for p in sorted(list(self._dirs) + list(self._files)):
1173 1178 if p in self._files:
1174 1179 fullp = self._subpath(p)
1175 1180 if match(fullp):
1176 1181 yield fullp
1177 1182 else:
1178 1183 if not visit or p[:-1] in visit:
1179 1184 for f in self._dirs[p]._walk(match):
1180 1185 yield f
1181 1186
1182 1187 def _matches(self, match):
1183 1188 '''recursively generate a new manifest filtered by the match argument.
1184 1189 '''
1185 1190 if match.always():
1186 1191 return self.copy()
1187 1192 return self._matches_inner(match)
1188 1193
1189 1194 def _matches_inner(self, match):
1190 1195 if match.always():
1191 1196 return self.copy()
1192 1197
1193 1198 visit = match.visitchildrenset(self._dir[:-1])
1194 1199 if visit == b'all':
1195 1200 return self.copy()
1196 1201 ret = treemanifest(self._dir)
1197 1202 if not visit:
1198 1203 return ret
1199 1204
1200 1205 self._load()
1201 1206 for fn in self._files:
1202 1207 # While visitchildrenset *usually* lists only subdirs, this is
1203 1208 # actually up to the matcher and may have some files in the set().
1204 1209 # If visit == 'this', we should obviously look at the files in this
1205 1210 # directory; if visit is a set, and fn is in it, we should inspect
1206 1211 # fn (but no need to inspect things not in the set).
1207 1212 if visit != b'this' and fn not in visit:
1208 1213 continue
1209 1214 fullp = self._subpath(fn)
1210 1215 # visitchildrenset isn't perfect, we still need to call the regular
1211 1216 # matcher code to further filter results.
1212 1217 if not match(fullp):
1213 1218 continue
1214 1219 ret._files[fn] = self._files[fn]
1215 1220 if fn in self._flags:
1216 1221 ret._flags[fn] = self._flags[fn]
1217 1222
1218 1223 visit = self._loadchildrensetlazy(visit)
1219 1224 for dir, subm in pycompat.iteritems(self._dirs):
1220 1225 if visit and dir[:-1] not in visit:
1221 1226 continue
1222 1227 m = subm._matches_inner(match)
1223 1228 if not m._isempty():
1224 1229 ret._dirs[dir] = m
1225 1230
1226 1231 if not ret._isempty():
1227 1232 ret._dirty = True
1228 1233 return ret
1229 1234
1230 1235 def fastdelta(self, base, changes):
1231 1236 raise FastdeltaUnavailable()
1232 1237
1233 1238 def diff(self, m2, match=None, clean=False):
1234 1239 '''Finds changes between the current manifest and m2.
1235 1240
1236 1241 Args:
1237 1242 m2: the manifest to which this manifest should be compared.
1238 1243 clean: if true, include files unchanged between these manifests
1239 1244 with a None value in the returned dictionary.
1240 1245
1241 1246 The result is returned as a dict with filename as key and
1242 1247 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1243 1248 nodeid in the current/other manifest and fl1/fl2 is the flag
1244 1249 in the current/other manifest. Where the file does not exist,
1245 1250 the nodeid will be None and the flags will be the empty
1246 1251 string.
1247 1252 '''
1248 1253 if match and not match.always():
1249 1254 m1 = self._matches(match)
1250 1255 m2 = m2._matches(match)
1251 1256 return m1.diff(m2, clean=clean)
1252 1257 result = {}
1253 1258 emptytree = treemanifest()
1254 1259
1255 1260 def _iterativediff(t1, t2, stack):
1256 1261 """compares two tree manifests and append new tree-manifests which
1257 1262 needs to be compared to stack"""
1258 1263 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1259 1264 return
1260 1265 t1._load()
1261 1266 t2._load()
1262 1267 self._loaddifflazy(t1, t2)
1263 1268
1264 1269 for d, m1 in pycompat.iteritems(t1._dirs):
1265 1270 m2 = t2._dirs.get(d, emptytree)
1266 1271 stack.append((m1, m2))
1267 1272
1268 1273 for d, m2 in pycompat.iteritems(t2._dirs):
1269 1274 if d not in t1._dirs:
1270 1275 stack.append((emptytree, m2))
1271 1276
1272 1277 for fn, n1 in pycompat.iteritems(t1._files):
1273 1278 fl1 = t1._flags.get(fn, b'')
1274 1279 n2 = t2._files.get(fn, None)
1275 1280 fl2 = t2._flags.get(fn, b'')
1276 1281 if n1 != n2 or fl1 != fl2:
1277 1282 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1278 1283 elif clean:
1279 1284 result[t1._subpath(fn)] = None
1280 1285
1281 1286 for fn, n2 in pycompat.iteritems(t2._files):
1282 1287 if fn not in t1._files:
1283 1288 fl2 = t2._flags.get(fn, b'')
1284 1289 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1285 1290
1286 1291 stackls = []
1287 1292 _iterativediff(self, m2, stackls)
1288 1293 while stackls:
1289 1294 t1, t2 = stackls.pop()
1290 1295 # stackls is populated in the function call
1291 1296 _iterativediff(t1, t2, stackls)
1292 1297 return result
1293 1298
1294 1299 def unmodifiedsince(self, m2):
1295 1300 return not self._dirty and not m2._dirty and self._node == m2._node
1296 1301
1297 1302 def parse(self, text, readsubtree):
1298 1303 selflazy = self._lazydirs
1299 1304 subpath = self._subpath
1300 1305 for f, n, fl in _parse(text):
1301 1306 if fl == b't':
1302 1307 f = f + b'/'
1303 1308 # False below means "doesn't need to be copied" and can use the
1304 1309 # cached value from readsubtree directly.
1305 1310 selflazy[f] = (subpath(f), n, readsubtree, False)
1306 1311 elif b'/' in f:
1307 1312 # This is a flat manifest, so use __setitem__ and setflag rather
1308 1313 # than assigning directly to _files and _flags, so we can
1309 1314 # assign a path in a subdirectory, and to mark dirty (compared
1310 1315 # to nullid).
1311 1316 self[f] = n
1312 1317 if fl:
1313 1318 self.setflag(f, fl)
1314 1319 else:
1315 1320 # Assigning to _files and _flags avoids marking as dirty,
1316 1321 # and should be a little faster.
1317 1322 self._files[f] = n
1318 1323 if fl:
1319 1324 self._flags[f] = fl
1320 1325
1321 1326 def text(self):
1322 1327 """Get the full data of this manifest as a bytestring."""
1323 1328 self._load()
1324 1329 return _text(self.iterentries())
1325 1330
1326 1331 def dirtext(self):
1327 1332 """Get the full data of this directory as a bytestring. Make sure that
1328 1333 any submanifests have been written first, so their nodeids are correct.
1329 1334 """
1330 1335 self._load()
1331 1336 flags = self.flags
1332 1337 lazydirs = [
1333 1338 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1334 1339 ]
1335 1340 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1336 1341 files = [(f, self._files[f], flags(f)) for f in self._files]
1337 1342 return _text(sorted(dirs + files + lazydirs))
1338 1343
1339 1344 def read(self, gettext, readsubtree):
1340 1345 def _load_for_read(s):
1341 1346 s.parse(gettext(), readsubtree)
1342 1347 s._dirty = False
1343 1348
1344 1349 self._loadfunc = _load_for_read
1345 1350
1346 1351 def writesubtrees(self, m1, m2, writesubtree, match):
1347 1352 self._load() # for consistency; should never have any effect here
1348 1353 m1._load()
1349 1354 m2._load()
1350 1355 emptytree = treemanifest()
1351 1356
1352 1357 def getnode(m, d):
1353 1358 ld = m._lazydirs.get(d)
1354 1359 if ld:
1355 1360 return ld[1]
1356 1361 return m._dirs.get(d, emptytree)._node
1357 1362
1358 1363 # let's skip investigating things that `match` says we do not need.
1359 1364 visit = match.visitchildrenset(self._dir[:-1])
1360 1365 visit = self._loadchildrensetlazy(visit)
1361 1366 if visit == b'this' or visit == b'all':
1362 1367 visit = None
1363 1368 for d, subm in pycompat.iteritems(self._dirs):
1364 1369 if visit and d[:-1] not in visit:
1365 1370 continue
1366 1371 subp1 = getnode(m1, d)
1367 1372 subp2 = getnode(m2, d)
1368 1373 if subp1 == nullid:
1369 1374 subp1, subp2 = subp2, subp1
1370 1375 writesubtree(subm, subp1, subp2, match)
1371 1376
1372 1377 def walksubtrees(self, matcher=None):
1373 1378 """Returns an iterator of the subtrees of this manifest, including this
1374 1379 manifest itself.
1375 1380
1376 1381 If `matcher` is provided, it only returns subtrees that match.
1377 1382 """
1378 1383 if matcher and not matcher.visitdir(self._dir[:-1]):
1379 1384 return
1380 1385 if not matcher or matcher(self._dir[:-1]):
1381 1386 yield self
1382 1387
1383 1388 self._load()
1384 1389 # OPT: use visitchildrenset to avoid loading everything.
1385 1390 self._loadalllazy()
1386 1391 for d, subm in pycompat.iteritems(self._dirs):
1387 1392 for subtree in subm.walksubtrees(matcher=matcher):
1388 1393 yield subtree
1389 1394
1390 1395
1391 1396 class manifestfulltextcache(util.lrucachedict):
1392 1397 """File-backed LRU cache for the manifest cache
1393 1398
1394 1399 File consists of entries, up to EOF:
1395 1400
1396 1401 - 20 bytes node, 4 bytes length, <length> manifest data
1397 1402
1398 1403 These are written in reverse cache order (oldest to newest).
1399 1404
1400 1405 """
1401 1406
1402 1407 _file = b'manifestfulltextcache'
1403 1408
1404 1409 def __init__(self, max):
1405 1410 super(manifestfulltextcache, self).__init__(max)
1406 1411 self._dirty = False
1407 1412 self._read = False
1408 1413 self._opener = None
1409 1414
1410 1415 def read(self):
1411 1416 if self._read or self._opener is None:
1412 1417 return
1413 1418
1414 1419 try:
1415 1420 with self._opener(self._file) as fp:
1416 1421 set = super(manifestfulltextcache, self).__setitem__
1417 1422 # ignore trailing data, this is a cache, corruption is skipped
1418 1423 while True:
1419 1424 node = fp.read(20)
1420 1425 if len(node) < 20:
1421 1426 break
1422 1427 try:
1423 1428 size = struct.unpack(b'>L', fp.read(4))[0]
1424 1429 except struct.error:
1425 1430 break
1426 1431 value = bytearray(fp.read(size))
1427 1432 if len(value) != size:
1428 1433 break
1429 1434 set(node, value)
1430 1435 except IOError:
1431 1436 # the file is allowed to be missing
1432 1437 pass
1433 1438
1434 1439 self._read = True
1435 1440 self._dirty = False
1436 1441
1437 1442 def write(self):
1438 1443 if not self._dirty or self._opener is None:
1439 1444 return
1440 1445 # rotate backwards to the first used node
1441 1446 with self._opener(
1442 1447 self._file, b'w', atomictemp=True, checkambig=True
1443 1448 ) as fp:
1444 1449 node = self._head.prev
1445 1450 while True:
1446 1451 if node.key in self._cache:
1447 1452 fp.write(node.key)
1448 1453 fp.write(struct.pack(b'>L', len(node.value)))
1449 1454 fp.write(node.value)
1450 1455 if node is self._head:
1451 1456 break
1452 1457 node = node.prev
1453 1458
1454 1459 def __len__(self):
1455 1460 if not self._read:
1456 1461 self.read()
1457 1462 return super(manifestfulltextcache, self).__len__()
1458 1463
1459 1464 def __contains__(self, k):
1460 1465 if not self._read:
1461 1466 self.read()
1462 1467 return super(manifestfulltextcache, self).__contains__(k)
1463 1468
1464 1469 def __iter__(self):
1465 1470 if not self._read:
1466 1471 self.read()
1467 1472 return super(manifestfulltextcache, self).__iter__()
1468 1473
1469 1474 def __getitem__(self, k):
1470 1475 if not self._read:
1471 1476 self.read()
1472 1477 # the cache lru order can change on read
1473 1478 setdirty = self._cache.get(k) is not self._head
1474 1479 value = super(manifestfulltextcache, self).__getitem__(k)
1475 1480 if setdirty:
1476 1481 self._dirty = True
1477 1482 return value
1478 1483
1479 1484 def __setitem__(self, k, v):
1480 1485 if not self._read:
1481 1486 self.read()
1482 1487 super(manifestfulltextcache, self).__setitem__(k, v)
1483 1488 self._dirty = True
1484 1489
1485 1490 def __delitem__(self, k):
1486 1491 if not self._read:
1487 1492 self.read()
1488 1493 super(manifestfulltextcache, self).__delitem__(k)
1489 1494 self._dirty = True
1490 1495
1491 1496 def get(self, k, default=None):
1492 1497 if not self._read:
1493 1498 self.read()
1494 1499 return super(manifestfulltextcache, self).get(k, default=default)
1495 1500
1496 1501 def clear(self, clear_persisted_data=False):
1497 1502 super(manifestfulltextcache, self).clear()
1498 1503 if clear_persisted_data:
1499 1504 self._dirty = True
1500 1505 self.write()
1501 1506 self._read = False
1502 1507
1503 1508
1504 1509 # and upper bound of what we expect from compression
1505 1510 # (real live value seems to be "3")
1506 1511 MAXCOMPRESSION = 3
1507 1512
1508 1513
1509 1514 class FastdeltaUnavailable(Exception):
1510 1515 """Exception raised when fastdelta isn't usable on a manifest."""
1511 1516
1512 1517
1513 1518 @interfaceutil.implementer(repository.imanifeststorage)
1514 1519 class manifestrevlog(object):
1515 1520 '''A revlog that stores manifest texts. This is responsible for caching the
1516 1521 full-text manifest contents.
1517 1522 '''
1518 1523
1519 1524 def __init__(
1520 1525 self,
1521 1526 opener,
1522 1527 tree=b'',
1523 1528 dirlogcache=None,
1524 1529 indexfile=None,
1525 1530 treemanifest=False,
1526 1531 ):
1527 1532 """Constructs a new manifest revlog
1528 1533
1529 1534 `indexfile` - used by extensions to have two manifests at once, like
1530 1535 when transitioning between flatmanifeset and treemanifests.
1531 1536
1532 1537 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1533 1538 options can also be used to make this a tree manifest revlog. The opener
1534 1539 option takes precedence, so if it is set to True, we ignore whatever
1535 1540 value is passed in to the constructor.
1536 1541 """
1537 1542 # During normal operations, we expect to deal with not more than four
1538 1543 # revs at a time (such as during commit --amend). When rebasing large
1539 1544 # stacks of commits, the number can go up, hence the config knob below.
1540 1545 cachesize = 4
1541 1546 optiontreemanifest = False
1542 1547 opts = getattr(opener, 'options', None)
1543 1548 if opts is not None:
1544 1549 cachesize = opts.get(b'manifestcachesize', cachesize)
1545 1550 optiontreemanifest = opts.get(b'treemanifest', False)
1546 1551
1547 1552 self._treeondisk = optiontreemanifest or treemanifest
1548 1553
1549 1554 self._fulltextcache = manifestfulltextcache(cachesize)
1550 1555
1551 1556 if tree:
1552 1557 assert self._treeondisk, b'opts is %r' % opts
1553 1558
1554 1559 if indexfile is None:
1555 1560 indexfile = b'00manifest.i'
1556 1561 if tree:
1557 1562 indexfile = b"meta/" + tree + indexfile
1558 1563
1559 1564 self.tree = tree
1560 1565
1561 1566 # The dirlogcache is kept on the root manifest log
1562 1567 if tree:
1563 1568 self._dirlogcache = dirlogcache
1564 1569 else:
1565 1570 self._dirlogcache = {b'': self}
1566 1571
1567 1572 self._revlog = revlog.revlog(
1568 1573 opener,
1569 1574 indexfile,
1570 1575 # only root indexfile is cached
1571 1576 checkambig=not bool(tree),
1572 1577 mmaplargeindex=True,
1573 1578 upperboundcomp=MAXCOMPRESSION,
1574 1579 )
1575 1580
1576 1581 self.index = self._revlog.index
1577 1582 self.version = self._revlog.version
1578 1583 self._generaldelta = self._revlog._generaldelta
1579 1584
1580 1585 def _setupmanifestcachehooks(self, repo):
1581 1586 """Persist the manifestfulltextcache on lock release"""
1582 1587 if not util.safehasattr(repo, b'_wlockref'):
1583 1588 return
1584 1589
1585 1590 self._fulltextcache._opener = repo.wcachevfs
1586 1591 if repo._currentlock(repo._wlockref) is None:
1587 1592 return
1588 1593
1589 1594 reporef = weakref.ref(repo)
1590 1595 manifestrevlogref = weakref.ref(self)
1591 1596
1592 1597 def persistmanifestcache(success):
1593 1598 # Repo is in an unknown state, do not persist.
1594 1599 if not success:
1595 1600 return
1596 1601
1597 1602 repo = reporef()
1598 1603 self = manifestrevlogref()
1599 1604 if repo is None or self is None:
1600 1605 return
1601 1606 if repo.manifestlog.getstorage(b'') is not self:
1602 1607 # there's a different manifest in play now, abort
1603 1608 return
1604 1609 self._fulltextcache.write()
1605 1610
1606 1611 repo._afterlock(persistmanifestcache)
1607 1612
1608 1613 @property
1609 1614 def fulltextcache(self):
1610 1615 return self._fulltextcache
1611 1616
1612 1617 def clearcaches(self, clear_persisted_data=False):
1613 1618 self._revlog.clearcaches()
1614 1619 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1615 1620 self._dirlogcache = {self.tree: self}
1616 1621
1617 1622 def dirlog(self, d):
1618 1623 if d:
1619 1624 assert self._treeondisk
1620 1625 if d not in self._dirlogcache:
1621 1626 mfrevlog = manifestrevlog(
1622 1627 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1623 1628 )
1624 1629 self._dirlogcache[d] = mfrevlog
1625 1630 return self._dirlogcache[d]
1626 1631
1627 1632 def add(
1628 1633 self,
1629 1634 m,
1630 1635 transaction,
1631 1636 link,
1632 1637 p1,
1633 1638 p2,
1634 1639 added,
1635 1640 removed,
1636 1641 readtree=None,
1637 1642 match=None,
1638 1643 ):
1639 1644 try:
1640 1645 if p1 not in self.fulltextcache:
1641 1646 raise FastdeltaUnavailable()
1642 1647 # If our first parent is in the manifest cache, we can
1643 1648 # compute a delta here using properties we know about the
1644 1649 # manifest up-front, which may save time later for the
1645 1650 # revlog layer.
1646 1651
1647 1652 _checkforbidden(added)
1648 1653 # combine the changed lists into one sorted iterator
1649 1654 work = heapq.merge(
1650 1655 [(x, False) for x in sorted(added)],
1651 1656 [(x, True) for x in sorted(removed)],
1652 1657 )
1653 1658
1654 1659 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1655 1660 cachedelta = self._revlog.rev(p1), deltatext
1656 1661 text = util.buffer(arraytext)
1657 1662 n = self._revlog.addrevision(
1658 1663 text, transaction, link, p1, p2, cachedelta
1659 1664 )
1660 1665 except FastdeltaUnavailable:
1661 1666 # The first parent manifest isn't already loaded or the
1662 1667 # manifest implementation doesn't support fastdelta, so
1663 1668 # we'll just encode a fulltext of the manifest and pass
1664 1669 # that through to the revlog layer, and let it handle the
1665 1670 # delta process.
1666 1671 if self._treeondisk:
1667 1672 assert readtree, b"readtree must be set for treemanifest writes"
1668 1673 assert match, b"match must be specified for treemanifest writes"
1669 1674 m1 = readtree(self.tree, p1)
1670 1675 m2 = readtree(self.tree, p2)
1671 1676 n = self._addtree(
1672 1677 m, transaction, link, m1, m2, readtree, match=match
1673 1678 )
1674 1679 arraytext = None
1675 1680 else:
1676 1681 text = m.text()
1677 1682 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1678 1683 arraytext = bytearray(text)
1679 1684
1680 1685 if arraytext is not None:
1681 1686 self.fulltextcache[n] = arraytext
1682 1687
1683 1688 return n
1684 1689
1685 1690 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1686 1691 # If the manifest is unchanged compared to one parent,
1687 1692 # don't write a new revision
1688 1693 if self.tree != b'' and (
1689 1694 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1690 1695 ):
1691 1696 return m.node()
1692 1697
1693 1698 def writesubtree(subm, subp1, subp2, match):
1694 1699 sublog = self.dirlog(subm.dir())
1695 1700 sublog.add(
1696 1701 subm,
1697 1702 transaction,
1698 1703 link,
1699 1704 subp1,
1700 1705 subp2,
1701 1706 None,
1702 1707 None,
1703 1708 readtree=readtree,
1704 1709 match=match,
1705 1710 )
1706 1711
1707 1712 m.writesubtrees(m1, m2, writesubtree, match)
1708 1713 text = m.dirtext()
1709 1714 n = None
1710 1715 if self.tree != b'':
1711 1716 # Double-check whether contents are unchanged to one parent
1712 1717 if text == m1.dirtext():
1713 1718 n = m1.node()
1714 1719 elif text == m2.dirtext():
1715 1720 n = m2.node()
1716 1721
1717 1722 if not n:
1718 1723 n = self._revlog.addrevision(
1719 1724 text, transaction, link, m1.node(), m2.node()
1720 1725 )
1721 1726
1722 1727 # Save nodeid so parent manifest can calculate its nodeid
1723 1728 m.setnode(n)
1724 1729 return n
1725 1730
1726 1731 def __len__(self):
1727 1732 return len(self._revlog)
1728 1733
1729 1734 def __iter__(self):
1730 1735 return self._revlog.__iter__()
1731 1736
1732 1737 def rev(self, node):
1733 1738 return self._revlog.rev(node)
1734 1739
1735 1740 def node(self, rev):
1736 1741 return self._revlog.node(rev)
1737 1742
1738 1743 def lookup(self, value):
1739 1744 return self._revlog.lookup(value)
1740 1745
1741 1746 def parentrevs(self, rev):
1742 1747 return self._revlog.parentrevs(rev)
1743 1748
1744 1749 def parents(self, node):
1745 1750 return self._revlog.parents(node)
1746 1751
1747 1752 def linkrev(self, rev):
1748 1753 return self._revlog.linkrev(rev)
1749 1754
1750 1755 def checksize(self):
1751 1756 return self._revlog.checksize()
1752 1757
1753 1758 def revision(self, node, _df=None, raw=False):
1754 1759 return self._revlog.revision(node, _df=_df, raw=raw)
1755 1760
1756 1761 def rawdata(self, node, _df=None):
1757 1762 return self._revlog.rawdata(node, _df=_df)
1758 1763
1759 1764 def revdiff(self, rev1, rev2):
1760 1765 return self._revlog.revdiff(rev1, rev2)
1761 1766
1762 1767 def cmp(self, node, text):
1763 1768 return self._revlog.cmp(node, text)
1764 1769
1765 1770 def deltaparent(self, rev):
1766 1771 return self._revlog.deltaparent(rev)
1767 1772
1768 1773 def emitrevisions(
1769 1774 self,
1770 1775 nodes,
1771 1776 nodesorder=None,
1772 1777 revisiondata=False,
1773 1778 assumehaveparentrevisions=False,
1774 1779 deltamode=repository.CG_DELTAMODE_STD,
1775 1780 ):
1776 1781 return self._revlog.emitrevisions(
1777 1782 nodes,
1778 1783 nodesorder=nodesorder,
1779 1784 revisiondata=revisiondata,
1780 1785 assumehaveparentrevisions=assumehaveparentrevisions,
1781 1786 deltamode=deltamode,
1782 1787 )
1783 1788
1784 1789 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1785 1790 return self._revlog.addgroup(
1786 1791 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1787 1792 )
1788 1793
1789 1794 def rawsize(self, rev):
1790 1795 return self._revlog.rawsize(rev)
1791 1796
1792 1797 def getstrippoint(self, minlink):
1793 1798 return self._revlog.getstrippoint(minlink)
1794 1799
1795 1800 def strip(self, minlink, transaction):
1796 1801 return self._revlog.strip(minlink, transaction)
1797 1802
1798 1803 def files(self):
1799 1804 return self._revlog.files()
1800 1805
1801 1806 def clone(self, tr, destrevlog, **kwargs):
1802 1807 if not isinstance(destrevlog, manifestrevlog):
1803 1808 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1804 1809
1805 1810 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1806 1811
1807 1812 def storageinfo(
1808 1813 self,
1809 1814 exclusivefiles=False,
1810 1815 sharedfiles=False,
1811 1816 revisionscount=False,
1812 1817 trackedsize=False,
1813 1818 storedsize=False,
1814 1819 ):
1815 1820 return self._revlog.storageinfo(
1816 1821 exclusivefiles=exclusivefiles,
1817 1822 sharedfiles=sharedfiles,
1818 1823 revisionscount=revisionscount,
1819 1824 trackedsize=trackedsize,
1820 1825 storedsize=storedsize,
1821 1826 )
1822 1827
1823 1828 @property
1824 1829 def indexfile(self):
1825 1830 return self._revlog.indexfile
1826 1831
1827 1832 @indexfile.setter
1828 1833 def indexfile(self, value):
1829 1834 self._revlog.indexfile = value
1830 1835
1831 1836 @property
1832 1837 def opener(self):
1833 1838 return self._revlog.opener
1834 1839
1835 1840 @opener.setter
1836 1841 def opener(self, value):
1837 1842 self._revlog.opener = value
1838 1843
1839 1844
1840 1845 @interfaceutil.implementer(repository.imanifestlog)
1841 1846 class manifestlog(object):
1842 1847 """A collection class representing the collection of manifest snapshots
1843 1848 referenced by commits in the repository.
1844 1849
1845 1850 In this situation, 'manifest' refers to the abstract concept of a snapshot
1846 1851 of the list of files in the given commit. Consumers of the output of this
1847 1852 class do not care about the implementation details of the actual manifests
1848 1853 they receive (i.e. tree or flat or lazily loaded, etc)."""
1849 1854
1850 1855 def __init__(self, opener, repo, rootstore, narrowmatch):
1851 1856 usetreemanifest = False
1852 1857 cachesize = 4
1853 1858
1854 1859 opts = getattr(opener, 'options', None)
1855 1860 if opts is not None:
1856 1861 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1857 1862 cachesize = opts.get(b'manifestcachesize', cachesize)
1858 1863
1859 1864 self._treemanifests = usetreemanifest
1860 1865
1861 1866 self._rootstore = rootstore
1862 1867 self._rootstore._setupmanifestcachehooks(repo)
1863 1868 self._narrowmatch = narrowmatch
1864 1869
1865 1870 # A cache of the manifestctx or treemanifestctx for each directory
1866 1871 self._dirmancache = {}
1867 1872 self._dirmancache[b''] = util.lrucachedict(cachesize)
1868 1873
1869 1874 self._cachesize = cachesize
1870 1875
1871 1876 def __getitem__(self, node):
1872 1877 """Retrieves the manifest instance for the given node. Throws a
1873 1878 LookupError if not found.
1874 1879 """
1875 1880 return self.get(b'', node)
1876 1881
1877 1882 def get(self, tree, node, verify=True):
1878 1883 """Retrieves the manifest instance for the given node. Throws a
1879 1884 LookupError if not found.
1880 1885
1881 1886 `verify` - if True an exception will be thrown if the node is not in
1882 1887 the revlog
1883 1888 """
1884 1889 if node in self._dirmancache.get(tree, ()):
1885 1890 return self._dirmancache[tree][node]
1886 1891
1887 1892 if not self._narrowmatch.always():
1888 1893 if not self._narrowmatch.visitdir(tree[:-1]):
1889 1894 return excludeddirmanifestctx(tree, node)
1890 1895 if tree:
1891 1896 if self._rootstore._treeondisk:
1892 1897 if verify:
1893 1898 # Side-effect is LookupError is raised if node doesn't
1894 1899 # exist.
1895 1900 self.getstorage(tree).rev(node)
1896 1901
1897 1902 m = treemanifestctx(self, tree, node)
1898 1903 else:
1899 1904 raise error.Abort(
1900 1905 _(
1901 1906 b"cannot ask for manifest directory '%s' in a flat "
1902 1907 b"manifest"
1903 1908 )
1904 1909 % tree
1905 1910 )
1906 1911 else:
1907 1912 if verify:
1908 1913 # Side-effect is LookupError is raised if node doesn't exist.
1909 1914 self._rootstore.rev(node)
1910 1915
1911 1916 if self._treemanifests:
1912 1917 m = treemanifestctx(self, b'', node)
1913 1918 else:
1914 1919 m = manifestctx(self, node)
1915 1920
1916 1921 if node != nullid:
1917 1922 mancache = self._dirmancache.get(tree)
1918 1923 if not mancache:
1919 1924 mancache = util.lrucachedict(self._cachesize)
1920 1925 self._dirmancache[tree] = mancache
1921 1926 mancache[node] = m
1922 1927 return m
1923 1928
1924 1929 def getstorage(self, tree):
1925 1930 return self._rootstore.dirlog(tree)
1926 1931
1927 1932 def clearcaches(self, clear_persisted_data=False):
1928 1933 self._dirmancache.clear()
1929 1934 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1930 1935
1931 1936 def rev(self, node):
1932 1937 return self._rootstore.rev(node)
1933 1938
1934 1939
1935 1940 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1936 1941 class memmanifestctx(object):
1937 1942 def __init__(self, manifestlog):
1938 1943 self._manifestlog = manifestlog
1939 1944 self._manifestdict = manifestdict()
1940 1945
1941 1946 def _storage(self):
1942 1947 return self._manifestlog.getstorage(b'')
1943 1948
1944 1949 def copy(self):
1945 1950 memmf = memmanifestctx(self._manifestlog)
1946 1951 memmf._manifestdict = self.read().copy()
1947 1952 return memmf
1948 1953
1949 1954 def read(self):
1950 1955 return self._manifestdict
1951 1956
1952 1957 def write(self, transaction, link, p1, p2, added, removed, match=None):
1953 1958 return self._storage().add(
1954 1959 self._manifestdict,
1955 1960 transaction,
1956 1961 link,
1957 1962 p1,
1958 1963 p2,
1959 1964 added,
1960 1965 removed,
1961 1966 match=match,
1962 1967 )
1963 1968
1964 1969
1965 1970 @interfaceutil.implementer(repository.imanifestrevisionstored)
1966 1971 class manifestctx(object):
1967 1972 """A class representing a single revision of a manifest, including its
1968 1973 contents, its parent revs, and its linkrev.
1969 1974 """
1970 1975
1971 1976 def __init__(self, manifestlog, node):
1972 1977 self._manifestlog = manifestlog
1973 1978 self._data = None
1974 1979
1975 1980 self._node = node
1976 1981
1977 1982 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1978 1983 # but let's add it later when something needs it and we can load it
1979 1984 # lazily.
1980 1985 # self.p1, self.p2 = store.parents(node)
1981 1986 # rev = store.rev(node)
1982 1987 # self.linkrev = store.linkrev(rev)
1983 1988
1984 1989 def _storage(self):
1985 1990 return self._manifestlog.getstorage(b'')
1986 1991
1987 1992 def node(self):
1988 1993 return self._node
1989 1994
1990 1995 def copy(self):
1991 1996 memmf = memmanifestctx(self._manifestlog)
1992 1997 memmf._manifestdict = self.read().copy()
1993 1998 return memmf
1994 1999
1995 2000 @propertycache
1996 2001 def parents(self):
1997 2002 return self._storage().parents(self._node)
1998 2003
1999 2004 def read(self):
2000 2005 if self._data is None:
2001 2006 if self._node == nullid:
2002 2007 self._data = manifestdict()
2003 2008 else:
2004 2009 store = self._storage()
2005 2010 if self._node in store.fulltextcache:
2006 2011 text = pycompat.bytestr(store.fulltextcache[self._node])
2007 2012 else:
2008 2013 text = store.revision(self._node)
2009 2014 arraytext = bytearray(text)
2010 2015 store.fulltextcache[self._node] = arraytext
2011 2016 self._data = manifestdict(text)
2012 2017 return self._data
2013 2018
2014 2019 def readfast(self, shallow=False):
2015 2020 '''Calls either readdelta or read, based on which would be less work.
2016 2021 readdelta is called if the delta is against the p1, and therefore can be
2017 2022 read quickly.
2018 2023
2019 2024 If `shallow` is True, nothing changes since this is a flat manifest.
2020 2025 '''
2021 2026 store = self._storage()
2022 2027 r = store.rev(self._node)
2023 2028 deltaparent = store.deltaparent(r)
2024 2029 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2025 2030 return self.readdelta()
2026 2031 return self.read()
2027 2032
2028 2033 def readdelta(self, shallow=False):
2029 2034 '''Returns a manifest containing just the entries that are present
2030 2035 in this manifest, but not in its p1 manifest. This is efficient to read
2031 2036 if the revlog delta is already p1.
2032 2037
2033 2038 Changing the value of `shallow` has no effect on flat manifests.
2034 2039 '''
2035 2040 store = self._storage()
2036 2041 r = store.rev(self._node)
2037 2042 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2038 2043 return manifestdict(d)
2039 2044
2040 2045 def find(self, key):
2041 2046 return self.read().find(key)
2042 2047
2043 2048
2044 2049 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2045 2050 class memtreemanifestctx(object):
2046 2051 def __init__(self, manifestlog, dir=b''):
2047 2052 self._manifestlog = manifestlog
2048 2053 self._dir = dir
2049 2054 self._treemanifest = treemanifest()
2050 2055
2051 2056 def _storage(self):
2052 2057 return self._manifestlog.getstorage(b'')
2053 2058
2054 2059 def copy(self):
2055 2060 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2056 2061 memmf._treemanifest = self._treemanifest.copy()
2057 2062 return memmf
2058 2063
2059 2064 def read(self):
2060 2065 return self._treemanifest
2061 2066
2062 2067 def write(self, transaction, link, p1, p2, added, removed, match=None):
2063 2068 def readtree(dir, node):
2064 2069 return self._manifestlog.get(dir, node).read()
2065 2070
2066 2071 return self._storage().add(
2067 2072 self._treemanifest,
2068 2073 transaction,
2069 2074 link,
2070 2075 p1,
2071 2076 p2,
2072 2077 added,
2073 2078 removed,
2074 2079 readtree=readtree,
2075 2080 match=match,
2076 2081 )
2077 2082
2078 2083
2079 2084 @interfaceutil.implementer(repository.imanifestrevisionstored)
2080 2085 class treemanifestctx(object):
2081 2086 def __init__(self, manifestlog, dir, node):
2082 2087 self._manifestlog = manifestlog
2083 2088 self._dir = dir
2084 2089 self._data = None
2085 2090
2086 2091 self._node = node
2087 2092
2088 2093 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2089 2094 # we can instantiate treemanifestctx objects for directories we don't
2090 2095 # have on disk.
2091 2096 # self.p1, self.p2 = store.parents(node)
2092 2097 # rev = store.rev(node)
2093 2098 # self.linkrev = store.linkrev(rev)
2094 2099
2095 2100 def _storage(self):
2096 2101 narrowmatch = self._manifestlog._narrowmatch
2097 2102 if not narrowmatch.always():
2098 2103 if not narrowmatch.visitdir(self._dir[:-1]):
2099 2104 return excludedmanifestrevlog(self._dir)
2100 2105 return self._manifestlog.getstorage(self._dir)
2101 2106
2102 2107 def read(self):
2103 2108 if self._data is None:
2104 2109 store = self._storage()
2105 2110 if self._node == nullid:
2106 2111 self._data = treemanifest()
2107 2112 # TODO accessing non-public API
2108 2113 elif store._treeondisk:
2109 2114 m = treemanifest(dir=self._dir)
2110 2115
2111 2116 def gettext():
2112 2117 return store.revision(self._node)
2113 2118
2114 2119 def readsubtree(dir, subm):
2115 2120 # Set verify to False since we need to be able to create
2116 2121 # subtrees for trees that don't exist on disk.
2117 2122 return self._manifestlog.get(dir, subm, verify=False).read()
2118 2123
2119 2124 m.read(gettext, readsubtree)
2120 2125 m.setnode(self._node)
2121 2126 self._data = m
2122 2127 else:
2123 2128 if self._node in store.fulltextcache:
2124 2129 text = pycompat.bytestr(store.fulltextcache[self._node])
2125 2130 else:
2126 2131 text = store.revision(self._node)
2127 2132 arraytext = bytearray(text)
2128 2133 store.fulltextcache[self._node] = arraytext
2129 2134 self._data = treemanifest(dir=self._dir, text=text)
2130 2135
2131 2136 return self._data
2132 2137
2133 2138 def node(self):
2134 2139 return self._node
2135 2140
2136 2141 def copy(self):
2137 2142 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2138 2143 memmf._treemanifest = self.read().copy()
2139 2144 return memmf
2140 2145
2141 2146 @propertycache
2142 2147 def parents(self):
2143 2148 return self._storage().parents(self._node)
2144 2149
2145 2150 def readdelta(self, shallow=False):
2146 2151 '''Returns a manifest containing just the entries that are present
2147 2152 in this manifest, but not in its p1 manifest. This is efficient to read
2148 2153 if the revlog delta is already p1.
2149 2154
2150 2155 If `shallow` is True, this will read the delta for this directory,
2151 2156 without recursively reading subdirectory manifests. Instead, any
2152 2157 subdirectory entry will be reported as it appears in the manifest, i.e.
2153 2158 the subdirectory will be reported among files and distinguished only by
2154 2159 its 't' flag.
2155 2160 '''
2156 2161 store = self._storage()
2157 2162 if shallow:
2158 2163 r = store.rev(self._node)
2159 2164 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2160 2165 return manifestdict(d)
2161 2166 else:
2162 2167 # Need to perform a slow delta
2163 2168 r0 = store.deltaparent(store.rev(self._node))
2164 2169 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2165 2170 m1 = self.read()
2166 2171 md = treemanifest(dir=self._dir)
2167 2172 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2168 2173 if n1:
2169 2174 md[f] = n1
2170 2175 if fl1:
2171 2176 md.setflag(f, fl1)
2172 2177 return md
2173 2178
2174 2179 def readfast(self, shallow=False):
2175 2180 '''Calls either readdelta or read, based on which would be less work.
2176 2181 readdelta is called if the delta is against the p1, and therefore can be
2177 2182 read quickly.
2178 2183
2179 2184 If `shallow` is True, it only returns the entries from this manifest,
2180 2185 and not any submanifests.
2181 2186 '''
2182 2187 store = self._storage()
2183 2188 r = store.rev(self._node)
2184 2189 deltaparent = store.deltaparent(r)
2185 2190 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2186 2191 return self.readdelta(shallow=shallow)
2187 2192
2188 2193 if shallow:
2189 2194 return manifestdict(store.revision(self._node))
2190 2195 else:
2191 2196 return self.read()
2192 2197
2193 2198 def find(self, key):
2194 2199 return self.read().find(key)
2195 2200
2196 2201
2197 2202 class excludeddir(treemanifest):
2198 2203 """Stand-in for a directory that is excluded from the repository.
2199 2204
2200 2205 With narrowing active on a repository that uses treemanifests,
2201 2206 some of the directory revlogs will be excluded from the resulting
2202 2207 clone. This is a huge storage win for clients, but means we need
2203 2208 some sort of pseudo-manifest to surface to internals so we can
2204 2209 detect a merge conflict outside the narrowspec. That's what this
2205 2210 class is: it stands in for a directory whose node is known, but
2206 2211 whose contents are unknown.
2207 2212 """
2208 2213
2209 2214 def __init__(self, dir, node):
2210 2215 super(excludeddir, self).__init__(dir)
2211 2216 self._node = node
2212 2217 # Add an empty file, which will be included by iterators and such,
2213 2218 # appearing as the directory itself (i.e. something like "dir/")
2214 2219 self._files[b''] = node
2215 2220 self._flags[b''] = b't'
2216 2221
2217 2222 # Manifests outside the narrowspec should never be modified, so avoid
2218 2223 # copying. This makes a noticeable difference when there are very many
2219 2224 # directories outside the narrowspec. Also, it makes sense for the copy to
2220 2225 # be of the same type as the original, which would not happen with the
2221 2226 # super type's copy().
2222 2227 def copy(self):
2223 2228 return self
2224 2229
2225 2230
2226 2231 class excludeddirmanifestctx(treemanifestctx):
2227 2232 """context wrapper for excludeddir - see that docstring for rationale"""
2228 2233
2229 2234 def __init__(self, dir, node):
2230 2235 self._dir = dir
2231 2236 self._node = node
2232 2237
2233 2238 def read(self):
2234 2239 return excludeddir(self._dir, self._node)
2235 2240
2236 2241 def write(self, *args):
2237 2242 raise error.ProgrammingError(
2238 2243 b'attempt to write manifest from excluded dir %s' % self._dir
2239 2244 )
2240 2245
2241 2246
2242 2247 class excludedmanifestrevlog(manifestrevlog):
2243 2248 """Stand-in for excluded treemanifest revlogs.
2244 2249
2245 2250 When narrowing is active on a treemanifest repository, we'll have
2246 2251 references to directories we can't see due to the revlog being
2247 2252 skipped. This class exists to conform to the manifestrevlog
2248 2253 interface for those directories and proactively prevent writes to
2249 2254 outside the narrowspec.
2250 2255 """
2251 2256
2252 2257 def __init__(self, dir):
2253 2258 self._dir = dir
2254 2259
2255 2260 def __len__(self):
2256 2261 raise error.ProgrammingError(
2257 2262 b'attempt to get length of excluded dir %s' % self._dir
2258 2263 )
2259 2264
2260 2265 def rev(self, node):
2261 2266 raise error.ProgrammingError(
2262 2267 b'attempt to get rev from excluded dir %s' % self._dir
2263 2268 )
2264 2269
2265 2270 def linkrev(self, node):
2266 2271 raise error.ProgrammingError(
2267 2272 b'attempt to get linkrev from excluded dir %s' % self._dir
2268 2273 )
2269 2274
2270 2275 def node(self, rev):
2271 2276 raise error.ProgrammingError(
2272 2277 b'attempt to get node from excluded dir %s' % self._dir
2273 2278 )
2274 2279
2275 2280 def add(self, *args, **kwargs):
2276 2281 # We should never write entries in dirlogs outside the narrow clone.
2277 2282 # However, the method still gets called from writesubtree() in
2278 2283 # _addtree(), so we need to handle it. We should possibly make that
2279 2284 # avoid calling add() with a clean manifest (_dirty is always False
2280 2285 # in excludeddir instances).
2281 2286 pass
General Comments 0
You need to be logged in to leave comments. Login now