##// END OF EJS Templates
revlog: split functionality related to deltas computation in a new module...
Boris Feld -
r39366:655b5b46 default
parent child Browse files
Show More
@@ -88,6 +88,10 b' from .utils import ('
88 stringutil,
88 stringutil,
89 )
89 )
90
90
91 from .revlogutils import (
92 deltas as deltautil
93 )
94
91 release = lockmod.release
95 release = lockmod.release
92
96
93 command = registrar.command()
97 command = registrar.command()
@@ -706,7 +710,7 b' def debugdeltachain(ui, repo, file_=None'
706 largestblock = 0
710 largestblock = 0
707 srchunks = 0
711 srchunks = 0
708
712
709 for revschunk in revlog._slicechunk(r, chain):
713 for revschunk in deltautil.slicechunk(r, chain):
710 srchunks += 1
714 srchunks += 1
711 blkend = start(revschunk[-1]) + length(revschunk[-1])
715 blkend = start(revschunk[-1]) + length(revschunk[-1])
712 blksize = blkend - start(revschunk[0])
716 blksize = blkend - start(revschunk[0])
This diff has been collapsed as it changes many lines, (709 lines changed) Show them Hide them
@@ -17,7 +17,6 b' import collections'
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
21 import os
20 import os
22 import re
21 import re
23 import struct
22 import struct
@@ -39,7 +38,6 b' from .i18n import _'
39 from .revlogutils.constants import (
38 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
42 LIMIT_DELTA2TEXT,
43 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
44 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
45 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
@@ -69,6 +67,9 b' from . import ('
69 templatefilters,
67 templatefilters,
70 util,
68 util,
71 )
69 )
70 from .revlogutils import (
71 deltas as deltautil,
72 )
72 from .utils import (
73 from .utils import (
73 interfaceutil,
74 interfaceutil,
74 stringutil,
75 stringutil,
@@ -211,610 +212,6 b' def hash(text, p1, p2):'
211 s.update(text)
212 s.update(text)
212 return s.digest()
213 return s.digest()
213
214
214 class _testrevlog(object):
215 """minimalist fake revlog to use in doctests"""
216
217 def __init__(self, data, density=0.5, mingap=0):
218 """data is an list of revision payload boundaries"""
219 self._data = data
220 self._srdensitythreshold = density
221 self._srmingapsize = mingap
222
223 def start(self, rev):
224 if rev == 0:
225 return 0
226 return self._data[rev - 1]
227
228 def end(self, rev):
229 return self._data[rev]
230
231 def length(self, rev):
232 return self.end(rev) - self.start(rev)
233
234 def __len__(self):
235 return len(self._data)
236
237 def _trimchunk(revlog, revs, startidx, endidx=None):
238 """returns revs[startidx:endidx] without empty trailing revs
239
240 Doctest Setup
241 >>> revlog = _testrevlog([
242 ... 5, #0
243 ... 10, #1
244 ... 12, #2
245 ... 12, #3 (empty)
246 ... 17, #4
247 ... 21, #5
248 ... 21, #6 (empty)
249 ... ])
250
251 Contiguous cases:
252 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
253 [0, 1, 2, 3, 4, 5]
254 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
255 [0, 1, 2, 3, 4]
256 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
257 [0, 1, 2]
258 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
259 [2]
260 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
261 [3, 4, 5]
262 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
263 [3, 4]
264
265 Discontiguous cases:
266 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
267 [1, 3, 5]
268 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
269 [1]
270 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
271 [3, 5]
272 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
273 [3, 5]
274 """
275 length = revlog.length
276
277 if endidx is None:
278 endidx = len(revs)
279
280 # If we have a non-emtpy delta candidate, there are nothing to trim
281 if revs[endidx - 1] < len(revlog):
282 # Trim empty revs at the end, except the very first revision of a chain
283 while (endidx > 1
284 and endidx > startidx
285 and length(revs[endidx - 1]) == 0):
286 endidx -= 1
287
288 return revs[startidx:endidx]
289
290 def _segmentspan(revlog, revs, deltainfo=None):
291 """Get the byte span of a segment of revisions
292
293 revs is a sorted array of revision numbers
294
295 >>> revlog = _testrevlog([
296 ... 5, #0
297 ... 10, #1
298 ... 12, #2
299 ... 12, #3 (empty)
300 ... 17, #4
301 ... ])
302
303 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
304 17
305 >>> _segmentspan(revlog, [0, 4])
306 17
307 >>> _segmentspan(revlog, [3, 4])
308 5
309 >>> _segmentspan(revlog, [1, 2, 3,])
310 7
311 >>> _segmentspan(revlog, [1, 3])
312 7
313 """
314 if not revs:
315 return 0
316 if deltainfo is not None and len(revlog) <= revs[-1]:
317 if len(revs) == 1:
318 return deltainfo.deltalen
319 offset = revlog.end(len(revlog) - 1)
320 end = deltainfo.deltalen + offset
321 else:
322 end = revlog.end(revs[-1])
323 return end - revlog.start(revs[0])
324
325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
326 """slice revs to reduce the amount of unrelated data to be read from disk.
327
328 ``revs`` is sliced into groups that should be read in one time.
329 Assume that revs are sorted.
330
331 The initial chunk is sliced until the overall density (payload/chunks-span
332 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
333 `revlog._srmingapsize` is skipped.
334
335 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
336 For consistency with other slicing choice, this limit won't go lower than
337 `revlog._srmingapsize`.
338
339 If individual revisions chunk are larger than this limit, they will still
340 be raised individually.
341
342 >>> revlog = _testrevlog([
343 ... 5, #00 (5)
344 ... 10, #01 (5)
345 ... 12, #02 (2)
346 ... 12, #03 (empty)
347 ... 27, #04 (15)
348 ... 31, #05 (4)
349 ... 31, #06 (empty)
350 ... 42, #07 (11)
351 ... 47, #08 (5)
352 ... 47, #09 (empty)
353 ... 48, #10 (1)
354 ... 51, #11 (3)
355 ... 74, #12 (23)
356 ... 85, #13 (11)
357 ... 86, #14 (1)
358 ... 91, #15 (5)
359 ... ])
360
361 >>> list(_slicechunk(revlog, list(range(16))))
362 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
363 >>> list(_slicechunk(revlog, [0, 15]))
364 [[0], [15]]
365 >>> list(_slicechunk(revlog, [0, 11, 15]))
366 [[0], [11], [15]]
367 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
368 [[0], [11, 13, 15]]
369 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
370 [[1, 2], [5, 8, 10, 11], [14]]
371
372 Slicing with a maximum chunk size
373 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
374 [[0], [11], [13], [15]]
375 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
376 [[0], [11], [13, 15]]
377 """
378 if targetsize is not None:
379 targetsize = max(targetsize, revlog._srmingapsize)
380 # targetsize should not be specified when evaluating delta candidates:
381 # * targetsize is used to ensure we stay within specification when reading,
382 # * deltainfo is used to pick are good delta chain when writing.
383 if not (deltainfo is None or targetsize is None):
384 msg = 'cannot use `targetsize` with a `deltainfo`'
385 raise error.ProgrammingError(msg)
386 for chunk in _slicechunktodensity(revlog, revs,
387 deltainfo,
388 revlog._srdensitythreshold,
389 revlog._srmingapsize):
390 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
391 yield subchunk
392
393 def _slicechunktosize(revlog, revs, targetsize=None):
394 """slice revs to match the target size
395
396 This is intended to be used on chunk that density slicing selected by that
397 are still too large compared to the read garantee of revlog. This might
398 happens when "minimal gap size" interrupted the slicing or when chain are
399 built in a way that create large blocks next to each other.
400
401 >>> revlog = _testrevlog([
402 ... 3, #0 (3)
403 ... 5, #1 (2)
404 ... 6, #2 (1)
405 ... 8, #3 (2)
406 ... 8, #4 (empty)
407 ... 11, #5 (3)
408 ... 12, #6 (1)
409 ... 13, #7 (1)
410 ... 14, #8 (1)
411 ... ])
412
413 Cases where chunk is already small enough
414 >>> list(_slicechunktosize(revlog, [0], 3))
415 [[0]]
416 >>> list(_slicechunktosize(revlog, [6, 7], 3))
417 [[6, 7]]
418 >>> list(_slicechunktosize(revlog, [0], None))
419 [[0]]
420 >>> list(_slicechunktosize(revlog, [6, 7], None))
421 [[6, 7]]
422
423 cases where we need actual slicing
424 >>> list(_slicechunktosize(revlog, [0, 1], 3))
425 [[0], [1]]
426 >>> list(_slicechunktosize(revlog, [1, 3], 3))
427 [[1], [3]]
428 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
429 [[1, 2], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 5], 3))
431 [[3], [5]]
432 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
433 [[3], [5]]
434 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
435 [[5], [6, 7, 8]]
436 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
437 [[0], [1, 2], [3], [5], [6, 7, 8]]
438
439 Case with too large individual chunk (must return valid chunk)
440 >>> list(_slicechunktosize(revlog, [0, 1], 2))
441 [[0], [1]]
442 >>> list(_slicechunktosize(revlog, [1, 3], 1))
443 [[1], [3]]
444 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
445 [[3], [5]]
446 """
447 assert targetsize is None or 0 <= targetsize
448 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
449 yield revs
450 return
451
452 startrevidx = 0
453 startdata = revlog.start(revs[0])
454 endrevidx = 0
455 iterrevs = enumerate(revs)
456 next(iterrevs) # skip first rev.
457 for idx, r in iterrevs:
458 span = revlog.end(r) - startdata
459 if span <= targetsize:
460 endrevidx = idx
461 else:
462 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
463 if chunk:
464 yield chunk
465 startrevidx = idx
466 startdata = revlog.start(r)
467 endrevidx = idx
468 yield _trimchunk(revlog, revs, startrevidx)
469
470 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
471 mingapsize=0):
472 """slice revs to reduce the amount of unrelated data to be read from disk.
473
474 ``revs`` is sliced into groups that should be read in one time.
475 Assume that revs are sorted.
476
477 ``deltainfo`` is a _deltainfo instance of a revision that we would append
478 to the top of the revlog.
479
480 The initial chunk is sliced until the overall density (payload/chunks-span
481 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
482 skipped.
483
484 >>> revlog = _testrevlog([
485 ... 5, #00 (5)
486 ... 10, #01 (5)
487 ... 12, #02 (2)
488 ... 12, #03 (empty)
489 ... 27, #04 (15)
490 ... 31, #05 (4)
491 ... 31, #06 (empty)
492 ... 42, #07 (11)
493 ... 47, #08 (5)
494 ... 47, #09 (empty)
495 ... 48, #10 (1)
496 ... 51, #11 (3)
497 ... 74, #12 (23)
498 ... 85, #13 (11)
499 ... 86, #14 (1)
500 ... 91, #15 (5)
501 ... ])
502
503 >>> list(_slicechunktodensity(revlog, list(range(16))))
504 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
505 >>> list(_slicechunktodensity(revlog, [0, 15]))
506 [[0], [15]]
507 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
508 [[0], [11], [15]]
509 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
510 [[0], [11, 13, 15]]
511 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
512 [[1, 2], [5, 8, 10, 11], [14]]
513 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
514 ... mingapsize=20))
515 [[1, 2, 3, 5, 8, 10, 11], [14]]
516 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
517 ... targetdensity=0.95))
518 [[1, 2], [5], [8, 10, 11], [14]]
519 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
520 ... targetdensity=0.95, mingapsize=12))
521 [[1, 2], [5, 8, 10, 11], [14]]
522 """
523 start = revlog.start
524 length = revlog.length
525
526 if len(revs) <= 1:
527 yield revs
528 return
529
530 nextrev = len(revlog)
531 nextoffset = revlog.end(nextrev - 1)
532
533 if deltainfo is None:
534 deltachainspan = _segmentspan(revlog, revs)
535 chainpayload = sum(length(r) for r in revs)
536 else:
537 deltachainspan = deltainfo.distance
538 chainpayload = deltainfo.compresseddeltalen
539
540 if deltachainspan < mingapsize:
541 yield revs
542 return
543
544 readdata = deltachainspan
545
546 if deltachainspan:
547 density = chainpayload / float(deltachainspan)
548 else:
549 density = 1.0
550
551 if density >= targetdensity:
552 yield revs
553 return
554
555 if deltainfo is not None and deltainfo.deltalen:
556 revs = list(revs)
557 revs.append(nextrev)
558
559 # Store the gaps in a heap to have them sorted by decreasing size
560 gapsheap = []
561 heapq.heapify(gapsheap)
562 prevend = None
563 for i, rev in enumerate(revs):
564 if rev < nextrev:
565 revstart = start(rev)
566 revlen = length(rev)
567 else:
568 revstart = nextoffset
569 revlen = deltainfo.deltalen
570
571 # Skip empty revisions to form larger holes
572 if revlen == 0:
573 continue
574
575 if prevend is not None:
576 gapsize = revstart - prevend
577 # only consider holes that are large enough
578 if gapsize > mingapsize:
579 heapq.heappush(gapsheap, (-gapsize, i))
580
581 prevend = revstart + revlen
582
583 # Collect the indices of the largest holes until the density is acceptable
584 indicesheap = []
585 heapq.heapify(indicesheap)
586 while gapsheap and density < targetdensity:
587 oppgapsize, gapidx = heapq.heappop(gapsheap)
588
589 heapq.heappush(indicesheap, gapidx)
590
591 # the gap sizes are stored as negatives to be sorted decreasingly
592 # by the heap
593 readdata -= (-oppgapsize)
594 if readdata > 0:
595 density = chainpayload / float(readdata)
596 else:
597 density = 1.0
598
599 # Cut the revs at collected indices
600 previdx = 0
601 while indicesheap:
602 idx = heapq.heappop(indicesheap)
603
604 chunk = _trimchunk(revlog, revs, previdx, idx)
605 if chunk:
606 yield chunk
607
608 previdx = idx
609
610 chunk = _trimchunk(revlog, revs, previdx)
611 if chunk:
612 yield chunk
613
614 @attr.s(slots=True, frozen=True)
615 class _deltainfo(object):
616 distance = attr.ib()
617 deltalen = attr.ib()
618 data = attr.ib()
619 base = attr.ib()
620 chainbase = attr.ib()
621 chainlen = attr.ib()
622 compresseddeltalen = attr.ib()
623 snapshotdepth = attr.ib()
624
625 class _deltacomputer(object):
626 def __init__(self, revlog):
627 self.revlog = revlog
628
629 def _getcandidaterevs(self, p1, p2, cachedelta):
630 """
631 Provides revisions that present an interest to be diffed against,
632 grouped by level of easiness.
633 """
634 revlog = self.revlog
635 gdelta = revlog._generaldelta
636 curr = len(revlog)
637 prev = curr - 1
638 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
639
640 # should we try to build a delta?
641 if prev != nullrev and revlog._storedeltachains:
642 tested = set()
643 # This condition is true most of the time when processing
644 # changegroup data into a generaldelta repo. The only time it
645 # isn't true is if this is the first revision in a delta chain
646 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
647 if cachedelta and gdelta and revlog._lazydeltabase:
648 # Assume what we received from the server is a good choice
649 # build delta will reuse the cache
650 yield (cachedelta[0],)
651 tested.add(cachedelta[0])
652
653 if gdelta:
654 # exclude already lazy tested base if any
655 parents = [p for p in (p1r, p2r)
656 if p != nullrev and p not in tested]
657
658 if not revlog._deltabothparents and len(parents) == 2:
659 parents.sort()
660 # To minimize the chance of having to build a fulltext,
661 # pick first whichever parent is closest to us (max rev)
662 yield (parents[1],)
663 # then the other one (min rev) if the first did not fit
664 yield (parents[0],)
665 tested.update(parents)
666 elif len(parents) > 0:
667 # Test all parents (1 or 2), and keep the best candidate
668 yield parents
669 tested.update(parents)
670
671 if prev not in tested:
672 # other approach failed try against prev to hopefully save us a
673 # fulltext.
674 yield (prev,)
675 tested.add(prev)
676
677 def buildtext(self, revinfo, fh):
678 """Builds a fulltext version of a revision
679
680 revinfo: _revisioninfo instance that contains all needed info
681 fh: file handle to either the .i or the .d revlog file,
682 depending on whether it is inlined or not
683 """
684 btext = revinfo.btext
685 if btext[0] is not None:
686 return btext[0]
687
688 revlog = self.revlog
689 cachedelta = revinfo.cachedelta
690 flags = revinfo.flags
691 node = revinfo.node
692
693 baserev = cachedelta[0]
694 delta = cachedelta[1]
695 # special case deltas which replace entire base; no need to decode
696 # base revision. this neatly avoids censored bases, which throw when
697 # they're decoded.
698 hlen = struct.calcsize(">lll")
699 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
700 len(delta) - hlen):
701 btext[0] = delta[hlen:]
702 else:
703 # deltabase is rawtext before changed by flag processors, which is
704 # equivalent to non-raw text
705 basetext = revlog.revision(baserev, _df=fh, raw=False)
706 btext[0] = mdiff.patch(basetext, delta)
707
708 try:
709 res = revlog._processflags(btext[0], flags, 'read', raw=True)
710 btext[0], validatehash = res
711 if validatehash:
712 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
713 if flags & REVIDX_ISCENSORED:
714 raise RevlogError(_('node %s is not censored') % node)
715 except CensoredNodeError:
716 # must pass the censored index flag to add censored revisions
717 if not flags & REVIDX_ISCENSORED:
718 raise
719 return btext[0]
720
721 def _builddeltadiff(self, base, revinfo, fh):
722 revlog = self.revlog
723 t = self.buildtext(revinfo, fh)
724 if revlog.iscensored(base):
725 # deltas based on a censored revision must replace the
726 # full content in one patch, so delta works everywhere
727 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
728 delta = header + t
729 else:
730 ptext = revlog.revision(base, _df=fh, raw=True)
731 delta = mdiff.textdiff(ptext, t)
732
733 return delta
734
735 def _builddeltainfo(self, revinfo, base, fh):
736 # can we use the cached delta?
737 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
738 delta = revinfo.cachedelta[1]
739 else:
740 delta = self._builddeltadiff(base, revinfo, fh)
741 revlog = self.revlog
742 header, data = revlog.compress(delta)
743 deltalen = len(header) + len(data)
744 chainbase = revlog.chainbase(base)
745 offset = revlog.end(len(revlog) - 1)
746 dist = deltalen + offset - revlog.start(chainbase)
747 if revlog._generaldelta:
748 deltabase = base
749 else:
750 deltabase = chainbase
751 chainlen, compresseddeltalen = revlog._chaininfo(base)
752 chainlen += 1
753 compresseddeltalen += deltalen
754
755 revlog = self.revlog
756 snapshotdepth = None
757 if deltabase == nullrev:
758 snapshotdepth = 0
759 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
760 # A delta chain should always be one full snapshot,
761 # zero or more semi-snapshots, and zero or more deltas
762 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
763 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
764 snapshotdepth = len(revlog._deltachain(deltabase)[0])
765
766 return _deltainfo(dist, deltalen, (header, data), deltabase,
767 chainbase, chainlen, compresseddeltalen,
768 snapshotdepth)
769
770 def finddeltainfo(self, revinfo, fh):
771 """Find an acceptable delta against a candidate revision
772
773 revinfo: information about the revision (instance of _revisioninfo)
774 fh: file handle to either the .i or the .d revlog file,
775 depending on whether it is inlined or not
776
777 Returns the first acceptable candidate revision, as ordered by
778 _getcandidaterevs
779 """
780 if not revinfo.textlen:
781 return None # empty file do not need delta
782
783 cachedelta = revinfo.cachedelta
784 p1 = revinfo.p1
785 p2 = revinfo.p2
786 revlog = self.revlog
787
788 deltalength = self.revlog.length
789 deltaparent = self.revlog.deltaparent
790
791 deltainfo = None
792 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
793 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
794 # filter out delta base that will never produce good delta
795 candidaterevs = [r for r in candidaterevs
796 if self.revlog.length(r) <= deltas_limit]
797 nominateddeltas = []
798 for candidaterev in candidaterevs:
799 # skip over empty delta (no need to include them in a chain)
800 while candidaterev != nullrev and not deltalength(candidaterev):
801 candidaterev = deltaparent(candidaterev)
802 # no need to try a delta against nullid, this will be handled
803 # by fulltext later.
804 if candidaterev == nullrev:
805 continue
806 # no delta for rawtext-changing revs (see "candelta" for why)
807 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
808 continue
809 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
810 if revlog._isgooddeltainfo(candidatedelta, revinfo):
811 nominateddeltas.append(candidatedelta)
812 if nominateddeltas:
813 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
814 break
815
816 return deltainfo
817
818 @attr.s(slots=True, frozen=True)
215 @attr.s(slots=True, frozen=True)
819 class _revisioninfo(object):
216 class _revisioninfo(object):
820 """Information about a revision that allows building its fulltext
217 """Information about a revision that allows building its fulltext
@@ -2095,7 +1492,8 b' class revlog(object):'
2095 if not self._withsparseread:
1492 if not self._withsparseread:
2096 slicedchunks = (revs,)
1493 slicedchunks = (revs,)
2097 else:
1494 else:
2098 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1495 slicedchunks = deltautil.slicechunk(self, revs,
1496 targetsize=targetsize)
2099
1497
2100 for revschunk in slicedchunks:
1498 for revschunk in slicedchunks:
2101 firstrev = revschunk[0]
1499 firstrev = revschunk[0]
@@ -2393,7 +1791,7 b' class revlog(object):'
2393 computed by default as hash(text, p1, p2), however subclasses might
1791 computed by default as hash(text, p1, p2), however subclasses might
2394 use different hashing method (and override checkhash() in such case)
1792 use different hashing method (and override checkhash() in such case)
2395 flags - the known flags to set on the revision
1793 flags - the known flags to set on the revision
2396 deltacomputer - an optional _deltacomputer instance shared between
1794 deltacomputer - an optional deltacomputer instance shared between
2397 multiple calls
1795 multiple calls
2398 """
1796 """
2399 if link == nullrev:
1797 if link == nullrev:
@@ -2516,95 +1914,6 b' class revlog(object):'
2516
1914
2517 return compressor.decompress(data)
1915 return compressor.decompress(data)
2518
1916
2519 def _isgooddeltainfo(self, deltainfo, revinfo):
2520 """Returns True if the given delta is good. Good means that it is within
2521 the disk span, disk size, and chain length bounds that we know to be
2522 performant."""
2523 if deltainfo is None:
2524 return False
2525
2526 # - 'deltainfo.distance' is the distance from the base revision --
2527 # bounding it limits the amount of I/O we need to do.
2528 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2529 # deltas we need to apply -- bounding it limits the amount of CPU
2530 # we consume.
2531
2532 if self._sparserevlog:
2533 # As sparse-read will be used, we can consider that the distance,
2534 # instead of being the span of the whole chunk,
2535 # is the span of the largest read chunk
2536 base = deltainfo.base
2537
2538 if base != nullrev:
2539 deltachain = self._deltachain(base)[0]
2540 else:
2541 deltachain = []
2542
2543 # search for the first non-snapshot revision
2544 for idx, r in enumerate(deltachain):
2545 if not self.issnapshot(r):
2546 break
2547 deltachain = deltachain[idx:]
2548 chunks = _slicechunk(self, deltachain, deltainfo)
2549 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2550 distance = max(all_span)
2551 else:
2552 distance = deltainfo.distance
2553
2554 textlen = revinfo.textlen
2555 defaultmax = textlen * 4
2556 maxdist = self._maxdeltachainspan
2557 if not maxdist:
2558 maxdist = distance # ensure the conditional pass
2559 maxdist = max(maxdist, defaultmax)
2560 if self._sparserevlog and maxdist < self._srmingapsize:
2561 # In multiple place, we are ignoring irrelevant data range below a
2562 # certain size. Be also apply this tradeoff here and relax span
2563 # constraint for small enought content.
2564 maxdist = self._srmingapsize
2565
2566 # Bad delta from read span:
2567 #
2568 # If the span of data read is larger than the maximum allowed.
2569 if maxdist < distance:
2570 return False
2571
2572 # Bad delta from new delta size:
2573 #
2574 # If the delta size is larger than the target text, storing the
2575 # delta will be inefficient.
2576 if textlen < deltainfo.deltalen:
2577 return False
2578
2579 # Bad delta from cumulated payload size:
2580 #
2581 # If the sum of delta get larger than K * target text length.
2582 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2583 return False
2584
2585 # Bad delta from chain length:
2586 #
2587 # If the number of delta in the chain gets too high.
2588 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2589 return False
2590
2591 # bad delta from intermediate snapshot size limit
2592 #
2593 # If an intermediate snapshot size is higher than the limit. The
2594 # limit exist to prevent endless chain of intermediate delta to be
2595 # created.
2596 if (deltainfo.snapshotdepth is not None and
2597 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2598 return False
2599
2600 # bad delta if new intermediate snapshot is larger than the previous
2601 # snapshot
2602 if (deltainfo.snapshotdepth
2603 and self.length(deltainfo.base) < deltainfo.deltalen):
2604 return False
2605
2606 return True
2607
2608 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2609 cachedelta, ifh, dfh, alwayscache=False,
1918 cachedelta, ifh, dfh, alwayscache=False,
2610 deltacomputer=None):
1919 deltacomputer=None):
@@ -2652,7 +1961,7 b' class revlog(object):'
2652 textlen = len(rawtext)
1961 textlen = len(rawtext)
2653
1962
2654 if deltacomputer is None:
1963 if deltacomputer is None:
2655 deltacomputer = _deltacomputer(self)
1964 deltacomputer = deltautil.deltacomputer(self)
2656
1965
2657 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2658
1967
@@ -2754,7 +2063,7 b' class revlog(object):'
2754 dfh.flush()
2063 dfh.flush()
2755 ifh.flush()
2064 ifh.flush()
2756 try:
2065 try:
2757 deltacomputer = _deltacomputer(self)
2066 deltacomputer = deltautil.deltacomputer(self)
2758 # loop through our set of deltas
2067 # loop through our set of deltas
2759 for data in deltas:
2068 for data in deltas:
2760 node, p1, p2, linknode, deltabase, delta, flags = data
2069 node, p1, p2, linknode, deltabase, delta, flags = data
@@ -3127,7 +2436,7 b' class revlog(object):'
3127 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2436 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3128 self.DELTAREUSESAMEREVS)
2437 self.DELTAREUSESAMEREVS)
3129
2438
3130 deltacomputer = _deltacomputer(destrevlog)
2439 deltacomputer = deltautil.deltacomputer(destrevlog)
3131 index = self.index
2440 index = self.index
3132 for rev in self:
2441 for rev in self:
3133 entry = index[rev]
2442 entry = index[rev]
@@ -41,6 +41,3 b' REVIDX_FLAGS_ORDER = ['
41 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
41 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
42 # bitmark for flags that could cause rawdata content change
42 # bitmark for flags that could cause rawdata content change
43 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
43 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
44
45 # maximum <delta-chain-data>/<revision-text-length> ratio
46 LIMIT_DELTA2TEXT = 2
This diff has been collapsed as it changes many lines, (2854 lines changed) Show them Hide them
@@ -1,215 +1,42 b''
1 # revlog.py - storage back-end for mercurial
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 #
5 #
5 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
7
8 """Helper class to compute deltas stored inside revlogs"""
8 """Storage back-end for Mercurial.
9
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
12 """
13
9
14 from __future__ import absolute_import
10 from __future__ import absolute_import
15
11
16 import collections
17 import contextlib
18 import errno
19 import hashlib
20 import heapq
12 import heapq
21 import os
22 import re
23 import struct
13 import struct
24 import zlib
25
14
26 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
27 from .node import (
16 from ..node import (
28 bin,
29 hex,
30 nullhex,
31 nullid,
32 nullrev,
17 nullrev,
33 wdirfilenodeids,
34 wdirhex,
35 wdirid,
36 wdirrev,
37 )
18 )
38 from .i18n import _
19 from ..i18n import _
39 from .revlogutils.constants import (
20
40 FLAG_GENERALDELTA,
21 from .constants import (
41 FLAG_INLINE_DATA,
42 LIMIT_DELTA2TEXT,
43 REVIDX_DEFAULT_FLAGS,
44 REVIDX_ELLIPSIS,
45 REVIDX_EXTSTORED,
46 REVIDX_FLAGS_ORDER,
47 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
48 REVIDX_KNOWN_FLAGS,
49 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
50 REVLOGV0,
51 REVLOGV1,
52 REVLOGV1_FLAGS,
53 REVLOGV2,
54 REVLOGV2_FLAGS,
55 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_VERSION,
58 )
24 )
59 from .thirdparty import (
25
26 from ..thirdparty import (
60 attr,
27 attr,
61 )
28 )
62 from . import (
29
63 ancestor,
30 from .. import (
64 error,
31 error,
65 mdiff,
32 mdiff,
66 policy,
67 pycompat,
68 repository,
69 templatefilters,
70 util,
71 )
33 )
72 from .utils import (
73 interfaceutil,
74 stringutil,
75 )
76
77 # blanked usage of all the name to prevent pyflakes constraints
78 # We need these name available in the module for extensions.
79 REVLOGV0
80 REVLOGV1
81 REVLOGV2
82 FLAG_INLINE_DATA
83 FLAG_GENERALDELTA
84 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_VERSION
87 REVLOGV1_FLAGS
88 REVLOGV2_FLAGS
89 REVIDX_ISCENSORED
90 REVIDX_ELLIPSIS
91 REVIDX_EXTSTORED
92 REVIDX_DEFAULT_FLAGS
93 REVIDX_FLAGS_ORDER
94 REVIDX_KNOWN_FLAGS
95 REVIDX_RAWTEXT_CHANGING_FLAGS
96
97 parsers = policy.importmod(r'parsers')
98
99 # Aliased for performance.
100 _zlibdecompress = zlib.decompress
101
102 # max size of revlog with inline data
103 _maxinline = 131072
104 _chunksize = 1048576
105
34
106 RevlogError = error.RevlogError
35 RevlogError = error.RevlogError
107 LookupError = error.LookupError
108 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 CensoredNodeError = error.CensoredNodeError
36 CensoredNodeError = error.CensoredNodeError
110 ProgrammingError = error.ProgrammingError
111
112 # Store flag processors (cf. 'addflagprocessor()' to register)
113 _flagprocessors = {
114 REVIDX_ISCENSORED: None,
115 }
116
117 _mdre = re.compile('\1\n')
118 def parsemeta(text):
119 """return (metadatadict, metadatasize)"""
120 # text can be buffer, so we can't use .startswith or .index
121 if text[:2] != '\1\n':
122 return None, None
123 s = _mdre.search(text, 2).start()
124 mtext = text[2:s]
125 meta = {}
126 for l in mtext.splitlines():
127 k, v = l.split(": ", 1)
128 meta[k] = v
129 return meta, (s + 2)
130
131 def packmeta(meta, text):
132 keys = sorted(meta)
133 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
134 return "\1\n%s\1\n%s" % (metatext, text)
135
136 def _censoredtext(text):
137 m, offs = parsemeta(text)
138 return m and "censored" in m
139
140 def addflagprocessor(flag, processor):
141 """Register a flag processor on a revision data flag.
142
37
143 Invariant:
38 # maximum <delta-chain-data>/<revision-text-length> ratio
144 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
39 LIMIT_DELTA2TEXT = 2
145 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
146 - Only one flag processor can be registered on a specific flag.
147 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
148 following signatures:
149 - (read) f(self, rawtext) -> text, bool
150 - (write) f(self, text) -> rawtext, bool
151 - (raw) f(self, rawtext) -> bool
152 "text" is presented to the user. "rawtext" is stored in revlog data, not
153 directly visible to the user.
154 The boolean returned by these transforms is used to determine whether
155 the returned text can be used for hash integrity checking. For example,
156 if "write" returns False, then "text" is used to generate hash. If
157 "write" returns True, that basically means "rawtext" returned by "write"
158 should be used to generate hash. Usually, "write" and "read" return
159 different booleans. And "raw" returns a same boolean as "write".
160
161 Note: The 'raw' transform is used for changegroup generation and in some
162 debug commands. In this case the transform only indicates whether the
163 contents can be used for hash integrity checks.
164 """
165 if not flag & REVIDX_KNOWN_FLAGS:
166 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
167 raise ProgrammingError(msg)
168 if flag not in REVIDX_FLAGS_ORDER:
169 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
170 raise ProgrammingError(msg)
171 if flag in _flagprocessors:
172 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
173 raise error.Abort(msg)
174 _flagprocessors[flag] = processor
175
176 def getoffset(q):
177 return int(q >> 16)
178
179 def gettype(q):
180 return int(q & 0xFFFF)
181
182 def offset_type(offset, type):
183 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
184 raise ValueError('unknown revlog index flags')
185 return int(int(offset) << 16 | type)
186
187 _nullhash = hashlib.sha1(nullid)
188
189 def hash(text, p1, p2):
190 """generate a hash from the given text and its parent hashes
191
192 This hash combines both the current file contents and its history
193 in a manner that makes it easy to distinguish nodes with the same
194 content in the revision graph.
195 """
196 # As of now, if one of the parent node is null, p2 is null
197 if p2 == nullid:
198 # deep copy of a hash is faster than creating one
199 s = _nullhash.copy()
200 s.update(p1)
201 else:
202 # none of the parent nodes are nullid
203 if p1 < p2:
204 a = p1
205 b = p2
206 else:
207 a = p2
208 b = p1
209 s = hashlib.sha1(a)
210 s.update(b)
211 s.update(text)
212 return s.digest()
213
40
214 class _testrevlog(object):
41 class _testrevlog(object):
215 """minimalist fake revlog to use in doctests"""
42 """minimalist fake revlog to use in doctests"""
@@ -234,95 +61,7 b' class _testrevlog(object):'
234 def __len__(self):
61 def __len__(self):
235 return len(self._data)
62 return len(self._data)
236
63
237 def _trimchunk(revlog, revs, startidx, endidx=None):
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
238 """returns revs[startidx:endidx] without empty trailing revs
239
240 Doctest Setup
241 >>> revlog = _testrevlog([
242 ... 5, #0
243 ... 10, #1
244 ... 12, #2
245 ... 12, #3 (empty)
246 ... 17, #4
247 ... 21, #5
248 ... 21, #6 (empty)
249 ... ])
250
251 Contiguous cases:
252 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
253 [0, 1, 2, 3, 4, 5]
254 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
255 [0, 1, 2, 3, 4]
256 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
257 [0, 1, 2]
258 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
259 [2]
260 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
261 [3, 4, 5]
262 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
263 [3, 4]
264
265 Discontiguous cases:
266 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
267 [1, 3, 5]
268 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
269 [1]
270 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
271 [3, 5]
272 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
273 [3, 5]
274 """
275 length = revlog.length
276
277 if endidx is None:
278 endidx = len(revs)
279
280 # If we have a non-emtpy delta candidate, there are nothing to trim
281 if revs[endidx - 1] < len(revlog):
282 # Trim empty revs at the end, except the very first revision of a chain
283 while (endidx > 1
284 and endidx > startidx
285 and length(revs[endidx - 1]) == 0):
286 endidx -= 1
287
288 return revs[startidx:endidx]
289
290 def _segmentspan(revlog, revs, deltainfo=None):
291 """Get the byte span of a segment of revisions
292
293 revs is a sorted array of revision numbers
294
295 >>> revlog = _testrevlog([
296 ... 5, #0
297 ... 10, #1
298 ... 12, #2
299 ... 12, #3 (empty)
300 ... 17, #4
301 ... ])
302
303 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
304 17
305 >>> _segmentspan(revlog, [0, 4])
306 17
307 >>> _segmentspan(revlog, [3, 4])
308 5
309 >>> _segmentspan(revlog, [1, 2, 3,])
310 7
311 >>> _segmentspan(revlog, [1, 3])
312 7
313 """
314 if not revs:
315 return 0
316 if deltainfo is not None and len(revlog) <= revs[-1]:
317 if len(revs) == 1:
318 return deltainfo.deltalen
319 offset = revlog.end(len(revlog) - 1)
320 end = deltainfo.deltalen + offset
321 else:
322 end = revlog.end(revs[-1])
323 return end - revlog.start(revs[0])
324
325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
326 """slice revs to reduce the amount of unrelated data to be read from disk.
65 """slice revs to reduce the amount of unrelated data to be read from disk.
327
66
328 ``revs`` is sliced into groups that should be read in one time.
67 ``revs`` is sliced into groups that should be read in one time.
@@ -358,21 +97,21 b' def _slicechunk(revlog, revs, deltainfo='
358 ... 91, #15 (5)
97 ... 91, #15 (5)
359 ... ])
98 ... ])
360
99
361 >>> list(_slicechunk(revlog, list(range(16))))
100 >>> list(slicechunk(revlog, list(range(16))))
362 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
363 >>> list(_slicechunk(revlog, [0, 15]))
102 >>> list(slicechunk(revlog, [0, 15]))
364 [[0], [15]]
103 [[0], [15]]
365 >>> list(_slicechunk(revlog, [0, 11, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 15]))
366 [[0], [11], [15]]
105 [[0], [11], [15]]
367 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
368 [[0], [11, 13, 15]]
107 [[0], [11, 13, 15]]
369 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
370 [[1, 2], [5, 8, 10, 11], [14]]
109 [[1, 2], [5, 8, 10, 11], [14]]
371
110
372 Slicing with a maximum chunk size
111 Slicing with a maximum chunk size
373 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
374 [[0], [11], [13], [15]]
113 [[0], [11], [13], [15]]
375 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
376 [[0], [11], [13, 15]]
115 [[0], [11], [13, 15]]
377 """
116 """
378 if targetsize is not None:
117 if targetsize is not None:
@@ -445,7 +184,7 b' def _slicechunktosize(revlog, revs, targ'
445 [[3], [5]]
184 [[3], [5]]
446 """
185 """
447 assert targetsize is None or 0 <= targetsize
186 assert targetsize is None or 0 <= targetsize
448 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
449 yield revs
188 yield revs
450 return
189 return
451
190
@@ -531,7 +270,7 b' def _slicechunktodensity(revlog, revs, d'
531 nextoffset = revlog.end(nextrev - 1)
270 nextoffset = revlog.end(nextrev - 1)
532
271
533 if deltainfo is None:
272 if deltainfo is None:
534 deltachainspan = _segmentspan(revlog, revs)
273 deltachainspan = segmentspan(revlog, revs)
535 chainpayload = sum(length(r) for r in revs)
274 chainpayload = sum(length(r) for r in revs)
536 else:
275 else:
537 deltachainspan = deltainfo.distance
276 deltachainspan = deltainfo.distance
@@ -611,6 +350,94 b' def _slicechunktodensity(revlog, revs, d'
611 if chunk:
350 if chunk:
612 yield chunk
351 yield chunk
613
352
353 def _trimchunk(revlog, revs, startidx, endidx=None):
354 """returns revs[startidx:endidx] without empty trailing revs
355
356 Doctest Setup
357 >>> revlog = _testrevlog([
358 ... 5, #0
359 ... 10, #1
360 ... 12, #2
361 ... 12, #3 (empty)
362 ... 17, #4
363 ... 21, #5
364 ... 21, #6 (empty)
365 ... ])
366
367 Contiguous cases:
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
369 [0, 1, 2, 3, 4, 5]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
371 [0, 1, 2, 3, 4]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
373 [0, 1, 2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
375 [2]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
377 [3, 4, 5]
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
379 [3, 4]
380
381 Discontiguous cases:
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
383 [1, 3, 5]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
385 [1]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
387 [3, 5]
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
389 [3, 5]
390 """
391 length = revlog.length
392
393 if endidx is None:
394 endidx = len(revs)
395
396 # If we have a non-emtpy delta candidate, there are nothing to trim
397 if revs[endidx - 1] < len(revlog):
398 # Trim empty revs at the end, except the very first revision of a chain
399 while (endidx > 1
400 and endidx > startidx
401 and length(revs[endidx - 1]) == 0):
402 endidx -= 1
403
404 return revs[startidx:endidx]
405
406 def segmentspan(revlog, revs, deltainfo=None):
407 """Get the byte span of a segment of revisions
408
409 revs is a sorted array of revision numbers
410
411 >>> revlog = _testrevlog([
412 ... 5, #0
413 ... 10, #1
414 ... 12, #2
415 ... 12, #3 (empty)
416 ... 17, #4
417 ... ])
418
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
420 17
421 >>> segmentspan(revlog, [0, 4])
422 17
423 >>> segmentspan(revlog, [3, 4])
424 5
425 >>> segmentspan(revlog, [1, 2, 3,])
426 7
427 >>> segmentspan(revlog, [1, 3])
428 7
429 """
430 if not revs:
431 return 0
432 if deltainfo is not None and len(revlog) <= revs[-1]:
433 if len(revs) == 1:
434 return deltainfo.deltalen
435 offset = revlog.end(len(revlog) - 1)
436 end = deltainfo.deltalen + offset
437 else:
438 end = revlog.end(revs[-1])
439 return end - revlog.start(revs[0])
440
614 @attr.s(slots=True, frozen=True)
441 @attr.s(slots=True, frozen=True)
615 class _deltainfo(object):
442 class _deltainfo(object):
616 distance = attr.ib()
443 distance = attr.ib()
@@ -622,7 +449,98 b' class _deltainfo(object):'
622 compresseddeltalen = attr.ib()
449 compresseddeltalen = attr.ib()
623 snapshotdepth = attr.ib()
450 snapshotdepth = attr.ib()
624
451
625 class _deltacomputer(object):
452 def isgooddeltainfo(revlog, deltainfo, revinfo):
453 """Returns True if the given delta is good. Good means that it is within
454 the disk span, disk size, and chain length bounds that we know to be
455 performant."""
456 if deltainfo is None:
457 return False
458
459 # - 'deltainfo.distance' is the distance from the base revision --
460 # bounding it limits the amount of I/O we need to do.
461 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
462 # deltas we need to apply -- bounding it limits the amount of CPU
463 # we consume.
464
465 if revlog._sparserevlog:
466 # As sparse-read will be used, we can consider that the distance,
467 # instead of being the span of the whole chunk,
468 # is the span of the largest read chunk
469 base = deltainfo.base
470
471 if base != nullrev:
472 deltachain = revlog._deltachain(base)[0]
473 else:
474 deltachain = []
475
476 # search for the first non-snapshot revision
477 for idx, r in enumerate(deltachain):
478 if not revlog.issnapshot(r):
479 break
480 deltachain = deltachain[idx:]
481 chunks = slicechunk(revlog, deltachain, deltainfo)
482 all_span = [segmentspan(revlog, revs, deltainfo)
483 for revs in chunks]
484 distance = max(all_span)
485 else:
486 distance = deltainfo.distance
487
488 textlen = revinfo.textlen
489 defaultmax = textlen * 4
490 maxdist = revlog._maxdeltachainspan
491 if not maxdist:
492 maxdist = distance # ensure the conditional pass
493 maxdist = max(maxdist, defaultmax)
494 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
495 # In multiple place, we are ignoring irrelevant data range below a
496 # certain size. Be also apply this tradeoff here and relax span
497 # constraint for small enought content.
498 maxdist = revlog._srmingapsize
499
500 # Bad delta from read span:
501 #
502 # If the span of data read is larger than the maximum allowed.
503 if maxdist < distance:
504 return False
505
506 # Bad delta from new delta size:
507 #
508 # If the delta size is larger than the target text, storing the
509 # delta will be inefficient.
510 if textlen < deltainfo.deltalen:
511 return False
512
513 # Bad delta from cumulated payload size:
514 #
515 # If the sum of delta get larger than K * target text length.
516 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
517 return False
518
519 # Bad delta from chain length:
520 #
521 # If the number of delta in the chain gets too high.
522 if (revlog._maxchainlen
523 and revlog._maxchainlen < deltainfo.chainlen):
524 return False
525
526 # bad delta from intermediate snapshot size limit
527 #
528 # If an intermediate snapshot size is higher than the limit. The
529 # limit exist to prevent endless chain of intermediate delta to be
530 # created.
531 if (deltainfo.snapshotdepth is not None and
532 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
533 return False
534
535 # bad delta if new intermediate snapshot is larger than the previous
536 # snapshot
537 if (deltainfo.snapshotdepth
538 and revlog.length(deltainfo.base) < deltainfo.deltalen):
539 return False
540
541 return True
542
543 class deltacomputer(object):
626 def __init__(self, revlog):
544 def __init__(self, revlog):
627 self.revlog = revlog
545 self.revlog = revlog
628
546
@@ -807,2374 +725,10 b' class _deltacomputer(object):'
807 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
725 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
808 continue
726 continue
809 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
727 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
810 if revlog._isgooddeltainfo(candidatedelta, revinfo):
728 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
811 nominateddeltas.append(candidatedelta)
729 nominateddeltas.append(candidatedelta)
812 if nominateddeltas:
730 if nominateddeltas:
813 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
731 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
814 break
732 break
815
733
816 return deltainfo
734 return deltainfo
817
818 @attr.s(slots=True, frozen=True)
819 class _revisioninfo(object):
820 """Information about a revision that allows building its fulltext
821 node: expected hash of the revision
822 p1, p2: parent revs of the revision
823 btext: built text cache consisting of a one-element list
824 cachedelta: (baserev, uncompressed_delta) or None
825 flags: flags associated to the revision storage
826
827 One of btext[0] or cachedelta must be set.
828 """
829 node = attr.ib()
830 p1 = attr.ib()
831 p2 = attr.ib()
832 btext = attr.ib()
833 textlen = attr.ib()
834 cachedelta = attr.ib()
835 flags = attr.ib()
836
837 @interfaceutil.implementer(repository.irevisiondelta)
838 @attr.s(slots=True, frozen=True)
839 class revlogrevisiondelta(object):
840 node = attr.ib()
841 p1node = attr.ib()
842 p2node = attr.ib()
843 basenode = attr.ib()
844 linknode = attr.ib()
845 flags = attr.ib()
846 baserevisionsize = attr.ib()
847 revision = attr.ib()
848 delta = attr.ib()
849
850 # index v0:
851 # 4 bytes: offset
852 # 4 bytes: compressed length
853 # 4 bytes: base rev
854 # 4 bytes: link rev
855 # 20 bytes: parent 1 nodeid
856 # 20 bytes: parent 2 nodeid
857 # 20 bytes: nodeid
858 indexformatv0 = struct.Struct(">4l20s20s20s")
859 indexformatv0_pack = indexformatv0.pack
860 indexformatv0_unpack = indexformatv0.unpack
861
862 class revlogoldindex(list):
863 def __getitem__(self, i):
864 if i == -1:
865 return (0, 0, 0, -1, -1, -1, -1, nullid)
866 return list.__getitem__(self, i)
867
868 class revlogoldio(object):
869 def __init__(self):
870 self.size = indexformatv0.size
871
872 def parseindex(self, data, inline):
873 s = self.size
874 index = []
875 nodemap = {nullid: nullrev}
876 n = off = 0
877 l = len(data)
878 while off + s <= l:
879 cur = data[off:off + s]
880 off += s
881 e = indexformatv0_unpack(cur)
882 # transform to revlogv1 format
883 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
884 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
885 index.append(e2)
886 nodemap[e[6]] = n
887 n += 1
888
889 return revlogoldindex(index), nodemap, None
890
891 def packentry(self, entry, node, version, rev):
892 if gettype(entry[0]):
893 raise RevlogError(_('index entry flags need revlog version 1'))
894 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
895 node(entry[5]), node(entry[6]), entry[7])
896 return indexformatv0_pack(*e2)
897
898 # index ng:
899 # 6 bytes: offset
900 # 2 bytes: flags
901 # 4 bytes: compressed length
902 # 4 bytes: uncompressed length
903 # 4 bytes: base rev
904 # 4 bytes: link rev
905 # 4 bytes: parent 1 rev
906 # 4 bytes: parent 2 rev
907 # 32 bytes: nodeid
908 indexformatng = struct.Struct(">Qiiiiii20s12x")
909 indexformatng_pack = indexformatng.pack
910 versionformat = struct.Struct(">I")
911 versionformat_pack = versionformat.pack
912 versionformat_unpack = versionformat.unpack
913
914 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
915 # signed integer)
916 _maxentrysize = 0x7fffffff
917
918 class revlogio(object):
919 def __init__(self):
920 self.size = indexformatng.size
921
922 def parseindex(self, data, inline):
923 # call the C implementation to parse the index data
924 index, cache = parsers.parse_index2(data, inline)
925 return index, getattr(index, 'nodemap', None), cache
926
927 def packentry(self, entry, node, version, rev):
928 p = indexformatng_pack(*entry)
929 if rev == 0:
930 p = versionformat_pack(version) + p[4:]
931 return p
932
933 class revlog(object):
934 """
935 the underlying revision storage object
936
937 A revlog consists of two parts, an index and the revision data.
938
939 The index is a file with a fixed record size containing
940 information on each revision, including its nodeid (hash), the
941 nodeids of its parents, the position and offset of its data within
942 the data file, and the revision it's based on. Finally, each entry
943 contains a linkrev entry that can serve as a pointer to external
944 data.
945
946 The revision data itself is a linear collection of data chunks.
947 Each chunk represents a revision and is usually represented as a
948 delta against the previous chunk. To bound lookup time, runs of
949 deltas are limited to about 2 times the length of the original
950 version data. This makes retrieval of a version proportional to
951 its size, or O(1) relative to the number of revisions.
952
953 Both pieces of the revlog are written to in an append-only
954 fashion, which means we never need to rewrite a file to insert or
955 remove data, and can use some simple techniques to avoid the need
956 for locking while reading.
957
958 If checkambig, indexfile is opened with checkambig=True at
959 writing, to avoid file stat ambiguity.
960
961 If mmaplargeindex is True, and an mmapindexthreshold is set, the
962 index will be mmapped rather than read if it is larger than the
963 configured threshold.
964
965 If censorable is True, the revlog can have censored revisions.
966 """
967 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
968 mmaplargeindex=False, censorable=False):
969 """
970 create a revlog object
971
972 opener is a function that abstracts the file opening operation
973 and can be used to implement COW semantics or the like.
974 """
975 self.indexfile = indexfile
976 self.datafile = datafile or (indexfile[:-2] + ".d")
977 self.opener = opener
978 # When True, indexfile is opened with checkambig=True at writing, to
979 # avoid file stat ambiguity.
980 self._checkambig = checkambig
981 self._censorable = censorable
982 # 3-tuple of (node, rev, text) for a raw revision.
983 self._cache = None
984 # Maps rev to chain base rev.
985 self._chainbasecache = util.lrucachedict(100)
986 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
987 self._chunkcache = (0, '')
988 # How much data to read and cache into the raw revlog data cache.
989 self._chunkcachesize = 65536
990 self._maxchainlen = None
991 self._deltabothparents = True
992 self.index = []
993 # Mapping of partial identifiers to full nodes.
994 self._pcache = {}
995 # Mapping of revision integer to full node.
996 self._nodecache = {nullid: nullrev}
997 self._nodepos = None
998 self._compengine = 'zlib'
999 self._maxdeltachainspan = -1
1000 self._withsparseread = False
1001 self._sparserevlog = False
1002 self._srdensitythreshold = 0.50
1003 self._srmingapsize = 262144
1004
1005 mmapindexthreshold = None
1006 v = REVLOG_DEFAULT_VERSION
1007 opts = getattr(opener, 'options', None)
1008 if opts is not None:
1009 if 'revlogv2' in opts:
1010 # version 2 revlogs always use generaldelta.
1011 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
1012 elif 'revlogv1' in opts:
1013 if 'generaldelta' in opts:
1014 v |= FLAG_GENERALDELTA
1015 else:
1016 v = 0
1017 if 'chunkcachesize' in opts:
1018 self._chunkcachesize = opts['chunkcachesize']
1019 if 'maxchainlen' in opts:
1020 self._maxchainlen = opts['maxchainlen']
1021 if 'deltabothparents' in opts:
1022 self._deltabothparents = opts['deltabothparents']
1023 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1024 if 'compengine' in opts:
1025 self._compengine = opts['compengine']
1026 if 'maxdeltachainspan' in opts:
1027 self._maxdeltachainspan = opts['maxdeltachainspan']
1028 if mmaplargeindex and 'mmapindexthreshold' in opts:
1029 mmapindexthreshold = opts['mmapindexthreshold']
1030 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1031 withsparseread = bool(opts.get('with-sparse-read', False))
1032 # sparse-revlog forces sparse-read
1033 self._withsparseread = self._sparserevlog or withsparseread
1034 if 'sparse-read-density-threshold' in opts:
1035 self._srdensitythreshold = opts['sparse-read-density-threshold']
1036 if 'sparse-read-min-gap-size' in opts:
1037 self._srmingapsize = opts['sparse-read-min-gap-size']
1038
1039 if self._chunkcachesize <= 0:
1040 raise RevlogError(_('revlog chunk cache size %r is not greater '
1041 'than 0') % self._chunkcachesize)
1042 elif self._chunkcachesize & (self._chunkcachesize - 1):
1043 raise RevlogError(_('revlog chunk cache size %r is not a power '
1044 'of 2') % self._chunkcachesize)
1045
1046 indexdata = ''
1047 self._initempty = True
1048 try:
1049 with self._indexfp() as f:
1050 if (mmapindexthreshold is not None and
1051 self.opener.fstat(f).st_size >= mmapindexthreshold):
1052 indexdata = util.buffer(util.mmapread(f))
1053 else:
1054 indexdata = f.read()
1055 if len(indexdata) > 0:
1056 v = versionformat_unpack(indexdata[:4])[0]
1057 self._initempty = False
1058 except IOError as inst:
1059 if inst.errno != errno.ENOENT:
1060 raise
1061
1062 self.version = v
1063 self._inline = v & FLAG_INLINE_DATA
1064 self._generaldelta = v & FLAG_GENERALDELTA
1065 flags = v & ~0xFFFF
1066 fmt = v & 0xFFFF
1067 if fmt == REVLOGV0:
1068 if flags:
1069 raise RevlogError(_('unknown flags (%#04x) in version %d '
1070 'revlog %s') %
1071 (flags >> 16, fmt, self.indexfile))
1072 elif fmt == REVLOGV1:
1073 if flags & ~REVLOGV1_FLAGS:
1074 raise RevlogError(_('unknown flags (%#04x) in version %d '
1075 'revlog %s') %
1076 (flags >> 16, fmt, self.indexfile))
1077 elif fmt == REVLOGV2:
1078 if flags & ~REVLOGV2_FLAGS:
1079 raise RevlogError(_('unknown flags (%#04x) in version %d '
1080 'revlog %s') %
1081 (flags >> 16, fmt, self.indexfile))
1082 else:
1083 raise RevlogError(_('unknown version (%d) in revlog %s') %
1084 (fmt, self.indexfile))
1085
1086 self._storedeltachains = True
1087
1088 self._io = revlogio()
1089 if self.version == REVLOGV0:
1090 self._io = revlogoldio()
1091 try:
1092 d = self._io.parseindex(indexdata, self._inline)
1093 except (ValueError, IndexError):
1094 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1095 self.index, nodemap, self._chunkcache = d
1096 if nodemap is not None:
1097 self.nodemap = self._nodecache = nodemap
1098 if not self._chunkcache:
1099 self._chunkclear()
1100 # revnum -> (chain-length, sum-delta-length)
1101 self._chaininfocache = {}
1102 # revlog header -> revlog compressor
1103 self._decompressors = {}
1104
1105 @util.propertycache
1106 def _compressor(self):
1107 return util.compengines[self._compengine].revlogcompressor()
1108
1109 def _indexfp(self, mode='r'):
1110 """file object for the revlog's index file"""
1111 args = {r'mode': mode}
1112 if mode != 'r':
1113 args[r'checkambig'] = self._checkambig
1114 if mode == 'w':
1115 args[r'atomictemp'] = True
1116 return self.opener(self.indexfile, **args)
1117
1118 def _datafp(self, mode='r'):
1119 """file object for the revlog's data file"""
1120 return self.opener(self.datafile, mode=mode)
1121
1122 @contextlib.contextmanager
1123 def _datareadfp(self, existingfp=None):
1124 """file object suitable to read data"""
1125 if existingfp is not None:
1126 yield existingfp
1127 else:
1128 if self._inline:
1129 func = self._indexfp
1130 else:
1131 func = self._datafp
1132 with func() as fp:
1133 yield fp
1134
1135 def tip(self):
1136 return self.node(len(self.index) - 1)
1137 def __contains__(self, rev):
1138 return 0 <= rev < len(self)
1139 def __len__(self):
1140 return len(self.index)
1141 def __iter__(self):
1142 return iter(pycompat.xrange(len(self)))
1143 def revs(self, start=0, stop=None):
1144 """iterate over all rev in this revlog (from start to stop)"""
1145 step = 1
1146 length = len(self)
1147 if stop is not None:
1148 if start > stop:
1149 step = -1
1150 stop += step
1151 if stop > length:
1152 stop = length
1153 else:
1154 stop = length
1155 return pycompat.xrange(start, stop, step)
1156
1157 @util.propertycache
1158 def nodemap(self):
1159 if self.index:
1160 # populate mapping down to the initial node
1161 node0 = self.index[0][7] # get around changelog filtering
1162 self.rev(node0)
1163 return self._nodecache
1164
1165 def hasnode(self, node):
1166 try:
1167 self.rev(node)
1168 return True
1169 except KeyError:
1170 return False
1171
1172 def candelta(self, baserev, rev):
1173 """whether two revisions (baserev, rev) can be delta-ed or not"""
1174 # Disable delta if either rev requires a content-changing flag
1175 # processor (ex. LFS). This is because such flag processor can alter
1176 # the rawtext content that the delta will be based on, and two clients
1177 # could have a same revlog node with different flags (i.e. different
1178 # rawtext contents) and the delta could be incompatible.
1179 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1180 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1181 return False
1182 return True
1183
1184 def clearcaches(self):
1185 self._cache = None
1186 self._chainbasecache.clear()
1187 self._chunkcache = (0, '')
1188 self._pcache = {}
1189
1190 try:
1191 self._nodecache.clearcaches()
1192 except AttributeError:
1193 self._nodecache = {nullid: nullrev}
1194 self._nodepos = None
1195
1196 def rev(self, node):
1197 try:
1198 return self._nodecache[node]
1199 except TypeError:
1200 raise
1201 except RevlogError:
1202 # parsers.c radix tree lookup failed
1203 if node == wdirid or node in wdirfilenodeids:
1204 raise error.WdirUnsupported
1205 raise LookupError(node, self.indexfile, _('no node'))
1206 except KeyError:
1207 # pure python cache lookup failed
1208 n = self._nodecache
1209 i = self.index
1210 p = self._nodepos
1211 if p is None:
1212 p = len(i) - 1
1213 else:
1214 assert p < len(i)
1215 for r in pycompat.xrange(p, -1, -1):
1216 v = i[r][7]
1217 n[v] = r
1218 if v == node:
1219 self._nodepos = r - 1
1220 return r
1221 if node == wdirid or node in wdirfilenodeids:
1222 raise error.WdirUnsupported
1223 raise LookupError(node, self.indexfile, _('no node'))
1224
1225 # Accessors for index entries.
1226
1227 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1228 # are flags.
1229 def start(self, rev):
1230 return int(self.index[rev][0] >> 16)
1231
1232 def flags(self, rev):
1233 return self.index[rev][0] & 0xFFFF
1234
1235 def length(self, rev):
1236 return self.index[rev][1]
1237
1238 def rawsize(self, rev):
1239 """return the length of the uncompressed text for a given revision"""
1240 l = self.index[rev][2]
1241 if l >= 0:
1242 return l
1243
1244 t = self.revision(rev, raw=True)
1245 return len(t)
1246
1247 def size(self, rev):
1248 """length of non-raw text (processed by a "read" flag processor)"""
1249 # fast path: if no "read" flag processor could change the content,
1250 # size is rawsize. note: ELLIPSIS is known to not change the content.
1251 flags = self.flags(rev)
1252 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1253 return self.rawsize(rev)
1254
1255 return len(self.revision(rev, raw=False))
1256
1257 def chainbase(self, rev):
1258 base = self._chainbasecache.get(rev)
1259 if base is not None:
1260 return base
1261
1262 index = self.index
1263 iterrev = rev
1264 base = index[iterrev][3]
1265 while base != iterrev:
1266 iterrev = base
1267 base = index[iterrev][3]
1268
1269 self._chainbasecache[rev] = base
1270 return base
1271
1272 def linkrev(self, rev):
1273 return self.index[rev][4]
1274
1275 def parentrevs(self, rev):
1276 try:
1277 entry = self.index[rev]
1278 except IndexError:
1279 if rev == wdirrev:
1280 raise error.WdirUnsupported
1281 raise
1282
1283 return entry[5], entry[6]
1284
1285 def node(self, rev):
1286 try:
1287 return self.index[rev][7]
1288 except IndexError:
1289 if rev == wdirrev:
1290 raise error.WdirUnsupported
1291 raise
1292
1293 # Derived from index values.
1294
1295 def end(self, rev):
1296 return self.start(rev) + self.length(rev)
1297
1298 def parents(self, node):
1299 i = self.index
1300 d = i[self.rev(node)]
1301 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1302
1303 def chainlen(self, rev):
1304 return self._chaininfo(rev)[0]
1305
1306 def _chaininfo(self, rev):
1307 chaininfocache = self._chaininfocache
1308 if rev in chaininfocache:
1309 return chaininfocache[rev]
1310 index = self.index
1311 generaldelta = self._generaldelta
1312 iterrev = rev
1313 e = index[iterrev]
1314 clen = 0
1315 compresseddeltalen = 0
1316 while iterrev != e[3]:
1317 clen += 1
1318 compresseddeltalen += e[1]
1319 if generaldelta:
1320 iterrev = e[3]
1321 else:
1322 iterrev -= 1
1323 if iterrev in chaininfocache:
1324 t = chaininfocache[iterrev]
1325 clen += t[0]
1326 compresseddeltalen += t[1]
1327 break
1328 e = index[iterrev]
1329 else:
1330 # Add text length of base since decompressing that also takes
1331 # work. For cache hits the length is already included.
1332 compresseddeltalen += e[1]
1333 r = (clen, compresseddeltalen)
1334 chaininfocache[rev] = r
1335 return r
1336
1337 def _deltachain(self, rev, stoprev=None):
1338 """Obtain the delta chain for a revision.
1339
1340 ``stoprev`` specifies a revision to stop at. If not specified, we
1341 stop at the base of the chain.
1342
1343 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1344 revs in ascending order and ``stopped`` is a bool indicating whether
1345 ``stoprev`` was hit.
1346 """
1347 # Try C implementation.
1348 try:
1349 return self.index.deltachain(rev, stoprev, self._generaldelta)
1350 except AttributeError:
1351 pass
1352
1353 chain = []
1354
1355 # Alias to prevent attribute lookup in tight loop.
1356 index = self.index
1357 generaldelta = self._generaldelta
1358
1359 iterrev = rev
1360 e = index[iterrev]
1361 while iterrev != e[3] and iterrev != stoprev:
1362 chain.append(iterrev)
1363 if generaldelta:
1364 iterrev = e[3]
1365 else:
1366 iterrev -= 1
1367 e = index[iterrev]
1368
1369 if iterrev == stoprev:
1370 stopped = True
1371 else:
1372 chain.append(iterrev)
1373 stopped = False
1374
1375 chain.reverse()
1376 return chain, stopped
1377
1378 def ancestors(self, revs, stoprev=0, inclusive=False):
1379 """Generate the ancestors of 'revs' in reverse topological order.
1380 Does not generate revs lower than stoprev.
1381
1382 See the documentation for ancestor.lazyancestors for more details."""
1383
1384 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1385 inclusive=inclusive)
1386
1387 def descendants(self, revs):
1388 """Generate the descendants of 'revs' in revision order.
1389
1390 Yield a sequence of revision numbers starting with a child of
1391 some rev in revs, i.e., each revision is *not* considered a
1392 descendant of itself. Results are ordered by revision number (a
1393 topological sort)."""
1394 first = min(revs)
1395 if first == nullrev:
1396 for i in self:
1397 yield i
1398 return
1399
1400 seen = set(revs)
1401 for i in self.revs(start=first + 1):
1402 for x in self.parentrevs(i):
1403 if x != nullrev and x in seen:
1404 seen.add(i)
1405 yield i
1406 break
1407
1408 def findcommonmissing(self, common=None, heads=None):
1409 """Return a tuple of the ancestors of common and the ancestors of heads
1410 that are not ancestors of common. In revset terminology, we return the
1411 tuple:
1412
1413 ::common, (::heads) - (::common)
1414
1415 The list is sorted by revision number, meaning it is
1416 topologically sorted.
1417
1418 'heads' and 'common' are both lists of node IDs. If heads is
1419 not supplied, uses all of the revlog's heads. If common is not
1420 supplied, uses nullid."""
1421 if common is None:
1422 common = [nullid]
1423 if heads is None:
1424 heads = self.heads()
1425
1426 common = [self.rev(n) for n in common]
1427 heads = [self.rev(n) for n in heads]
1428
1429 # we want the ancestors, but inclusive
1430 class lazyset(object):
1431 def __init__(self, lazyvalues):
1432 self.addedvalues = set()
1433 self.lazyvalues = lazyvalues
1434
1435 def __contains__(self, value):
1436 return value in self.addedvalues or value in self.lazyvalues
1437
1438 def __iter__(self):
1439 added = self.addedvalues
1440 for r in added:
1441 yield r
1442 for r in self.lazyvalues:
1443 if not r in added:
1444 yield r
1445
1446 def add(self, value):
1447 self.addedvalues.add(value)
1448
1449 def update(self, values):
1450 self.addedvalues.update(values)
1451
1452 has = lazyset(self.ancestors(common))
1453 has.add(nullrev)
1454 has.update(common)
1455
1456 # take all ancestors from heads that aren't in has
1457 missing = set()
1458 visit = collections.deque(r for r in heads if r not in has)
1459 while visit:
1460 r = visit.popleft()
1461 if r in missing:
1462 continue
1463 else:
1464 missing.add(r)
1465 for p in self.parentrevs(r):
1466 if p not in has:
1467 visit.append(p)
1468 missing = list(missing)
1469 missing.sort()
1470 return has, [self.node(miss) for miss in missing]
1471
1472 def incrementalmissingrevs(self, common=None):
1473 """Return an object that can be used to incrementally compute the
1474 revision numbers of the ancestors of arbitrary sets that are not
1475 ancestors of common. This is an ancestor.incrementalmissingancestors
1476 object.
1477
1478 'common' is a list of revision numbers. If common is not supplied, uses
1479 nullrev.
1480 """
1481 if common is None:
1482 common = [nullrev]
1483
1484 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1485
1486 def findmissingrevs(self, common=None, heads=None):
1487 """Return the revision numbers of the ancestors of heads that
1488 are not ancestors of common.
1489
1490 More specifically, return a list of revision numbers corresponding to
1491 nodes N such that every N satisfies the following constraints:
1492
1493 1. N is an ancestor of some node in 'heads'
1494 2. N is not an ancestor of any node in 'common'
1495
1496 The list is sorted by revision number, meaning it is
1497 topologically sorted.
1498
1499 'heads' and 'common' are both lists of revision numbers. If heads is
1500 not supplied, uses all of the revlog's heads. If common is not
1501 supplied, uses nullid."""
1502 if common is None:
1503 common = [nullrev]
1504 if heads is None:
1505 heads = self.headrevs()
1506
1507 inc = self.incrementalmissingrevs(common=common)
1508 return inc.missingancestors(heads)
1509
1510 def findmissing(self, common=None, heads=None):
1511 """Return the ancestors of heads that are not ancestors of common.
1512
1513 More specifically, return a list of nodes N such that every N
1514 satisfies the following constraints:
1515
1516 1. N is an ancestor of some node in 'heads'
1517 2. N is not an ancestor of any node in 'common'
1518
1519 The list is sorted by revision number, meaning it is
1520 topologically sorted.
1521
1522 'heads' and 'common' are both lists of node IDs. If heads is
1523 not supplied, uses all of the revlog's heads. If common is not
1524 supplied, uses nullid."""
1525 if common is None:
1526 common = [nullid]
1527 if heads is None:
1528 heads = self.heads()
1529
1530 common = [self.rev(n) for n in common]
1531 heads = [self.rev(n) for n in heads]
1532
1533 inc = self.incrementalmissingrevs(common=common)
1534 return [self.node(r) for r in inc.missingancestors(heads)]
1535
1536 def nodesbetween(self, roots=None, heads=None):
1537 """Return a topological path from 'roots' to 'heads'.
1538
1539 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1540 topologically sorted list of all nodes N that satisfy both of
1541 these constraints:
1542
1543 1. N is a descendant of some node in 'roots'
1544 2. N is an ancestor of some node in 'heads'
1545
1546 Every node is considered to be both a descendant and an ancestor
1547 of itself, so every reachable node in 'roots' and 'heads' will be
1548 included in 'nodes'.
1549
1550 'outroots' is the list of reachable nodes in 'roots', i.e., the
1551 subset of 'roots' that is returned in 'nodes'. Likewise,
1552 'outheads' is the subset of 'heads' that is also in 'nodes'.
1553
1554 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1555 unspecified, uses nullid as the only root. If 'heads' is
1556 unspecified, uses list of all of the revlog's heads."""
1557 nonodes = ([], [], [])
1558 if roots is not None:
1559 roots = list(roots)
1560 if not roots:
1561 return nonodes
1562 lowestrev = min([self.rev(n) for n in roots])
1563 else:
1564 roots = [nullid] # Everybody's a descendant of nullid
1565 lowestrev = nullrev
1566 if (lowestrev == nullrev) and (heads is None):
1567 # We want _all_ the nodes!
1568 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1569 if heads is None:
1570 # All nodes are ancestors, so the latest ancestor is the last
1571 # node.
1572 highestrev = len(self) - 1
1573 # Set ancestors to None to signal that every node is an ancestor.
1574 ancestors = None
1575 # Set heads to an empty dictionary for later discovery of heads
1576 heads = {}
1577 else:
1578 heads = list(heads)
1579 if not heads:
1580 return nonodes
1581 ancestors = set()
1582 # Turn heads into a dictionary so we can remove 'fake' heads.
1583 # Also, later we will be using it to filter out the heads we can't
1584 # find from roots.
1585 heads = dict.fromkeys(heads, False)
1586 # Start at the top and keep marking parents until we're done.
1587 nodestotag = set(heads)
1588 # Remember where the top was so we can use it as a limit later.
1589 highestrev = max([self.rev(n) for n in nodestotag])
1590 while nodestotag:
1591 # grab a node to tag
1592 n = nodestotag.pop()
1593 # Never tag nullid
1594 if n == nullid:
1595 continue
1596 # A node's revision number represents its place in a
1597 # topologically sorted list of nodes.
1598 r = self.rev(n)
1599 if r >= lowestrev:
1600 if n not in ancestors:
1601 # If we are possibly a descendant of one of the roots
1602 # and we haven't already been marked as an ancestor
1603 ancestors.add(n) # Mark as ancestor
1604 # Add non-nullid parents to list of nodes to tag.
1605 nodestotag.update([p for p in self.parents(n) if
1606 p != nullid])
1607 elif n in heads: # We've seen it before, is it a fake head?
1608 # So it is, real heads should not be the ancestors of
1609 # any other heads.
1610 heads.pop(n)
1611 if not ancestors:
1612 return nonodes
1613 # Now that we have our set of ancestors, we want to remove any
1614 # roots that are not ancestors.
1615
1616 # If one of the roots was nullid, everything is included anyway.
1617 if lowestrev > nullrev:
1618 # But, since we weren't, let's recompute the lowest rev to not
1619 # include roots that aren't ancestors.
1620
1621 # Filter out roots that aren't ancestors of heads
1622 roots = [root for root in roots if root in ancestors]
1623 # Recompute the lowest revision
1624 if roots:
1625 lowestrev = min([self.rev(root) for root in roots])
1626 else:
1627 # No more roots? Return empty list
1628 return nonodes
1629 else:
1630 # We are descending from nullid, and don't need to care about
1631 # any other roots.
1632 lowestrev = nullrev
1633 roots = [nullid]
1634 # Transform our roots list into a set.
1635 descendants = set(roots)
1636 # Also, keep the original roots so we can filter out roots that aren't
1637 # 'real' roots (i.e. are descended from other roots).
1638 roots = descendants.copy()
1639 # Our topologically sorted list of output nodes.
1640 orderedout = []
1641 # Don't start at nullid since we don't want nullid in our output list,
1642 # and if nullid shows up in descendants, empty parents will look like
1643 # they're descendants.
1644 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1645 n = self.node(r)
1646 isdescendant = False
1647 if lowestrev == nullrev: # Everybody is a descendant of nullid
1648 isdescendant = True
1649 elif n in descendants:
1650 # n is already a descendant
1651 isdescendant = True
1652 # This check only needs to be done here because all the roots
1653 # will start being marked is descendants before the loop.
1654 if n in roots:
1655 # If n was a root, check if it's a 'real' root.
1656 p = tuple(self.parents(n))
1657 # If any of its parents are descendants, it's not a root.
1658 if (p[0] in descendants) or (p[1] in descendants):
1659 roots.remove(n)
1660 else:
1661 p = tuple(self.parents(n))
1662 # A node is a descendant if either of its parents are
1663 # descendants. (We seeded the dependents list with the roots
1664 # up there, remember?)
1665 if (p[0] in descendants) or (p[1] in descendants):
1666 descendants.add(n)
1667 isdescendant = True
1668 if isdescendant and ((ancestors is None) or (n in ancestors)):
1669 # Only include nodes that are both descendants and ancestors.
1670 orderedout.append(n)
1671 if (ancestors is not None) and (n in heads):
1672 # We're trying to figure out which heads are reachable
1673 # from roots.
1674 # Mark this head as having been reached
1675 heads[n] = True
1676 elif ancestors is None:
1677 # Otherwise, we're trying to discover the heads.
1678 # Assume this is a head because if it isn't, the next step
1679 # will eventually remove it.
1680 heads[n] = True
1681 # But, obviously its parents aren't.
1682 for p in self.parents(n):
1683 heads.pop(p, None)
1684 heads = [head for head, flag in heads.iteritems() if flag]
1685 roots = list(roots)
1686 assert orderedout
1687 assert roots
1688 assert heads
1689 return (orderedout, roots, heads)
1690
1691 def headrevs(self):
1692 try:
1693 return self.index.headrevs()
1694 except AttributeError:
1695 return self._headrevs()
1696
1697 def computephases(self, roots):
1698 return self.index.computephasesmapsets(roots)
1699
1700 def _headrevs(self):
1701 count = len(self)
1702 if not count:
1703 return [nullrev]
1704 # we won't iter over filtered rev so nobody is a head at start
1705 ishead = [0] * (count + 1)
1706 index = self.index
1707 for r in self:
1708 ishead[r] = 1 # I may be an head
1709 e = index[r]
1710 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1711 return [r for r, val in enumerate(ishead) if val]
1712
1713 def heads(self, start=None, stop=None):
1714 """return the list of all nodes that have no children
1715
1716 if start is specified, only heads that are descendants of
1717 start will be returned
1718 if stop is specified, it will consider all the revs from stop
1719 as if they had no children
1720 """
1721 if start is None and stop is None:
1722 if not len(self):
1723 return [nullid]
1724 return [self.node(r) for r in self.headrevs()]
1725
1726 if start is None:
1727 start = nullid
1728 if stop is None:
1729 stop = []
1730 stoprevs = set([self.rev(n) for n in stop])
1731 startrev = self.rev(start)
1732 reachable = {startrev}
1733 heads = {startrev}
1734
1735 parentrevs = self.parentrevs
1736 for r in self.revs(start=startrev + 1):
1737 for p in parentrevs(r):
1738 if p in reachable:
1739 if r not in stoprevs:
1740 reachable.add(r)
1741 heads.add(r)
1742 if p in heads and p not in stoprevs:
1743 heads.remove(p)
1744
1745 return [self.node(r) for r in heads]
1746
1747 def children(self, node):
1748 """find the children of a given node"""
1749 c = []
1750 p = self.rev(node)
1751 for r in self.revs(start=p + 1):
1752 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1753 if prevs:
1754 for pr in prevs:
1755 if pr == p:
1756 c.append(self.node(r))
1757 elif p == nullrev:
1758 c.append(self.node(r))
1759 return c
1760
1761 def commonancestorsheads(self, a, b):
1762 """calculate all the heads of the common ancestors of nodes a and b"""
1763 a, b = self.rev(a), self.rev(b)
1764 ancs = self._commonancestorsheads(a, b)
1765 return pycompat.maplist(self.node, ancs)
1766
1767 def _commonancestorsheads(self, *revs):
1768 """calculate all the heads of the common ancestors of revs"""
1769 try:
1770 ancs = self.index.commonancestorsheads(*revs)
1771 except (AttributeError, OverflowError): # C implementation failed
1772 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1773 return ancs
1774
1775 def isancestor(self, a, b):
1776 """return True if node a is an ancestor of node b
1777
1778 A revision is considered an ancestor of itself."""
1779 a, b = self.rev(a), self.rev(b)
1780 return self.isancestorrev(a, b)
1781
1782 def isancestorrev(self, a, b):
1783 """return True if revision a is an ancestor of revision b
1784
1785 A revision is considered an ancestor of itself.
1786
1787 The implementation of this is trivial but the use of
1788 commonancestorsheads is not."""
1789 if a == nullrev:
1790 return True
1791 elif a == b:
1792 return True
1793 elif a > b:
1794 return False
1795 return a in self._commonancestorsheads(a, b)
1796
1797 def ancestor(self, a, b):
1798 """calculate the "best" common ancestor of nodes a and b"""
1799
1800 a, b = self.rev(a), self.rev(b)
1801 try:
1802 ancs = self.index.ancestors(a, b)
1803 except (AttributeError, OverflowError):
1804 ancs = ancestor.ancestors(self.parentrevs, a, b)
1805 if ancs:
1806 # choose a consistent winner when there's a tie
1807 return min(map(self.node, ancs))
1808 return nullid
1809
1810 def _match(self, id):
1811 if isinstance(id, int):
1812 # rev
1813 return self.node(id)
1814 if len(id) == 20:
1815 # possibly a binary node
1816 # odds of a binary node being all hex in ASCII are 1 in 10**25
1817 try:
1818 node = id
1819 self.rev(node) # quick search the index
1820 return node
1821 except LookupError:
1822 pass # may be partial hex id
1823 try:
1824 # str(rev)
1825 rev = int(id)
1826 if "%d" % rev != id:
1827 raise ValueError
1828 if rev < 0:
1829 rev = len(self) + rev
1830 if rev < 0 or rev >= len(self):
1831 raise ValueError
1832 return self.node(rev)
1833 except (ValueError, OverflowError):
1834 pass
1835 if len(id) == 40:
1836 try:
1837 # a full hex nodeid?
1838 node = bin(id)
1839 self.rev(node)
1840 return node
1841 except (TypeError, LookupError):
1842 pass
1843
1844 def _partialmatch(self, id):
1845 # we don't care wdirfilenodeids as they should be always full hash
1846 maybewdir = wdirhex.startswith(id)
1847 try:
1848 partial = self.index.partialmatch(id)
1849 if partial and self.hasnode(partial):
1850 if maybewdir:
1851 # single 'ff...' match in radix tree, ambiguous with wdir
1852 raise RevlogError
1853 return partial
1854 if maybewdir:
1855 # no 'ff...' match in radix tree, wdir identified
1856 raise error.WdirUnsupported
1857 return None
1858 except RevlogError:
1859 # parsers.c radix tree lookup gave multiple matches
1860 # fast path: for unfiltered changelog, radix tree is accurate
1861 if not getattr(self, 'filteredrevs', None):
1862 raise AmbiguousPrefixLookupError(id, self.indexfile,
1863 _('ambiguous identifier'))
1864 # fall through to slow path that filters hidden revisions
1865 except (AttributeError, ValueError):
1866 # we are pure python, or key was too short to search radix tree
1867 pass
1868
1869 if id in self._pcache:
1870 return self._pcache[id]
1871
1872 if len(id) <= 40:
1873 try:
1874 # hex(node)[:...]
1875 l = len(id) // 2 # grab an even number of digits
1876 prefix = bin(id[:l * 2])
1877 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1878 nl = [n for n in nl if hex(n).startswith(id) and
1879 self.hasnode(n)]
1880 if nullhex.startswith(id):
1881 nl.append(nullid)
1882 if len(nl) > 0:
1883 if len(nl) == 1 and not maybewdir:
1884 self._pcache[id] = nl[0]
1885 return nl[0]
1886 raise AmbiguousPrefixLookupError(id, self.indexfile,
1887 _('ambiguous identifier'))
1888 if maybewdir:
1889 raise error.WdirUnsupported
1890 return None
1891 except TypeError:
1892 pass
1893
1894 def lookup(self, id):
1895 """locate a node based on:
1896 - revision number or str(revision number)
1897 - nodeid or subset of hex nodeid
1898 """
1899 n = self._match(id)
1900 if n is not None:
1901 return n
1902 n = self._partialmatch(id)
1903 if n:
1904 return n
1905
1906 raise LookupError(id, self.indexfile, _('no match found'))
1907
1908 def shortest(self, node, minlength=1):
1909 """Find the shortest unambiguous prefix that matches node."""
1910 def isvalid(prefix):
1911 try:
1912 node = self._partialmatch(prefix)
1913 except error.RevlogError:
1914 return False
1915 except error.WdirUnsupported:
1916 # single 'ff...' match
1917 return True
1918 if node is None:
1919 raise LookupError(node, self.indexfile, _('no node'))
1920 return True
1921
1922 def maybewdir(prefix):
1923 return all(c == 'f' for c in prefix)
1924
1925 hexnode = hex(node)
1926
1927 def disambiguate(hexnode, minlength):
1928 """Disambiguate against wdirid."""
1929 for length in range(minlength, 41):
1930 prefix = hexnode[:length]
1931 if not maybewdir(prefix):
1932 return prefix
1933
1934 if not getattr(self, 'filteredrevs', None):
1935 try:
1936 length = max(self.index.shortest(node), minlength)
1937 return disambiguate(hexnode, length)
1938 except RevlogError:
1939 if node != wdirid:
1940 raise LookupError(node, self.indexfile, _('no node'))
1941 except AttributeError:
1942 # Fall through to pure code
1943 pass
1944
1945 if node == wdirid:
1946 for length in range(minlength, 41):
1947 prefix = hexnode[:length]
1948 if isvalid(prefix):
1949 return prefix
1950
1951 for length in range(minlength, 41):
1952 prefix = hexnode[:length]
1953 if isvalid(prefix):
1954 return disambiguate(hexnode, length)
1955
1956 def cmp(self, node, text):
1957 """compare text with a given file revision
1958
1959 returns True if text is different than what is stored.
1960 """
1961 p1, p2 = self.parents(node)
1962 return hash(text, p1, p2) != node
1963
1964 def _cachesegment(self, offset, data):
1965 """Add a segment to the revlog cache.
1966
1967 Accepts an absolute offset and the data that is at that location.
1968 """
1969 o, d = self._chunkcache
1970 # try to add to existing cache
1971 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1972 self._chunkcache = o, d + data
1973 else:
1974 self._chunkcache = offset, data
1975
1976 def _readsegment(self, offset, length, df=None):
1977 """Load a segment of raw data from the revlog.
1978
1979 Accepts an absolute offset, length to read, and an optional existing
1980 file handle to read from.
1981
1982 If an existing file handle is passed, it will be seeked and the
1983 original seek position will NOT be restored.
1984
1985 Returns a str or buffer of raw byte data.
1986 """
1987 # Cache data both forward and backward around the requested
1988 # data, in a fixed size window. This helps speed up operations
1989 # involving reading the revlog backwards.
1990 cachesize = self._chunkcachesize
1991 realoffset = offset & ~(cachesize - 1)
1992 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1993 - realoffset)
1994 with self._datareadfp(df) as df:
1995 df.seek(realoffset)
1996 d = df.read(reallength)
1997 self._cachesegment(realoffset, d)
1998 if offset != realoffset or reallength != length:
1999 return util.buffer(d, offset - realoffset, length)
2000 return d
2001
2002 def _getsegment(self, offset, length, df=None):
2003 """Obtain a segment of raw data from the revlog.
2004
2005 Accepts an absolute offset, length of bytes to obtain, and an
2006 optional file handle to the already-opened revlog. If the file
2007 handle is used, it's original seek position will not be preserved.
2008
2009 Requests for data may be returned from a cache.
2010
2011 Returns a str or a buffer instance of raw byte data.
2012 """
2013 o, d = self._chunkcache
2014 l = len(d)
2015
2016 # is it in the cache?
2017 cachestart = offset - o
2018 cacheend = cachestart + length
2019 if cachestart >= 0 and cacheend <= l:
2020 if cachestart == 0 and cacheend == l:
2021 return d # avoid a copy
2022 return util.buffer(d, cachestart, cacheend - cachestart)
2023
2024 return self._readsegment(offset, length, df=df)
2025
2026 def _getsegmentforrevs(self, startrev, endrev, df=None):
2027 """Obtain a segment of raw data corresponding to a range of revisions.
2028
2029 Accepts the start and end revisions and an optional already-open
2030 file handle to be used for reading. If the file handle is read, its
2031 seek position will not be preserved.
2032
2033 Requests for data may be satisfied by a cache.
2034
2035 Returns a 2-tuple of (offset, data) for the requested range of
2036 revisions. Offset is the integer offset from the beginning of the
2037 revlog and data is a str or buffer of the raw byte data.
2038
2039 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2040 to determine where each revision's data begins and ends.
2041 """
2042 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2043 # (functions are expensive).
2044 index = self.index
2045 istart = index[startrev]
2046 start = int(istart[0] >> 16)
2047 if startrev == endrev:
2048 end = start + istart[1]
2049 else:
2050 iend = index[endrev]
2051 end = int(iend[0] >> 16) + iend[1]
2052
2053 if self._inline:
2054 start += (startrev + 1) * self._io.size
2055 end += (endrev + 1) * self._io.size
2056 length = end - start
2057
2058 return start, self._getsegment(start, length, df=df)
2059
2060 def _chunk(self, rev, df=None):
2061 """Obtain a single decompressed chunk for a revision.
2062
2063 Accepts an integer revision and an optional already-open file handle
2064 to be used for reading. If used, the seek position of the file will not
2065 be preserved.
2066
2067 Returns a str holding uncompressed data for the requested revision.
2068 """
2069 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2070
2071 def _chunks(self, revs, df=None, targetsize=None):
2072 """Obtain decompressed chunks for the specified revisions.
2073
2074 Accepts an iterable of numeric revisions that are assumed to be in
2075 ascending order. Also accepts an optional already-open file handle
2076 to be used for reading. If used, the seek position of the file will
2077 not be preserved.
2078
2079 This function is similar to calling ``self._chunk()`` multiple times,
2080 but is faster.
2081
2082 Returns a list with decompressed data for each requested revision.
2083 """
2084 if not revs:
2085 return []
2086 start = self.start
2087 length = self.length
2088 inline = self._inline
2089 iosize = self._io.size
2090 buffer = util.buffer
2091
2092 l = []
2093 ladd = l.append
2094
2095 if not self._withsparseread:
2096 slicedchunks = (revs,)
2097 else:
2098 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2099
2100 for revschunk in slicedchunks:
2101 firstrev = revschunk[0]
2102 # Skip trailing revisions with empty diff
2103 for lastrev in revschunk[::-1]:
2104 if length(lastrev) != 0:
2105 break
2106
2107 try:
2108 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2109 except OverflowError:
2110 # issue4215 - we can't cache a run of chunks greater than
2111 # 2G on Windows
2112 return [self._chunk(rev, df=df) for rev in revschunk]
2113
2114 decomp = self.decompress
2115 for rev in revschunk:
2116 chunkstart = start(rev)
2117 if inline:
2118 chunkstart += (rev + 1) * iosize
2119 chunklength = length(rev)
2120 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2121
2122 return l
2123
2124 def _chunkclear(self):
2125 """Clear the raw chunk cache."""
2126 self._chunkcache = (0, '')
2127
2128 def deltaparent(self, rev):
2129 """return deltaparent of the given revision"""
2130 base = self.index[rev][3]
2131 if base == rev:
2132 return nullrev
2133 elif self._generaldelta:
2134 return base
2135 else:
2136 return rev - 1
2137
2138 def issnapshot(self, rev):
2139 """tells whether rev is a snapshot
2140 """
2141 if rev == nullrev:
2142 return True
2143 deltap = self.deltaparent(rev)
2144 if deltap == nullrev:
2145 return True
2146 p1, p2 = self.parentrevs(rev)
2147 if deltap in (p1, p2):
2148 return False
2149 return self.issnapshot(deltap)
2150
2151 def snapshotdepth(self, rev):
2152 """number of snapshot in the chain before this one"""
2153 if not self.issnapshot(rev):
2154 raise ProgrammingError('revision %d not a snapshot')
2155 return len(self._deltachain(rev)[0]) - 1
2156
2157 def revdiff(self, rev1, rev2):
2158 """return or calculate a delta between two revisions
2159
2160 The delta calculated is in binary form and is intended to be written to
2161 revlog data directly. So this function needs raw revision data.
2162 """
2163 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2164 return bytes(self._chunk(rev2))
2165
2166 return mdiff.textdiff(self.revision(rev1, raw=True),
2167 self.revision(rev2, raw=True))
2168
2169 def revision(self, nodeorrev, _df=None, raw=False):
2170 """return an uncompressed revision of a given node or revision
2171 number.
2172
2173 _df - an existing file handle to read from. (internal-only)
2174 raw - an optional argument specifying if the revision data is to be
2175 treated as raw data when applying flag transforms. 'raw' should be set
2176 to True when generating changegroups or in debug commands.
2177 """
2178 if isinstance(nodeorrev, int):
2179 rev = nodeorrev
2180 node = self.node(rev)
2181 else:
2182 node = nodeorrev
2183 rev = None
2184
2185 cachedrev = None
2186 flags = None
2187 rawtext = None
2188 if node == nullid:
2189 return ""
2190 if self._cache:
2191 if self._cache[0] == node:
2192 # _cache only stores rawtext
2193 if raw:
2194 return self._cache[2]
2195 # duplicated, but good for perf
2196 if rev is None:
2197 rev = self.rev(node)
2198 if flags is None:
2199 flags = self.flags(rev)
2200 # no extra flags set, no flag processor runs, text = rawtext
2201 if flags == REVIDX_DEFAULT_FLAGS:
2202 return self._cache[2]
2203 # rawtext is reusable. need to run flag processor
2204 rawtext = self._cache[2]
2205
2206 cachedrev = self._cache[1]
2207
2208 # look up what we need to read
2209 if rawtext is None:
2210 if rev is None:
2211 rev = self.rev(node)
2212
2213 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2214 if stopped:
2215 rawtext = self._cache[2]
2216
2217 # drop cache to save memory
2218 self._cache = None
2219
2220 targetsize = None
2221 rawsize = self.index[rev][2]
2222 if 0 <= rawsize:
2223 targetsize = 4 * rawsize
2224
2225 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2226 if rawtext is None:
2227 rawtext = bytes(bins[0])
2228 bins = bins[1:]
2229
2230 rawtext = mdiff.patches(rawtext, bins)
2231 self._cache = (node, rev, rawtext)
2232
2233 if flags is None:
2234 if rev is None:
2235 rev = self.rev(node)
2236 flags = self.flags(rev)
2237
2238 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2239 if validatehash:
2240 self.checkhash(text, node, rev=rev)
2241
2242 return text
2243
2244 def hash(self, text, p1, p2):
2245 """Compute a node hash.
2246
2247 Available as a function so that subclasses can replace the hash
2248 as needed.
2249 """
2250 return hash(text, p1, p2)
2251
2252 def _processflags(self, text, flags, operation, raw=False):
2253 """Inspect revision data flags and applies transforms defined by
2254 registered flag processors.
2255
2256 ``text`` - the revision data to process
2257 ``flags`` - the revision flags
2258 ``operation`` - the operation being performed (read or write)
2259 ``raw`` - an optional argument describing if the raw transform should be
2260 applied.
2261
2262 This method processes the flags in the order (or reverse order if
2263 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2264 flag processors registered for present flags. The order of flags defined
2265 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2266
2267 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2268 processed text and ``validatehash`` is a bool indicating whether the
2269 returned text should be checked for hash integrity.
2270
2271 Note: If the ``raw`` argument is set, it has precedence over the
2272 operation and will only update the value of ``validatehash``.
2273 """
2274 # fast path: no flag processors will run
2275 if flags == 0:
2276 return text, True
2277 if not operation in ('read', 'write'):
2278 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2279 # Check all flags are known.
2280 if flags & ~REVIDX_KNOWN_FLAGS:
2281 raise RevlogError(_("incompatible revision flag '%#x'") %
2282 (flags & ~REVIDX_KNOWN_FLAGS))
2283 validatehash = True
2284 # Depending on the operation (read or write), the order might be
2285 # reversed due to non-commutative transforms.
2286 orderedflags = REVIDX_FLAGS_ORDER
2287 if operation == 'write':
2288 orderedflags = reversed(orderedflags)
2289
2290 for flag in orderedflags:
2291 # If a flagprocessor has been registered for a known flag, apply the
2292 # related operation transform and update result tuple.
2293 if flag & flags:
2294 vhash = True
2295
2296 if flag not in _flagprocessors:
2297 message = _("missing processor for flag '%#x'") % (flag)
2298 raise RevlogError(message)
2299
2300 processor = _flagprocessors[flag]
2301 if processor is not None:
2302 readtransform, writetransform, rawtransform = processor
2303
2304 if raw:
2305 vhash = rawtransform(self, text)
2306 elif operation == 'read':
2307 text, vhash = readtransform(self, text)
2308 else: # write operation
2309 text, vhash = writetransform(self, text)
2310 validatehash = validatehash and vhash
2311
2312 return text, validatehash
2313
2314 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2315 """Check node hash integrity.
2316
2317 Available as a function so that subclasses can extend hash mismatch
2318 behaviors as needed.
2319 """
2320 try:
2321 if p1 is None and p2 is None:
2322 p1, p2 = self.parents(node)
2323 if node != self.hash(text, p1, p2):
2324 revornode = rev
2325 if revornode is None:
2326 revornode = templatefilters.short(hex(node))
2327 raise RevlogError(_("integrity check failed on %s:%s")
2328 % (self.indexfile, pycompat.bytestr(revornode)))
2329 except RevlogError:
2330 if self._censorable and _censoredtext(text):
2331 raise error.CensoredNodeError(self.indexfile, node, text)
2332 raise
2333
2334 def _enforceinlinesize(self, tr, fp=None):
2335 """Check if the revlog is too big for inline and convert if so.
2336
2337 This should be called after revisions are added to the revlog. If the
2338 revlog has grown too large to be an inline revlog, it will convert it
2339 to use multiple index and data files.
2340 """
2341 tiprev = len(self) - 1
2342 if (not self._inline or
2343 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2344 return
2345
2346 trinfo = tr.find(self.indexfile)
2347 if trinfo is None:
2348 raise RevlogError(_("%s not found in the transaction")
2349 % self.indexfile)
2350
2351 trindex = trinfo[2]
2352 if trindex is not None:
2353 dataoff = self.start(trindex)
2354 else:
2355 # revlog was stripped at start of transaction, use all leftover data
2356 trindex = len(self) - 1
2357 dataoff = self.end(tiprev)
2358
2359 tr.add(self.datafile, dataoff)
2360
2361 if fp:
2362 fp.flush()
2363 fp.close()
2364
2365 with self._datafp('w') as df:
2366 for r in self:
2367 df.write(self._getsegmentforrevs(r, r)[1])
2368
2369 with self._indexfp('w') as fp:
2370 self.version &= ~FLAG_INLINE_DATA
2371 self._inline = False
2372 io = self._io
2373 for i in self:
2374 e = io.packentry(self.index[i], self.node, self.version, i)
2375 fp.write(e)
2376
2377 # the temp file replace the real index when we exit the context
2378 # manager
2379
2380 tr.replace(self.indexfile, trindex * self._io.size)
2381 self._chunkclear()
2382
2383 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2384 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2385 """add a revision to the log
2386
2387 text - the revision data to add
2388 transaction - the transaction object used for rollback
2389 link - the linkrev data to add
2390 p1, p2 - the parent nodeids of the revision
2391 cachedelta - an optional precomputed delta
2392 node - nodeid of revision; typically node is not specified, and it is
2393 computed by default as hash(text, p1, p2), however subclasses might
2394 use different hashing method (and override checkhash() in such case)
2395 flags - the known flags to set on the revision
2396 deltacomputer - an optional _deltacomputer instance shared between
2397 multiple calls
2398 """
2399 if link == nullrev:
2400 raise RevlogError(_("attempted to add linkrev -1 to %s")
2401 % self.indexfile)
2402
2403 if flags:
2404 node = node or self.hash(text, p1, p2)
2405
2406 rawtext, validatehash = self._processflags(text, flags, 'write')
2407
2408 # If the flag processor modifies the revision data, ignore any provided
2409 # cachedelta.
2410 if rawtext != text:
2411 cachedelta = None
2412
2413 if len(rawtext) > _maxentrysize:
2414 raise RevlogError(
2415 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2416 % (self.indexfile, len(rawtext)))
2417
2418 node = node or self.hash(rawtext, p1, p2)
2419 if node in self.nodemap:
2420 return node
2421
2422 if validatehash:
2423 self.checkhash(rawtext, node, p1=p1, p2=p2)
2424
2425 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2426 flags, cachedelta=cachedelta,
2427 deltacomputer=deltacomputer)
2428
2429 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2430 cachedelta=None, deltacomputer=None):
2431 """add a raw revision with known flags, node and parents
2432 useful when reusing a revision not stored in this revlog (ex: received
2433 over wire, or read from an external bundle).
2434 """
2435 dfh = None
2436 if not self._inline:
2437 dfh = self._datafp("a+")
2438 ifh = self._indexfp("a+")
2439 try:
2440 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2441 flags, cachedelta, ifh, dfh,
2442 deltacomputer=deltacomputer)
2443 finally:
2444 if dfh:
2445 dfh.close()
2446 ifh.close()
2447
2448 def compress(self, data):
2449 """Generate a possibly-compressed representation of data."""
2450 if not data:
2451 return '', data
2452
2453 compressed = self._compressor.compress(data)
2454
2455 if compressed:
2456 # The revlog compressor added the header in the returned data.
2457 return '', compressed
2458
2459 if data[0:1] == '\0':
2460 return '', data
2461 return 'u', data
2462
2463 def decompress(self, data):
2464 """Decompress a revlog chunk.
2465
2466 The chunk is expected to begin with a header identifying the
2467 format type so it can be routed to an appropriate decompressor.
2468 """
2469 if not data:
2470 return data
2471
2472 # Revlogs are read much more frequently than they are written and many
2473 # chunks only take microseconds to decompress, so performance is
2474 # important here.
2475 #
2476 # We can make a few assumptions about revlogs:
2477 #
2478 # 1) the majority of chunks will be compressed (as opposed to inline
2479 # raw data).
2480 # 2) decompressing *any* data will likely by at least 10x slower than
2481 # returning raw inline data.
2482 # 3) we want to prioritize common and officially supported compression
2483 # engines
2484 #
2485 # It follows that we want to optimize for "decompress compressed data
2486 # when encoded with common and officially supported compression engines"
2487 # case over "raw data" and "data encoded by less common or non-official
2488 # compression engines." That is why we have the inline lookup first
2489 # followed by the compengines lookup.
2490 #
2491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2492 # compressed chunks. And this matters for changelog and manifest reads.
2493 t = data[0:1]
2494
2495 if t == 'x':
2496 try:
2497 return _zlibdecompress(data)
2498 except zlib.error as e:
2499 raise RevlogError(_('revlog decompress error: %s') %
2500 stringutil.forcebytestr(e))
2501 # '\0' is more common than 'u' so it goes first.
2502 elif t == '\0':
2503 return data
2504 elif t == 'u':
2505 return util.buffer(data, 1)
2506
2507 try:
2508 compressor = self._decompressors[t]
2509 except KeyError:
2510 try:
2511 engine = util.compengines.forrevlogheader(t)
2512 compressor = engine.revlogcompressor()
2513 self._decompressors[t] = compressor
2514 except KeyError:
2515 raise RevlogError(_('unknown compression type %r') % t)
2516
2517 return compressor.decompress(data)
2518
2519 def _isgooddeltainfo(self, deltainfo, revinfo):
2520 """Returns True if the given delta is good. Good means that it is within
2521 the disk span, disk size, and chain length bounds that we know to be
2522 performant."""
2523 if deltainfo is None:
2524 return False
2525
2526 # - 'deltainfo.distance' is the distance from the base revision --
2527 # bounding it limits the amount of I/O we need to do.
2528 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2529 # deltas we need to apply -- bounding it limits the amount of CPU
2530 # we consume.
2531
2532 if self._sparserevlog:
2533 # As sparse-read will be used, we can consider that the distance,
2534 # instead of being the span of the whole chunk,
2535 # is the span of the largest read chunk
2536 base = deltainfo.base
2537
2538 if base != nullrev:
2539 deltachain = self._deltachain(base)[0]
2540 else:
2541 deltachain = []
2542
2543 # search for the first non-snapshot revision
2544 for idx, r in enumerate(deltachain):
2545 if not self.issnapshot(r):
2546 break
2547 deltachain = deltachain[idx:]
2548 chunks = _slicechunk(self, deltachain, deltainfo)
2549 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2550 distance = max(all_span)
2551 else:
2552 distance = deltainfo.distance
2553
2554 textlen = revinfo.textlen
2555 defaultmax = textlen * 4
2556 maxdist = self._maxdeltachainspan
2557 if not maxdist:
2558 maxdist = distance # ensure the conditional pass
2559 maxdist = max(maxdist, defaultmax)
2560 if self._sparserevlog and maxdist < self._srmingapsize:
2561 # In multiple place, we are ignoring irrelevant data range below a
2562 # certain size. Be also apply this tradeoff here and relax span
2563 # constraint for small enought content.
2564 maxdist = self._srmingapsize
2565
2566 # Bad delta from read span:
2567 #
2568 # If the span of data read is larger than the maximum allowed.
2569 if maxdist < distance:
2570 return False
2571
2572 # Bad delta from new delta size:
2573 #
2574 # If the delta size is larger than the target text, storing the
2575 # delta will be inefficient.
2576 if textlen < deltainfo.deltalen:
2577 return False
2578
2579 # Bad delta from cumulated payload size:
2580 #
2581 # If the sum of delta get larger than K * target text length.
2582 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2583 return False
2584
2585 # Bad delta from chain length:
2586 #
2587 # If the number of delta in the chain gets too high.
2588 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2589 return False
2590
2591 # bad delta from intermediate snapshot size limit
2592 #
2593 # If an intermediate snapshot size is higher than the limit. The
2594 # limit exist to prevent endless chain of intermediate delta to be
2595 # created.
2596 if (deltainfo.snapshotdepth is not None and
2597 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2598 return False
2599
2600 # bad delta if new intermediate snapshot is larger than the previous
2601 # snapshot
2602 if (deltainfo.snapshotdepth
2603 and self.length(deltainfo.base) < deltainfo.deltalen):
2604 return False
2605
2606 return True
2607
2608 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2609 cachedelta, ifh, dfh, alwayscache=False,
2610 deltacomputer=None):
2611 """internal function to add revisions to the log
2612
2613 see addrevision for argument descriptions.
2614
2615 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2616
2617 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2618 be used.
2619
2620 invariants:
2621 - rawtext is optional (can be None); if not set, cachedelta must be set.
2622 if both are set, they must correspond to each other.
2623 """
2624 if node == nullid:
2625 raise RevlogError(_("%s: attempt to add null revision") %
2626 (self.indexfile))
2627 if node == wdirid or node in wdirfilenodeids:
2628 raise RevlogError(_("%s: attempt to add wdir revision") %
2629 (self.indexfile))
2630
2631 if self._inline:
2632 fh = ifh
2633 else:
2634 fh = dfh
2635
2636 btext = [rawtext]
2637
2638 curr = len(self)
2639 prev = curr - 1
2640 offset = self.end(prev)
2641 p1r, p2r = self.rev(p1), self.rev(p2)
2642
2643 # full versions are inserted when the needed deltas
2644 # become comparable to the uncompressed text
2645 if rawtext is None:
2646 # need rawtext size, before changed by flag processors, which is
2647 # the non-raw size. use revlog explicitly to avoid filelog's extra
2648 # logic that might remove metadata size.
2649 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2650 cachedelta[1])
2651 else:
2652 textlen = len(rawtext)
2653
2654 if deltacomputer is None:
2655 deltacomputer = _deltacomputer(self)
2656
2657 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2658
2659 # no delta for flag processor revision (see "candelta" for why)
2660 # not calling candelta since only one revision needs test, also to
2661 # avoid overhead fetching flags again.
2662 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2663 deltainfo = None
2664 else:
2665 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2666
2667 if deltainfo is not None:
2668 base = deltainfo.base
2669 chainbase = deltainfo.chainbase
2670 data = deltainfo.data
2671 l = deltainfo.deltalen
2672 else:
2673 rawtext = deltacomputer.buildtext(revinfo, fh)
2674 data = self.compress(rawtext)
2675 l = len(data[1]) + len(data[0])
2676 base = chainbase = curr
2677
2678 e = (offset_type(offset, flags), l, textlen,
2679 base, link, p1r, p2r, node)
2680 self.index.append(e)
2681 self.nodemap[node] = curr
2682
2683 entry = self._io.packentry(e, self.node, self.version, curr)
2684 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2685
2686 if alwayscache and rawtext is None:
2687 rawtext = deltacomputer.buildtext(revinfo, fh)
2688
2689 if type(rawtext) == bytes: # only accept immutable objects
2690 self._cache = (node, curr, rawtext)
2691 self._chainbasecache[curr] = chainbase
2692 return node
2693
2694 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2695 # Files opened in a+ mode have inconsistent behavior on various
2696 # platforms. Windows requires that a file positioning call be made
2697 # when the file handle transitions between reads and writes. See
2698 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2699 # platforms, Python or the platform itself can be buggy. Some versions
2700 # of Solaris have been observed to not append at the end of the file
2701 # if the file was seeked to before the end. See issue4943 for more.
2702 #
2703 # We work around this issue by inserting a seek() before writing.
2704 # Note: This is likely not necessary on Python 3.
2705 ifh.seek(0, os.SEEK_END)
2706 if dfh:
2707 dfh.seek(0, os.SEEK_END)
2708
2709 curr = len(self) - 1
2710 if not self._inline:
2711 transaction.add(self.datafile, offset)
2712 transaction.add(self.indexfile, curr * len(entry))
2713 if data[0]:
2714 dfh.write(data[0])
2715 dfh.write(data[1])
2716 ifh.write(entry)
2717 else:
2718 offset += curr * self._io.size
2719 transaction.add(self.indexfile, offset, curr)
2720 ifh.write(entry)
2721 ifh.write(data[0])
2722 ifh.write(data[1])
2723 self._enforceinlinesize(transaction, ifh)
2724
2725 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2726 """
2727 add a delta group
2728
2729 given a set of deltas, add them to the revision log. the
2730 first delta is against its parent, which should be in our
2731 log, the rest are against the previous delta.
2732
2733 If ``addrevisioncb`` is defined, it will be called with arguments of
2734 this revlog and the node that was added.
2735 """
2736
2737 nodes = []
2738
2739 r = len(self)
2740 end = 0
2741 if r:
2742 end = self.end(r - 1)
2743 ifh = self._indexfp("a+")
2744 isize = r * self._io.size
2745 if self._inline:
2746 transaction.add(self.indexfile, end + isize, r)
2747 dfh = None
2748 else:
2749 transaction.add(self.indexfile, isize, r)
2750 transaction.add(self.datafile, end)
2751 dfh = self._datafp("a+")
2752 def flush():
2753 if dfh:
2754 dfh.flush()
2755 ifh.flush()
2756 try:
2757 deltacomputer = _deltacomputer(self)
2758 # loop through our set of deltas
2759 for data in deltas:
2760 node, p1, p2, linknode, deltabase, delta, flags = data
2761 link = linkmapper(linknode)
2762 flags = flags or REVIDX_DEFAULT_FLAGS
2763
2764 nodes.append(node)
2765
2766 if node in self.nodemap:
2767 # this can happen if two branches make the same change
2768 continue
2769
2770 for p in (p1, p2):
2771 if p not in self.nodemap:
2772 raise LookupError(p, self.indexfile,
2773 _('unknown parent'))
2774
2775 if deltabase not in self.nodemap:
2776 raise LookupError(deltabase, self.indexfile,
2777 _('unknown delta base'))
2778
2779 baserev = self.rev(deltabase)
2780
2781 if baserev != nullrev and self.iscensored(baserev):
2782 # if base is censored, delta must be full replacement in a
2783 # single patch operation
2784 hlen = struct.calcsize(">lll")
2785 oldlen = self.rawsize(baserev)
2786 newlen = len(delta) - hlen
2787 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2788 raise error.CensoredBaseError(self.indexfile,
2789 self.node(baserev))
2790
2791 if not flags and self._peek_iscensored(baserev, delta, flush):
2792 flags |= REVIDX_ISCENSORED
2793
2794 # We assume consumers of addrevisioncb will want to retrieve
2795 # the added revision, which will require a call to
2796 # revision(). revision() will fast path if there is a cache
2797 # hit. So, we tell _addrevision() to always cache in this case.
2798 # We're only using addgroup() in the context of changegroup
2799 # generation so the revision data can always be handled as raw
2800 # by the flagprocessor.
2801 self._addrevision(node, None, transaction, link,
2802 p1, p2, flags, (baserev, delta),
2803 ifh, dfh,
2804 alwayscache=bool(addrevisioncb),
2805 deltacomputer=deltacomputer)
2806
2807 if addrevisioncb:
2808 addrevisioncb(self, node)
2809
2810 if not dfh and not self._inline:
2811 # addrevision switched from inline to conventional
2812 # reopen the index
2813 ifh.close()
2814 dfh = self._datafp("a+")
2815 ifh = self._indexfp("a+")
2816 finally:
2817 if dfh:
2818 dfh.close()
2819 ifh.close()
2820
2821 return nodes
2822
2823 def iscensored(self, rev):
2824 """Check if a file revision is censored."""
2825 if not self._censorable:
2826 return False
2827
2828 return self.flags(rev) & REVIDX_ISCENSORED
2829
2830 def _peek_iscensored(self, baserev, delta, flush):
2831 """Quickly check if a delta produces a censored revision."""
2832 if not self._censorable:
2833 return False
2834
2835 # Fragile heuristic: unless new file meta keys are added alphabetically
2836 # preceding "censored", all censored revisions are prefixed by
2837 # "\1\ncensored:". A delta producing such a censored revision must be a
2838 # full-replacement delta, so we inspect the first and only patch in the
2839 # delta for this prefix.
2840 hlen = struct.calcsize(">lll")
2841 if len(delta) <= hlen:
2842 return False
2843
2844 oldlen = self.rawsize(baserev)
2845 newlen = len(delta) - hlen
2846 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2847 return False
2848
2849 add = "\1\ncensored:"
2850 addlen = len(add)
2851 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2852
2853 def getstrippoint(self, minlink):
2854 """find the minimum rev that must be stripped to strip the linkrev
2855
2856 Returns a tuple containing the minimum rev and a set of all revs that
2857 have linkrevs that will be broken by this strip.
2858 """
2859 brokenrevs = set()
2860 strippoint = len(self)
2861
2862 heads = {}
2863 futurelargelinkrevs = set()
2864 for head in self.headrevs():
2865 headlinkrev = self.linkrev(head)
2866 heads[head] = headlinkrev
2867 if headlinkrev >= minlink:
2868 futurelargelinkrevs.add(headlinkrev)
2869
2870 # This algorithm involves walking down the rev graph, starting at the
2871 # heads. Since the revs are topologically sorted according to linkrev,
2872 # once all head linkrevs are below the minlink, we know there are
2873 # no more revs that could have a linkrev greater than minlink.
2874 # So we can stop walking.
2875 while futurelargelinkrevs:
2876 strippoint -= 1
2877 linkrev = heads.pop(strippoint)
2878
2879 if linkrev < minlink:
2880 brokenrevs.add(strippoint)
2881 else:
2882 futurelargelinkrevs.remove(linkrev)
2883
2884 for p in self.parentrevs(strippoint):
2885 if p != nullrev:
2886 plinkrev = self.linkrev(p)
2887 heads[p] = plinkrev
2888 if plinkrev >= minlink:
2889 futurelargelinkrevs.add(plinkrev)
2890
2891 return strippoint, brokenrevs
2892
2893 def strip(self, minlink, transaction):
2894 """truncate the revlog on the first revision with a linkrev >= minlink
2895
2896 This function is called when we're stripping revision minlink and
2897 its descendants from the repository.
2898
2899 We have to remove all revisions with linkrev >= minlink, because
2900 the equivalent changelog revisions will be renumbered after the
2901 strip.
2902
2903 So we truncate the revlog on the first of these revisions, and
2904 trust that the caller has saved the revisions that shouldn't be
2905 removed and that it'll re-add them after this truncation.
2906 """
2907 if len(self) == 0:
2908 return
2909
2910 rev, _ = self.getstrippoint(minlink)
2911 if rev == len(self):
2912 return
2913
2914 # first truncate the files on disk
2915 end = self.start(rev)
2916 if not self._inline:
2917 transaction.add(self.datafile, end)
2918 end = rev * self._io.size
2919 else:
2920 end += rev * self._io.size
2921
2922 transaction.add(self.indexfile, end)
2923
2924 # then reset internal state in memory to forget those revisions
2925 self._cache = None
2926 self._chaininfocache = {}
2927 self._chunkclear()
2928 for x in pycompat.xrange(rev, len(self)):
2929 del self.nodemap[self.node(x)]
2930
2931 del self.index[rev:-1]
2932 self._nodepos = None
2933
2934 def checksize(self):
2935 expected = 0
2936 if len(self):
2937 expected = max(0, self.end(len(self) - 1))
2938
2939 try:
2940 with self._datafp() as f:
2941 f.seek(0, 2)
2942 actual = f.tell()
2943 dd = actual - expected
2944 except IOError as inst:
2945 if inst.errno != errno.ENOENT:
2946 raise
2947 dd = 0
2948
2949 try:
2950 f = self.opener(self.indexfile)
2951 f.seek(0, 2)
2952 actual = f.tell()
2953 f.close()
2954 s = self._io.size
2955 i = max(0, actual // s)
2956 di = actual - (i * s)
2957 if self._inline:
2958 databytes = 0
2959 for r in self:
2960 databytes += max(0, self.length(r))
2961 dd = 0
2962 di = actual - len(self) * s - databytes
2963 except IOError as inst:
2964 if inst.errno != errno.ENOENT:
2965 raise
2966 di = 0
2967
2968 return (dd, di)
2969
2970 def files(self):
2971 res = [self.indexfile]
2972 if not self._inline:
2973 res.append(self.datafile)
2974 return res
2975
2976 def emitrevisiondeltas(self, requests):
2977 frev = self.rev
2978
2979 prevrev = None
2980 for request in requests:
2981 node = request.node
2982 rev = frev(node)
2983
2984 if prevrev is None:
2985 prevrev = self.index[rev][5]
2986
2987 # Requesting a full revision.
2988 if request.basenode == nullid:
2989 baserev = nullrev
2990 # Requesting an explicit revision.
2991 elif request.basenode is not None:
2992 baserev = frev(request.basenode)
2993 # Allowing us to choose.
2994 else:
2995 p1rev, p2rev = self.parentrevs(rev)
2996 deltaparentrev = self.deltaparent(rev)
2997
2998 # Avoid sending full revisions when delta parent is null. Pick
2999 # prev in that case. It's tempting to pick p1 in this case, as
3000 # p1 will be smaller in the common case. However, computing a
3001 # delta against p1 may require resolving the raw text of p1,
3002 # which could be expensive. The revlog caches should have prev
3003 # cached, meaning less CPU for delta generation. There is
3004 # likely room to add a flag and/or config option to control this
3005 # behavior.
3006 if deltaparentrev == nullrev and self._storedeltachains:
3007 baserev = prevrev
3008
3009 # Revlog is configured to use full snapshot for a reason.
3010 # Stick to full snapshot.
3011 elif deltaparentrev == nullrev:
3012 baserev = nullrev
3013
3014 # Pick previous when we can't be sure the base is available
3015 # on consumer.
3016 elif deltaparentrev not in (p1rev, p2rev, prevrev):
3017 baserev = prevrev
3018 else:
3019 baserev = deltaparentrev
3020
3021 if baserev != nullrev and not self.candelta(baserev, rev):
3022 baserev = nullrev
3023
3024 revision = None
3025 delta = None
3026 baserevisionsize = None
3027
3028 if self.iscensored(baserev) or self.iscensored(rev):
3029 try:
3030 revision = self.revision(node, raw=True)
3031 except error.CensoredNodeError as e:
3032 revision = e.tombstone
3033
3034 if baserev != nullrev:
3035 baserevisionsize = self.rawsize(baserev)
3036
3037 elif baserev == nullrev:
3038 revision = self.revision(node, raw=True)
3039 else:
3040 delta = self.revdiff(baserev, rev)
3041
3042 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
3043
3044 yield revlogrevisiondelta(
3045 node=node,
3046 p1node=request.p1node,
3047 p2node=request.p2node,
3048 linknode=request.linknode,
3049 basenode=self.node(baserev),
3050 flags=self.flags(rev) | extraflags,
3051 baserevisionsize=baserevisionsize,
3052 revision=revision,
3053 delta=delta)
3054
3055 prevrev = rev
3056
3057 DELTAREUSEALWAYS = 'always'
3058 DELTAREUSESAMEREVS = 'samerevs'
3059 DELTAREUSENEVER = 'never'
3060
3061 DELTAREUSEFULLADD = 'fulladd'
3062
3063 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
3064
3065 def clone(self, tr, destrevlog, addrevisioncb=None,
3066 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
3067 """Copy this revlog to another, possibly with format changes.
3068
3069 The destination revlog will contain the same revisions and nodes.
3070 However, it may not be bit-for-bit identical due to e.g. delta encoding
3071 differences.
3072
3073 The ``deltareuse`` argument control how deltas from the existing revlog
3074 are preserved in the destination revlog. The argument can have the
3075 following values:
3076
3077 DELTAREUSEALWAYS
3078 Deltas will always be reused (if possible), even if the destination
3079 revlog would not select the same revisions for the delta. This is the
3080 fastest mode of operation.
3081 DELTAREUSESAMEREVS
3082 Deltas will be reused if the destination revlog would pick the same
3083 revisions for the delta. This mode strikes a balance between speed
3084 and optimization.
3085 DELTAREUSENEVER
3086 Deltas will never be reused. This is the slowest mode of execution.
3087 This mode can be used to recompute deltas (e.g. if the diff/delta
3088 algorithm changes).
3089
3090 Delta computation can be slow, so the choice of delta reuse policy can
3091 significantly affect run time.
3092
3093 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3094 two extremes. Deltas will be reused if they are appropriate. But if the
3095 delta could choose a better revision, it will do so. This means if you
3096 are converting a non-generaldelta revlog to a generaldelta revlog,
3097 deltas will be recomputed if the delta's parent isn't a parent of the
3098 revision.
3099
3100 In addition to the delta policy, the ``deltabothparents`` argument
3101 controls whether to compute deltas against both parents for merges.
3102 By default, the current default is used.
3103 """
3104 if deltareuse not in self.DELTAREUSEALL:
3105 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3106
3107 if len(destrevlog):
3108 raise ValueError(_('destination revlog is not empty'))
3109
3110 if getattr(self, 'filteredrevs', None):
3111 raise ValueError(_('source revlog has filtered revisions'))
3112 if getattr(destrevlog, 'filteredrevs', None):
3113 raise ValueError(_('destination revlog has filtered revisions'))
3114
3115 # lazydeltabase controls whether to reuse a cached delta, if possible.
3116 oldlazydeltabase = destrevlog._lazydeltabase
3117 oldamd = destrevlog._deltabothparents
3118
3119 try:
3120 if deltareuse == self.DELTAREUSEALWAYS:
3121 destrevlog._lazydeltabase = True
3122 elif deltareuse == self.DELTAREUSESAMEREVS:
3123 destrevlog._lazydeltabase = False
3124
3125 destrevlog._deltabothparents = deltabothparents or oldamd
3126
3127 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3128 self.DELTAREUSESAMEREVS)
3129
3130 deltacomputer = _deltacomputer(destrevlog)
3131 index = self.index
3132 for rev in self:
3133 entry = index[rev]
3134
3135 # Some classes override linkrev to take filtered revs into
3136 # account. Use raw entry from index.
3137 flags = entry[0] & 0xffff
3138 linkrev = entry[4]
3139 p1 = index[entry[5]][7]
3140 p2 = index[entry[6]][7]
3141 node = entry[7]
3142
3143 # (Possibly) reuse the delta from the revlog if allowed and
3144 # the revlog chunk is a delta.
3145 cachedelta = None
3146 rawtext = None
3147 if populatecachedelta:
3148 dp = self.deltaparent(rev)
3149 if dp != nullrev:
3150 cachedelta = (dp, bytes(self._chunk(rev)))
3151
3152 if not cachedelta:
3153 rawtext = self.revision(rev, raw=True)
3154
3155
3156 if deltareuse == self.DELTAREUSEFULLADD:
3157 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3158 cachedelta=cachedelta,
3159 node=node, flags=flags,
3160 deltacomputer=deltacomputer)
3161 else:
3162 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3163 checkambig=False)
3164 dfh = None
3165 if not destrevlog._inline:
3166 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3167 try:
3168 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3169 p2, flags, cachedelta, ifh, dfh,
3170 deltacomputer=deltacomputer)
3171 finally:
3172 if dfh:
3173 dfh.close()
3174 ifh.close()
3175
3176 if addrevisioncb:
3177 addrevisioncb(self, rev, node)
3178 finally:
3179 destrevlog._lazydeltabase = oldlazydeltabase
3180 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now