##// END OF EJS Templates
delta-find: introduce a _DeltaSearch object...
marmoute -
r52213:c82e03b1 default
parent child Browse files
Show More
@@ -675,169 +675,199 b' def is_good_delta_info(revlog, deltainfo'
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 def _candidategroups(
678 class _DeltaSearch:
679 revlog,
679 """perform the search of a good delta for a single revlog revision
680 textlen,
681 p1,
682 p2,
683 cachedelta,
684 excluded_bases=None,
685 target_rev=None,
686 snapshot_cache=None,
687 ):
688 """Provides group of revision to be tested as delta base
689
690 This top level function focus on emitting groups with unique and worthwhile
691 content. See _raw_candidate_groups for details about the group order.
692 """
693 # should we try to build a delta?
694 if not (len(revlog) and revlog._storedeltachains):
695 yield None
696 return
697
698 if target_rev is None:
699 target_rev = len(revlog)
700
680
701 if not revlog.delta_config.general_delta:
681 note: some of the deltacomputer.finddeltainfo logic should probably move
702 # before general delta, there is only one possible delta base
682 here.
703 yield (target_rev - 1,)
683 """
704 yield None
705 return
706
684
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
685 def __init__(
708 # we should never end up asking such question. Adding the assert as a
686 self,
709 # safe-guard to detect anything that would be fishy in this regard.
710 assert (
711 cachedelta is None
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
713 or not revlog.delta_config.general_delta
714 )
715
716 deltalength = revlog.length
717 deltaparent = revlog.deltaparent
718 sparse = revlog.delta_config.sparse_revlog
719 good = None
720
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
723
724 tested = {nullrev}
725 candidates = _refinedgroups(
726 revlog,
687 revlog,
688 textlen,
727 p1,
689 p1,
728 p2,
690 p2,
729 cachedelta,
691 cachedelta,
730 snapshot_cache=snapshot_cache,
692 excluded_bases=None,
731 )
693 target_rev=None,
732 while True:
694 snapshot_cache=None,
733 temptative = candidates.send(good)
695 ):
734 if temptative is None:
696 self.revlog = revlog
735 break
697 self.textlen = textlen
736 group = []
698 self.p1 = p1
737 for rev in temptative:
699 self.p2 = p2
738 # skip over empty delta (no need to include them in a chain)
700 self.cachedelta = cachedelta
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
701 self.excluded_bases = excluded_bases
740 tested.add(rev)
702 self.target_rev = target_rev
741 rev = deltaparent(rev)
703 self.snapshot_cache = snapshot_cache
742 # no need to try a delta against nullrev, this will be done as a
704
743 # last resort.
705 def candidate_groups(self):
744 if rev == nullrev:
706 """Provides group of revision to be tested as delta base
745 continue
707
746 # filter out revision we tested already
708 This top level function focus on emitting groups with unique and
747 if rev in tested:
709 worthwhile content. See _raw_candidate_groups for details about the
748 continue
710 group order.
711 """
712 # should we try to build a delta?
713 if not (len(self.revlog) and self.revlog._storedeltachains):
714 yield None
715 return
716
717 if self.target_rev is None:
718 self.target_rev = len(self.revlog)
719
720 if not self.revlog.delta_config.general_delta:
721 # before general delta, there is only one possible delta base
722 yield (self.target_rev - 1,)
723 yield None
724 return
749
725
750 # an higher authority deamed the base unworthy (e.g. censored)
726 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
751 if excluded_bases is not None and rev in excluded_bases:
727 # so we should never end up asking such question. Adding the assert as
752 tested.add(rev)
728 # a safe-guard to detect anything that would be fishy in this regard.
753 continue
729 assert (
754 # We are in some recomputation cases and that rev is too high in
730 self.cachedelta is None
755 # the revlog
731 or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE
756 if target_rev is not None and rev >= target_rev:
732 or not self.revlog.delta_config.general_delta
757 tested.add(rev)
733 )
758 continue
734
759 # filter out delta base that will never produce good delta
735 deltalength = self.revlog.length
760 if deltas_limit < revlog.length(rev):
736 deltaparent = self.revlog.deltaparent
761 tested.add(rev)
737 sparse = self.revlog.delta_config.sparse_revlog
762 continue
738 good = None
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
739
764 tested.add(rev)
740 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
765 continue
741 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
766 # no delta for rawtext-changing revs (see "candelta" for why)
742
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
743 tested = {nullrev}
768 tested.add(rev)
744 candidates = _refinedgroups(
769 continue
745 self.revlog,
746 self.p1,
747 self.p2,
748 self.cachedelta,
749 snapshot_cache=self.snapshot_cache,
750 )
751 while True:
752 temptative = candidates.send(good)
753 if temptative is None:
754 break
755 group = []
756 for rev in temptative:
757 # skip over empty delta (no need to include them in a chain)
758 while not (rev == nullrev or rev in tested or deltalength(rev)):
759 tested.add(rev)
760 rev = deltaparent(rev)
761 # no need to try a delta against nullrev, this will be done as
762 # a last resort.
763 if rev == nullrev:
764 continue
765 # filter out revision we tested already
766 if rev in tested:
767 continue
770
768
771 # If we reach here, we are about to build and test a delta.
769 # an higher authority deamed the base unworthy (e.g. censored)
772 # The delta building process will compute the chaininfo in all
770 if (
773 # case, since that computation is cached, it is fine to access it
771 self.excluded_bases is not None
774 # here too.
772 and rev in self.excluded_bases
775 chainlen, chainsize = revlog._chaininfo(rev)
773 ):
776 # if chain will be too long, skip base
774 tested.add(rev)
777 if (
775 continue
778 revlog.delta_config.max_chain_len
776 # We are in some recomputation cases and that rev is too high
779 and chainlen >= revlog.delta_config.max_chain_len
777 # in the revlog
780 ):
778 if self.target_rev is not None and rev >= self.target_rev:
781 tested.add(rev)
779 tested.add(rev)
782 continue
780 continue
783 # if chain already have too much data, skip base
781 # filter out delta base that will never produce good delta
784 if deltas_limit < chainsize:
782 if deltas_limit < self.revlog.length(rev):
785 tested.add(rev)
783 tested.add(rev)
786 continue
784 continue
787 if sparse and revlog.delta_config.upper_bound_comp is not None:
785 if sparse and self.revlog.rawsize(rev) < (
788 maxcomp = revlog.delta_config.upper_bound_comp
786 self.textlen // LIMIT_BASE2TEXT
789 basenotsnap = (p1, p2, nullrev)
787 ):
790 if rev not in basenotsnap and revlog.issnapshot(rev):
788 tested.add(rev)
791 snapshotdepth = revlog.snapshotdepth(rev)
789 continue
792 # If text is significantly larger than the base, we can
790 # no delta for rawtext-changing revs (see "candelta" for why)
793 # expect the resulting delta to be proportional to the size
791 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 # difference
792 tested.add(rev)
795 revsize = revlog.rawsize(rev)
793 continue
796 rawsizedistance = max(textlen - revsize, 0)
797 # use an estimate of the compression upper bound.
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
799
794
800 # check the absolute constraint on the delta size
795 # If we reach here, we are about to build and test a delta.
801 snapshotlimit = textlen >> snapshotdepth
796 # The delta building process will compute the chaininfo in all
802 if snapshotlimit < lowestrealisticdeltalen:
797 # case, since that computation is cached, it is fine to access
803 # delta lower bound is larger than accepted upper bound
798 # it here too.
804 tested.add(rev)
799 chainlen, chainsize = self.revlog._chaininfo(rev)
805 continue
800 # if chain will be too long, skip base
806
801 if (
807 # check the relative constraint on the delta size
802 self.revlog.delta_config.max_chain_len
808 revlength = revlog.length(rev)
803 and chainlen >= self.revlog.delta_config.max_chain_len
809 if revlength < lowestrealisticdeltalen:
804 ):
810 # delta probable lower bound is larger than target base
805 tested.add(rev)
811 tested.add(rev)
806 continue
812 continue
807 # if chain already have too much data, skip base
808 if deltas_limit < chainsize:
809 tested.add(rev)
810 continue
811 if (
812 sparse
813 and self.revlog.delta_config.upper_bound_comp is not None
814 ):
815 maxcomp = self.revlog.delta_config.upper_bound_comp
816 basenotsnap = (self.p1, self.p2, nullrev)
817 if rev not in basenotsnap and self.revlog.issnapshot(rev):
818 snapshotdepth = self.revlog.snapshotdepth(rev)
819 # If text is significantly larger than the base, we can
820 # expect the resulting delta to be proportional to the size
821 # difference
822 revsize = self.revlog.rawsize(rev)
823 rawsizedistance = max(self.textlen - revsize, 0)
824 # use an estimate of the compression upper bound.
825 lowestrealisticdeltalen = rawsizedistance // maxcomp
813
826
814 group.append(rev)
827 # check the absolute constraint on the delta size
815 if group:
828 snapshotlimit = self.textlen >> snapshotdepth
816 # When the size of the candidate group is big, it can result in a
829 if snapshotlimit < lowestrealisticdeltalen:
817 # quite significant performance impact. To reduce this, we can send
830 # delta lower bound is larger than accepted upper
818 # them in smaller batches until the new batch does not provide any
831 # bound
819 # improvements.
832 tested.add(rev)
820 #
833 continue
821 # This might reduce the overall efficiency of the compression in
834
822 # some corner cases, but that should also prevent very pathological
835 # check the relative constraint on the delta size
823 # cases from being an issue. (eg. 20 000 candidates).
836 revlength = self.revlog.length(rev)
824 #
837 if revlength < lowestrealisticdeltalen:
825 # XXX note that the ordering of the group becomes important as it
838 # delta probable lower bound is larger than target
826 # now impacts the final result. The current order is unprocessed
839 # base
827 # and can be improved.
840 tested.add(rev)
828 if group_chunk_size == 0:
841 continue
829 tested.update(group)
830 good = yield tuple(group)
831 else:
832 prev_good = good
833 for start in range(0, len(group), group_chunk_size):
834 sub_group = group[start : start + group_chunk_size]
835 tested.update(sub_group)
836 good = yield tuple(sub_group)
837 if prev_good == good:
838 break
839
842
840 yield None
843 group.append(rev)
844 if group:
845 # When the size of the candidate group is big, it can result in
846 # a quite significant performance impact. To reduce this, we
847 # can send them in smaller batches until the new batch does not
848 # provide any improvements.
849 #
850 # This might reduce the overall efficiency of the compression
851 # in some corner cases, but that should also prevent very
852 # pathological cases from being an issue. (eg. 20 000
853 # candidates).
854 #
855 # XXX note that the ordering of the group becomes important as
856 # it now impacts the final result. The current order is
857 # unprocessed and can be improved.
858 if group_chunk_size == 0:
859 tested.update(group)
860 good = yield tuple(group)
861 else:
862 prev_good = good
863 for start in range(0, len(group), group_chunk_size):
864 sub_group = group[start : start + group_chunk_size]
865 tested.update(sub_group)
866 good = yield tuple(sub_group)
867 if prev_good == good:
868 break
869
870 yield None
841
871
842
872
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
873 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
@@ -1410,7 +1440,7 b' class deltacomputer:'
1410 msg %= target_rev
1440 msg %= target_rev
1411 self._write_debug(msg)
1441 self._write_debug(msg)
1412
1442
1413 groups = _candidategroups(
1443 search = _DeltaSearch(
1414 self.revlog,
1444 self.revlog,
1415 revinfo.textlen,
1445 revinfo.textlen,
1416 p1r,
1446 p1r,
@@ -1420,6 +1450,8 b' class deltacomputer:'
1420 target_rev,
1450 target_rev,
1421 snapshot_cache=self._snapshot_cache,
1451 snapshot_cache=self._snapshot_cache,
1422 )
1452 )
1453
1454 groups = search.candidate_groups()
1423 candidaterevs = next(groups)
1455 candidaterevs = next(groups)
1424 while candidaterevs is not None:
1456 while candidaterevs is not None:
1425 dbg_try_rounds += 1
1457 dbg_try_rounds += 1
General Comments 0
You need to be logged in to leave comments. Login now