Show More
@@ -675,169 +675,199 b' def is_good_delta_info(revlog, deltainfo' | |||||
675 | LIMIT_BASE2TEXT = 500 |
|
675 | LIMIT_BASE2TEXT = 500 | |
676 |
|
676 | |||
677 |
|
677 | |||
678 | def _candidategroups( |
|
678 | class _DeltaSearch: | |
679 | revlog, |
|
679 | """perform the search of a good delta for a single revlog revision | |
680 | textlen, |
|
|||
681 | p1, |
|
|||
682 | p2, |
|
|||
683 | cachedelta, |
|
|||
684 | excluded_bases=None, |
|
|||
685 | target_rev=None, |
|
|||
686 | snapshot_cache=None, |
|
|||
687 | ): |
|
|||
688 | """Provides group of revision to be tested as delta base |
|
|||
689 |
|
||||
690 | This top level function focus on emitting groups with unique and worthwhile |
|
|||
691 | content. See _raw_candidate_groups for details about the group order. |
|
|||
692 | """ |
|
|||
693 | # should we try to build a delta? |
|
|||
694 | if not (len(revlog) and revlog._storedeltachains): |
|
|||
695 | yield None |
|
|||
696 | return |
|
|||
697 |
|
||||
698 | if target_rev is None: |
|
|||
699 | target_rev = len(revlog) |
|
|||
700 |
|
|
680 | ||
701 | if not revlog.delta_config.general_delta: |
|
681 | note: some of the deltacomputer.finddeltainfo logic should probably move | |
702 | # before general delta, there is only one possible delta base |
|
682 | here. | |
703 | yield (target_rev - 1,) |
|
683 | """ | |
704 | yield None |
|
|||
705 | return |
|
|||
706 |
|
684 | |||
707 | # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so |
|
685 | def __init__( | |
708 | # we should never end up asking such question. Adding the assert as a |
|
686 | self, | |
709 | # safe-guard to detect anything that would be fishy in this regard. |
|
|||
710 | assert ( |
|
|||
711 | cachedelta is None |
|
|||
712 | or cachedelta[2] != DELTA_BASE_REUSE_FORCE |
|
|||
713 | or not revlog.delta_config.general_delta |
|
|||
714 | ) |
|
|||
715 |
|
||||
716 | deltalength = revlog.length |
|
|||
717 | deltaparent = revlog.deltaparent |
|
|||
718 | sparse = revlog.delta_config.sparse_revlog |
|
|||
719 | good = None |
|
|||
720 |
|
||||
721 | deltas_limit = textlen * LIMIT_DELTA2TEXT |
|
|||
722 | group_chunk_size = revlog.delta_config.candidate_group_chunk_size |
|
|||
723 |
|
||||
724 | tested = {nullrev} |
|
|||
725 | candidates = _refinedgroups( |
|
|||
726 | revlog, |
|
687 | revlog, | |
|
688 | textlen, | |||
727 | p1, |
|
689 | p1, | |
728 | p2, |
|
690 | p2, | |
729 | cachedelta, |
|
691 | cachedelta, | |
730 | snapshot_cache=snapshot_cache, |
|
692 | excluded_bases=None, | |
731 | ) |
|
693 | target_rev=None, | |
732 | while True: |
|
694 | snapshot_cache=None, | |
733 | temptative = candidates.send(good) |
|
695 | ): | |
734 | if temptative is None: |
|
696 | self.revlog = revlog | |
735 | break |
|
697 | self.textlen = textlen | |
736 |
|
|
698 | self.p1 = p1 | |
737 | for rev in temptative: |
|
699 | self.p2 = p2 | |
738 | # skip over empty delta (no need to include them in a chain) |
|
700 | self.cachedelta = cachedelta | |
739 | while not (rev == nullrev or rev in tested or deltalength(rev)): |
|
701 | self.excluded_bases = excluded_bases | |
740 | tested.add(rev) |
|
702 | self.target_rev = target_rev | |
741 | rev = deltaparent(rev) |
|
703 | self.snapshot_cache = snapshot_cache | |
742 | # no need to try a delta against nullrev, this will be done as a |
|
704 | ||
743 | # last resort. |
|
705 | def candidate_groups(self): | |
744 | if rev == nullrev: |
|
706 | """Provides group of revision to be tested as delta base | |
745 | continue |
|
707 | ||
746 | # filter out revision we tested already |
|
708 | This top level function focus on emitting groups with unique and | |
747 | if rev in tested: |
|
709 | worthwhile content. See _raw_candidate_groups for details about the | |
748 | continue |
|
710 | group order. | |
|
711 | """ | |||
|
712 | # should we try to build a delta? | |||
|
713 | if not (len(self.revlog) and self.revlog._storedeltachains): | |||
|
714 | yield None | |||
|
715 | return | |||
|
716 | ||||
|
717 | if self.target_rev is None: | |||
|
718 | self.target_rev = len(self.revlog) | |||
|
719 | ||||
|
720 | if not self.revlog.delta_config.general_delta: | |||
|
721 | # before general delta, there is only one possible delta base | |||
|
722 | yield (self.target_rev - 1,) | |||
|
723 | yield None | |||
|
724 | return | |||
749 |
|
725 | |||
750 | # an higher authority deamed the base unworthy (e.g. censored) |
|
726 | # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner | |
751 | if excluded_bases is not None and rev in excluded_bases: |
|
727 | # so we should never end up asking such question. Adding the assert as | |
752 | tested.add(rev) |
|
728 | # a safe-guard to detect anything that would be fishy in this regard. | |
753 | continue |
|
729 | assert ( | |
754 | # We are in some recomputation cases and that rev is too high in |
|
730 | self.cachedelta is None | |
755 | # the revlog |
|
731 | or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |
756 | if target_rev is not None and rev >= target_rev: |
|
732 | or not self.revlog.delta_config.general_delta | |
757 | tested.add(rev) |
|
733 | ) | |
758 | continue |
|
734 | ||
759 | # filter out delta base that will never produce good delta |
|
735 | deltalength = self.revlog.length | |
760 | if deltas_limit < revlog.length(rev): |
|
736 | deltaparent = self.revlog.deltaparent | |
761 | tested.add(rev) |
|
737 | sparse = self.revlog.delta_config.sparse_revlog | |
762 |
|
|
738 | good = None | |
763 | if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT): |
|
739 | ||
764 | tested.add(rev) |
|
740 | deltas_limit = self.textlen * LIMIT_DELTA2TEXT | |
765 | continue |
|
741 | group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size | |
766 | # no delta for rawtext-changing revs (see "candelta" for why) |
|
742 | ||
767 | if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: |
|
743 | tested = {nullrev} | |
768 | tested.add(rev) |
|
744 | candidates = _refinedgroups( | |
769 | continue |
|
745 | self.revlog, | |
|
746 | self.p1, | |||
|
747 | self.p2, | |||
|
748 | self.cachedelta, | |||
|
749 | snapshot_cache=self.snapshot_cache, | |||
|
750 | ) | |||
|
751 | while True: | |||
|
752 | temptative = candidates.send(good) | |||
|
753 | if temptative is None: | |||
|
754 | break | |||
|
755 | group = [] | |||
|
756 | for rev in temptative: | |||
|
757 | # skip over empty delta (no need to include them in a chain) | |||
|
758 | while not (rev == nullrev or rev in tested or deltalength(rev)): | |||
|
759 | tested.add(rev) | |||
|
760 | rev = deltaparent(rev) | |||
|
761 | # no need to try a delta against nullrev, this will be done as | |||
|
762 | # a last resort. | |||
|
763 | if rev == nullrev: | |||
|
764 | continue | |||
|
765 | # filter out revision we tested already | |||
|
766 | if rev in tested: | |||
|
767 | continue | |||
770 |
|
768 | |||
771 | # If we reach here, we are about to build and test a delta. |
|
769 | # an higher authority deamed the base unworthy (e.g. censored) | |
772 | # The delta building process will compute the chaininfo in all |
|
770 | if ( | |
773 | # case, since that computation is cached, it is fine to access it |
|
771 | self.excluded_bases is not None | |
774 | # here too. |
|
772 | and rev in self.excluded_bases | |
775 | chainlen, chainsize = revlog._chaininfo(rev) |
|
773 | ): | |
776 | # if chain will be too long, skip base |
|
774 | tested.add(rev) | |
777 | if ( |
|
775 | continue | |
778 | revlog.delta_config.max_chain_len |
|
776 | # We are in some recomputation cases and that rev is too high | |
779 | and chainlen >= revlog.delta_config.max_chain_len |
|
777 | # in the revlog | |
780 | ): |
|
778 | if self.target_rev is not None and rev >= self.target_rev: | |
781 | tested.add(rev) |
|
779 | tested.add(rev) | |
782 | continue |
|
780 | continue | |
783 | # if chain already have too much data, skip base |
|
781 | # filter out delta base that will never produce good delta | |
784 |
if deltas_limit < |
|
782 | if deltas_limit < self.revlog.length(rev): | |
785 | tested.add(rev) |
|
783 | tested.add(rev) | |
786 | continue |
|
784 | continue | |
787 | if sparse and revlog.delta_config.upper_bound_comp is not None: |
|
785 | if sparse and self.revlog.rawsize(rev) < ( | |
788 | maxcomp = revlog.delta_config.upper_bound_comp |
|
786 | self.textlen // LIMIT_BASE2TEXT | |
789 | basenotsnap = (p1, p2, nullrev) |
|
787 | ): | |
790 | if rev not in basenotsnap and revlog.issnapshot(rev): |
|
788 | tested.add(rev) | |
791 | snapshotdepth = revlog.snapshotdepth(rev) |
|
789 | continue | |
792 | # If text is significantly larger than the base, we can |
|
790 | # no delta for rawtext-changing revs (see "candelta" for why) | |
793 | # expect the resulting delta to be proportional to the size |
|
791 | if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: | |
794 |
|
|
792 | tested.add(rev) | |
795 | revsize = revlog.rawsize(rev) |
|
793 | continue | |
796 | rawsizedistance = max(textlen - revsize, 0) |
|
|||
797 | # use an estimate of the compression upper bound. |
|
|||
798 | lowestrealisticdeltalen = rawsizedistance // maxcomp |
|
|||
799 |
|
794 | |||
800 | # check the absolute constraint on the delta size |
|
795 | # If we reach here, we are about to build and test a delta. | |
801 | snapshotlimit = textlen >> snapshotdepth |
|
796 | # The delta building process will compute the chaininfo in all | |
802 | if snapshotlimit < lowestrealisticdeltalen: |
|
797 | # case, since that computation is cached, it is fine to access | |
803 | # delta lower bound is larger than accepted upper bound |
|
798 | # it here too. | |
804 | tested.add(rev) |
|
799 | chainlen, chainsize = self.revlog._chaininfo(rev) | |
805 | continue |
|
800 | # if chain will be too long, skip base | |
806 |
|
801 | if ( | ||
807 | # check the relative constraint on the delta size |
|
802 | self.revlog.delta_config.max_chain_len | |
808 | revlength = revlog.length(rev) |
|
803 | and chainlen >= self.revlog.delta_config.max_chain_len | |
809 | if revlength < lowestrealisticdeltalen: |
|
804 | ): | |
810 | # delta probable lower bound is larger than target base |
|
805 | tested.add(rev) | |
811 |
|
|
806 | continue | |
812 | continue |
|
807 | # if chain already have too much data, skip base | |
|
808 | if deltas_limit < chainsize: | |||
|
809 | tested.add(rev) | |||
|
810 | continue | |||
|
811 | if ( | |||
|
812 | sparse | |||
|
813 | and self.revlog.delta_config.upper_bound_comp is not None | |||
|
814 | ): | |||
|
815 | maxcomp = self.revlog.delta_config.upper_bound_comp | |||
|
816 | basenotsnap = (self.p1, self.p2, nullrev) | |||
|
817 | if rev not in basenotsnap and self.revlog.issnapshot(rev): | |||
|
818 | snapshotdepth = self.revlog.snapshotdepth(rev) | |||
|
819 | # If text is significantly larger than the base, we can | |||
|
820 | # expect the resulting delta to be proportional to the size | |||
|
821 | # difference | |||
|
822 | revsize = self.revlog.rawsize(rev) | |||
|
823 | rawsizedistance = max(self.textlen - revsize, 0) | |||
|
824 | # use an estimate of the compression upper bound. | |||
|
825 | lowestrealisticdeltalen = rawsizedistance // maxcomp | |||
813 |
|
826 | |||
814 | group.append(rev) |
|
827 | # check the absolute constraint on the delta size | |
815 | if group: |
|
828 | snapshotlimit = self.textlen >> snapshotdepth | |
816 | # When the size of the candidate group is big, it can result in a |
|
829 | if snapshotlimit < lowestrealisticdeltalen: | |
817 | # quite significant performance impact. To reduce this, we can send |
|
830 | # delta lower bound is larger than accepted upper | |
818 | # them in smaller batches until the new batch does not provide any |
|
831 | # bound | |
819 | # improvements. |
|
832 | tested.add(rev) | |
820 | # |
|
833 | continue | |
821 | # This might reduce the overall efficiency of the compression in |
|
834 | ||
822 | # some corner cases, but that should also prevent very pathological |
|
835 | # check the relative constraint on the delta size | |
823 | # cases from being an issue. (eg. 20 000 candidates). |
|
836 | revlength = self.revlog.length(rev) | |
824 | # |
|
837 | if revlength < lowestrealisticdeltalen: | |
825 | # XXX note that the ordering of the group becomes important as it |
|
838 | # delta probable lower bound is larger than target | |
826 | # now impacts the final result. The current order is unprocessed |
|
839 | # base | |
827 | # and can be improved. |
|
840 | tested.add(rev) | |
828 | if group_chunk_size == 0: |
|
841 | continue | |
829 | tested.update(group) |
|
|||
830 | good = yield tuple(group) |
|
|||
831 | else: |
|
|||
832 | prev_good = good |
|
|||
833 | for start in range(0, len(group), group_chunk_size): |
|
|||
834 | sub_group = group[start : start + group_chunk_size] |
|
|||
835 | tested.update(sub_group) |
|
|||
836 | good = yield tuple(sub_group) |
|
|||
837 | if prev_good == good: |
|
|||
838 | break |
|
|||
839 |
|
842 | |||
840 | yield None |
|
843 | group.append(rev) | |
|
844 | if group: | |||
|
845 | # When the size of the candidate group is big, it can result in | |||
|
846 | # a quite significant performance impact. To reduce this, we | |||
|
847 | # can send them in smaller batches until the new batch does not | |||
|
848 | # provide any improvements. | |||
|
849 | # | |||
|
850 | # This might reduce the overall efficiency of the compression | |||
|
851 | # in some corner cases, but that should also prevent very | |||
|
852 | # pathological cases from being an issue. (eg. 20 000 | |||
|
853 | # candidates). | |||
|
854 | # | |||
|
855 | # XXX note that the ordering of the group becomes important as | |||
|
856 | # it now impacts the final result. The current order is | |||
|
857 | # unprocessed and can be improved. | |||
|
858 | if group_chunk_size == 0: | |||
|
859 | tested.update(group) | |||
|
860 | good = yield tuple(group) | |||
|
861 | else: | |||
|
862 | prev_good = good | |||
|
863 | for start in range(0, len(group), group_chunk_size): | |||
|
864 | sub_group = group[start : start + group_chunk_size] | |||
|
865 | tested.update(sub_group) | |||
|
866 | good = yield tuple(sub_group) | |||
|
867 | if prev_good == good: | |||
|
868 | break | |||
|
869 | ||||
|
870 | yield None | |||
841 |
|
871 | |||
842 |
|
872 | |||
843 | def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): |
|
873 | def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): | |
@@ -1410,7 +1440,7 b' class deltacomputer:' | |||||
1410 | msg %= target_rev |
|
1440 | msg %= target_rev | |
1411 | self._write_debug(msg) |
|
1441 | self._write_debug(msg) | |
1412 |
|
1442 | |||
1413 | groups = _candidategroups( |
|
1443 | search = _DeltaSearch( | |
1414 | self.revlog, |
|
1444 | self.revlog, | |
1415 | revinfo.textlen, |
|
1445 | revinfo.textlen, | |
1416 | p1r, |
|
1446 | p1r, | |
@@ -1420,6 +1450,8 b' class deltacomputer:' | |||||
1420 | target_rev, |
|
1450 | target_rev, | |
1421 | snapshot_cache=self._snapshot_cache, |
|
1451 | snapshot_cache=self._snapshot_cache, | |
1422 | ) |
|
1452 | ) | |
|
1453 | ||||
|
1454 | groups = search.candidate_groups() | |||
1423 | candidaterevs = next(groups) |
|
1455 | candidaterevs = next(groups) | |
1424 | while candidaterevs is not None: |
|
1456 | while candidaterevs is not None: | |
1425 | dbg_try_rounds += 1 |
|
1457 | dbg_try_rounds += 1 |
General Comments 0
You need to be logged in to leave comments.
Login now