##// END OF EJS Templates
delta-find: move good delta code earlier in the class...
marmoute -
r52237:f3f35b37 default
parent child Browse files
Show More
@@ -637,6 +637,130 b' class _DeltaSearch:'
637 self._last_good = None
637 self._last_good = None
638 self.current_group = self._candidates_iterator.send(self._last_good)
638 self.current_group = self._candidates_iterator.send(self._last_good)
639
639
640 def is_good_delta_info(self, deltainfo):
641 """Returns True if the given delta is good.
642
643 Good means that it is within the disk span, disk size, and chain length
644 bounds that we know to be performant.
645 """
646 if not self._is_good_delta_info_universal(deltainfo):
647 return False
648 if not self._is_good_delta_info_chain_quality(deltainfo):
649 return False
650 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
651 return False
652 return True
653
654 def _is_good_delta_info_universal(self, deltainfo):
655 """Returns True if the given delta is good.
656
657 This performs generic checks needed by all format variants.
658
659 This is used by is_good_delta_info.
660 """
661
662 if deltainfo is None:
663 return False
664
665 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
666 # so we should never end up asking such question. Adding the assert as
667 # a safe-guard to detect anything that would be fishy in this regard.
668 assert (
669 self.revinfo.cachedelta is None
670 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
671 or not self.revlog.delta_config.general_delta
672 )
673
674 # Bad delta from new delta size:
675 #
676 # If the delta size is larger than the target text, storing the delta
677 # will be inefficient.
678 if self.revinfo.textlen < deltainfo.deltalen:
679 return False
680
681 return True
682
683 def _is_good_delta_info_chain_quality(self, deltainfo):
684 """Returns True if the chain associated with the delta is good.
685
686 This performs checks for format that use delta chains.
687
688 This is used by is_good_delta_info.
689 """
690 # - 'deltainfo.distance' is the distance from the base revision --
691 # bounding it limits the amount of I/O we need to do.
692
693 defaultmax = self.revinfo.textlen * 4
694 maxdist = self.revlog.delta_config.max_deltachain_span
695 if not maxdist:
696 maxdist = deltainfo.distance # ensure the conditional pass
697 maxdist = max(maxdist, defaultmax)
698
699 # Bad delta from read span:
700 #
701 # If the span of data read is larger than the maximum allowed.
702 #
703 # In the sparse-revlog case, we rely on the associated "sparse
704 # reading" to avoid issue related to the span of data. In theory, it
705 # would be possible to build pathological revlog where delta pattern
706 # would lead to too many reads. However, they do not happen in
707 # practice at all. So we skip the span check entirely.
708 if (
709 not self.revlog.delta_config.sparse_revlog
710 and maxdist < deltainfo.distance
711 ):
712 return False
713
714 # Bad delta from cumulated payload size:
715 #
716 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
717 # deltas we need to apply -- bounding it limits the amount of CPU
718 # we consume.
719 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
720 # If the sum of delta get larger than K * target text length.
721 if max_chain_data < deltainfo.compresseddeltalen:
722 return False
723
724 # Bad delta from chain length:
725 #
726 # If the number of delta in the chain gets too high.
727 if (
728 self.revlog.delta_config.max_chain_len
729 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
730 ):
731 return False
732 return True
733
734 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
735 """Returns True if the chain associated with snapshots
736
737 This performs checks for format that use sparse-revlog and intermediate
738 snapshots.
739
740 This is used by is_good_delta_info.
741 """
742 # bad delta from intermediate snapshot size limit
743 #
744 # If an intermediate snapshot size is higher than the limit. The
745 # limit exist to prevent endless chain of intermediate delta to be
746 # created.
747 if (
748 deltainfo.snapshotdepth is not None
749 and (self.revinfo.textlen >> deltainfo.snapshotdepth)
750 < deltainfo.deltalen
751 ):
752 return False
753
754 # bad delta if new intermediate snapshot is larger than the previous
755 # snapshot
756 if (
757 deltainfo.snapshotdepth
758 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
759 ):
760 return False
761
762 return True
763
640 @property
764 @property
641 def done(self):
765 def done(self):
642 """True when all possible candidate have been tested"""
766 """True when all possible candidate have been tested"""
@@ -1041,130 +1165,6 b' class _DeltaSearch:'
1041 # fulltext.
1165 # fulltext.
1042 yield (prev,)
1166 yield (prev,)
1043
1167
1044 def is_good_delta_info(self, deltainfo):
1045 """Returns True if the given delta is good.
1046
1047 Good means that it is within the disk span, disk size, and chain length
1048 bounds that we know to be performant.
1049 """
1050 if not self._is_good_delta_info_universal(deltainfo):
1051 return False
1052 if not self._is_good_delta_info_chain_quality(deltainfo):
1053 return False
1054 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1055 return False
1056 return True
1057
1058 def _is_good_delta_info_universal(self, deltainfo):
1059 """Returns True if the given delta is good.
1060
1061 This performs generic checks needed by all format variants.
1062
1063 This is used by is_good_delta_info.
1064 """
1065
1066 if deltainfo is None:
1067 return False
1068
1069 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
1070 # so we should never end up asking such question. Adding the assert as
1071 # a safe-guard to detect anything that would be fishy in this regard.
1072 assert (
1073 self.revinfo.cachedelta is None
1074 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
1075 or not self.revlog.delta_config.general_delta
1076 )
1077
1078 # Bad delta from new delta size:
1079 #
1080 # If the delta size is larger than the target text, storing the delta
1081 # will be inefficient.
1082 if self.revinfo.textlen < deltainfo.deltalen:
1083 return False
1084
1085 return True
1086
1087 def _is_good_delta_info_chain_quality(self, deltainfo):
1088 """Returns True if the chain associated with the delta is good.
1089
1090 This performs checks for format that use delta chains.
1091
1092 This is used by is_good_delta_info.
1093 """
1094 # - 'deltainfo.distance' is the distance from the base revision --
1095 # bounding it limits the amount of I/O we need to do.
1096
1097 defaultmax = self.revinfo.textlen * 4
1098 maxdist = self.revlog.delta_config.max_deltachain_span
1099 if not maxdist:
1100 maxdist = deltainfo.distance # ensure the conditional pass
1101 maxdist = max(maxdist, defaultmax)
1102
1103 # Bad delta from read span:
1104 #
1105 # If the span of data read is larger than the maximum allowed.
1106 #
1107 # In the sparse-revlog case, we rely on the associated "sparse
1108 # reading" to avoid issue related to the span of data. In theory, it
1109 # would be possible to build pathological revlog where delta pattern
1110 # would lead to too many reads. However, they do not happen in
1111 # practice at all. So we skip the span check entirely.
1112 if (
1113 not self.revlog.delta_config.sparse_revlog
1114 and maxdist < deltainfo.distance
1115 ):
1116 return False
1117
1118 # Bad delta from cumulated payload size:
1119 #
1120 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
1121 # deltas we need to apply -- bounding it limits the amount of CPU
1122 # we consume.
1123 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
1124 # If the sum of delta get larger than K * target text length.
1125 if max_chain_data < deltainfo.compresseddeltalen:
1126 return False
1127
1128 # Bad delta from chain length:
1129 #
1130 # If the number of delta in the chain gets too high.
1131 if (
1132 self.revlog.delta_config.max_chain_len
1133 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1134 ):
1135 return False
1136 return True
1137
1138 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1139 """Returns True if the chain associated with snapshots
1140
1141 This performs checks for format that use sparse-revlog and intermediate
1142 snapshots.
1143
1144 This is used by is_good_delta_info.
1145 """
1146 # bad delta from intermediate snapshot size limit
1147 #
1148 # If an intermediate snapshot size is higher than the limit. The
1149 # limit exist to prevent endless chain of intermediate delta to be
1150 # created.
1151 if (
1152 deltainfo.snapshotdepth is not None
1153 and (self.revinfo.textlen >> deltainfo.snapshotdepth)
1154 < deltainfo.deltalen
1155 ):
1156 return False
1157
1158 # bad delta if new intermediate snapshot is larger than the previous
1159 # snapshot
1160 if (
1161 deltainfo.snapshotdepth
1162 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1163 ):
1164 return False
1165
1166 return True
1167
1168
1168
1169 class SnapshotCache:
1169 class SnapshotCache:
1170 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1170 __slots__ = ('snapshots', '_start_rev', '_end_rev')
General Comments 0
You need to be logged in to leave comments. Login now