Show More
@@ -637,6 +637,130 b' class _DeltaSearch:' | |||||
637 | self._last_good = None |
|
637 | self._last_good = None | |
638 | self.current_group = self._candidates_iterator.send(self._last_good) |
|
638 | self.current_group = self._candidates_iterator.send(self._last_good) | |
639 |
|
639 | |||
|
640 | def is_good_delta_info(self, deltainfo): | |||
|
641 | """Returns True if the given delta is good. | |||
|
642 | ||||
|
643 | Good means that it is within the disk span, disk size, and chain length | |||
|
644 | bounds that we know to be performant. | |||
|
645 | """ | |||
|
646 | if not self._is_good_delta_info_universal(deltainfo): | |||
|
647 | return False | |||
|
648 | if not self._is_good_delta_info_chain_quality(deltainfo): | |||
|
649 | return False | |||
|
650 | if not self._is_good_delta_info_snapshot_constraints(deltainfo): | |||
|
651 | return False | |||
|
652 | return True | |||
|
653 | ||||
|
654 | def _is_good_delta_info_universal(self, deltainfo): | |||
|
655 | """Returns True if the given delta is good. | |||
|
656 | ||||
|
657 | This performs generic checks needed by all format variants. | |||
|
658 | ||||
|
659 | This is used by is_good_delta_info. | |||
|
660 | """ | |||
|
661 | ||||
|
662 | if deltainfo is None: | |||
|
663 | return False | |||
|
664 | ||||
|
665 | # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner | |||
|
666 | # so we should never end up asking such question. Adding the assert as | |||
|
667 | # a safe-guard to detect anything that would be fishy in this regard. | |||
|
668 | assert ( | |||
|
669 | self.revinfo.cachedelta is None | |||
|
670 | or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |||
|
671 | or not self.revlog.delta_config.general_delta | |||
|
672 | ) | |||
|
673 | ||||
|
674 | # Bad delta from new delta size: | |||
|
675 | # | |||
|
676 | # If the delta size is larger than the target text, storing the delta | |||
|
677 | # will be inefficient. | |||
|
678 | if self.revinfo.textlen < deltainfo.deltalen: | |||
|
679 | return False | |||
|
680 | ||||
|
681 | return True | |||
|
682 | ||||
|
683 | def _is_good_delta_info_chain_quality(self, deltainfo): | |||
|
684 | """Returns True if the chain associated with the delta is good. | |||
|
685 | ||||
|
686 | This performs checks for format that use delta chains. | |||
|
687 | ||||
|
688 | This is used by is_good_delta_info. | |||
|
689 | """ | |||
|
690 | # - 'deltainfo.distance' is the distance from the base revision -- | |||
|
691 | # bounding it limits the amount of I/O we need to do. | |||
|
692 | ||||
|
693 | defaultmax = self.revinfo.textlen * 4 | |||
|
694 | maxdist = self.revlog.delta_config.max_deltachain_span | |||
|
695 | if not maxdist: | |||
|
696 | maxdist = deltainfo.distance # ensure the conditional pass | |||
|
697 | maxdist = max(maxdist, defaultmax) | |||
|
698 | ||||
|
699 | # Bad delta from read span: | |||
|
700 | # | |||
|
701 | # If the span of data read is larger than the maximum allowed. | |||
|
702 | # | |||
|
703 | # In the sparse-revlog case, we rely on the associated "sparse | |||
|
704 | # reading" to avoid issue related to the span of data. In theory, it | |||
|
705 | # would be possible to build pathological revlog where delta pattern | |||
|
706 | # would lead to too many reads. However, they do not happen in | |||
|
707 | # practice at all. So we skip the span check entirely. | |||
|
708 | if ( | |||
|
709 | not self.revlog.delta_config.sparse_revlog | |||
|
710 | and maxdist < deltainfo.distance | |||
|
711 | ): | |||
|
712 | return False | |||
|
713 | ||||
|
714 | # Bad delta from cumulated payload size: | |||
|
715 | # | |||
|
716 | # - 'deltainfo.compresseddeltalen' is the sum of the total size of | |||
|
717 | # deltas we need to apply -- bounding it limits the amount of CPU | |||
|
718 | # we consume. | |||
|
719 | max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT | |||
|
720 | # If the sum of delta get larger than K * target text length. | |||
|
721 | if max_chain_data < deltainfo.compresseddeltalen: | |||
|
722 | return False | |||
|
723 | ||||
|
724 | # Bad delta from chain length: | |||
|
725 | # | |||
|
726 | # If the number of delta in the chain gets too high. | |||
|
727 | if ( | |||
|
728 | self.revlog.delta_config.max_chain_len | |||
|
729 | and self.revlog.delta_config.max_chain_len < deltainfo.chainlen | |||
|
730 | ): | |||
|
731 | return False | |||
|
732 | return True | |||
|
733 | ||||
|
734 | def _is_good_delta_info_snapshot_constraints(self, deltainfo): | |||
|
735 | """Returns True if the chain associated with snapshots | |||
|
736 | ||||
|
737 | This performs checks for format that use sparse-revlog and intermediate | |||
|
738 | snapshots. | |||
|
739 | ||||
|
740 | This is used by is_good_delta_info. | |||
|
741 | """ | |||
|
742 | # bad delta from intermediate snapshot size limit | |||
|
743 | # | |||
|
744 | # If an intermediate snapshot size is higher than the limit. The | |||
|
745 | # limit exist to prevent endless chain of intermediate delta to be | |||
|
746 | # created. | |||
|
747 | if ( | |||
|
748 | deltainfo.snapshotdepth is not None | |||
|
749 | and (self.revinfo.textlen >> deltainfo.snapshotdepth) | |||
|
750 | < deltainfo.deltalen | |||
|
751 | ): | |||
|
752 | return False | |||
|
753 | ||||
|
754 | # bad delta if new intermediate snapshot is larger than the previous | |||
|
755 | # snapshot | |||
|
756 | if ( | |||
|
757 | deltainfo.snapshotdepth | |||
|
758 | and self.revlog.length(deltainfo.base) < deltainfo.deltalen | |||
|
759 | ): | |||
|
760 | return False | |||
|
761 | ||||
|
762 | return True | |||
|
763 | ||||
640 | @property |
|
764 | @property | |
641 | def done(self): |
|
765 | def done(self): | |
642 | """True when all possible candidate have been tested""" |
|
766 | """True when all possible candidate have been tested""" | |
@@ -1041,130 +1165,6 b' class _DeltaSearch:' | |||||
1041 | # fulltext. |
|
1165 | # fulltext. | |
1042 | yield (prev,) |
|
1166 | yield (prev,) | |
1043 |
|
1167 | |||
1044 | def is_good_delta_info(self, deltainfo): |
|
|||
1045 | """Returns True if the given delta is good. |
|
|||
1046 |
|
||||
1047 | Good means that it is within the disk span, disk size, and chain length |
|
|||
1048 | bounds that we know to be performant. |
|
|||
1049 | """ |
|
|||
1050 | if not self._is_good_delta_info_universal(deltainfo): |
|
|||
1051 | return False |
|
|||
1052 | if not self._is_good_delta_info_chain_quality(deltainfo): |
|
|||
1053 | return False |
|
|||
1054 | if not self._is_good_delta_info_snapshot_constraints(deltainfo): |
|
|||
1055 | return False |
|
|||
1056 | return True |
|
|||
1057 |
|
||||
1058 | def _is_good_delta_info_universal(self, deltainfo): |
|
|||
1059 | """Returns True if the given delta is good. |
|
|||
1060 |
|
||||
1061 | This performs generic checks needed by all format variants. |
|
|||
1062 |
|
||||
1063 | This is used by is_good_delta_info. |
|
|||
1064 | """ |
|
|||
1065 |
|
||||
1066 | if deltainfo is None: |
|
|||
1067 | return False |
|
|||
1068 |
|
||||
1069 | # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner |
|
|||
1070 | # so we should never end up asking such question. Adding the assert as |
|
|||
1071 | # a safe-guard to detect anything that would be fishy in this regard. |
|
|||
1072 | assert ( |
|
|||
1073 | self.revinfo.cachedelta is None |
|
|||
1074 | or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE |
|
|||
1075 | or not self.revlog.delta_config.general_delta |
|
|||
1076 | ) |
|
|||
1077 |
|
||||
1078 | # Bad delta from new delta size: |
|
|||
1079 | # |
|
|||
1080 | # If the delta size is larger than the target text, storing the delta |
|
|||
1081 | # will be inefficient. |
|
|||
1082 | if self.revinfo.textlen < deltainfo.deltalen: |
|
|||
1083 | return False |
|
|||
1084 |
|
||||
1085 | return True |
|
|||
1086 |
|
||||
1087 | def _is_good_delta_info_chain_quality(self, deltainfo): |
|
|||
1088 | """Returns True if the chain associated with the delta is good. |
|
|||
1089 |
|
||||
1090 | This performs checks for format that use delta chains. |
|
|||
1091 |
|
||||
1092 | This is used by is_good_delta_info. |
|
|||
1093 | """ |
|
|||
1094 | # - 'deltainfo.distance' is the distance from the base revision -- |
|
|||
1095 | # bounding it limits the amount of I/O we need to do. |
|
|||
1096 |
|
||||
1097 | defaultmax = self.revinfo.textlen * 4 |
|
|||
1098 | maxdist = self.revlog.delta_config.max_deltachain_span |
|
|||
1099 | if not maxdist: |
|
|||
1100 | maxdist = deltainfo.distance # ensure the conditional pass |
|
|||
1101 | maxdist = max(maxdist, defaultmax) |
|
|||
1102 |
|
||||
1103 | # Bad delta from read span: |
|
|||
1104 | # |
|
|||
1105 | # If the span of data read is larger than the maximum allowed. |
|
|||
1106 | # |
|
|||
1107 | # In the sparse-revlog case, we rely on the associated "sparse |
|
|||
1108 | # reading" to avoid issue related to the span of data. In theory, it |
|
|||
1109 | # would be possible to build pathological revlog where delta pattern |
|
|||
1110 | # would lead to too many reads. However, they do not happen in |
|
|||
1111 | # practice at all. So we skip the span check entirely. |
|
|||
1112 | if ( |
|
|||
1113 | not self.revlog.delta_config.sparse_revlog |
|
|||
1114 | and maxdist < deltainfo.distance |
|
|||
1115 | ): |
|
|||
1116 | return False |
|
|||
1117 |
|
||||
1118 | # Bad delta from cumulated payload size: |
|
|||
1119 | # |
|
|||
1120 | # - 'deltainfo.compresseddeltalen' is the sum of the total size of |
|
|||
1121 | # deltas we need to apply -- bounding it limits the amount of CPU |
|
|||
1122 | # we consume. |
|
|||
1123 | max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT |
|
|||
1124 | # If the sum of delta get larger than K * target text length. |
|
|||
1125 | if max_chain_data < deltainfo.compresseddeltalen: |
|
|||
1126 | return False |
|
|||
1127 |
|
||||
1128 | # Bad delta from chain length: |
|
|||
1129 | # |
|
|||
1130 | # If the number of delta in the chain gets too high. |
|
|||
1131 | if ( |
|
|||
1132 | self.revlog.delta_config.max_chain_len |
|
|||
1133 | and self.revlog.delta_config.max_chain_len < deltainfo.chainlen |
|
|||
1134 | ): |
|
|||
1135 | return False |
|
|||
1136 | return True |
|
|||
1137 |
|
||||
1138 | def _is_good_delta_info_snapshot_constraints(self, deltainfo): |
|
|||
1139 | """Returns True if the chain associated with snapshots |
|
|||
1140 |
|
||||
1141 | This performs checks for format that use sparse-revlog and intermediate |
|
|||
1142 | snapshots. |
|
|||
1143 |
|
||||
1144 | This is used by is_good_delta_info. |
|
|||
1145 | """ |
|
|||
1146 | # bad delta from intermediate snapshot size limit |
|
|||
1147 | # |
|
|||
1148 | # If an intermediate snapshot size is higher than the limit. The |
|
|||
1149 | # limit exist to prevent endless chain of intermediate delta to be |
|
|||
1150 | # created. |
|
|||
1151 | if ( |
|
|||
1152 | deltainfo.snapshotdepth is not None |
|
|||
1153 | and (self.revinfo.textlen >> deltainfo.snapshotdepth) |
|
|||
1154 | < deltainfo.deltalen |
|
|||
1155 | ): |
|
|||
1156 | return False |
|
|||
1157 |
|
||||
1158 | # bad delta if new intermediate snapshot is larger than the previous |
|
|||
1159 | # snapshot |
|
|||
1160 | if ( |
|
|||
1161 | deltainfo.snapshotdepth |
|
|||
1162 | and self.revlog.length(deltainfo.base) < deltainfo.deltalen |
|
|||
1163 | ): |
|
|||
1164 | return False |
|
|||
1165 |
|
||||
1166 | return True |
|
|||
1167 |
|
||||
1168 |
|
1168 | |||
1169 | class SnapshotCache: |
|
1169 | class SnapshotCache: | |
1170 | __slots__ = ('snapshots', '_start_rev', '_end_rev') |
|
1170 | __slots__ = ('snapshots', '_start_rev', '_end_rev') |
General Comments 0
You need to be logged in to leave comments.
Login now