##// END OF EJS Templates
delta-find: move is_good_delta_info on the _DeltaSearch class...
marmoute -
r52224:7455cae6 default
parent child Browse files
Show More
@@ -584,91 +584,6 b' def drop_u_compression(delta):'
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
591 if deltainfo is None:
592 return False
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
601 )
602
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
608
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
615
616 # Bad delta from read span:
617 #
618 # If the span of data read is larger than the maximum allowed.
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
627
628 # Bad delta from new delta size:
629 #
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
633 return False
634
635 # Bad delta from cumulated payload size:
636 #
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
640
641 # Bad delta from chain length:
642 #
643 # If the number of delta in the chain gets too high.
644 if (
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
648 return False
649
650 # bad delta from intermediate snapshot size limit
651 #
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
655 if (
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
659 return False
660
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
663 if (
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
667 return False
668
669 return True
670
671
672 # If a revision's full text is that much bigger than a base candidate full
587 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
589 # consider these candidates.
@@ -1061,6 +976,93 b' class _DeltaSearch:'
1061 # fulltext.
976 # fulltext.
1062 yield (prev,)
977 yield (prev,)
1063
978
979 def is_good_delta_info(self, deltainfo):
980 """Returns True if the given delta is good. Good means that it is
981 within the disk span, disk size, and chain length bounds that we know
982 to be performant."""
983 if deltainfo is None:
984 return False
985
986 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
987 # so we should never end up asking such question. Adding the assert as
988 # a safe-guard to detect anything that would be fishy in this regard.
989 assert (
990 self.revinfo.cachedelta is None
991 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
992 or not self.revlog.delta_config.general_delta
993 )
994
995 # - 'deltainfo.distance' is the distance from the base revision --
996 # bounding it limits the amount of I/O we need to do.
997 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
998 # deltas we need to apply -- bounding it limits the amount of CPU
999 # we consume.
1000
1001 textlen = self.revinfo.textlen
1002 defaultmax = textlen * 4
1003 maxdist = self.revlog.delta_config.max_deltachain_span
1004 if not maxdist:
1005 maxdist = deltainfo.distance # ensure the conditional pass
1006 maxdist = max(maxdist, defaultmax)
1007
1008 # Bad delta from read span:
1009 #
1010 # If the span of data read is larger than the maximum allowed.
1011 #
1012 # In the sparse-revlog case, we rely on the associated "sparse
1013 # reading" to avoid issue related to the span of data. In theory, it
1014 # would be possible to build pathological revlog where delta pattern
1015 # would lead to too many reads. However, they do not happen in
1016 # practice at all. So we skip the span check entirely.
1017 if (
1018 not self.revlog.delta_config.sparse_revlog
1019 and maxdist < deltainfo.distance
1020 ):
1021 return False
1022
1023 # Bad delta from new delta size:
1024 #
1025 # If the delta size is larger than the target text, storing the delta
1026 # will be inefficient.
1027 if textlen < deltainfo.deltalen:
1028 return False
1029
1030 # Bad delta from cumulated payload size:
1031 #
1032 # If the sum of delta get larger than K * target text length.
1033 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1034 return False
1035
1036 # Bad delta from chain length:
1037 #
1038 # If the number of delta in the chain gets too high.
1039 if (
1040 self.revlog.delta_config.max_chain_len
1041 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1042 ):
1043 return False
1044
1045 # bad delta from intermediate snapshot size limit
1046 #
1047 # If an intermediate snapshot size is higher than the limit. The
1048 # limit exist to prevent endless chain of intermediate delta to be
1049 # created.
1050 if (
1051 deltainfo.snapshotdepth is not None
1052 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1053 ):
1054 return False
1055
1056 # bad delta if new intermediate snapshot is larger than the previous
1057 # snapshot
1058 if (
1059 deltainfo.snapshotdepth
1060 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1061 ):
1062 return False
1063
1064 return True
1065
1064
1066
1065 class SnapshotCache:
1067 class SnapshotCache:
1066 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1068 __slots__ = ('snapshots', '_start_rev', '_end_rev')
@@ -1521,7 +1523,7 b' class deltacomputer:'
1521 msg %= delta_end - delta_start
1523 msg %= delta_end - delta_start
1522 self._write_debug(msg)
1524 self._write_debug(msg)
1523 if candidatedelta is not None:
1525 if candidatedelta is not None:
1524 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1526 if search.is_good_delta_info(candidatedelta):
1525 if self._debug_search:
1527 if self._debug_search:
1526 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1528 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1527 msg %= candidatedelta.deltalen
1529 msg %= candidatedelta.deltalen
General Comments 0
You need to be logged in to leave comments. Login now