##// END OF EJS Templates
delta-find: move `_rawgroups` on the `_DeltaSearch` object...
marmoute -
r52215:a227e061 default
parent child Browse files
Show More
@@ -888,13 +888,7 b' class _DeltaSearch:'
888 return
888 return
889 if self.snapshot_cache is None:
889 if self.snapshot_cache is None:
890 self.snapshot_cache = SnapshotCache()
890 self.snapshot_cache = SnapshotCache()
891 groups = _rawgroups(
891 groups = self._raw_groups()
892 self.revlog,
893 self.p1,
894 self.p2,
895 self.cachedelta,
896 self.snapshot_cache,
897 )
898 for candidates in groups:
892 for candidates in groups:
899 good = yield candidates
893 good = yield candidates
900 if good is not None:
894 if good is not None:
@@ -937,127 +931,133 b' class _DeltaSearch:'
937
931
938 yield None
932 yield None
939
933
934 def _raw_groups(self):
935 """Provides group of revision to be tested as delta base
940
936
941 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
937 This lower level function focus on emitting delta theorically
942 """Provides group of revision to be tested as delta base
938 interresting without looking it any practical details.
943
939
944 This lower level function focus on emitting delta theorically interresting
940 The group order aims at providing fast or small candidates first.
945 without looking it any practical details.
941 """
942 # Why search for delta base if we cannot use a delta base ?
943 assert self.revlog.delta_config.general_delta
944 # also see issue6056
945 sparse = self.revlog.delta_config.sparse_revlog
946 curr = len(self.revlog)
947 prev = curr - 1
948 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
946
949
947 The group order aims at providing fast or small candidates first.
950 # exclude already lazy tested base if any
948 """
951 parents = [p for p in (self.p1, self.p2) if p != nullrev]
949 # Why search for delta base if we cannot use a delta base ?
950 assert revlog.delta_config.general_delta
951 # also see issue6056
952 sparse = revlog.delta_config.sparse_revlog
953 curr = len(revlog)
954 prev = curr - 1
955 deltachain = lambda rev: revlog._deltachain(rev)[0]
956
952
957 # exclude already lazy tested base if any
953 if (
958 parents = [p for p in (p1, p2) if p != nullrev]
954 not self.revlog.delta_config.delta_both_parents
955 and len(parents) == 2
956 ):
957 parents.sort()
958 # To minimize the chance of having to build a fulltext,
959 # pick first whichever parent is closest to us (max rev)
960 yield (parents[1],)
961 # then the other one (min rev) if the first did not fit
962 yield (parents[0],)
963 elif len(parents) > 0:
964 # Test all parents (1 or 2), and keep the best candidate
965 yield parents
959
966
960 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
967 if sparse and parents:
961 parents.sort()
968 if self.snapshot_cache is None:
962 # To minimize the chance of having to build a fulltext,
969 # map: base-rev: [snapshot-revs]
963 # pick first whichever parent is closest to us (max rev)
970 self.snapshot_cache = SnapshotCache()
964 yield (parents[1],)
971 # See if we can use an existing snapshot in the parent chains to
965 # then the other one (min rev) if the first did not fit
972 # use as a base for a new intermediate-snapshot
966 yield (parents[0],)
973 #
967 elif len(parents) > 0:
974 # search for snapshot in parents delta chain map: snapshot-level:
968 # Test all parents (1 or 2), and keep the best candidate
975 # snapshot-rev
969 yield parents
976 parents_snaps = collections.defaultdict(set)
970
977 candidate_chains = [deltachain(p) for p in parents]
971 if sparse and parents:
978 for chain in candidate_chains:
972 if snapshot_cache is None:
979 for idx, s in enumerate(chain):
973 # map: base-rev: [snapshot-revs]
980 if not self.revlog.issnapshot(s):
974 snapshot_cache = SnapshotCache()
981 break
975 # See if we can use an existing snapshot in the parent chains to use as
982 parents_snaps[idx].add(s)
976 # a base for a new intermediate-snapshot
983 snapfloor = min(parents_snaps[0]) + 1
977 #
984 self.snapshot_cache.update(self.revlog, snapfloor)
978 # search for snapshot in parents delta chain
985 # search for the highest "unrelated" revision
979 # map: snapshot-level: snapshot-rev
986 #
980 parents_snaps = collections.defaultdict(set)
987 # Adding snapshots used by "unrelated" revision increase the odd we
981 candidate_chains = [deltachain(p) for p in parents]
988 # reuse an independant, yet better snapshot chain.
982 for chain in candidate_chains:
989 #
983 for idx, s in enumerate(chain):
990 # XXX instead of building a set of revisions, we could lazily
984 if not revlog.issnapshot(s):
991 # enumerate over the chains. That would be more efficient, however
992 # we stick to simple code for now.
993 all_revs = set()
994 for chain in candidate_chains:
995 all_revs.update(chain)
996 other = None
997 for r in self.revlog.revs(prev, snapfloor):
998 if r not in all_revs:
999 other = r
985 break
1000 break
986 parents_snaps[idx].add(s)
1001 if other is not None:
987 snapfloor = min(parents_snaps[0]) + 1
1002 # To avoid unfair competition, we won't use unrelated
988 snapshot_cache.update(revlog, snapfloor)
1003 # intermediate snapshot that are deeper than the ones from the
989 # search for the highest "unrelated" revision
1004 # parent delta chain.
990 #
1005 max_depth = max(parents_snaps.keys())
991 # Adding snapshots used by "unrelated" revision increase the odd we
1006 chain = deltachain(other)
992 # reuse an independant, yet better snapshot chain.
1007 for depth, s in enumerate(chain):
993 #
1008 if s < snapfloor:
994 # XXX instead of building a set of revisions, we could lazily enumerate
1009 continue
995 # over the chains. That would be more efficient, however we stick to
1010 if max_depth < depth:
996 # simple code for now.
1011 break
997 all_revs = set()
1012 if not self.revlog.issnapshot(s):
998 for chain in candidate_chains:
1013 break
999 all_revs.update(chain)
1014 parents_snaps[depth].add(s)
1000 other = None
1015 # Test them as possible intermediate snapshot base We test them
1001 for r in revlog.revs(prev, snapfloor):
1016 # from highest to lowest level. High level one are more likely to
1002 if r not in all_revs:
1017 # result in small delta
1003 other = r
1018 floor = None
1004 break
1019 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1005 if other is not None:
1020 siblings = set()
1006 # To avoid unfair competition, we won't use unrelated intermediate
1021 for s in snaps:
1007 # snapshot that are deeper than the ones from the parent delta
1022 siblings.update(self.snapshot_cache.snapshots[s])
1008 # chain.
1023 # Before considering making a new intermediate snapshot, we
1009 max_depth = max(parents_snaps.keys())
1024 # check if an existing snapshot, children of base we consider,
1010 chain = deltachain(other)
1025 # would be suitable.
1011 for depth, s in enumerate(chain):
1026 #
1012 if s < snapfloor:
1027 # It give a change to reuse a delta chain "unrelated" to the
1013 continue
1028 # current revision instead of starting our own. Without such
1014 if max_depth < depth:
1029 # re-use, topological branches would keep reopening new chains.
1015 break
1030 # Creating more and more snapshot as the repository grow.
1016 if not revlog.issnapshot(s):
1031
1017 break
1032 if floor is not None:
1018 parents_snaps[depth].add(s)
1033 # We only do this for siblings created after the one in our
1019 # Test them as possible intermediate snapshot base
1034 # parent's delta chain. Those created before has less
1020 # We test them from highest to lowest level. High level one are more
1035 # chances to be valid base since our ancestors had to
1021 # likely to result in small delta
1036 # create a new snapshot.
1022 floor = None
1037 siblings = [r for r in siblings if floor < r]
1023 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1038 yield tuple(sorted(siblings))
1024 siblings = set()
1039 # then test the base from our parent's delta chain.
1025 for s in snaps:
1040 yield tuple(sorted(snaps))
1026 siblings.update(snapshot_cache.snapshots[s])
1041 floor = min(snaps)
1027 # Before considering making a new intermediate snapshot, we check
1042 # No suitable base found in the parent chain, search if any full
1028 # if an existing snapshot, children of base we consider, would be
1043 # snapshots emitted since parent's base would be a suitable base
1029 # suitable.
1044 # for an intermediate snapshot.
1030 #
1045 #
1031 # It give a change to reuse a delta chain "unrelated" to the
1046 # It give a chance to reuse a delta chain unrelated to the current
1032 # current revision instead of starting our own. Without such
1047 # revisions instead of starting our own. Without such re-use,
1033 # re-use, topological branches would keep reopening new chains.
1048 # topological branches would keep reopening new full chains.
1034 # Creating more and more snapshot as the repository grow.
1049 # Creating more and more snapshot as the repository grow.
1050 full = [
1051 r
1052 for r in self.snapshot_cache.snapshots[nullrev]
1053 if snapfloor <= r
1054 ]
1055 yield tuple(sorted(full))
1035
1056
1036 if floor is not None:
1057 if not sparse:
1037 # We only do this for siblings created after the one in our
1058 # other approach failed try against prev to hopefully save us a
1038 # parent's delta chain. Those created before has less chances
1059 # fulltext.
1039 # to be valid base since our ancestors had to create a new
1060 yield (prev,)
1040 # snapshot.
1041 siblings = [r for r in siblings if floor < r]
1042 yield tuple(sorted(siblings))
1043 # then test the base from our parent's delta chain.
1044 yield tuple(sorted(snaps))
1045 floor = min(snaps)
1046 # No suitable base found in the parent chain, search if any full
1047 # snapshots emitted since parent's base would be a suitable base for an
1048 # intermediate snapshot.
1049 #
1050 # It give a chance to reuse a delta chain unrelated to the current
1051 # revisions instead of starting our own. Without such re-use,
1052 # topological branches would keep reopening new full chains. Creating
1053 # more and more snapshot as the repository grow.
1054 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1055 yield tuple(sorted(full))
1056
1057 if not sparse:
1058 # other approach failed try against prev to hopefully save us a
1059 # fulltext.
1060 yield (prev,)
1061
1061
1062
1062
1063 class SnapshotCache:
1063 class SnapshotCache:
General Comments 0
You need to be logged in to leave comments. Login now