# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 2018-08-29 16:55:11
# Node ID 1c6ff52fe9cf4cc4fe784d7c3dbe7fba07c55556
# Parent  1441eb38849fd2f9ddb34ba1250ee6bef0bfbf4b

revlogdeltas: split candidate groups selection from the filtering logic

The group iteration has two main components:

* walking candidates, the logic that we are about to extend to build
  intermediate snapshots,

* Making sure we test diffs against interesting bases. No duplicated tests,
  skipping empty revisions, etc.

We split `_candidategroups` to separate the two components. This achieves two
goals:

* It will be simpler to update the walking logic for intermediate snapshots,

* We can gather the filtering logic from `finddeltainfo` into
  `_candidategroups` to centralize it.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -569,9 +569,29 @@ def isgooddeltainfo(revlog, deltainfo, r
     return True
 
 def _candidategroups(revlog, p1, p2, cachedelta):
+    """Provides group of revision to be tested as delta base
+
+    This top level function focus on emitting groups with unique and worthwhile
+    content. See _raw_candidate_groups for details about the group order.
     """
-    Provides revisions that present an interest to be diffed against,
-    grouped by level of easiness.
+    # should we try to build a delta?
+    if not (len(revlog) and revlog._storedeltachains):
+        return
+
+    tested = set([nullrev])
+    for group in _rawgroups(revlog, p1, p2, cachedelta):
+        group = tuple(r for r in group if r not in tested)
+        tested.update(group)
+        if group:
+            yield group
+
+def _rawgroups(revlog, p1, p2, cachedelta):
+    """Provides group of revision to be tested as delta base
+
+    This lower level function focus on emitting delta theorically interresting
+    without looking it any practical details.
+
+    The group order aims at providing fast or small candidates first.
     """
     gdelta = revlog._generaldelta
     curr = len(revlog)
@@ -590,29 +610,33 @@ def _candidategroups(revlog, p1, p2, cac
             yield (cachedelta[0],)
             tested.add(cachedelta[0])
 
-        if gdelta:
-            # exclude already lazy tested base if any
-            parents = [p for p in (p1, p2)
-                       if p != nullrev and p not in tested]
+    # This condition is true most of the time when processing
+    # changegroup data into a generaldelta repo. The only time it
+    # isn't true is if this is the first revision in a delta chain
+    # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
+    if cachedelta and gdelta and revlog._lazydeltabase:
+        # Assume what we received from the server is a good choice
+        # build delta will reuse the cache
+        yield (cachedelta[0],)
+
+    if gdelta:
+        # exclude already lazy tested base if any
+        parents = [p for p in (p1, p2) if p != nullrev]
 
-            if not revlog._deltabothparents and len(parents) == 2:
-                parents.sort()
-                # To minimize the chance of having to build a fulltext,
-                # pick first whichever parent is closest to us (max rev)
-                yield (parents[1],)
-                # then the other one (min rev) if the first did not fit
-                yield (parents[0],)
-                tested.update(parents)
-            elif len(parents) > 0:
-                # Test all parents (1 or 2), and keep the best candidate
-                yield parents
-                tested.update(parents)
+        if not revlog._deltabothparents and len(parents) == 2:
+            parents.sort()
+            # To minimize the chance of having to build a fulltext,
+            # pick first whichever parent is closest to us (max rev)
+            yield (parents[1],)
+            # then the other one (min rev) if the first did not fit
+            yield (parents[0],)
+        elif len(parents) > 0:
+            # Test all parents (1 or 2), and keep the best candidate
+            yield parents
 
-        if prev not in tested:
-            # other approach failed try against prev to hopefully save us a
-            # fulltext.
-            yield (prev,)
-            tested.add(prev)
+    # other approach failed try against prev to hopefully save us a
+    # fulltext.
+    yield (prev,)
 
 class deltacomputer(object):
     def __init__(self, revlog):