diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py --- a/mercurial/setdiscovery.py +++ b/mercurial/setdiscovery.py @@ -5,6 +5,40 @@ # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +""" +Algorithm works in the following way. You have two repository: local and +remote. They both contains a DAG of changelists. + +The goal of the discovery protocol is to find one set of node *common*, +the set of nodes shared by local and remote. + +One of the issue with the original protocol was latency, it could +potentially require lots of roundtrips to discover that the local repo was a +subset of remote (which is a very common case, you usually have few changes +compared to upstream, while upstream probably had lots of development). + +The new protocol only requires one interface for the remote repo: `known()`, +which given a set of changelists tells you if they are present in the DAG. + +The algorithm then works as follow: + + - We will be using three sets, `common`, `missing`, `unknown`. Originally + all nodes are in `unknown`. + - Take a sample from `unknown`, call `remote.known(sample)` + - For each node that remote knows, move it and all its ancestors to `common` + - For each node that remote doesn't know, move it and all its descendants + to `missing` + - Iterate until `unknown` is empty + +There are a couple optimizations, first is instead of starting with a random +sample of missing, start by sending all heads, in the case where the local +repo is a subset, you computed the answer in one round trip. + +Then you can do something similar to the bisecting strategy used when +finding faulty changesets. Instead of random samples, you can try picking +nodes that will maximize the number of nodes that will be +classified with it (since all ancestors or descendants will be marked as well). +""" from node import nullid from i18n import _