# HG changeset patch # User Pierre-Yves David # Date 2015-01-07 18:32:17 # Node ID 6a5877a73141448e6d91bc74e53808b428357e41 # Parent 932f814bf01635c327570cf61c0ac3cae4718db4 setdiscovery: drop the 'always' argument to '_updatesample' This argument exists because of the complex code flow in '_takequicksample'. It first gets the list of heads and then calls '_updatesample' on an empty initial sample and a size limit matching the differences between the number of heads and the target sample size. Finally the heads and the sample from '_updatesample' were added. To ensure this addition result had the exact target length, the code had to ensure no elements from the heads were added to the '_updatesample' content and therefore was passing this "always included set of heads". Instead we can just update the initial heads sample directly and use the final target size as target size for the update. This removes the need for this 'always' parameter to the '_updatesample' function The test are affected because different set building order results in different random sampling. diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py --- a/mercurial/setdiscovery.py +++ b/mercurial/setdiscovery.py @@ -45,7 +45,7 @@ from i18n import _ import random import util, dagutil -def _updatesample(dag, nodes, sample, always, quicksamplesize=0): +def _updatesample(dag, nodes, sample, quicksamplesize=0): """update an existing sample to match the expected size The sample is updated with nodes exponentially distant from each head of the @@ -58,7 +58,6 @@ def _updatesample(dag, nodes, sample, al :dag: a dag object from dagutil :nodes: set of nodes we want to discover (if None, assume the whole dag) :sample: a sample to update - :always: set of notable nodes that will be part of the sample anyway :quicksamplesize: optional target size of the sample""" # if nodes is empty we scan the entire graph if nodes: @@ -77,10 +76,9 @@ def _updatesample(dag, nodes, sample, al if d > factor: factor *= 2 if d == factor: - if curr not in always: # need this check for the early exit below - sample.add(curr) - if quicksamplesize and (len(sample) >= quicksamplesize): - return + sample.add(curr) + if quicksamplesize and (len(sample) >= quicksamplesize): + return seen.add(curr) for p in dag.parents(curr): if not nodes or p in nodes: @@ -100,18 +98,17 @@ def _takequicksample(dag, nodes, size): always, sample, desiredlen = _setupsample(dag, nodes, size) if sample is None: return always - _updatesample(dag, None, sample, always, quicksamplesize=desiredlen) - sample.update(always) + sample = always + _updatesample(dag, None, sample, quicksamplesize=size) return sample def _takefullsample(dag, nodes, size): - sample = always = dag.headsetofconnecteds(nodes) + sample = dag.headsetofconnecteds(nodes) # update from heads - _updatesample(dag, nodes, sample, always) + _updatesample(dag, nodes, sample) # update from roots - _updatesample(dag.inverse(), nodes, sample, always) + _updatesample(dag.inverse(), nodes, sample) assert sample - sample.update(always) sample = _limitsample(sample, size) if len(sample) < size: more = size - len(sample)