##// END OF EJS Templates
setdiscovery: remove '_setupsample' function...
Pierre-Yves David -
r23817:813aaaf2 default
parent child Browse files
Show More
@@ -1,259 +1,250
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from node import nullid, nullrev
44 44 from i18n import _
45 45 import random
46 46 import util, dagutil
47 47
48 48 def _updatesample(dag, nodes, sample, quicksamplesize=0):
49 49 """update an existing sample to match the expected size
50 50
51 51 The sample is updated with nodes exponentially distant from each head of the
52 52 <nodes> set. (H~1, H~2, H~4, H~8, etc).
53 53
54 54 If a target size is specified, the sampling will stop once this size is
55 55 reached. Otherwise sampling will happen until roots of the <nodes> set are
56 56 reached.
57 57
58 58 :dag: a dag object from dagutil
59 59 :nodes: set of nodes we want to discover (if None, assume the whole dag)
60 60 :sample: a sample to update
61 61 :quicksamplesize: optional target size of the sample"""
62 62 # if nodes is empty we scan the entire graph
63 63 if nodes:
64 64 heads = dag.headsetofconnecteds(nodes)
65 65 else:
66 66 heads = dag.heads()
67 67 dist = {}
68 68 visit = util.deque(heads)
69 69 seen = set()
70 70 factor = 1
71 71 while visit:
72 72 curr = visit.popleft()
73 73 if curr in seen:
74 74 continue
75 75 d = dist.setdefault(curr, 1)
76 76 if d > factor:
77 77 factor *= 2
78 78 if d == factor:
79 79 sample.add(curr)
80 80 if quicksamplesize and (len(sample) >= quicksamplesize):
81 81 return
82 82 seen.add(curr)
83 83 for p in dag.parents(curr):
84 84 if not nodes or p in nodes:
85 85 dist.setdefault(p, d + 1)
86 86 visit.append(p)
87 87
88 def _setupsample(dag, nodes, size):
89 always = dag.headsetofconnecteds(nodes)
90 desiredlen = size - len(always)
91 if desiredlen <= 0:
92 # This could be bad if there are very many heads, all unknown to the
93 # server. We're counting on long request support here.
94 return always, None, desiredlen
95 return always, set(), desiredlen
96
97 88 def _takequicksample(dag, nodes, size):
98 89 """takes a quick sample of size <size>
99 90
100 91 It is meant for initial sampling and focuses on querying heads and close
101 92 ancestors of heads.
102 93
103 94 :dag: a dag object
104 95 :nodes: set of nodes to discover
105 96 :size: the maximum size of the sample"""
106 97 sample = dag.headsetofconnecteds(nodes)
107 98 if size <= len(sample):
108 99 return _limitsample(sample, size)
109 100 _updatesample(dag, None, sample, quicksamplesize=size)
110 101 return sample
111 102
112 103 def _takefullsample(dag, nodes, size):
113 104 sample = dag.headsetofconnecteds(nodes)
114 105 # update from heads
115 106 _updatesample(dag, nodes, sample)
116 107 # update from roots
117 108 _updatesample(dag.inverse(), nodes, sample)
118 109 assert sample
119 110 sample = _limitsample(sample, size)
120 111 if len(sample) < size:
121 112 more = size - len(sample)
122 113 sample.update(random.sample(list(nodes - sample), more))
123 114 return sample
124 115
125 116 def _limitsample(sample, desiredlen):
126 117 """return a random subset of sample of at most desiredlen item"""
127 118 if len(sample) > desiredlen:
128 119 sample = set(random.sample(sample, desiredlen))
129 120 return sample
130 121
131 122 def findcommonheads(ui, local, remote,
132 123 initialsamplesize=100,
133 124 fullsamplesize=200,
134 125 abortwhenunrelated=True):
135 126 '''Return a tuple (common, anyincoming, remoteheads) used to identify
136 127 missing nodes from or in remote.
137 128 '''
138 129 roundtrips = 0
139 130 cl = local.changelog
140 131 dag = dagutil.revlogdag(cl)
141 132
142 133 # early exit if we know all the specified remote heads already
143 134 ui.debug("query 1; heads\n")
144 135 roundtrips += 1
145 136 ownheads = dag.heads()
146 137 sample = _limitsample(ownheads, initialsamplesize)
147 138 # indices between sample and externalized version must match
148 139 sample = list(sample)
149 140 if remote.local():
150 141 # stopgap until we have a proper localpeer that supports batch()
151 142 srvheadhashes = remote.heads()
152 143 yesno = remote.known(dag.externalizeall(sample))
153 144 elif remote.capable('batch'):
154 145 batch = remote.batch()
155 146 srvheadhashesref = batch.heads()
156 147 yesnoref = batch.known(dag.externalizeall(sample))
157 148 batch.submit()
158 149 srvheadhashes = srvheadhashesref.value
159 150 yesno = yesnoref.value
160 151 else:
161 152 # compatibility with pre-batch, but post-known remotes during 1.9
162 153 # development
163 154 srvheadhashes = remote.heads()
164 155 sample = []
165 156
166 157 if cl.tip() == nullid:
167 158 if srvheadhashes != [nullid]:
168 159 return [nullid], True, srvheadhashes
169 160 return [nullid], False, []
170 161
171 162 # start actual discovery (we note this before the next "if" for
172 163 # compatibility reasons)
173 164 ui.status(_("searching for changes\n"))
174 165
175 166 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
176 167 if len(srvheads) == len(srvheadhashes):
177 168 ui.debug("all remote heads known locally\n")
178 169 return (srvheadhashes, False, srvheadhashes,)
179 170
180 171 if sample and len(ownheads) <= initialsamplesize and util.all(yesno):
181 172 ui.note(_("all local heads known remotely\n"))
182 173 ownheadhashes = dag.externalizeall(ownheads)
183 174 return (ownheadhashes, True, srvheadhashes,)
184 175
185 176 # full blown discovery
186 177
187 178 # own nodes I know we both know
188 179 # treat remote heads (and maybe own heads) as a first implicit sample
189 180 # response
190 181 common = cl.incrementalmissingrevs(srvheads)
191 182 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
192 183 common.addbases(commoninsample)
193 184 # own nodes where I don't know if remote knows them
194 185 undecided = set(common.missingancestors(ownheads))
195 186 # own nodes I know remote lacks
196 187 missing = set()
197 188
198 189 full = False
199 190 while undecided:
200 191
201 192 if sample:
202 193 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
203 194 missing.update(dag.descendantset(missinginsample, missing))
204 195
205 196 undecided.difference_update(missing)
206 197
207 198 if not undecided:
208 199 break
209 200
210 201 if full or common.hasbases():
211 202 if full:
212 203 ui.note(_("sampling from both directions\n"))
213 204 else:
214 205 ui.debug("taking initial sample\n")
215 206 samplefunc = _takefullsample
216 207 targetsize = fullsamplesize
217 208 else:
218 209 # use even cheaper initial sample
219 210 ui.debug("taking quick initial sample\n")
220 211 samplefunc = _takequicksample
221 212 targetsize = initialsamplesize
222 213 if len(undecided) < targetsize:
223 214 sample = list(undecided)
224 215 else:
225 216 sample = samplefunc(dag, undecided, targetsize)
226 217 sample = _limitsample(sample, targetsize)
227 218
228 219 roundtrips += 1
229 220 ui.progress(_('searching'), roundtrips, unit=_('queries'))
230 221 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
231 222 % (roundtrips, len(undecided), len(sample)))
232 223 # indices between sample and externalized version must match
233 224 sample = list(sample)
234 225 yesno = remote.known(dag.externalizeall(sample))
235 226 full = True
236 227
237 228 if sample:
238 229 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
239 230 common.addbases(commoninsample)
240 231 common.removeancestorsfrom(undecided)
241 232
242 233 # heads(common) == heads(common.bases) since common represents common.bases
243 234 # and all its ancestors
244 235 result = dag.headsetofconnecteds(common.bases)
245 236 # common.bases can include nullrev, but our contract requires us to not
246 237 # return any heads in that case, so discard that
247 238 result.discard(nullrev)
248 239 ui.progress(_('searching'), None)
249 240 ui.debug("%d total queries\n" % roundtrips)
250 241
251 242 if not result and srvheadhashes != [nullid]:
252 243 if abortwhenunrelated:
253 244 raise util.Abort(_("repository is unrelated"))
254 245 else:
255 246 ui.warn(_("warning: repository is unrelated\n"))
256 247 return (set([nullid]), True, srvheadhashes,)
257 248
258 249 anyincoming = (srvheadhashes != [nullid])
259 250 return dag.externalizeall(result), anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now