##// END OF EJS Templates
setdiscovery: reflect use of revs instead of nodes...
Gregory Szorc -
r39204:2d218db7 default
parent child Browse files
Show More
@@ -1,290 +1,290
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 dagutil,
55 55 error,
56 56 util,
57 57 )
58 58
59 def _updatesample(dag, nodes, sample, quicksamplesize=0):
59 def _updatesample(dag, revs, sample, quicksamplesize=0):
60 60 """update an existing sample to match the expected size
61 61
62 The sample is updated with nodes exponentially distant from each head of the
63 <nodes> set. (H~1, H~2, H~4, H~8, etc).
62 The sample is updated with revs exponentially distant from each head of the
63 <revs> set. (H~1, H~2, H~4, H~8, etc).
64 64
65 65 If a target size is specified, the sampling will stop once this size is
66 reached. Otherwise sampling will happen until roots of the <nodes> set are
66 reached. Otherwise sampling will happen until roots of the <revs> set are
67 67 reached.
68 68
69 69 :dag: a dag object from dagutil
70 :nodes: set of nodes we want to discover (if None, assume the whole dag)
70 :revs: set of revs we want to discover (if None, assume the whole dag)
71 71 :sample: a sample to update
72 72 :quicksamplesize: optional target size of the sample"""
73 # if nodes is empty we scan the entire graph
74 if nodes:
75 heads = dag.headsetofconnecteds(nodes)
73 # if revs is empty we scan the entire graph
74 if revs:
75 heads = dag.headsetofconnecteds(revs)
76 76 else:
77 77 heads = dag.heads()
78 78 dist = {}
79 79 visit = collections.deque(heads)
80 80 seen = set()
81 81 factor = 1
82 82 while visit:
83 83 curr = visit.popleft()
84 84 if curr in seen:
85 85 continue
86 86 d = dist.setdefault(curr, 1)
87 87 if d > factor:
88 88 factor *= 2
89 89 if d == factor:
90 90 sample.add(curr)
91 91 if quicksamplesize and (len(sample) >= quicksamplesize):
92 92 return
93 93 seen.add(curr)
94 94 for p in dag.parents(curr):
95 if not nodes or p in nodes:
95 if not revs or p in revs:
96 96 dist.setdefault(p, d + 1)
97 97 visit.append(p)
98 98
99 def _takequicksample(dag, nodes, size):
99 def _takequicksample(dag, revs, size):
100 100 """takes a quick sample of size <size>
101 101
102 102 It is meant for initial sampling and focuses on querying heads and close
103 103 ancestors of heads.
104 104
105 105 :dag: a dag object
106 :nodes: set of nodes to discover
106 :revs: set of revs to discover
107 107 :size: the maximum size of the sample"""
108 sample = dag.headsetofconnecteds(nodes)
108 sample = dag.headsetofconnecteds(revs)
109 109 if len(sample) >= size:
110 110 return _limitsample(sample, size)
111 111 _updatesample(dag, None, sample, quicksamplesize=size)
112 112 return sample
113 113
114 def _takefullsample(dag, nodes, size):
115 sample = dag.headsetofconnecteds(nodes)
114 def _takefullsample(dag, revs, size):
115 sample = dag.headsetofconnecteds(revs)
116 116 # update from heads
117 _updatesample(dag, nodes, sample)
117 _updatesample(dag, revs, sample)
118 118 # update from roots
119 _updatesample(dag.inverse(), nodes, sample)
119 _updatesample(dag.inverse(), revs, sample)
120 120 assert sample
121 121 sample = _limitsample(sample, size)
122 122 if len(sample) < size:
123 123 more = size - len(sample)
124 sample.update(random.sample(list(nodes - sample), more))
124 sample.update(random.sample(list(revs - sample), more))
125 125 return sample
126 126
127 127 def _limitsample(sample, desiredlen):
128 128 """return a random subset of sample of at most desiredlen item"""
129 129 if len(sample) > desiredlen:
130 130 sample = set(random.sample(sample, desiredlen))
131 131 return sample
132 132
133 133 def findcommonheads(ui, local, remote,
134 134 initialsamplesize=100,
135 135 fullsamplesize=200,
136 136 abortwhenunrelated=True,
137 137 ancestorsof=None):
138 138 '''Return a tuple (common, anyincoming, remoteheads) used to identify
139 139 missing nodes from or in remote.
140 140 '''
141 141 start = util.timer()
142 142
143 143 roundtrips = 0
144 144 cl = local.changelog
145 145 clnode = cl.node
146 146 clrev = cl.rev
147 147
148 148 if ancestorsof is not None:
149 149 ownheads = [clrev(n) for n in ancestorsof]
150 150 else:
151 151 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
152 152
153 153 dag = dagutil.revlogdag(cl, localsubset=ownheads)
154 154
155 155 # early exit if we know all the specified remote heads already
156 156 ui.debug("query 1; heads\n")
157 157 roundtrips += 1
158 158 sample = _limitsample(ownheads, initialsamplesize)
159 159 # indices between sample and externalized version must match
160 160 sample = list(sample)
161 161
162 162 with remote.commandexecutor() as e:
163 163 fheads = e.callcommand('heads', {})
164 164 fknown = e.callcommand('known', {
165 165 'nodes': [clnode(r) for r in sample],
166 166 })
167 167
168 168 srvheadhashes, yesno = fheads.result(), fknown.result()
169 169
170 170 if cl.tip() == nullid:
171 171 if srvheadhashes != [nullid]:
172 172 return [nullid], True, srvheadhashes
173 173 return [nullid], False, []
174 174
175 175 # start actual discovery (we note this before the next "if" for
176 176 # compatibility reasons)
177 177 ui.status(_("searching for changes\n"))
178 178
179 179 srvheads = []
180 180 for node in srvheadhashes:
181 181 if node == nullid:
182 182 continue
183 183
184 184 try:
185 185 srvheads.append(clrev(node))
186 186 # Catches unknown and filtered nodes.
187 187 except error.LookupError:
188 188 continue
189 189
190 190 if len(srvheads) == len(srvheadhashes):
191 191 ui.debug("all remote heads known locally\n")
192 192 return srvheadhashes, False, srvheadhashes
193 193
194 194 if len(sample) == len(ownheads) and all(yesno):
195 195 ui.note(_("all local heads known remotely\n"))
196 196 ownheadhashes = [clnode(r) for r in ownheads]
197 197 return ownheadhashes, True, srvheadhashes
198 198
199 199 # full blown discovery
200 200
201 201 # own nodes I know we both know
202 202 # treat remote heads (and maybe own heads) as a first implicit sample
203 203 # response
204 204 common = cl.incrementalmissingrevs(srvheads)
205 205 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
206 206 common.addbases(commoninsample)
207 207 # own nodes where I don't know if remote knows them
208 208 undecided = set(common.missingancestors(ownheads))
209 209 # own nodes I know remote lacks
210 210 missing = set()
211 211
212 212 full = False
213 213 progress = ui.makeprogress(_('searching'), unit=_('queries'))
214 214 while undecided:
215 215
216 216 if sample:
217 217 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
218 218
219 219 if missing:
220 220 missing.update(local.revs('descendants(%ld) - descendants(%ld)',
221 221 missinginsample, missing))
222 222 else:
223 223 missing.update(local.revs('descendants(%ld)', missinginsample))
224 224
225 225 undecided.difference_update(missing)
226 226
227 227 if not undecided:
228 228 break
229 229
230 230 if full or common.hasbases():
231 231 if full:
232 232 ui.note(_("sampling from both directions\n"))
233 233 else:
234 234 ui.debug("taking initial sample\n")
235 235 samplefunc = _takefullsample
236 236 targetsize = fullsamplesize
237 237 else:
238 238 # use even cheaper initial sample
239 239 ui.debug("taking quick initial sample\n")
240 240 samplefunc = _takequicksample
241 241 targetsize = initialsamplesize
242 242 if len(undecided) < targetsize:
243 243 sample = list(undecided)
244 244 else:
245 245 sample = samplefunc(dag, undecided, targetsize)
246 246
247 247 roundtrips += 1
248 248 progress.update(roundtrips)
249 249 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
250 250 % (roundtrips, len(undecided), len(sample)))
251 251 # indices between sample and externalized version must match
252 252 sample = list(sample)
253 253
254 254 with remote.commandexecutor() as e:
255 255 yesno = e.callcommand('known', {
256 256 'nodes': [clnode(r) for r in sample],
257 257 }).result()
258 258
259 259 full = True
260 260
261 261 if sample:
262 262 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
263 263 common.addbases(commoninsample)
264 264 common.removeancestorsfrom(undecided)
265 265
266 266 # heads(common) == heads(common.bases) since common represents common.bases
267 267 # and all its ancestors
268 268 result = dag.headsetofconnecteds(common.bases)
269 269 # common.bases can include nullrev, but our contract requires us to not
270 270 # return any heads in that case, so discard that
271 271 result.discard(nullrev)
272 272 elapsed = util.timer() - start
273 273 progress.complete()
274 274 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
275 275 msg = ('found %d common and %d unknown server heads,'
276 276 ' %d roundtrips in %.4fs\n')
277 277 missing = set(result) - set(srvheads)
278 278 ui.log('discovery', msg, len(result), len(missing), roundtrips,
279 279 elapsed)
280 280
281 281 if not result and srvheadhashes != [nullid]:
282 282 if abortwhenunrelated:
283 283 raise error.Abort(_("repository is unrelated"))
284 284 else:
285 285 ui.warn(_("warning: repository is unrelated\n"))
286 286 return ({nullid}, True, srvheadhashes,)
287 287
288 288 anyincoming = (srvheadhashes != [nullid])
289 289 result = {clnode(r) for r in result}
290 290 return result, anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now