##// END OF EJS Templates
setdiscovery: don't use dagutil to compute heads...
Gregory Szorc -
r39201:860e83cd default
parent child Browse files
Show More
@@ -1,284 +1,285 b''
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 dagutil,
55 55 error,
56 56 util,
57 57 )
58 58
59 59 def _updatesample(dag, nodes, sample, quicksamplesize=0):
60 60 """update an existing sample to match the expected size
61 61
62 62 The sample is updated with nodes exponentially distant from each head of the
63 63 <nodes> set. (H~1, H~2, H~4, H~8, etc).
64 64
65 65 If a target size is specified, the sampling will stop once this size is
66 66 reached. Otherwise sampling will happen until roots of the <nodes> set are
67 67 reached.
68 68
69 69 :dag: a dag object from dagutil
70 70 :nodes: set of nodes we want to discover (if None, assume the whole dag)
71 71 :sample: a sample to update
72 72 :quicksamplesize: optional target size of the sample"""
73 73 # if nodes is empty we scan the entire graph
74 74 if nodes:
75 75 heads = dag.headsetofconnecteds(nodes)
76 76 else:
77 77 heads = dag.heads()
78 78 dist = {}
79 79 visit = collections.deque(heads)
80 80 seen = set()
81 81 factor = 1
82 82 while visit:
83 83 curr = visit.popleft()
84 84 if curr in seen:
85 85 continue
86 86 d = dist.setdefault(curr, 1)
87 87 if d > factor:
88 88 factor *= 2
89 89 if d == factor:
90 90 sample.add(curr)
91 91 if quicksamplesize and (len(sample) >= quicksamplesize):
92 92 return
93 93 seen.add(curr)
94 94 for p in dag.parents(curr):
95 95 if not nodes or p in nodes:
96 96 dist.setdefault(p, d + 1)
97 97 visit.append(p)
98 98
99 99 def _takequicksample(dag, nodes, size):
100 100 """takes a quick sample of size <size>
101 101
102 102 It is meant for initial sampling and focuses on querying heads and close
103 103 ancestors of heads.
104 104
105 105 :dag: a dag object
106 106 :nodes: set of nodes to discover
107 107 :size: the maximum size of the sample"""
108 108 sample = dag.headsetofconnecteds(nodes)
109 109 if len(sample) >= size:
110 110 return _limitsample(sample, size)
111 111 _updatesample(dag, None, sample, quicksamplesize=size)
112 112 return sample
113 113
114 114 def _takefullsample(dag, nodes, size):
115 115 sample = dag.headsetofconnecteds(nodes)
116 116 # update from heads
117 117 _updatesample(dag, nodes, sample)
118 118 # update from roots
119 119 _updatesample(dag.inverse(), nodes, sample)
120 120 assert sample
121 121 sample = _limitsample(sample, size)
122 122 if len(sample) < size:
123 123 more = size - len(sample)
124 124 sample.update(random.sample(list(nodes - sample), more))
125 125 return sample
126 126
127 127 def _limitsample(sample, desiredlen):
128 128 """return a random subset of sample of at most desiredlen item"""
129 129 if len(sample) > desiredlen:
130 130 sample = set(random.sample(sample, desiredlen))
131 131 return sample
132 132
133 133 def findcommonheads(ui, local, remote,
134 134 initialsamplesize=100,
135 135 fullsamplesize=200,
136 136 abortwhenunrelated=True,
137 137 ancestorsof=None):
138 138 '''Return a tuple (common, anyincoming, remoteheads) used to identify
139 139 missing nodes from or in remote.
140 140 '''
141 141 start = util.timer()
142 142
143 143 roundtrips = 0
144 144 cl = local.changelog
145 145 clnode = cl.node
146 146 clrev = cl.rev
147 localsubset = None
148 147
149 148 if ancestorsof is not None:
150 localsubset = [clrev(n) for n in ancestorsof]
151 dag = dagutil.revlogdag(cl, localsubset=localsubset)
149 ownheads = [clrev(n) for n in ancestorsof]
150 else:
151 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
152
153 dag = dagutil.revlogdag(cl, localsubset=ownheads)
152 154
153 155 # early exit if we know all the specified remote heads already
154 156 ui.debug("query 1; heads\n")
155 157 roundtrips += 1
156 ownheads = dag.heads()
157 158 sample = _limitsample(ownheads, initialsamplesize)
158 159 # indices between sample and externalized version must match
159 160 sample = list(sample)
160 161
161 162 with remote.commandexecutor() as e:
162 163 fheads = e.callcommand('heads', {})
163 164 fknown = e.callcommand('known', {
164 165 'nodes': [clnode(r) for r in sample],
165 166 })
166 167
167 168 srvheadhashes, yesno = fheads.result(), fknown.result()
168 169
169 170 if cl.tip() == nullid:
170 171 if srvheadhashes != [nullid]:
171 172 return [nullid], True, srvheadhashes
172 173 return [nullid], False, []
173 174
174 175 # start actual discovery (we note this before the next "if" for
175 176 # compatibility reasons)
176 177 ui.status(_("searching for changes\n"))
177 178
178 179 srvheads = []
179 180 for node in srvheadhashes:
180 181 if node == nullid:
181 182 continue
182 183
183 184 try:
184 185 srvheads.append(clrev(node))
185 186 # Catches unknown and filtered nodes.
186 187 except error.LookupError:
187 188 continue
188 189
189 190 if len(srvheads) == len(srvheadhashes):
190 191 ui.debug("all remote heads known locally\n")
191 192 return srvheadhashes, False, srvheadhashes
192 193
193 194 if len(sample) == len(ownheads) and all(yesno):
194 195 ui.note(_("all local heads known remotely\n"))
195 196 ownheadhashes = [clnode(r) for r in ownheads]
196 197 return ownheadhashes, True, srvheadhashes
197 198
198 199 # full blown discovery
199 200
200 201 # own nodes I know we both know
201 202 # treat remote heads (and maybe own heads) as a first implicit sample
202 203 # response
203 204 common = cl.incrementalmissingrevs(srvheads)
204 205 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
205 206 common.addbases(commoninsample)
206 207 # own nodes where I don't know if remote knows them
207 208 undecided = set(common.missingancestors(ownheads))
208 209 # own nodes I know remote lacks
209 210 missing = set()
210 211
211 212 full = False
212 213 progress = ui.makeprogress(_('searching'), unit=_('queries'))
213 214 while undecided:
214 215
215 216 if sample:
216 217 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
217 218 missing.update(dag.descendantset(missinginsample, missing))
218 219
219 220 undecided.difference_update(missing)
220 221
221 222 if not undecided:
222 223 break
223 224
224 225 if full or common.hasbases():
225 226 if full:
226 227 ui.note(_("sampling from both directions\n"))
227 228 else:
228 229 ui.debug("taking initial sample\n")
229 230 samplefunc = _takefullsample
230 231 targetsize = fullsamplesize
231 232 else:
232 233 # use even cheaper initial sample
233 234 ui.debug("taking quick initial sample\n")
234 235 samplefunc = _takequicksample
235 236 targetsize = initialsamplesize
236 237 if len(undecided) < targetsize:
237 238 sample = list(undecided)
238 239 else:
239 240 sample = samplefunc(dag, undecided, targetsize)
240 241
241 242 roundtrips += 1
242 243 progress.update(roundtrips)
243 244 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
244 245 % (roundtrips, len(undecided), len(sample)))
245 246 # indices between sample and externalized version must match
246 247 sample = list(sample)
247 248
248 249 with remote.commandexecutor() as e:
249 250 yesno = e.callcommand('known', {
250 251 'nodes': [clnode(r) for r in sample],
251 252 }).result()
252 253
253 254 full = True
254 255
255 256 if sample:
256 257 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
257 258 common.addbases(commoninsample)
258 259 common.removeancestorsfrom(undecided)
259 260
260 261 # heads(common) == heads(common.bases) since common represents common.bases
261 262 # and all its ancestors
262 263 result = dag.headsetofconnecteds(common.bases)
263 264 # common.bases can include nullrev, but our contract requires us to not
264 265 # return any heads in that case, so discard that
265 266 result.discard(nullrev)
266 267 elapsed = util.timer() - start
267 268 progress.complete()
268 269 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
269 270 msg = ('found %d common and %d unknown server heads,'
270 271 ' %d roundtrips in %.4fs\n')
271 272 missing = set(result) - set(srvheads)
272 273 ui.log('discovery', msg, len(result), len(missing), roundtrips,
273 274 elapsed)
274 275
275 276 if not result and srvheadhashes != [nullid]:
276 277 if abortwhenunrelated:
277 278 raise error.Abort(_("repository is unrelated"))
278 279 else:
279 280 ui.warn(_("warning: repository is unrelated\n"))
280 281 return ({nullid}, True, srvheadhashes,)
281 282
282 283 anyincoming = (srvheadhashes != [nullid])
283 284 result = {clnode(r) for r in result}
284 285 return result, anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now