##// END OF EJS Templates
discovery: minor fix to some conditionals...
Boris Feld -
r41528:71b0db4f default
parent child Browse files
Show More
@@ -1,313 +1,313
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 error,
55 55 util,
56 56 )
57 57
58 58 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
59 59 """update an existing sample to match the expected size
60 60
61 61 The sample is updated with revs exponentially distant from each head of the
62 62 <revs> set. (H~1, H~2, H~4, H~8, etc).
63 63
64 64 If a target size is specified, the sampling will stop once this size is
65 65 reached. Otherwise sampling will happen until roots of the <revs> set are
66 66 reached.
67 67
68 68 :revs: set of revs we want to discover (if None, assume the whole dag)
69 69 :heads: set of DAG head revs
70 70 :sample: a sample to update
71 71 :parentfn: a callable to resolve parents for a revision
72 72 :quicksamplesize: optional target size of the sample"""
73 73 dist = {}
74 74 visit = collections.deque(heads)
75 75 seen = set()
76 76 factor = 1
77 77 while visit:
78 78 curr = visit.popleft()
79 79 if curr in seen:
80 80 continue
81 81 d = dist.setdefault(curr, 1)
82 82 if d > factor:
83 83 factor *= 2
84 84 if d == factor:
85 85 sample.add(curr)
86 86 if quicksamplesize and (len(sample) >= quicksamplesize):
87 87 return
88 88 seen.add(curr)
89 89
90 90 for p in parentfn(curr):
91 91 if p != nullrev and (not revs or p in revs):
92 92 dist.setdefault(p, d + 1)
93 93 visit.append(p)
94 94
95 95 def _takequicksample(repo, headrevs, revs, size):
96 96 """takes a quick sample of size <size>
97 97
98 98 It is meant for initial sampling and focuses on querying heads and close
99 99 ancestors of heads.
100 100
101 101 :dag: a dag object
102 102 :headrevs: set of head revisions in local DAG to consider
103 103 :revs: set of revs to discover
104 104 :size: the maximum size of the sample"""
105 105 sample = set(repo.revs('heads(%ld)', revs))
106 106
107 107 if len(sample) >= size:
108 108 return _limitsample(sample, size)
109 109
110 110 _updatesample(None, headrevs, sample, repo.changelog.parentrevs,
111 111 quicksamplesize=size)
112 112 return sample
113 113
114 114 def _takefullsample(repo, headrevs, revs, size):
115 115 sample = set(repo.revs('heads(%ld)', revs))
116 116
117 117 # update from heads
118 118 revsheads = set(repo.revs('heads(%ld)', revs))
119 119 _updatesample(revs, revsheads, sample, repo.changelog.parentrevs)
120 120
121 121 # update from roots
122 122 revsroots = set(repo.revs('roots(%ld)', revs))
123 123
124 124 # _updatesample() essentially does interaction over revisions to look up
125 125 # their children. This lookup is expensive and doing it in a loop is
126 126 # quadratic. We precompute the children for all relevant revisions and
127 127 # make the lookup in _updatesample() a simple dict lookup.
128 128 #
129 129 # Because this function can be called multiple times during discovery, we
130 130 # may still perform redundant work and there is room to optimize this by
131 131 # keeping a persistent cache of children across invocations.
132 132 children = {}
133 133
134 134 parentrevs = repo.changelog.parentrevs
135 135 for rev in repo.changelog.revs(start=min(revsroots)):
136 136 # Always ensure revision has an entry so we don't need to worry about
137 137 # missing keys.
138 138 children.setdefault(rev, [])
139 139
140 140 for prev in parentrevs(rev):
141 141 if prev == nullrev:
142 142 continue
143 143
144 144 children.setdefault(prev, []).append(rev)
145 145
146 146 _updatesample(revs, revsroots, sample, children.__getitem__)
147 147 assert sample
148 148 sample = _limitsample(sample, size)
149 if len(sample) < size:
149 if len(sample) <= size:
150 150 more = size - len(sample)
151 151 sample.update(random.sample(list(revs - sample), more))
152 152 return sample
153 153
154 154 def _limitsample(sample, desiredlen):
155 155 """return a random subset of sample of at most desiredlen item"""
156 156 if len(sample) > desiredlen:
157 157 sample = set(random.sample(sample, desiredlen))
158 158 return sample
159 159
160 160 def findcommonheads(ui, local, remote,
161 161 initialsamplesize=100,
162 162 fullsamplesize=200,
163 163 abortwhenunrelated=True,
164 164 ancestorsof=None):
165 165 '''Return a tuple (common, anyincoming, remoteheads) used to identify
166 166 missing nodes from or in remote.
167 167 '''
168 168 start = util.timer()
169 169
170 170 roundtrips = 0
171 171 cl = local.changelog
172 172 clnode = cl.node
173 173 clrev = cl.rev
174 174
175 175 if ancestorsof is not None:
176 176 ownheads = [clrev(n) for n in ancestorsof]
177 177 else:
178 178 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
179 179
180 180 # early exit if we know all the specified remote heads already
181 181 ui.debug("query 1; heads\n")
182 182 roundtrips += 1
183 183 sample = _limitsample(ownheads, initialsamplesize)
184 184 # indices between sample and externalized version must match
185 185 sample = list(sample)
186 186
187 187 with remote.commandexecutor() as e:
188 188 fheads = e.callcommand('heads', {})
189 189 fknown = e.callcommand('known', {
190 190 'nodes': [clnode(r) for r in sample],
191 191 })
192 192
193 193 srvheadhashes, yesno = fheads.result(), fknown.result()
194 194
195 195 if cl.tip() == nullid:
196 196 if srvheadhashes != [nullid]:
197 197 return [nullid], True, srvheadhashes
198 198 return [nullid], False, []
199 199
200 200 # start actual discovery (we note this before the next "if" for
201 201 # compatibility reasons)
202 202 ui.status(_("searching for changes\n"))
203 203
204 204 srvheads = []
205 205 for node in srvheadhashes:
206 206 if node == nullid:
207 207 continue
208 208
209 209 try:
210 210 srvheads.append(clrev(node))
211 211 # Catches unknown and filtered nodes.
212 212 except error.LookupError:
213 213 continue
214 214
215 215 if len(srvheads) == len(srvheadhashes):
216 216 ui.debug("all remote heads known locally\n")
217 217 return srvheadhashes, False, srvheadhashes
218 218
219 219 if len(sample) == len(ownheads) and all(yesno):
220 220 ui.note(_("all local heads known remotely\n"))
221 221 ownheadhashes = [clnode(r) for r in ownheads]
222 222 return ownheadhashes, True, srvheadhashes
223 223
224 224 # full blown discovery
225 225
226 226 # own nodes I know we both know
227 227 # treat remote heads (and maybe own heads) as a first implicit sample
228 228 # response
229 229 common = cl.incrementalmissingrevs(srvheads)
230 230 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
231 231 common.addbases(commoninsample)
232 232 # own nodes where I don't know if remote knows them
233 233 undecided = set(common.missingancestors(ownheads))
234 234 # own nodes I know remote lacks
235 235 missing = set()
236 236
237 237 full = False
238 238 progress = ui.makeprogress(_('searching'), unit=_('queries'))
239 239 while undecided:
240 240
241 241 if sample:
242 242 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
243 243
244 244 if missing:
245 245 missing.update(local.revs('descendants(%ld) - descendants(%ld)',
246 246 missinginsample, missing))
247 247 else:
248 248 missing.update(local.revs('descendants(%ld)', missinginsample))
249 249
250 250 undecided.difference_update(missing)
251 251
252 252 if not undecided:
253 253 break
254 254
255 255 if full or common.hasbases():
256 256 if full:
257 257 ui.note(_("sampling from both directions\n"))
258 258 else:
259 259 ui.debug("taking initial sample\n")
260 260 samplefunc = _takefullsample
261 261 targetsize = fullsamplesize
262 262 else:
263 263 # use even cheaper initial sample
264 264 ui.debug("taking quick initial sample\n")
265 265 samplefunc = _takequicksample
266 266 targetsize = initialsamplesize
267 if len(undecided) < targetsize:
267 if len(undecided) <= targetsize:
268 268 sample = list(undecided)
269 269 else:
270 270 sample = samplefunc(local, ownheads, undecided, targetsize)
271 271
272 272 roundtrips += 1
273 273 progress.update(roundtrips)
274 274 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
275 275 % (roundtrips, len(undecided), len(sample)))
276 276 # indices between sample and externalized version must match
277 277 sample = list(sample)
278 278
279 279 with remote.commandexecutor() as e:
280 280 yesno = e.callcommand('known', {
281 281 'nodes': [clnode(r) for r in sample],
282 282 }).result()
283 283
284 284 full = True
285 285
286 286 if sample:
287 287 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
288 288 common.addbases(commoninsample)
289 289 common.removeancestorsfrom(undecided)
290 290
291 291 # heads(common) == heads(common.bases) since common represents common.bases
292 292 # and all its ancestors
293 293 # The presence of nullrev will confuse heads(). So filter it out.
294 294 result = set(local.revs('heads(%ld)', common.bases - {nullrev}))
295 295 elapsed = util.timer() - start
296 296 progress.complete()
297 297 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
298 298 msg = ('found %d common and %d unknown server heads,'
299 299 ' %d roundtrips in %.4fs\n')
300 300 missing = set(result) - set(srvheads)
301 301 ui.log('discovery', msg, len(result), len(missing), roundtrips,
302 302 elapsed)
303 303
304 304 if not result and srvheadhashes != [nullid]:
305 305 if abortwhenunrelated:
306 306 raise error.Abort(_("repository is unrelated"))
307 307 else:
308 308 ui.warn(_("warning: repository is unrelated\n"))
309 309 return ({nullid}, True, srvheadhashes,)
310 310
311 311 anyincoming = (srvheadhashes != [nullid])
312 312 result = {clnode(r) for r in result}
313 313 return result, anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now