##// END OF EJS Templates
setdiscovery: use progress helper...
Martin von Zweigbergk -
r38369:9e70690a default
parent child Browse files
Show More
@@ -1,270 +1,271 b''
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 dagutil,
55 55 error,
56 56 util,
57 57 )
58 58
59 59 def _updatesample(dag, nodes, sample, quicksamplesize=0):
60 60 """update an existing sample to match the expected size
61 61
62 62 The sample is updated with nodes exponentially distant from each head of the
63 63 <nodes> set. (H~1, H~2, H~4, H~8, etc).
64 64
65 65 If a target size is specified, the sampling will stop once this size is
66 66 reached. Otherwise sampling will happen until roots of the <nodes> set are
67 67 reached.
68 68
69 69 :dag: a dag object from dagutil
70 70 :nodes: set of nodes we want to discover (if None, assume the whole dag)
71 71 :sample: a sample to update
72 72 :quicksamplesize: optional target size of the sample"""
73 73 # if nodes is empty we scan the entire graph
74 74 if nodes:
75 75 heads = dag.headsetofconnecteds(nodes)
76 76 else:
77 77 heads = dag.heads()
78 78 dist = {}
79 79 visit = collections.deque(heads)
80 80 seen = set()
81 81 factor = 1
82 82 while visit:
83 83 curr = visit.popleft()
84 84 if curr in seen:
85 85 continue
86 86 d = dist.setdefault(curr, 1)
87 87 if d > factor:
88 88 factor *= 2
89 89 if d == factor:
90 90 sample.add(curr)
91 91 if quicksamplesize and (len(sample) >= quicksamplesize):
92 92 return
93 93 seen.add(curr)
94 94 for p in dag.parents(curr):
95 95 if not nodes or p in nodes:
96 96 dist.setdefault(p, d + 1)
97 97 visit.append(p)
98 98
99 99 def _takequicksample(dag, nodes, size):
100 100 """takes a quick sample of size <size>
101 101
102 102 It is meant for initial sampling and focuses on querying heads and close
103 103 ancestors of heads.
104 104
105 105 :dag: a dag object
106 106 :nodes: set of nodes to discover
107 107 :size: the maximum size of the sample"""
108 108 sample = dag.headsetofconnecteds(nodes)
109 109 if len(sample) >= size:
110 110 return _limitsample(sample, size)
111 111 _updatesample(dag, None, sample, quicksamplesize=size)
112 112 return sample
113 113
114 114 def _takefullsample(dag, nodes, size):
115 115 sample = dag.headsetofconnecteds(nodes)
116 116 # update from heads
117 117 _updatesample(dag, nodes, sample)
118 118 # update from roots
119 119 _updatesample(dag.inverse(), nodes, sample)
120 120 assert sample
121 121 sample = _limitsample(sample, size)
122 122 if len(sample) < size:
123 123 more = size - len(sample)
124 124 sample.update(random.sample(list(nodes - sample), more))
125 125 return sample
126 126
127 127 def _limitsample(sample, desiredlen):
128 128 """return a random subset of sample of at most desiredlen item"""
129 129 if len(sample) > desiredlen:
130 130 sample = set(random.sample(sample, desiredlen))
131 131 return sample
132 132
133 133 def findcommonheads(ui, local, remote,
134 134 initialsamplesize=100,
135 135 fullsamplesize=200,
136 136 abortwhenunrelated=True,
137 137 ancestorsof=None):
138 138 '''Return a tuple (common, anyincoming, remoteheads) used to identify
139 139 missing nodes from or in remote.
140 140 '''
141 141 start = util.timer()
142 142
143 143 roundtrips = 0
144 144 cl = local.changelog
145 145 localsubset = None
146 146 if ancestorsof is not None:
147 147 rev = local.changelog.rev
148 148 localsubset = [rev(n) for n in ancestorsof]
149 149 dag = dagutil.revlogdag(cl, localsubset=localsubset)
150 150
151 151 # early exit if we know all the specified remote heads already
152 152 ui.debug("query 1; heads\n")
153 153 roundtrips += 1
154 154 ownheads = dag.heads()
155 155 sample = _limitsample(ownheads, initialsamplesize)
156 156 # indices between sample and externalized version must match
157 157 sample = list(sample)
158 158
159 159 with remote.commandexecutor() as e:
160 160 fheads = e.callcommand('heads', {})
161 161 fknown = e.callcommand('known', {
162 162 'nodes': dag.externalizeall(sample),
163 163 })
164 164
165 165 srvheadhashes, yesno = fheads.result(), fknown.result()
166 166
167 167 if cl.tip() == nullid:
168 168 if srvheadhashes != [nullid]:
169 169 return [nullid], True, srvheadhashes
170 170 return [nullid], False, []
171 171
172 172 # start actual discovery (we note this before the next "if" for
173 173 # compatibility reasons)
174 174 ui.status(_("searching for changes\n"))
175 175
176 176 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
177 177 if len(srvheads) == len(srvheadhashes):
178 178 ui.debug("all remote heads known locally\n")
179 179 return (srvheadhashes, False, srvheadhashes,)
180 180
181 181 if len(sample) == len(ownheads) and all(yesno):
182 182 ui.note(_("all local heads known remotely\n"))
183 183 ownheadhashes = dag.externalizeall(ownheads)
184 184 return (ownheadhashes, True, srvheadhashes,)
185 185
186 186 # full blown discovery
187 187
188 188 # own nodes I know we both know
189 189 # treat remote heads (and maybe own heads) as a first implicit sample
190 190 # response
191 191 common = cl.incrementalmissingrevs(srvheads)
192 192 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
193 193 common.addbases(commoninsample)
194 194 # own nodes where I don't know if remote knows them
195 195 undecided = set(common.missingancestors(ownheads))
196 196 # own nodes I know remote lacks
197 197 missing = set()
198 198
199 199 full = False
200 progress = ui.makeprogress(_('searching'), unit=_('queries'))
200 201 while undecided:
201 202
202 203 if sample:
203 204 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
204 205 missing.update(dag.descendantset(missinginsample, missing))
205 206
206 207 undecided.difference_update(missing)
207 208
208 209 if not undecided:
209 210 break
210 211
211 212 if full or common.hasbases():
212 213 if full:
213 214 ui.note(_("sampling from both directions\n"))
214 215 else:
215 216 ui.debug("taking initial sample\n")
216 217 samplefunc = _takefullsample
217 218 targetsize = fullsamplesize
218 219 else:
219 220 # use even cheaper initial sample
220 221 ui.debug("taking quick initial sample\n")
221 222 samplefunc = _takequicksample
222 223 targetsize = initialsamplesize
223 224 if len(undecided) < targetsize:
224 225 sample = list(undecided)
225 226 else:
226 227 sample = samplefunc(dag, undecided, targetsize)
227 228
228 229 roundtrips += 1
229 ui.progress(_('searching'), roundtrips, unit=_('queries'))
230 progress.update(roundtrips)
230 231 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
231 232 % (roundtrips, len(undecided), len(sample)))
232 233 # indices between sample and externalized version must match
233 234 sample = list(sample)
234 235
235 236 with remote.commandexecutor() as e:
236 237 yesno = e.callcommand('known', {
237 238 'nodes': dag.externalizeall(sample),
238 239 }).result()
239 240
240 241 full = True
241 242
242 243 if sample:
243 244 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
244 245 common.addbases(commoninsample)
245 246 common.removeancestorsfrom(undecided)
246 247
247 248 # heads(common) == heads(common.bases) since common represents common.bases
248 249 # and all its ancestors
249 250 result = dag.headsetofconnecteds(common.bases)
250 251 # common.bases can include nullrev, but our contract requires us to not
251 252 # return any heads in that case, so discard that
252 253 result.discard(nullrev)
253 254 elapsed = util.timer() - start
254 ui.progress(_('searching'), None)
255 progress.update(None)
255 256 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
256 257 msg = ('found %d common and %d unknown server heads,'
257 258 ' %d roundtrips in %.4fs\n')
258 259 missing = set(result) - set(srvheads)
259 260 ui.log('discovery', msg, len(result), len(missing), roundtrips,
260 261 elapsed)
261 262
262 263 if not result and srvheadhashes != [nullid]:
263 264 if abortwhenunrelated:
264 265 raise error.Abort(_("repository is unrelated"))
265 266 else:
266 267 ui.warn(_("warning: repository is unrelated\n"))
267 268 return ({nullid}, True, srvheadhashes,)
268 269
269 270 anyincoming = (srvheadhashes != [nullid])
270 271 return dag.externalizeall(result), anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now