##// END OF EJS Templates
setdiscovery: drop '_setupsample' usage in '_takequicksample'...
Pierre-Yves David -
r23815:31e75a36 default
parent child Browse files
Show More
@@ -1,252 +1,251 b''
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from node import nullid, nullrev
44 44 from i18n import _
45 45 import random
46 46 import util, dagutil
47 47
48 48 def _updatesample(dag, nodes, sample, quicksamplesize=0):
49 49 """update an existing sample to match the expected size
50 50
51 51 The sample is updated with nodes exponentially distant from each head of the
52 52 <nodes> set. (H~1, H~2, H~4, H~8, etc).
53 53
54 54 If a target size is specified, the sampling will stop once this size is
55 55 reached. Otherwise sampling will happen until roots of the <nodes> set are
56 56 reached.
57 57
58 58 :dag: a dag object from dagutil
59 59 :nodes: set of nodes we want to discover (if None, assume the whole dag)
60 60 :sample: a sample to update
61 61 :quicksamplesize: optional target size of the sample"""
62 62 # if nodes is empty we scan the entire graph
63 63 if nodes:
64 64 heads = dag.headsetofconnecteds(nodes)
65 65 else:
66 66 heads = dag.heads()
67 67 dist = {}
68 68 visit = util.deque(heads)
69 69 seen = set()
70 70 factor = 1
71 71 while visit:
72 72 curr = visit.popleft()
73 73 if curr in seen:
74 74 continue
75 75 d = dist.setdefault(curr, 1)
76 76 if d > factor:
77 77 factor *= 2
78 78 if d == factor:
79 79 sample.add(curr)
80 80 if quicksamplesize and (len(sample) >= quicksamplesize):
81 81 return
82 82 seen.add(curr)
83 83 for p in dag.parents(curr):
84 84 if not nodes or p in nodes:
85 85 dist.setdefault(p, d + 1)
86 86 visit.append(p)
87 87
88 88 def _setupsample(dag, nodes, size):
89 89 always = dag.headsetofconnecteds(nodes)
90 90 desiredlen = size - len(always)
91 91 if desiredlen <= 0:
92 92 # This could be bad if there are very many heads, all unknown to the
93 93 # server. We're counting on long request support here.
94 94 return always, None, desiredlen
95 95 return always, set(), desiredlen
96 96
97 97 def _takequicksample(dag, nodes, size):
98 always, sample, desiredlen = _setupsample(dag, nodes, size)
99 if sample is None:
100 return always
101 sample = always
98 sample = dag.headsetofconnecteds(nodes)
99 if size <= len(sample):
100 return _limitsample(sample, size)
102 101 _updatesample(dag, None, sample, quicksamplesize=size)
103 102 return sample
104 103
105 104 def _takefullsample(dag, nodes, size):
106 105 sample = dag.headsetofconnecteds(nodes)
107 106 # update from heads
108 107 _updatesample(dag, nodes, sample)
109 108 # update from roots
110 109 _updatesample(dag.inverse(), nodes, sample)
111 110 assert sample
112 111 sample = _limitsample(sample, size)
113 112 if len(sample) < size:
114 113 more = size - len(sample)
115 114 sample.update(random.sample(list(nodes - sample), more))
116 115 return sample
117 116
118 117 def _limitsample(sample, desiredlen):
119 118 """return a random subset of sample of at most desiredlen item"""
120 119 if len(sample) > desiredlen:
121 120 sample = set(random.sample(sample, desiredlen))
122 121 return sample
123 122
124 123 def findcommonheads(ui, local, remote,
125 124 initialsamplesize=100,
126 125 fullsamplesize=200,
127 126 abortwhenunrelated=True):
128 127 '''Return a tuple (common, anyincoming, remoteheads) used to identify
129 128 missing nodes from or in remote.
130 129 '''
131 130 roundtrips = 0
132 131 cl = local.changelog
133 132 dag = dagutil.revlogdag(cl)
134 133
135 134 # early exit if we know all the specified remote heads already
136 135 ui.debug("query 1; heads\n")
137 136 roundtrips += 1
138 137 ownheads = dag.heads()
139 138 sample = _limitsample(ownheads, initialsamplesize)
140 139 # indices between sample and externalized version must match
141 140 sample = list(sample)
142 141 if remote.local():
143 142 # stopgap until we have a proper localpeer that supports batch()
144 143 srvheadhashes = remote.heads()
145 144 yesno = remote.known(dag.externalizeall(sample))
146 145 elif remote.capable('batch'):
147 146 batch = remote.batch()
148 147 srvheadhashesref = batch.heads()
149 148 yesnoref = batch.known(dag.externalizeall(sample))
150 149 batch.submit()
151 150 srvheadhashes = srvheadhashesref.value
152 151 yesno = yesnoref.value
153 152 else:
154 153 # compatibility with pre-batch, but post-known remotes during 1.9
155 154 # development
156 155 srvheadhashes = remote.heads()
157 156 sample = []
158 157
159 158 if cl.tip() == nullid:
160 159 if srvheadhashes != [nullid]:
161 160 return [nullid], True, srvheadhashes
162 161 return [nullid], False, []
163 162
164 163 # start actual discovery (we note this before the next "if" for
165 164 # compatibility reasons)
166 165 ui.status(_("searching for changes\n"))
167 166
168 167 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
169 168 if len(srvheads) == len(srvheadhashes):
170 169 ui.debug("all remote heads known locally\n")
171 170 return (srvheadhashes, False, srvheadhashes,)
172 171
173 172 if sample and len(ownheads) <= initialsamplesize and util.all(yesno):
174 173 ui.note(_("all local heads known remotely\n"))
175 174 ownheadhashes = dag.externalizeall(ownheads)
176 175 return (ownheadhashes, True, srvheadhashes,)
177 176
178 177 # full blown discovery
179 178
180 179 # own nodes I know we both know
181 180 # treat remote heads (and maybe own heads) as a first implicit sample
182 181 # response
183 182 common = cl.incrementalmissingrevs(srvheads)
184 183 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
185 184 common.addbases(commoninsample)
186 185 # own nodes where I don't know if remote knows them
187 186 undecided = set(common.missingancestors(ownheads))
188 187 # own nodes I know remote lacks
189 188 missing = set()
190 189
191 190 full = False
192 191 while undecided:
193 192
194 193 if sample:
195 194 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
196 195 missing.update(dag.descendantset(missinginsample, missing))
197 196
198 197 undecided.difference_update(missing)
199 198
200 199 if not undecided:
201 200 break
202 201
203 202 if full or common.hasbases():
204 203 if full:
205 204 ui.note(_("sampling from both directions\n"))
206 205 else:
207 206 ui.debug("taking initial sample\n")
208 207 samplefunc = _takefullsample
209 208 targetsize = fullsamplesize
210 209 else:
211 210 # use even cheaper initial sample
212 211 ui.debug("taking quick initial sample\n")
213 212 samplefunc = _takequicksample
214 213 targetsize = initialsamplesize
215 214 if len(undecided) < targetsize:
216 215 sample = list(undecided)
217 216 else:
218 217 sample = samplefunc(dag, undecided, targetsize)
219 218 sample = _limitsample(sample, targetsize)
220 219
221 220 roundtrips += 1
222 221 ui.progress(_('searching'), roundtrips, unit=_('queries'))
223 222 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
224 223 % (roundtrips, len(undecided), len(sample)))
225 224 # indices between sample and externalized version must match
226 225 sample = list(sample)
227 226 yesno = remote.known(dag.externalizeall(sample))
228 227 full = True
229 228
230 229 if sample:
231 230 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
232 231 common.addbases(commoninsample)
233 232 common.removeancestorsfrom(undecided)
234 233
235 234 # heads(common) == heads(common.bases) since common represents common.bases
236 235 # and all its ancestors
237 236 result = dag.headsetofconnecteds(common.bases)
238 237 # common.bases can include nullrev, but our contract requires us to not
239 238 # return any heads in that case, so discard that
240 239 result.discard(nullrev)
241 240 ui.progress(_('searching'), None)
242 241 ui.debug("%d total queries\n" % roundtrips)
243 242
244 243 if not result and srvheadhashes != [nullid]:
245 244 if abortwhenunrelated:
246 245 raise util.Abort(_("repository is unrelated"))
247 246 else:
248 247 ui.warn(_("warning: repository is unrelated\n"))
249 248 return (set([nullid]), True, srvheadhashes,)
250 249
251 250 anyincoming = (srvheadhashes != [nullid])
252 251 return dag.externalizeall(result), anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now