##// END OF EJS Templates
rust-discovery: optionally don't randomize at all, for tests...
Georges Racinet -
r42968:4e7bd618 default
parent child Browse files
Show More
@@ -1,442 +1,456 b''
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 #
2 #
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """
8 """
9 Algorithm works in the following way. You have two repository: local and
9 Algorithm works in the following way. You have two repository: local and
10 remote. They both contains a DAG of changelists.
10 remote. They both contains a DAG of changelists.
11
11
12 The goal of the discovery protocol is to find one set of node *common*,
12 The goal of the discovery protocol is to find one set of node *common*,
13 the set of nodes shared by local and remote.
13 the set of nodes shared by local and remote.
14
14
15 One of the issue with the original protocol was latency, it could
15 One of the issue with the original protocol was latency, it could
16 potentially require lots of roundtrips to discover that the local repo was a
16 potentially require lots of roundtrips to discover that the local repo was a
17 subset of remote (which is a very common case, you usually have few changes
17 subset of remote (which is a very common case, you usually have few changes
18 compared to upstream, while upstream probably had lots of development).
18 compared to upstream, while upstream probably had lots of development).
19
19
20 The new protocol only requires one interface for the remote repo: `known()`,
20 The new protocol only requires one interface for the remote repo: `known()`,
21 which given a set of changelists tells you if they are present in the DAG.
21 which given a set of changelists tells you if they are present in the DAG.
22
22
23 The algorithm then works as follow:
23 The algorithm then works as follow:
24
24
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 all nodes are in `unknown`.
26 all nodes are in `unknown`.
27 - Take a sample from `unknown`, call `remote.known(sample)`
27 - Take a sample from `unknown`, call `remote.known(sample)`
28 - For each node that remote knows, move it and all its ancestors to `common`
28 - For each node that remote knows, move it and all its ancestors to `common`
29 - For each node that remote doesn't know, move it and all its descendants
29 - For each node that remote doesn't know, move it and all its descendants
30 to `missing`
30 to `missing`
31 - Iterate until `unknown` is empty
31 - Iterate until `unknown` is empty
32
32
33 There are a couple optimizations, first is instead of starting with a random
33 There are a couple optimizations, first is instead of starting with a random
34 sample of missing, start by sending all heads, in the case where the local
34 sample of missing, start by sending all heads, in the case where the local
35 repo is a subset, you computed the answer in one round trip.
35 repo is a subset, you computed the answer in one round trip.
36
36
37 Then you can do something similar to the bisecting strategy used when
37 Then you can do something similar to the bisecting strategy used when
38 finding faulty changesets. Instead of random samples, you can try picking
38 finding faulty changesets. Instead of random samples, you can try picking
39 nodes that will maximize the number of nodes that will be
39 nodes that will maximize the number of nodes that will be
40 classified with it (since all ancestors or descendants will be marked as well).
40 classified with it (since all ancestors or descendants will be marked as well).
41 """
41 """
42
42
43 from __future__ import absolute_import
43 from __future__ import absolute_import
44
44
45 import collections
45 import collections
46 import random
46 import random
47
47
48 from .i18n import _
48 from .i18n import _
49 from .node import (
49 from .node import (
50 nullid,
50 nullid,
51 nullrev,
51 nullrev,
52 )
52 )
53 from . import (
53 from . import (
54 error,
54 error,
55 util,
55 util,
56 )
56 )
57
57
58 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
58 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
59 """update an existing sample to match the expected size
59 """update an existing sample to match the expected size
60
60
61 The sample is updated with revs exponentially distant from each head of the
61 The sample is updated with revs exponentially distant from each head of the
62 <revs> set. (H~1, H~2, H~4, H~8, etc).
62 <revs> set. (H~1, H~2, H~4, H~8, etc).
63
63
64 If a target size is specified, the sampling will stop once this size is
64 If a target size is specified, the sampling will stop once this size is
65 reached. Otherwise sampling will happen until roots of the <revs> set are
65 reached. Otherwise sampling will happen until roots of the <revs> set are
66 reached.
66 reached.
67
67
68 :revs: set of revs we want to discover (if None, assume the whole dag)
68 :revs: set of revs we want to discover (if None, assume the whole dag)
69 :heads: set of DAG head revs
69 :heads: set of DAG head revs
70 :sample: a sample to update
70 :sample: a sample to update
71 :parentfn: a callable to resolve parents for a revision
71 :parentfn: a callable to resolve parents for a revision
72 :quicksamplesize: optional target size of the sample"""
72 :quicksamplesize: optional target size of the sample"""
73 dist = {}
73 dist = {}
74 visit = collections.deque(heads)
74 visit = collections.deque(heads)
75 seen = set()
75 seen = set()
76 factor = 1
76 factor = 1
77 while visit:
77 while visit:
78 curr = visit.popleft()
78 curr = visit.popleft()
79 if curr in seen:
79 if curr in seen:
80 continue
80 continue
81 d = dist.setdefault(curr, 1)
81 d = dist.setdefault(curr, 1)
82 if d > factor:
82 if d > factor:
83 factor *= 2
83 factor *= 2
84 if d == factor:
84 if d == factor:
85 sample.add(curr)
85 sample.add(curr)
86 if quicksamplesize and (len(sample) >= quicksamplesize):
86 if quicksamplesize and (len(sample) >= quicksamplesize):
87 return
87 return
88 seen.add(curr)
88 seen.add(curr)
89
89
90 for p in parentfn(curr):
90 for p in parentfn(curr):
91 if p != nullrev and (not revs or p in revs):
91 if p != nullrev and (not revs or p in revs):
92 dist.setdefault(p, d + 1)
92 dist.setdefault(p, d + 1)
93 visit.append(p)
93 visit.append(p)
94
94
95 def _limitsample(sample, desiredlen):
95 def _limitsample(sample, desiredlen, randomize=True):
96 """return a random subset of sample of at most desiredlen item"""
96 """return a random subset of sample of at most desiredlen item.
97 if len(sample) > desiredlen:
97
98 sample = set(random.sample(sample, desiredlen))
98 If randomize is False, though, a deterministic subset is returned.
99 return sample
99 This is meant for integration tests.
100 """
101 if len(sample) <= desiredlen:
102 return sample
103 if randomize:
104 return set(random.sample(sample, desiredlen))
105 sample = list(sample)
106 sample.sort()
107 return set(sample[:desiredlen])
100
108
101 class partialdiscovery(object):
109 class partialdiscovery(object):
102 """an object representing ongoing discovery
110 """an object representing ongoing discovery
103
111
104 Feed with data from the remote repository, this object keep track of the
112 Feed with data from the remote repository, this object keep track of the
105 current set of changeset in various states:
113 current set of changeset in various states:
106
114
107 - common: revs also known remotely
115 - common: revs also known remotely
108 - undecided: revs we don't have information on yet
116 - undecided: revs we don't have information on yet
109 - missing: revs missing remotely
117 - missing: revs missing remotely
110 (all tracked revisions are known locally)
118 (all tracked revisions are known locally)
111 """
119 """
112
120
113 def __init__(self, repo, targetheads, respectsize):
121 def __init__(self, repo, targetheads, respectsize, randomize=True):
114 self._repo = repo
122 self._repo = repo
115 self._targetheads = targetheads
123 self._targetheads = targetheads
116 self._common = repo.changelog.incrementalmissingrevs()
124 self._common = repo.changelog.incrementalmissingrevs()
117 self._undecided = None
125 self._undecided = None
118 self.missing = set()
126 self.missing = set()
119 self._childrenmap = None
127 self._childrenmap = None
120 self._respectsize = respectsize
128 self._respectsize = respectsize
129 self.randomize = randomize
121
130
122 def addcommons(self, commons):
131 def addcommons(self, commons):
123 """register nodes known as common"""
132 """register nodes known as common"""
124 self._common.addbases(commons)
133 self._common.addbases(commons)
125 if self._undecided is not None:
134 if self._undecided is not None:
126 self._common.removeancestorsfrom(self._undecided)
135 self._common.removeancestorsfrom(self._undecided)
127
136
128 def addmissings(self, missings):
137 def addmissings(self, missings):
129 """register some nodes as missing"""
138 """register some nodes as missing"""
130 newmissing = self._repo.revs('%ld::%ld', missings, self.undecided)
139 newmissing = self._repo.revs('%ld::%ld', missings, self.undecided)
131 if newmissing:
140 if newmissing:
132 self.missing.update(newmissing)
141 self.missing.update(newmissing)
133 self.undecided.difference_update(newmissing)
142 self.undecided.difference_update(newmissing)
134
143
135 def addinfo(self, sample):
144 def addinfo(self, sample):
136 """consume an iterable of (rev, known) tuples"""
145 """consume an iterable of (rev, known) tuples"""
137 common = set()
146 common = set()
138 missing = set()
147 missing = set()
139 for rev, known in sample:
148 for rev, known in sample:
140 if known:
149 if known:
141 common.add(rev)
150 common.add(rev)
142 else:
151 else:
143 missing.add(rev)
152 missing.add(rev)
144 if common:
153 if common:
145 self.addcommons(common)
154 self.addcommons(common)
146 if missing:
155 if missing:
147 self.addmissings(missing)
156 self.addmissings(missing)
148
157
149 def hasinfo(self):
158 def hasinfo(self):
150 """return True is we have any clue about the remote state"""
159 """return True is we have any clue about the remote state"""
151 return self._common.hasbases()
160 return self._common.hasbases()
152
161
153 def iscomplete(self):
162 def iscomplete(self):
154 """True if all the necessary data have been gathered"""
163 """True if all the necessary data have been gathered"""
155 return self._undecided is not None and not self._undecided
164 return self._undecided is not None and not self._undecided
156
165
157 @property
166 @property
158 def undecided(self):
167 def undecided(self):
159 if self._undecided is not None:
168 if self._undecided is not None:
160 return self._undecided
169 return self._undecided
161 self._undecided = set(self._common.missingancestors(self._targetheads))
170 self._undecided = set(self._common.missingancestors(self._targetheads))
162 return self._undecided
171 return self._undecided
163
172
164 def stats(self):
173 def stats(self):
165 return {
174 return {
166 'undecided': len(self.undecided),
175 'undecided': len(self.undecided),
167 }
176 }
168
177
169 def commonheads(self):
178 def commonheads(self):
170 """the heads of the known common set"""
179 """the heads of the known common set"""
171 # heads(common) == heads(common.bases) since common represents
180 # heads(common) == heads(common.bases) since common represents
172 # common.bases and all its ancestors
181 # common.bases and all its ancestors
173 return self._common.basesheads()
182 return self._common.basesheads()
174
183
175 def _parentsgetter(self):
184 def _parentsgetter(self):
176 getrev = self._repo.changelog.index.__getitem__
185 getrev = self._repo.changelog.index.__getitem__
177 def getparents(r):
186 def getparents(r):
178 return getrev(r)[5:7]
187 return getrev(r)[5:7]
179 return getparents
188 return getparents
180
189
181 def _childrengetter(self):
190 def _childrengetter(self):
182
191
183 if self._childrenmap is not None:
192 if self._childrenmap is not None:
184 # During discovery, the `undecided` set keep shrinking.
193 # During discovery, the `undecided` set keep shrinking.
185 # Therefore, the map computed for an iteration N will be
194 # Therefore, the map computed for an iteration N will be
186 # valid for iteration N+1. Instead of computing the same
195 # valid for iteration N+1. Instead of computing the same
187 # data over and over we cached it the first time.
196 # data over and over we cached it the first time.
188 return self._childrenmap.__getitem__
197 return self._childrenmap.__getitem__
189
198
190 # _updatesample() essentially does interaction over revisions to look
199 # _updatesample() essentially does interaction over revisions to look
191 # up their children. This lookup is expensive and doing it in a loop is
200 # up their children. This lookup is expensive and doing it in a loop is
192 # quadratic. We precompute the children for all relevant revisions and
201 # quadratic. We precompute the children for all relevant revisions and
193 # make the lookup in _updatesample() a simple dict lookup.
202 # make the lookup in _updatesample() a simple dict lookup.
194 self._childrenmap = children = {}
203 self._childrenmap = children = {}
195
204
196 parentrevs = self._parentsgetter()
205 parentrevs = self._parentsgetter()
197 revs = self.undecided
206 revs = self.undecided
198
207
199 for rev in sorted(revs):
208 for rev in sorted(revs):
200 # Always ensure revision has an entry so we don't need to worry
209 # Always ensure revision has an entry so we don't need to worry
201 # about missing keys.
210 # about missing keys.
202 children[rev] = []
211 children[rev] = []
203 for prev in parentrevs(rev):
212 for prev in parentrevs(rev):
204 if prev == nullrev:
213 if prev == nullrev:
205 continue
214 continue
206 c = children.get(prev)
215 c = children.get(prev)
207 if c is not None:
216 if c is not None:
208 c.append(rev)
217 c.append(rev)
209 return children.__getitem__
218 return children.__getitem__
210
219
211 def takequicksample(self, headrevs, size):
220 def takequicksample(self, headrevs, size):
212 """takes a quick sample of size <size>
221 """takes a quick sample of size <size>
213
222
214 It is meant for initial sampling and focuses on querying heads and close
223 It is meant for initial sampling and focuses on querying heads and close
215 ancestors of heads.
224 ancestors of heads.
216
225
217 :headrevs: set of head revisions in local DAG to consider
226 :headrevs: set of head revisions in local DAG to consider
218 :size: the maximum size of the sample"""
227 :size: the maximum size of the sample"""
219 revs = self.undecided
228 revs = self.undecided
220 if len(revs) <= size:
229 if len(revs) <= size:
221 return list(revs)
230 return list(revs)
222 sample = set(self._repo.revs('heads(%ld)', revs))
231 sample = set(self._repo.revs('heads(%ld)', revs))
223
232
224 if len(sample) >= size:
233 if len(sample) >= size:
225 return _limitsample(sample, size)
234 return _limitsample(sample, size, randomize=self.randomize)
226
235
227 _updatesample(None, headrevs, sample, self._parentsgetter(),
236 _updatesample(None, headrevs, sample, self._parentsgetter(),
228 quicksamplesize=size)
237 quicksamplesize=size)
229 return sample
238 return sample
230
239
231 def takefullsample(self, headrevs, size):
240 def takefullsample(self, headrevs, size):
232 revs = self.undecided
241 revs = self.undecided
233 if len(revs) <= size:
242 if len(revs) <= size:
234 return list(revs)
243 return list(revs)
235 repo = self._repo
244 repo = self._repo
236 sample = set(repo.revs('heads(%ld)', revs))
245 sample = set(repo.revs('heads(%ld)', revs))
237 parentrevs = self._parentsgetter()
246 parentrevs = self._parentsgetter()
238
247
239 # update from heads
248 # update from heads
240 revsheads = sample.copy()
249 revsheads = sample.copy()
241 _updatesample(revs, revsheads, sample, parentrevs)
250 _updatesample(revs, revsheads, sample, parentrevs)
242
251
243 # update from roots
252 # update from roots
244 revsroots = set(repo.revs('roots(%ld)', revs))
253 revsroots = set(repo.revs('roots(%ld)', revs))
245 childrenrevs = self._childrengetter()
254 childrenrevs = self._childrengetter()
246 _updatesample(revs, revsroots, sample, childrenrevs)
255 _updatesample(revs, revsroots, sample, childrenrevs)
247 assert sample
256 assert sample
248
257
249 if not self._respectsize:
258 if not self._respectsize:
250 size = max(size, min(len(revsroots), len(revsheads)))
259 size = max(size, min(len(revsroots), len(revsheads)))
251
260
252 sample = _limitsample(sample, size)
261 sample = _limitsample(sample, size, randomize=self.randomize)
253 if len(sample) < size:
262 if len(sample) < size:
254 more = size - len(sample)
263 more = size - len(sample)
255 sample.update(random.sample(list(revs - sample), more))
264 takefrom = list(revs - sample)
265 if self.randomize:
266 sample.update(random.sample(takefrom, more))
267 else:
268 takefrom.sort()
269 sample.update(takefrom[:more])
256 return sample
270 return sample
257
271
258 def findcommonheads(ui, local, remote,
272 def findcommonheads(ui, local, remote,
259 initialsamplesize=100,
273 initialsamplesize=100,
260 fullsamplesize=200,
274 fullsamplesize=200,
261 abortwhenunrelated=True,
275 abortwhenunrelated=True,
262 ancestorsof=None,
276 ancestorsof=None,
263 samplegrowth=1.05):
277 samplegrowth=1.05):
264 '''Return a tuple (common, anyincoming, remoteheads) used to identify
278 '''Return a tuple (common, anyincoming, remoteheads) used to identify
265 missing nodes from or in remote.
279 missing nodes from or in remote.
266 '''
280 '''
267 start = util.timer()
281 start = util.timer()
268
282
269 roundtrips = 0
283 roundtrips = 0
270 cl = local.changelog
284 cl = local.changelog
271 clnode = cl.node
285 clnode = cl.node
272 clrev = cl.rev
286 clrev = cl.rev
273
287
274 if ancestorsof is not None:
288 if ancestorsof is not None:
275 ownheads = [clrev(n) for n in ancestorsof]
289 ownheads = [clrev(n) for n in ancestorsof]
276 else:
290 else:
277 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
291 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
278
292
279 # early exit if we know all the specified remote heads already
293 # early exit if we know all the specified remote heads already
280 ui.debug("query 1; heads\n")
294 ui.debug("query 1; heads\n")
281 roundtrips += 1
295 roundtrips += 1
282 # We also ask remote about all the local heads. That set can be arbitrarily
296 # We also ask remote about all the local heads. That set can be arbitrarily
283 # large, so we used to limit it size to `initialsamplesize`. We no longer
297 # large, so we used to limit it size to `initialsamplesize`. We no longer
284 # do as it proved counter productive. The skipped heads could lead to a
298 # do as it proved counter productive. The skipped heads could lead to a
285 # large "undecided" set, slower to be clarified than if we asked the
299 # large "undecided" set, slower to be clarified than if we asked the
286 # question for all heads right away.
300 # question for all heads right away.
287 #
301 #
288 # We are already fetching all server heads using the `heads` commands,
302 # We are already fetching all server heads using the `heads` commands,
289 # sending a equivalent number of heads the other way should not have a
303 # sending a equivalent number of heads the other way should not have a
290 # significant impact. In addition, it is very likely that we are going to
304 # significant impact. In addition, it is very likely that we are going to
291 # have to issue "known" request for an equivalent amount of revisions in
305 # have to issue "known" request for an equivalent amount of revisions in
292 # order to decide if theses heads are common or missing.
306 # order to decide if theses heads are common or missing.
293 #
307 #
294 # find a detailled analysis below.
308 # find a detailled analysis below.
295 #
309 #
296 # Case A: local and server both has few heads
310 # Case A: local and server both has few heads
297 #
311 #
298 # Ownheads is below initialsamplesize, limit would not have any effect.
312 # Ownheads is below initialsamplesize, limit would not have any effect.
299 #
313 #
300 # Case B: local has few heads and server has many
314 # Case B: local has few heads and server has many
301 #
315 #
302 # Ownheads is below initialsamplesize, limit would not have any effect.
316 # Ownheads is below initialsamplesize, limit would not have any effect.
303 #
317 #
304 # Case C: local and server both has many heads
318 # Case C: local and server both has many heads
305 #
319 #
306 # We now transfert some more data, but not significantly more than is
320 # We now transfert some more data, but not significantly more than is
307 # already transfered to carry the server heads.
321 # already transfered to carry the server heads.
308 #
322 #
309 # Case D: local has many heads, server has few
323 # Case D: local has many heads, server has few
310 #
324 #
311 # D.1 local heads are mostly known remotely
325 # D.1 local heads are mostly known remotely
312 #
326 #
313 # All the known head will have be part of a `known` request at some
327 # All the known head will have be part of a `known` request at some
314 # point for the discovery to finish. Sending them all earlier is
328 # point for the discovery to finish. Sending them all earlier is
315 # actually helping.
329 # actually helping.
316 #
330 #
317 # (This case is fairly unlikely, it requires the numerous heads to all
331 # (This case is fairly unlikely, it requires the numerous heads to all
318 # be merged server side in only a few heads)
332 # be merged server side in only a few heads)
319 #
333 #
320 # D.2 local heads are mostly missing remotely
334 # D.2 local heads are mostly missing remotely
321 #
335 #
322 # To determine that the heads are missing, we'll have to issue `known`
336 # To determine that the heads are missing, we'll have to issue `known`
323 # request for them or one of their ancestors. This amount of `known`
337 # request for them or one of their ancestors. This amount of `known`
324 # request will likely be in the same order of magnitude than the amount
338 # request will likely be in the same order of magnitude than the amount
325 # of local heads.
339 # of local heads.
326 #
340 #
327 # The only case where we can be more efficient using `known` request on
341 # The only case where we can be more efficient using `known` request on
328 # ancestors are case were all the "missing" local heads are based on a
342 # ancestors are case were all the "missing" local heads are based on a
329 # few changeset, also "missing". This means we would have a "complex"
343 # few changeset, also "missing". This means we would have a "complex"
330 # graph (with many heads) attached to, but very independant to a the
344 # graph (with many heads) attached to, but very independant to a the
331 # "simple" graph on the server. This is a fairly usual case and have
345 # "simple" graph on the server. This is a fairly usual case and have
332 # not been met in the wild so far.
346 # not been met in the wild so far.
333 if remote.limitedarguments:
347 if remote.limitedarguments:
334 sample = _limitsample(ownheads, initialsamplesize)
348 sample = _limitsample(ownheads, initialsamplesize)
335 # indices between sample and externalized version must match
349 # indices between sample and externalized version must match
336 sample = list(sample)
350 sample = list(sample)
337 else:
351 else:
338 sample = ownheads
352 sample = ownheads
339
353
340 with remote.commandexecutor() as e:
354 with remote.commandexecutor() as e:
341 fheads = e.callcommand('heads', {})
355 fheads = e.callcommand('heads', {})
342 fknown = e.callcommand('known', {
356 fknown = e.callcommand('known', {
343 'nodes': [clnode(r) for r in sample],
357 'nodes': [clnode(r) for r in sample],
344 })
358 })
345
359
346 srvheadhashes, yesno = fheads.result(), fknown.result()
360 srvheadhashes, yesno = fheads.result(), fknown.result()
347
361
348 if cl.tip() == nullid:
362 if cl.tip() == nullid:
349 if srvheadhashes != [nullid]:
363 if srvheadhashes != [nullid]:
350 return [nullid], True, srvheadhashes
364 return [nullid], True, srvheadhashes
351 return [nullid], False, []
365 return [nullid], False, []
352
366
353 # start actual discovery (we note this before the next "if" for
367 # start actual discovery (we note this before the next "if" for
354 # compatibility reasons)
368 # compatibility reasons)
355 ui.status(_("searching for changes\n"))
369 ui.status(_("searching for changes\n"))
356
370
357 knownsrvheads = [] # revnos of remote heads that are known locally
371 knownsrvheads = [] # revnos of remote heads that are known locally
358 for node in srvheadhashes:
372 for node in srvheadhashes:
359 if node == nullid:
373 if node == nullid:
360 continue
374 continue
361
375
362 try:
376 try:
363 knownsrvheads.append(clrev(node))
377 knownsrvheads.append(clrev(node))
364 # Catches unknown and filtered nodes.
378 # Catches unknown and filtered nodes.
365 except error.LookupError:
379 except error.LookupError:
366 continue
380 continue
367
381
368 if len(knownsrvheads) == len(srvheadhashes):
382 if len(knownsrvheads) == len(srvheadhashes):
369 ui.debug("all remote heads known locally\n")
383 ui.debug("all remote heads known locally\n")
370 return srvheadhashes, False, srvheadhashes
384 return srvheadhashes, False, srvheadhashes
371
385
372 if len(sample) == len(ownheads) and all(yesno):
386 if len(sample) == len(ownheads) and all(yesno):
373 ui.note(_("all local heads known remotely\n"))
387 ui.note(_("all local heads known remotely\n"))
374 ownheadhashes = [clnode(r) for r in ownheads]
388 ownheadhashes = [clnode(r) for r in ownheads]
375 return ownheadhashes, True, srvheadhashes
389 return ownheadhashes, True, srvheadhashes
376
390
377 # full blown discovery
391 # full blown discovery
378
392
379 disco = partialdiscovery(local, ownheads, remote.limitedarguments)
393 disco = partialdiscovery(local, ownheads, remote.limitedarguments)
380 # treat remote heads (and maybe own heads) as a first implicit sample
394 # treat remote heads (and maybe own heads) as a first implicit sample
381 # response
395 # response
382 disco.addcommons(knownsrvheads)
396 disco.addcommons(knownsrvheads)
383 disco.addinfo(zip(sample, yesno))
397 disco.addinfo(zip(sample, yesno))
384
398
385 full = False
399 full = False
386 progress = ui.makeprogress(_('searching'), unit=_('queries'))
400 progress = ui.makeprogress(_('searching'), unit=_('queries'))
387 while not disco.iscomplete():
401 while not disco.iscomplete():
388
402
389 if full or disco.hasinfo():
403 if full or disco.hasinfo():
390 if full:
404 if full:
391 ui.note(_("sampling from both directions\n"))
405 ui.note(_("sampling from both directions\n"))
392 else:
406 else:
393 ui.debug("taking initial sample\n")
407 ui.debug("taking initial sample\n")
394 samplefunc = disco.takefullsample
408 samplefunc = disco.takefullsample
395 targetsize = fullsamplesize
409 targetsize = fullsamplesize
396 if not remote.limitedarguments:
410 if not remote.limitedarguments:
397 fullsamplesize = int(fullsamplesize * samplegrowth)
411 fullsamplesize = int(fullsamplesize * samplegrowth)
398 else:
412 else:
399 # use even cheaper initial sample
413 # use even cheaper initial sample
400 ui.debug("taking quick initial sample\n")
414 ui.debug("taking quick initial sample\n")
401 samplefunc = disco.takequicksample
415 samplefunc = disco.takequicksample
402 targetsize = initialsamplesize
416 targetsize = initialsamplesize
403 sample = samplefunc(ownheads, targetsize)
417 sample = samplefunc(ownheads, targetsize)
404
418
405 roundtrips += 1
419 roundtrips += 1
406 progress.update(roundtrips)
420 progress.update(roundtrips)
407 stats = disco.stats()
421 stats = disco.stats()
408 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
422 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
409 % (roundtrips, stats['undecided'], len(sample)))
423 % (roundtrips, stats['undecided'], len(sample)))
410
424
411 # indices between sample and externalized version must match
425 # indices between sample and externalized version must match
412 sample = list(sample)
426 sample = list(sample)
413
427
414 with remote.commandexecutor() as e:
428 with remote.commandexecutor() as e:
415 yesno = e.callcommand('known', {
429 yesno = e.callcommand('known', {
416 'nodes': [clnode(r) for r in sample],
430 'nodes': [clnode(r) for r in sample],
417 }).result()
431 }).result()
418
432
419 full = True
433 full = True
420
434
421 disco.addinfo(zip(sample, yesno))
435 disco.addinfo(zip(sample, yesno))
422
436
423 result = disco.commonheads()
437 result = disco.commonheads()
424 elapsed = util.timer() - start
438 elapsed = util.timer() - start
425 progress.complete()
439 progress.complete()
426 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
440 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
427 msg = ('found %d common and %d unknown server heads,'
441 msg = ('found %d common and %d unknown server heads,'
428 ' %d roundtrips in %.4fs\n')
442 ' %d roundtrips in %.4fs\n')
429 missing = set(result) - set(knownsrvheads)
443 missing = set(result) - set(knownsrvheads)
430 ui.log('discovery', msg, len(result), len(missing), roundtrips,
444 ui.log('discovery', msg, len(result), len(missing), roundtrips,
431 elapsed)
445 elapsed)
432
446
433 if not result and srvheadhashes != [nullid]:
447 if not result and srvheadhashes != [nullid]:
434 if abortwhenunrelated:
448 if abortwhenunrelated:
435 raise error.Abort(_("repository is unrelated"))
449 raise error.Abort(_("repository is unrelated"))
436 else:
450 else:
437 ui.warn(_("warning: repository is unrelated\n"))
451 ui.warn(_("warning: repository is unrelated\n"))
438 return ({nullid}, True, srvheadhashes,)
452 return ({nullid}, True, srvheadhashes,)
439
453
440 anyincoming = (srvheadhashes != [nullid])
454 anyincoming = (srvheadhashes != [nullid])
441 result = {clnode(r) for r in result}
455 result = {clnode(r) for r in result}
442 return result, anyincoming, srvheadhashes
456 return result, anyincoming, srvheadhashes
@@ -1,615 +1,653 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 extern crate rand;
13 extern crate rand;
14 extern crate rand_pcg;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
18 use crate::ancestors::MissingAncestors;
18 use crate::ancestors::MissingAncestors;
19 use crate::dagops;
19 use crate::dagops;
20 use std::cmp::{max, min};
20 use std::cmp::{max, min};
21 use std::collections::{HashMap, HashSet, VecDeque};
21 use std::collections::{HashMap, HashSet, VecDeque};
22
22
23 type Rng = self::rand_pcg::Pcg32;
23 type Rng = self::rand_pcg::Pcg32;
24
24
25 pub struct PartialDiscovery<G: Graph + Clone> {
25 pub struct PartialDiscovery<G: Graph + Clone> {
26 target_heads: Option<Vec<Revision>>,
26 target_heads: Option<Vec<Revision>>,
27 graph: G, // plays the role of self._repo
27 graph: G, // plays the role of self._repo
28 common: MissingAncestors<G>,
28 common: MissingAncestors<G>,
29 undecided: Option<HashSet<Revision>>,
29 undecided: Option<HashSet<Revision>>,
30 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
30 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
31 missing: HashSet<Revision>,
31 missing: HashSet<Revision>,
32 rng: Rng,
32 rng: Rng,
33 respect_size: bool,
33 respect_size: bool,
34 randomize: bool,
34 }
35 }
35
36
36 pub struct DiscoveryStats {
37 pub struct DiscoveryStats {
37 pub undecided: Option<usize>,
38 pub undecided: Option<usize>,
38 }
39 }
39
40
40 /// Update an existing sample to match the expected size
41 /// Update an existing sample to match the expected size
41 ///
42 ///
42 /// The sample is updated with revisions exponentially distant from each
43 /// The sample is updated with revisions exponentially distant from each
43 /// element of `heads`.
44 /// element of `heads`.
44 ///
45 ///
45 /// If a target size is specified, the sampling will stop once this size is
46 /// If a target size is specified, the sampling will stop once this size is
46 /// reached. Otherwise sampling will happen until roots of the <revs> set are
47 /// reached. Otherwise sampling will happen until roots of the <revs> set are
47 /// reached.
48 /// reached.
48 ///
49 ///
49 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
50 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
50 /// represented by `parentfn`
51 /// represented by `parentfn`
51 /// - `heads`: set of DAG head revs
52 /// - `heads`: set of DAG head revs
52 /// - `sample`: a sample to update
53 /// - `sample`: a sample to update
53 /// - `parentfn`: a callable to resolve parents for a revision
54 /// - `parentfn`: a callable to resolve parents for a revision
54 /// - `quicksamplesize`: optional target size of the sample
55 /// - `quicksamplesize`: optional target size of the sample
55 fn update_sample<I>(
56 fn update_sample<I>(
56 revs: Option<&HashSet<Revision>>,
57 revs: Option<&HashSet<Revision>>,
57 heads: impl IntoIterator<Item = Revision>,
58 heads: impl IntoIterator<Item = Revision>,
58 sample: &mut HashSet<Revision>,
59 sample: &mut HashSet<Revision>,
59 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
60 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
60 quicksamplesize: Option<usize>,
61 quicksamplesize: Option<usize>,
61 ) -> Result<(), GraphError>
62 ) -> Result<(), GraphError>
62 where
63 where
63 I: Iterator<Item = Revision>,
64 I: Iterator<Item = Revision>,
64 {
65 {
65 let mut distances: HashMap<Revision, u32> = HashMap::new();
66 let mut distances: HashMap<Revision, u32> = HashMap::new();
66 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
67 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
67 let mut factor: u32 = 1;
68 let mut factor: u32 = 1;
68 let mut seen: HashSet<Revision> = HashSet::new();
69 let mut seen: HashSet<Revision> = HashSet::new();
69 loop {
70 loop {
70 let current = match visit.pop_front() {
71 let current = match visit.pop_front() {
71 None => {
72 None => {
72 break;
73 break;
73 }
74 }
74 Some(r) => r,
75 Some(r) => r,
75 };
76 };
76 if !seen.insert(current) {
77 if !seen.insert(current) {
77 continue;
78 continue;
78 }
79 }
79
80
80 let d = *distances.entry(current).or_insert(1);
81 let d = *distances.entry(current).or_insert(1);
81 if d > factor {
82 if d > factor {
82 factor *= 2;
83 factor *= 2;
83 }
84 }
84 if d == factor {
85 if d == factor {
85 sample.insert(current);
86 sample.insert(current);
86 if let Some(sz) = quicksamplesize {
87 if let Some(sz) = quicksamplesize {
87 if sample.len() >= sz {
88 if sample.len() >= sz {
88 return Ok(());
89 return Ok(());
89 }
90 }
90 }
91 }
91 }
92 }
92 for p in parentsfn(current)? {
93 for p in parentsfn(current)? {
93 if let Some(revs) = revs {
94 if let Some(revs) = revs {
94 if !revs.contains(&p) {
95 if !revs.contains(&p) {
95 continue;
96 continue;
96 }
97 }
97 }
98 }
98 distances.entry(p).or_insert(d + 1);
99 distances.entry(p).or_insert(d + 1);
99 visit.push_back(p);
100 visit.push_back(p);
100 }
101 }
101 }
102 }
102 Ok(())
103 Ok(())
103 }
104 }
104
105
105 struct ParentsIterator {
106 struct ParentsIterator {
106 parents: [Revision; 2],
107 parents: [Revision; 2],
107 cur: usize,
108 cur: usize,
108 }
109 }
109
110
110 impl ParentsIterator {
111 impl ParentsIterator {
111 fn graph_parents(
112 fn graph_parents(
112 graph: &impl Graph,
113 graph: &impl Graph,
113 r: Revision,
114 r: Revision,
114 ) -> Result<ParentsIterator, GraphError> {
115 ) -> Result<ParentsIterator, GraphError> {
115 Ok(ParentsIterator {
116 Ok(ParentsIterator {
116 parents: graph.parents(r)?,
117 parents: graph.parents(r)?,
117 cur: 0,
118 cur: 0,
118 })
119 })
119 }
120 }
120 }
121 }
121
122
122 impl Iterator for ParentsIterator {
123 impl Iterator for ParentsIterator {
123 type Item = Revision;
124 type Item = Revision;
124
125
125 fn next(&mut self) -> Option<Revision> {
126 fn next(&mut self) -> Option<Revision> {
126 if self.cur > 1 {
127 if self.cur > 1 {
127 return None;
128 return None;
128 }
129 }
129 let rev = self.parents[self.cur];
130 let rev = self.parents[self.cur];
130 self.cur += 1;
131 self.cur += 1;
131 if rev == NULL_REVISION {
132 if rev == NULL_REVISION {
132 return self.next();
133 return self.next();
133 }
134 }
134 Some(rev)
135 Some(rev)
135 }
136 }
136 }
137 }
137
138
138 impl<G: Graph + Clone> PartialDiscovery<G> {
139 impl<G: Graph + Clone> PartialDiscovery<G> {
139 /// Create a PartialDiscovery object, with the intent
140 /// Create a PartialDiscovery object, with the intent
140 /// of comparing our `::<target_heads>` revset to the contents of another
141 /// of comparing our `::<target_heads>` revset to the contents of another
141 /// repo.
142 /// repo.
142 ///
143 ///
143 /// For now `target_heads` is passed as a vector, and will be used
144 /// For now `target_heads` is passed as a vector, and will be used
144 /// at the first call to `ensure_undecided()`.
145 /// at the first call to `ensure_undecided()`.
145 ///
146 ///
146 /// If we want to make the signature more flexible,
147 /// If we want to make the signature more flexible,
147 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
148 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
148 /// object since we'll keep it in the meanwhile
149 /// object since we'll keep it in the meanwhile
149 ///
150 ///
150 /// The `respect_size` boolean controls how the sampling methods
151 /// The `respect_size` boolean controls how the sampling methods
151 /// will interpret the size argument requested by the caller. If it's
152 /// will interpret the size argument requested by the caller. If it's
152 /// `false`, they are allowed to produce a sample whose size is more
153 /// `false`, they are allowed to produce a sample whose size is more
153 /// appropriate to the situation (typically bigger).
154 /// appropriate to the situation (typically bigger).
155 ///
156 /// The `randomize` boolean affects sampling, and specifically how
157 /// limiting or last-minute expanding is been done:
158 ///
159 /// If `true`, both will perform random picking from `self.undecided`.
160 /// This is currently the best for actual discoveries.
161 ///
162 /// If `false`, a reproductible picking strategy is performed. This is
163 /// useful for integration tests.
154 pub fn new(
164 pub fn new(
155 graph: G,
165 graph: G,
156 target_heads: Vec<Revision>,
166 target_heads: Vec<Revision>,
157 respect_size: bool,
167 respect_size: bool,
168 randomize: bool,
158 ) -> Self {
169 ) -> Self {
159 let mut seed: [u8; 16] = [0; 16];
170 let mut seed: [u8; 16] = [0; 16];
160 thread_rng().fill_bytes(&mut seed);
171 if randomize {
161 Self::new_with_seed(graph, target_heads, seed, respect_size)
172 thread_rng().fill_bytes(&mut seed);
173 }
174 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
162 }
175 }
163
176
164 pub fn new_with_seed(
177 pub fn new_with_seed(
165 graph: G,
178 graph: G,
166 target_heads: Vec<Revision>,
179 target_heads: Vec<Revision>,
167 seed: [u8; 16],
180 seed: [u8; 16],
168 respect_size: bool,
181 respect_size: bool,
182 randomize: bool,
169 ) -> Self {
183 ) -> Self {
170 PartialDiscovery {
184 PartialDiscovery {
171 undecided: None,
185 undecided: None,
172 children_cache: None,
186 children_cache: None,
173 target_heads: Some(target_heads),
187 target_heads: Some(target_heads),
174 graph: graph.clone(),
188 graph: graph.clone(),
175 common: MissingAncestors::new(graph, vec![]),
189 common: MissingAncestors::new(graph, vec![]),
176 missing: HashSet::new(),
190 missing: HashSet::new(),
177 rng: Rng::from_seed(seed),
191 rng: Rng::from_seed(seed),
178 respect_size: respect_size,
192 respect_size: respect_size,
193 randomize: randomize,
179 }
194 }
180 }
195 }
181
196
182 /// Extract at most `size` random elements from sample and return them
197 /// Extract at most `size` random elements from sample and return them
183 /// as a vector
198 /// as a vector
184 fn limit_sample(
199 fn limit_sample(
185 &mut self,
200 &mut self,
186 mut sample: Vec<Revision>,
201 mut sample: Vec<Revision>,
187 size: usize,
202 size: usize,
188 ) -> Vec<Revision> {
203 ) -> Vec<Revision> {
204 if !self.randomize {
205 sample.sort();
206 sample.truncate(size);
207 return sample;
208 }
189 let sample_len = sample.len();
209 let sample_len = sample.len();
190 if sample_len <= size {
210 if sample_len <= size {
191 return sample;
211 return sample;
192 }
212 }
193 let rng = &mut self.rng;
213 let rng = &mut self.rng;
194 let dropped_size = sample_len - size;
214 let dropped_size = sample_len - size;
195 let limited_slice = if size < dropped_size {
215 let limited_slice = if size < dropped_size {
196 sample.partial_shuffle(rng, size).0
216 sample.partial_shuffle(rng, size).0
197 } else {
217 } else {
198 sample.partial_shuffle(rng, dropped_size).1
218 sample.partial_shuffle(rng, dropped_size).1
199 };
219 };
200 limited_slice.to_owned()
220 limited_slice.to_owned()
201 }
221 }
202
222
203 /// Register revisions known as being common
223 /// Register revisions known as being common
204 pub fn add_common_revisions(
224 pub fn add_common_revisions(
205 &mut self,
225 &mut self,
206 common: impl IntoIterator<Item = Revision>,
226 common: impl IntoIterator<Item = Revision>,
207 ) -> Result<(), GraphError> {
227 ) -> Result<(), GraphError> {
208 self.common.add_bases(common);
228 self.common.add_bases(common);
209 if let Some(ref mut undecided) = self.undecided {
229 if let Some(ref mut undecided) = self.undecided {
210 self.common.remove_ancestors_from(undecided)?;
230 self.common.remove_ancestors_from(undecided)?;
211 }
231 }
212 Ok(())
232 Ok(())
213 }
233 }
214
234
215 /// Register revisions known as being missing
235 /// Register revisions known as being missing
216 pub fn add_missing_revisions(
236 pub fn add_missing_revisions(
217 &mut self,
237 &mut self,
218 missing: impl IntoIterator<Item = Revision>,
238 missing: impl IntoIterator<Item = Revision>,
219 ) -> Result<(), GraphError> {
239 ) -> Result<(), GraphError> {
220 self.ensure_undecided()?;
240 self.ensure_undecided()?;
221 let range = dagops::range(
241 let range = dagops::range(
222 &self.graph,
242 &self.graph,
223 missing,
243 missing,
224 self.undecided.as_ref().unwrap().iter().cloned(),
244 self.undecided.as_ref().unwrap().iter().cloned(),
225 )?;
245 )?;
226 let undecided_mut = self.undecided.as_mut().unwrap();
246 let undecided_mut = self.undecided.as_mut().unwrap();
227 for missrev in range {
247 for missrev in range {
228 self.missing.insert(missrev);
248 self.missing.insert(missrev);
229 undecided_mut.remove(&missrev);
249 undecided_mut.remove(&missrev);
230 }
250 }
231 Ok(())
251 Ok(())
232 }
252 }
233
253
234 /// Do we have any information about the peer?
254 /// Do we have any information about the peer?
235 pub fn has_info(&self) -> bool {
255 pub fn has_info(&self) -> bool {
236 self.common.has_bases()
256 self.common.has_bases()
237 }
257 }
238
258
239 /// Did we acquire full knowledge of our Revisions that the peer has?
259 /// Did we acquire full knowledge of our Revisions that the peer has?
240 pub fn is_complete(&self) -> bool {
260 pub fn is_complete(&self) -> bool {
241 self.undecided.as_ref().map_or(false, |s| s.is_empty())
261 self.undecided.as_ref().map_or(false, |s| s.is_empty())
242 }
262 }
243
263
244 /// Return the heads of the currently known common set of revisions.
264 /// Return the heads of the currently known common set of revisions.
245 ///
265 ///
246 /// If the discovery process is not complete (see `is_complete()`), the
266 /// If the discovery process is not complete (see `is_complete()`), the
247 /// caller must be aware that this is an intermediate state.
267 /// caller must be aware that this is an intermediate state.
248 ///
268 ///
249 /// On the other hand, if it is complete, then this is currently
269 /// On the other hand, if it is complete, then this is currently
250 /// the only way to retrieve the end results of the discovery process.
270 /// the only way to retrieve the end results of the discovery process.
251 ///
271 ///
252 /// We may introduce in the future an `into_common_heads` call that
272 /// We may introduce in the future an `into_common_heads` call that
253 /// would be more appropriate for normal Rust callers, dropping `self`
273 /// would be more appropriate for normal Rust callers, dropping `self`
254 /// if it is complete.
274 /// if it is complete.
255 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
275 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
256 self.common.bases_heads()
276 self.common.bases_heads()
257 }
277 }
258
278
259 /// Force first computation of `self.undecided`
279 /// Force first computation of `self.undecided`
260 ///
280 ///
261 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
281 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
262 /// unwrapped to get workable immutable or mutable references without
282 /// unwrapped to get workable immutable or mutable references without
263 /// any panic.
283 /// any panic.
264 ///
284 ///
265 /// This is an imperative call instead of an access with added lazyness
285 /// This is an imperative call instead of an access with added lazyness
266 /// to reduce easily the scope of mutable borrow for the caller,
286 /// to reduce easily the scope of mutable borrow for the caller,
267 /// compared to undecided(&'a mut self) -> &'a… that would keep it
287 /// compared to undecided(&'a mut self) -> &'a… that would keep it
268 /// as long as the resulting immutable one.
288 /// as long as the resulting immutable one.
269 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
289 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
270 if self.undecided.is_some() {
290 if self.undecided.is_some() {
271 return Ok(());
291 return Ok(());
272 }
292 }
273 let tgt = self.target_heads.take().unwrap();
293 let tgt = self.target_heads.take().unwrap();
274 self.undecided =
294 self.undecided =
275 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
295 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
276 Ok(())
296 Ok(())
277 }
297 }
278
298
279 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
299 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
280 if self.children_cache.is_some() {
300 if self.children_cache.is_some() {
281 return Ok(());
301 return Ok(());
282 }
302 }
283 self.ensure_undecided()?;
303 self.ensure_undecided()?;
284
304
285 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
305 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
286 for &rev in self.undecided.as_ref().unwrap() {
306 for &rev in self.undecided.as_ref().unwrap() {
287 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
307 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
288 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
308 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
289 }
309 }
290 }
310 }
291 self.children_cache = Some(children);
311 self.children_cache = Some(children);
292 Ok(())
312 Ok(())
293 }
313 }
294
314
295 /// Provide statistics about the current state of the discovery process
315 /// Provide statistics about the current state of the discovery process
296 pub fn stats(&self) -> DiscoveryStats {
316 pub fn stats(&self) -> DiscoveryStats {
297 DiscoveryStats {
317 DiscoveryStats {
298 undecided: self.undecided.as_ref().map(|s| s.len()),
318 undecided: self.undecided.as_ref().map(|s| s.len()),
299 }
319 }
300 }
320 }
301
321
302 pub fn take_quick_sample(
322 pub fn take_quick_sample(
303 &mut self,
323 &mut self,
304 headrevs: impl IntoIterator<Item = Revision>,
324 headrevs: impl IntoIterator<Item = Revision>,
305 size: usize,
325 size: usize,
306 ) -> Result<Vec<Revision>, GraphError> {
326 ) -> Result<Vec<Revision>, GraphError> {
307 self.ensure_undecided()?;
327 self.ensure_undecided()?;
308 let mut sample = {
328 let mut sample = {
309 let undecided = self.undecided.as_ref().unwrap();
329 let undecided = self.undecided.as_ref().unwrap();
310 if undecided.len() <= size {
330 if undecided.len() <= size {
311 return Ok(undecided.iter().cloned().collect());
331 return Ok(undecided.iter().cloned().collect());
312 }
332 }
313 dagops::heads(&self.graph, undecided.iter())?
333 dagops::heads(&self.graph, undecided.iter())?
314 };
334 };
315 if sample.len() >= size {
335 if sample.len() >= size {
316 return Ok(self.limit_sample(sample.into_iter().collect(), size));
336 return Ok(self.limit_sample(sample.into_iter().collect(), size));
317 }
337 }
318 update_sample(
338 update_sample(
319 None,
339 None,
320 headrevs,
340 headrevs,
321 &mut sample,
341 &mut sample,
322 |r| ParentsIterator::graph_parents(&self.graph, r),
342 |r| ParentsIterator::graph_parents(&self.graph, r),
323 Some(size),
343 Some(size),
324 )?;
344 )?;
325 Ok(sample.into_iter().collect())
345 Ok(sample.into_iter().collect())
326 }
346 }
327
347
328 /// Extract a sample from `self.undecided`, going from its heads and roots.
348 /// Extract a sample from `self.undecided`, going from its heads and roots.
329 ///
349 ///
330 /// The `size` parameter is used to avoid useless computations if
350 /// The `size` parameter is used to avoid useless computations if
331 /// it turns out to be bigger than the whole set of undecided Revisions.
351 /// it turns out to be bigger than the whole set of undecided Revisions.
332 ///
352 ///
333 /// The sample is taken by using `update_sample` from the heads, then
353 /// The sample is taken by using `update_sample` from the heads, then
334 /// from the roots, working on the reverse DAG,
354 /// from the roots, working on the reverse DAG,
335 /// expressed by `self.children_cache`.
355 /// expressed by `self.children_cache`.
336 ///
356 ///
337 /// No effort is being made to complete or limit the sample to `size`
357 /// No effort is being made to complete or limit the sample to `size`
338 /// but this method returns another interesting size that it derives
358 /// but this method returns another interesting size that it derives
339 /// from its knowledge of the structure of the various sets, leaving
359 /// from its knowledge of the structure of the various sets, leaving
340 /// to the caller the decision to use it or not.
360 /// to the caller the decision to use it or not.
341 fn bidirectional_sample(
361 fn bidirectional_sample(
342 &mut self,
362 &mut self,
343 size: usize,
363 size: usize,
344 ) -> Result<(HashSet<Revision>, usize), GraphError> {
364 ) -> Result<(HashSet<Revision>, usize), GraphError> {
345 self.ensure_undecided()?;
365 self.ensure_undecided()?;
346 {
366 {
347 // we don't want to compute children_cache before this
367 // we don't want to compute children_cache before this
348 // but doing it after extracting self.undecided takes a mutable
368 // but doing it after extracting self.undecided takes a mutable
349 // ref to self while a shareable one is still active.
369 // ref to self while a shareable one is still active.
350 let undecided = self.undecided.as_ref().unwrap();
370 let undecided = self.undecided.as_ref().unwrap();
351 if undecided.len() <= size {
371 if undecided.len() <= size {
352 return Ok((undecided.clone(), size));
372 return Ok((undecided.clone(), size));
353 }
373 }
354 }
374 }
355
375
356 self.ensure_children_cache()?;
376 self.ensure_children_cache()?;
357 let revs = self.undecided.as_ref().unwrap();
377 let revs = self.undecided.as_ref().unwrap();
358 let mut sample: HashSet<Revision> = revs.clone();
378 let mut sample: HashSet<Revision> = revs.clone();
359
379
360 // it's possible that leveraging the children cache would be more
380 // it's possible that leveraging the children cache would be more
361 // efficient here
381 // efficient here
362 dagops::retain_heads(&self.graph, &mut sample)?;
382 dagops::retain_heads(&self.graph, &mut sample)?;
363 let revsheads = sample.clone(); // was again heads(revs) in python
383 let revsheads = sample.clone(); // was again heads(revs) in python
364
384
365 // update from heads
385 // update from heads
366 update_sample(
386 update_sample(
367 Some(revs),
387 Some(revs),
368 revsheads.iter().cloned(),
388 revsheads.iter().cloned(),
369 &mut sample,
389 &mut sample,
370 |r| ParentsIterator::graph_parents(&self.graph, r),
390 |r| ParentsIterator::graph_parents(&self.graph, r),
371 None,
391 None,
372 )?;
392 )?;
373
393
374 // update from roots
394 // update from roots
375 let revroots: HashSet<Revision> =
395 let revroots: HashSet<Revision> =
376 dagops::roots(&self.graph, revs)?.into_iter().collect();
396 dagops::roots(&self.graph, revs)?.into_iter().collect();
377 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
397 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
378
398
379 let children = self.children_cache.as_ref().unwrap();
399 let children = self.children_cache.as_ref().unwrap();
380 let empty_vec: Vec<Revision> = Vec::new();
400 let empty_vec: Vec<Revision> = Vec::new();
381 update_sample(
401 update_sample(
382 Some(revs),
402 Some(revs),
383 revroots,
403 revroots,
384 &mut sample,
404 &mut sample,
385 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
405 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
386 None,
406 None,
387 )?;
407 )?;
388 Ok((sample, prescribed_size))
408 Ok((sample, prescribed_size))
389 }
409 }
390
410
391 /// Fill up sample up to the wished size with random undecided Revisions.
411 /// Fill up sample up to the wished size with random undecided Revisions.
392 ///
412 ///
393 /// This is intended to be used as a last resort completion if the
413 /// This is intended to be used as a last resort completion if the
394 /// regular sampling algorithm returns too few elements.
414 /// regular sampling algorithm returns too few elements.
395 fn random_complete_sample(
415 fn random_complete_sample(
396 &mut self,
416 &mut self,
397 sample: &mut Vec<Revision>,
417 sample: &mut Vec<Revision>,
398 size: usize,
418 size: usize,
399 ) {
419 ) {
400 let sample_len = sample.len();
420 let sample_len = sample.len();
401 if size <= sample_len {
421 if size <= sample_len {
402 return;
422 return;
403 }
423 }
404 let take_from: Vec<Revision> = self
424 let take_from: Vec<Revision> = self
405 .undecided
425 .undecided
406 .as_ref()
426 .as_ref()
407 .unwrap()
427 .unwrap()
408 .iter()
428 .iter()
409 .filter(|&r| !sample.contains(r))
429 .filter(|&r| !sample.contains(r))
410 .cloned()
430 .cloned()
411 .collect();
431 .collect();
412 sample.extend(self.limit_sample(take_from, size - sample_len));
432 sample.extend(self.limit_sample(take_from, size - sample_len));
413 }
433 }
414
434
415 pub fn take_full_sample(
435 pub fn take_full_sample(
416 &mut self,
436 &mut self,
417 size: usize,
437 size: usize,
418 ) -> Result<Vec<Revision>, GraphError> {
438 ) -> Result<Vec<Revision>, GraphError> {
419 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
439 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
420 let size = if self.respect_size {
440 let size = if self.respect_size {
421 size
441 size
422 } else {
442 } else {
423 prescribed_size
443 prescribed_size
424 };
444 };
425 let mut sample =
445 let mut sample =
426 self.limit_sample(sample_set.into_iter().collect(), size);
446 self.limit_sample(sample_set.into_iter().collect(), size);
427 self.random_complete_sample(&mut sample, size);
447 self.random_complete_sample(&mut sample, size);
428 Ok(sample)
448 Ok(sample)
429 }
449 }
430 }
450 }
431
451
432 #[cfg(test)]
452 #[cfg(test)]
433 mod tests {
453 mod tests {
434 use super::*;
454 use super::*;
435 use crate::testing::SampleGraph;
455 use crate::testing::SampleGraph;
436
456
437 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
457 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
438 ///
458 ///
439 /// To avoid actual randomness in tests, we give it a fixed random seed.
459 /// To avoid actual randomness in these tests, we give it a fixed
460 /// random seed, but by default we'll test the random version.
440 fn full_disco() -> PartialDiscovery<SampleGraph> {
461 fn full_disco() -> PartialDiscovery<SampleGraph> {
441 PartialDiscovery::new_with_seed(
462 PartialDiscovery::new_with_seed(
442 SampleGraph,
463 SampleGraph,
443 vec![10, 11, 12, 13],
464 vec![10, 11, 12, 13],
444 [0; 16],
465 [0; 16],
445 true,
466 true,
467 true,
446 )
468 )
447 }
469 }
448
470
449 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
471 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
450 ///
472 ///
451 /// To avoid actual randomness in tests, we give it a fixed random seed.
473 /// To avoid actual randomness in tests, we give it a fixed random seed.
452 fn disco12() -> PartialDiscovery<SampleGraph> {
474 fn disco12() -> PartialDiscovery<SampleGraph> {
453 PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true)
475 PartialDiscovery::new_with_seed(
476 SampleGraph,
477 vec![12],
478 [0; 16],
479 true,
480 true,
481 )
454 }
482 }
455
483
456 fn sorted_undecided(
484 fn sorted_undecided(
457 disco: &PartialDiscovery<SampleGraph>,
485 disco: &PartialDiscovery<SampleGraph>,
458 ) -> Vec<Revision> {
486 ) -> Vec<Revision> {
459 let mut as_vec: Vec<Revision> =
487 let mut as_vec: Vec<Revision> =
460 disco.undecided.as_ref().unwrap().iter().cloned().collect();
488 disco.undecided.as_ref().unwrap().iter().cloned().collect();
461 as_vec.sort();
489 as_vec.sort();
462 as_vec
490 as_vec
463 }
491 }
464
492
465 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
493 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
466 let mut as_vec: Vec<Revision> =
494 let mut as_vec: Vec<Revision> =
467 disco.missing.iter().cloned().collect();
495 disco.missing.iter().cloned().collect();
468 as_vec.sort();
496 as_vec.sort();
469 as_vec
497 as_vec
470 }
498 }
471
499
472 fn sorted_common_heads(
500 fn sorted_common_heads(
473 disco: &PartialDiscovery<SampleGraph>,
501 disco: &PartialDiscovery<SampleGraph>,
474 ) -> Result<Vec<Revision>, GraphError> {
502 ) -> Result<Vec<Revision>, GraphError> {
475 let mut as_vec: Vec<Revision> =
503 let mut as_vec: Vec<Revision> =
476 disco.common_heads()?.iter().cloned().collect();
504 disco.common_heads()?.iter().cloned().collect();
477 as_vec.sort();
505 as_vec.sort();
478 Ok(as_vec)
506 Ok(as_vec)
479 }
507 }
480
508
481 #[test]
509 #[test]
482 fn test_add_common_get_undecided() -> Result<(), GraphError> {
510 fn test_add_common_get_undecided() -> Result<(), GraphError> {
483 let mut disco = full_disco();
511 let mut disco = full_disco();
484 assert_eq!(disco.undecided, None);
512 assert_eq!(disco.undecided, None);
485 assert!(!disco.has_info());
513 assert!(!disco.has_info());
486 assert_eq!(disco.stats().undecided, None);
514 assert_eq!(disco.stats().undecided, None);
487
515
488 disco.add_common_revisions(vec![11, 12])?;
516 disco.add_common_revisions(vec![11, 12])?;
489 assert!(disco.has_info());
517 assert!(disco.has_info());
490 assert!(!disco.is_complete());
518 assert!(!disco.is_complete());
491 assert!(disco.missing.is_empty());
519 assert!(disco.missing.is_empty());
492
520
493 // add_common_revisions did not trigger a premature computation
521 // add_common_revisions did not trigger a premature computation
494 // of `undecided`, let's check that and ask for them
522 // of `undecided`, let's check that and ask for them
495 assert_eq!(disco.undecided, None);
523 assert_eq!(disco.undecided, None);
496 disco.ensure_undecided()?;
524 disco.ensure_undecided()?;
497 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
525 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
498 assert_eq!(disco.stats().undecided, Some(4));
526 assert_eq!(disco.stats().undecided, Some(4));
499 Ok(())
527 Ok(())
500 }
528 }
501
529
502 /// in this test, we pretend that our peer misses exactly (8+10)::
530 /// in this test, we pretend that our peer misses exactly (8+10)::
503 /// and we're comparing all our repo to it (as in a bare push)
531 /// and we're comparing all our repo to it (as in a bare push)
504 #[test]
532 #[test]
505 fn test_discovery() -> Result<(), GraphError> {
533 fn test_discovery() -> Result<(), GraphError> {
506 let mut disco = full_disco();
534 let mut disco = full_disco();
507 disco.add_common_revisions(vec![11, 12])?;
535 disco.add_common_revisions(vec![11, 12])?;
508 disco.add_missing_revisions(vec![8, 10])?;
536 disco.add_missing_revisions(vec![8, 10])?;
509 assert_eq!(sorted_undecided(&disco), vec![5]);
537 assert_eq!(sorted_undecided(&disco), vec![5]);
510 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
538 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
511 assert!(!disco.is_complete());
539 assert!(!disco.is_complete());
512
540
513 disco.add_common_revisions(vec![5])?;
541 disco.add_common_revisions(vec![5])?;
514 assert_eq!(sorted_undecided(&disco), vec![]);
542 assert_eq!(sorted_undecided(&disco), vec![]);
515 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
543 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
516 assert!(disco.is_complete());
544 assert!(disco.is_complete());
517 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
545 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
518 Ok(())
546 Ok(())
519 }
547 }
520
548
521 #[test]
549 #[test]
522 fn test_limit_sample_no_need_to() {
550 fn test_limit_sample_no_need_to() {
523 let sample = vec![1, 2, 3, 4];
551 let sample = vec![1, 2, 3, 4];
524 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
552 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
525 }
553 }
526
554
527 #[test]
555 #[test]
528 fn test_limit_sample_less_than_half() {
556 fn test_limit_sample_less_than_half() {
529 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
557 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
530 }
558 }
531
559
532 #[test]
560 #[test]
533 fn test_limit_sample_more_than_half() {
561 fn test_limit_sample_more_than_half() {
534 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
562 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
535 }
563 }
536
564
537 #[test]
565 #[test]
566 fn test_limit_sample_no_random() {
567 let mut disco = full_disco();
568 disco.randomize = false;
569 assert_eq!(
570 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
571 vec![1, 3, 5, 7]
572 );
573 }
574
575 #[test]
538 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
576 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
539 let mut disco = full_disco();
577 let mut disco = full_disco();
540 disco.undecided = Some((1..=13).collect());
578 disco.undecided = Some((1..=13).collect());
541
579
542 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
580 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
543 sample_vec.sort();
581 sample_vec.sort();
544 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
582 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
545 Ok(())
583 Ok(())
546 }
584 }
547
585
548 #[test]
586 #[test]
549 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
587 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
550 let mut disco = disco12();
588 let mut disco = disco12();
551 disco.ensure_undecided()?;
589 disco.ensure_undecided()?;
552
590
553 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
591 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
554 sample_vec.sort();
592 sample_vec.sort();
555 // r12's only parent is r9, whose unique grand-parent through the
593 // r12's only parent is r9, whose unique grand-parent through the
556 // diamond shape is r4. This ends there because the distance from r4
594 // diamond shape is r4. This ends there because the distance from r4
557 // to the root is only 3.
595 // to the root is only 3.
558 assert_eq!(sample_vec, vec![4, 9, 12]);
596 assert_eq!(sample_vec, vec![4, 9, 12]);
559 Ok(())
597 Ok(())
560 }
598 }
561
599
562 #[test]
600 #[test]
563 fn test_children_cache() -> Result<(), GraphError> {
601 fn test_children_cache() -> Result<(), GraphError> {
564 let mut disco = full_disco();
602 let mut disco = full_disco();
565 disco.ensure_children_cache()?;
603 disco.ensure_children_cache()?;
566
604
567 let cache = disco.children_cache.unwrap();
605 let cache = disco.children_cache.unwrap();
568 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
606 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
569 assert_eq!(cache.get(&10).cloned(), None);
607 assert_eq!(cache.get(&10).cloned(), None);
570
608
571 let mut children_4 = cache.get(&4).cloned().unwrap();
609 let mut children_4 = cache.get(&4).cloned().unwrap();
572 children_4.sort();
610 children_4.sort();
573 assert_eq!(children_4, vec![5, 6, 7]);
611 assert_eq!(children_4, vec![5, 6, 7]);
574
612
575 let mut children_7 = cache.get(&7).cloned().unwrap();
613 let mut children_7 = cache.get(&7).cloned().unwrap();
576 children_7.sort();
614 children_7.sort();
577 assert_eq!(children_7, vec![9, 11]);
615 assert_eq!(children_7, vec![9, 11]);
578
616
579 Ok(())
617 Ok(())
580 }
618 }
581
619
582 #[test]
620 #[test]
583 fn test_complete_sample() {
621 fn test_complete_sample() {
584 let mut disco = full_disco();
622 let mut disco = full_disco();
585 let undecided: HashSet<Revision> =
623 let undecided: HashSet<Revision> =
586 [4, 7, 9, 2, 3].iter().cloned().collect();
624 [4, 7, 9, 2, 3].iter().cloned().collect();
587 disco.undecided = Some(undecided);
625 disco.undecided = Some(undecided);
588
626
589 let mut sample = vec![0];
627 let mut sample = vec![0];
590 disco.random_complete_sample(&mut sample, 3);
628 disco.random_complete_sample(&mut sample, 3);
591 assert_eq!(sample.len(), 3);
629 assert_eq!(sample.len(), 3);
592
630
593 let mut sample = vec![2, 4, 7];
631 let mut sample = vec![2, 4, 7];
594 disco.random_complete_sample(&mut sample, 1);
632 disco.random_complete_sample(&mut sample, 1);
595 assert_eq!(sample.len(), 3);
633 assert_eq!(sample.len(), 3);
596 }
634 }
597
635
598 #[test]
636 #[test]
599 fn test_bidirectional_sample() -> Result<(), GraphError> {
637 fn test_bidirectional_sample() -> Result<(), GraphError> {
600 let mut disco = full_disco();
638 let mut disco = full_disco();
601 disco.undecided = Some((0..=13).into_iter().collect());
639 disco.undecided = Some((0..=13).into_iter().collect());
602
640
603 let (sample_set, size) = disco.bidirectional_sample(7)?;
641 let (sample_set, size) = disco.bidirectional_sample(7)?;
604 assert_eq!(size, 7);
642 assert_eq!(size, 7);
605 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
643 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
606 sample.sort();
644 sample.sort();
607 // our DAG is a bit too small for the results to be really interesting
645 // our DAG is a bit too small for the results to be really interesting
608 // at least it shows that
646 // at least it shows that
609 // - we went both ways
647 // - we went both ways
610 // - we didn't take all Revisions (6 is not in the sample)
648 // - we didn't take all Revisions (6 is not in the sample)
611 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
649 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
612 Ok(())
650 Ok(())
613 }
651 }
614
652
615 }
653 }
@@ -1,161 +1,163 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::discovery` module provided by the
8 //! Bindings for the `hg::discovery` module provided by the
9 //! `hg-core` crate. From Python, this will be seen as `rustext.discovery`
9 //! `hg-core` crate. From Python, this will be seen as `rustext.discovery`
10 //!
10 //!
11 //! # Classes visible from Python:
11 //! # Classes visible from Python:
12 //! - [`PartialDiscover`] is the Rust implementation of
12 //! - [`PartialDiscover`] is the Rust implementation of
13 //! `mercurial.setdiscovery.partialdiscovery`.
13 //! `mercurial.setdiscovery.partialdiscovery`.
14
14
15 use crate::{
15 use crate::{
16 cindex::Index,
16 cindex::Index,
17 conversion::{py_set, rev_pyiter_collect},
17 conversion::{py_set, rev_pyiter_collect},
18 exceptions::GraphError,
18 exceptions::GraphError,
19 };
19 };
20 use cpython::{
20 use cpython::{
21 ObjectProtocol, PyDict, PyModule, PyObject, PyResult, PyTuple, Python,
21 ObjectProtocol, PyDict, PyModule, PyObject, PyResult, PyTuple, Python,
22 PythonObject, ToPyObject,
22 PythonObject, ToPyObject,
23 };
23 };
24 use hg::discovery::PartialDiscovery as CorePartialDiscovery;
24 use hg::discovery::PartialDiscovery as CorePartialDiscovery;
25 use hg::Revision;
25 use hg::Revision;
26
26
27 use std::cell::RefCell;
27 use std::cell::RefCell;
28
28
29 py_class!(pub class PartialDiscovery |py| {
29 py_class!(pub class PartialDiscovery |py| {
30 data inner: RefCell<Box<CorePartialDiscovery<Index>>>;
30 data inner: RefCell<Box<CorePartialDiscovery<Index>>>;
31
31
32 // `_respectsize` is currently only here to replicate the Python API and
32 // `_respectsize` is currently only here to replicate the Python API and
33 // will be used in future patches inside methods that are yet to be
33 // will be used in future patches inside methods that are yet to be
34 // implemented.
34 // implemented.
35 def __new__(
35 def __new__(
36 _cls,
36 _cls,
37 repo: PyObject,
37 repo: PyObject,
38 targetheads: PyObject,
38 targetheads: PyObject,
39 respectsize: bool
39 respectsize: bool,
40 randomize: bool = true
40 ) -> PyResult<PartialDiscovery> {
41 ) -> PyResult<PartialDiscovery> {
41 let index = repo.getattr(py, "changelog")?.getattr(py, "index")?;
42 let index = repo.getattr(py, "changelog")?.getattr(py, "index")?;
42 Self::create_instance(
43 Self::create_instance(
43 py,
44 py,
44 RefCell::new(Box::new(CorePartialDiscovery::new(
45 RefCell::new(Box::new(CorePartialDiscovery::new(
45 Index::new(py, index)?,
46 Index::new(py, index)?,
46 rev_pyiter_collect(py, &targetheads)?,
47 rev_pyiter_collect(py, &targetheads)?,
47 respectsize
48 respectsize,
49 randomize,
48 )))
50 )))
49 )
51 )
50 }
52 }
51
53
52 def addcommons(&self, commons: PyObject) -> PyResult<PyObject> {
54 def addcommons(&self, commons: PyObject) -> PyResult<PyObject> {
53 let mut inner = self.inner(py).borrow_mut();
55 let mut inner = self.inner(py).borrow_mut();
54 let commons_vec: Vec<Revision> = rev_pyiter_collect(py, &commons)?;
56 let commons_vec: Vec<Revision> = rev_pyiter_collect(py, &commons)?;
55 inner.add_common_revisions(commons_vec)
57 inner.add_common_revisions(commons_vec)
56 .map_err(|e| GraphError::pynew(py, e))?;
58 .map_err(|e| GraphError::pynew(py, e))?;
57 Ok(py.None())
59 Ok(py.None())
58 }
60 }
59
61
60 def addmissings(&self, missings: PyObject) -> PyResult<PyObject> {
62 def addmissings(&self, missings: PyObject) -> PyResult<PyObject> {
61 let mut inner = self.inner(py).borrow_mut();
63 let mut inner = self.inner(py).borrow_mut();
62 let missings_vec: Vec<Revision> = rev_pyiter_collect(py, &missings)?;
64 let missings_vec: Vec<Revision> = rev_pyiter_collect(py, &missings)?;
63 inner.add_missing_revisions(missings_vec)
65 inner.add_missing_revisions(missings_vec)
64 .map_err(|e| GraphError::pynew(py, e))?;
66 .map_err(|e| GraphError::pynew(py, e))?;
65 Ok(py.None())
67 Ok(py.None())
66 }
68 }
67
69
68 def addinfo(&self, sample: PyObject) -> PyResult<PyObject> {
70 def addinfo(&self, sample: PyObject) -> PyResult<PyObject> {
69 let mut missing: Vec<Revision> = Vec::new();
71 let mut missing: Vec<Revision> = Vec::new();
70 let mut common: Vec<Revision> = Vec::new();
72 let mut common: Vec<Revision> = Vec::new();
71 for info in sample.iter(py)? { // info is a pair (Revision, bool)
73 for info in sample.iter(py)? { // info is a pair (Revision, bool)
72 let mut revknown = info?.iter(py)?;
74 let mut revknown = info?.iter(py)?;
73 let rev: Revision = revknown.next().unwrap()?.extract(py)?;
75 let rev: Revision = revknown.next().unwrap()?.extract(py)?;
74 let known: bool = revknown.next().unwrap()?.extract(py)?;
76 let known: bool = revknown.next().unwrap()?.extract(py)?;
75 if known {
77 if known {
76 common.push(rev);
78 common.push(rev);
77 } else {
79 } else {
78 missing.push(rev);
80 missing.push(rev);
79 }
81 }
80 }
82 }
81 let mut inner = self.inner(py).borrow_mut();
83 let mut inner = self.inner(py).borrow_mut();
82 inner.add_common_revisions(common)
84 inner.add_common_revisions(common)
83 .map_err(|e| GraphError::pynew(py, e))?;
85 .map_err(|e| GraphError::pynew(py, e))?;
84 inner.add_missing_revisions(missing)
86 inner.add_missing_revisions(missing)
85 .map_err(|e| GraphError::pynew(py, e))?;
87 .map_err(|e| GraphError::pynew(py, e))?;
86 Ok(py.None())
88 Ok(py.None())
87 }
89 }
88
90
89 def hasinfo(&self) -> PyResult<bool> {
91 def hasinfo(&self) -> PyResult<bool> {
90 Ok(self.inner(py).borrow().has_info())
92 Ok(self.inner(py).borrow().has_info())
91 }
93 }
92
94
93 def iscomplete(&self) -> PyResult<bool> {
95 def iscomplete(&self) -> PyResult<bool> {
94 Ok(self.inner(py).borrow().is_complete())
96 Ok(self.inner(py).borrow().is_complete())
95 }
97 }
96
98
97 def stats(&self) -> PyResult<PyDict> {
99 def stats(&self) -> PyResult<PyDict> {
98 let stats = self.inner(py).borrow().stats();
100 let stats = self.inner(py).borrow().stats();
99 let as_dict: PyDict = PyDict::new(py);
101 let as_dict: PyDict = PyDict::new(py);
100 as_dict.set_item(py, "undecided",
102 as_dict.set_item(py, "undecided",
101 stats.undecided.map(
103 stats.undecided.map(
102 |l| l.to_py_object(py).into_object())
104 |l| l.to_py_object(py).into_object())
103 .unwrap_or_else(|| py.None()))?;
105 .unwrap_or_else(|| py.None()))?;
104 Ok(as_dict)
106 Ok(as_dict)
105 }
107 }
106
108
107 def commonheads(&self) -> PyResult<PyObject> {
109 def commonheads(&self) -> PyResult<PyObject> {
108 py_set(
110 py_set(
109 py,
111 py,
110 &self.inner(py).borrow().common_heads()
112 &self.inner(py).borrow().common_heads()
111 .map_err(|e| GraphError::pynew(py, e))?
113 .map_err(|e| GraphError::pynew(py, e))?
112 )
114 )
113 }
115 }
114
116
115 def takefullsample(&self, _headrevs: PyObject,
117 def takefullsample(&self, _headrevs: PyObject,
116 size: usize) -> PyResult<PyObject> {
118 size: usize) -> PyResult<PyObject> {
117 let mut inner = self.inner(py).borrow_mut();
119 let mut inner = self.inner(py).borrow_mut();
118 let sample = inner.take_full_sample(size)
120 let sample = inner.take_full_sample(size)
119 .map_err(|e| GraphError::pynew(py, e))?;
121 .map_err(|e| GraphError::pynew(py, e))?;
120 let as_vec: Vec<PyObject> = sample
122 let as_vec: Vec<PyObject> = sample
121 .iter()
123 .iter()
122 .map(|rev| rev.to_py_object(py).into_object())
124 .map(|rev| rev.to_py_object(py).into_object())
123 .collect();
125 .collect();
124 Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
126 Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
125 }
127 }
126
128
127 def takequicksample(&self, headrevs: PyObject,
129 def takequicksample(&self, headrevs: PyObject,
128 size: usize) -> PyResult<PyObject> {
130 size: usize) -> PyResult<PyObject> {
129 let mut inner = self.inner(py).borrow_mut();
131 let mut inner = self.inner(py).borrow_mut();
130 let revsvec: Vec<Revision> = rev_pyiter_collect(py, &headrevs)?;
132 let revsvec: Vec<Revision> = rev_pyiter_collect(py, &headrevs)?;
131 let sample = inner.take_quick_sample(revsvec, size)
133 let sample = inner.take_quick_sample(revsvec, size)
132 .map_err(|e| GraphError::pynew(py, e))?;
134 .map_err(|e| GraphError::pynew(py, e))?;
133 let as_vec: Vec<PyObject> = sample
135 let as_vec: Vec<PyObject> = sample
134 .iter()
136 .iter()
135 .map(|rev| rev.to_py_object(py).into_object())
137 .map(|rev| rev.to_py_object(py).into_object())
136 .collect();
138 .collect();
137 Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
139 Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
138 }
140 }
139
141
140 });
142 });
141
143
142 /// Create the module, with __package__ given from parent
144 /// Create the module, with __package__ given from parent
143 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
145 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
144 let dotted_name = &format!("{}.discovery", package);
146 let dotted_name = &format!("{}.discovery", package);
145 let m = PyModule::new(py, dotted_name)?;
147 let m = PyModule::new(py, dotted_name)?;
146 m.add(py, "__package__", package)?;
148 m.add(py, "__package__", package)?;
147 m.add(
149 m.add(
148 py,
150 py,
149 "__doc__",
151 "__doc__",
150 "Discovery of common node sets - Rust implementation",
152 "Discovery of common node sets - Rust implementation",
151 )?;
153 )?;
152 m.add_class::<PartialDiscovery>(py)?;
154 m.add_class::<PartialDiscovery>(py)?;
153
155
154 let sys = PyModule::import(py, "sys")?;
156 let sys = PyModule::import(py, "sys")?;
155 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
157 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
156 sys_modules.set_item(py, dotted_name, &m)?;
158 sys_modules.set_item(py, dotted_name, &m)?;
157 // Example C code (see pyexpat.c and import.c) will "give away the
159 // Example C code (see pyexpat.c and import.c) will "give away the
158 // reference", but we won't because it will be consumed once the
160 // reference", but we won't because it will be consumed once the
159 // Rust PyObject is dropped.
161 // Rust PyObject is dropped.
160 Ok(m)
162 Ok(m)
161 }
163 }
@@ -1,108 +1,111 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2 import unittest
2 import unittest
3
3
4 from mercurial import policy
4 from mercurial import policy
5
5
6 PartialDiscovery = policy.importrust('discovery', member='PartialDiscovery')
6 PartialDiscovery = policy.importrust('discovery', member='PartialDiscovery')
7
7
8 try:
8 try:
9 from mercurial.cext import parsers as cparsers
9 from mercurial.cext import parsers as cparsers
10 except ImportError:
10 except ImportError:
11 cparsers = None
11 cparsers = None
12
12
13 # picked from test-parse-index2, copied rather than imported
13 # picked from test-parse-index2, copied rather than imported
14 # so that it stays stable even if test-parse-index2 changes or disappears.
14 # so that it stays stable even if test-parse-index2 changes or disappears.
15 data_non_inlined = (
15 data_non_inlined = (
16 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
16 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
17 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
17 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
18 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
18 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
19 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
19 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
20 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
20 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
21 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
21 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
22 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
22 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
23 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
23 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
24 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
24 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
25 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
25 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
26 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
26 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
27 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
27 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
28 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
28 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
29 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
29 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
30 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
30 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
31 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
31 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
32 )
32 )
33
33
34 class fakerepo(object):
34 class fakerepo(object):
35 def __init__(self, idx):
35 def __init__(self, idx):
36 """Just make so that self.changelog.index is the given idx."""
36 """Just make so that self.changelog.index is the given idx."""
37 self.index = idx
37 self.index = idx
38 self.changelog = self
38 self.changelog = self
39
39
40 @unittest.skipIf(PartialDiscovery is None or cparsers is None,
40 @unittest.skipIf(PartialDiscovery is None or cparsers is None,
41 "rustext or the C Extension parsers module "
41 "rustext or the C Extension parsers module "
42 "discovery relies on is not available")
42 "discovery relies on is not available")
43 class rustdiscoverytest(unittest.TestCase):
43 class rustdiscoverytest(unittest.TestCase):
44 """Test the correctness of binding to Rust code.
44 """Test the correctness of binding to Rust code.
45
45
46 This test is merely for the binding to Rust itself: extraction of
46 This test is merely for the binding to Rust itself: extraction of
47 Python variable, giving back the results etc.
47 Python variable, giving back the results etc.
48
48
49 It is not meant to test the algorithmic correctness of the provided
49 It is not meant to test the algorithmic correctness of the provided
50 methods. Hence the very simple embedded index data is good enough.
50 methods. Hence the very simple embedded index data is good enough.
51
51
52 Algorithmic correctness is asserted by the Rust unit tests.
52 Algorithmic correctness is asserted by the Rust unit tests.
53 """
53 """
54
54
55 def parseindex(self):
55 def parseindex(self):
56 return cparsers.parse_index2(data_non_inlined, False)[0]
56 return cparsers.parse_index2(data_non_inlined, False)[0]
57
57
58 def repo(self):
58 def repo(self):
59 return fakerepo(self.parseindex())
59 return fakerepo(self.parseindex())
60
60
61 def testindex(self):
61 def testindex(self):
62 idx = self.parseindex()
62 idx = self.parseindex()
63 # checking our assumptions about the index binary data:
63 # checking our assumptions about the index binary data:
64 self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)},
64 self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)},
65 {0: (-1, -1),
65 {0: (-1, -1),
66 1: (0, -1),
66 1: (0, -1),
67 2: (1, -1),
67 2: (1, -1),
68 3: (2, -1)})
68 3: (2, -1)})
69
69
70 def testaddcommonsmissings(self):
70 def testaddcommonsmissings(self):
71 disco = PartialDiscovery(self.repo(), [3], True)
71 disco = PartialDiscovery(self.repo(), [3], True)
72 self.assertFalse(disco.hasinfo())
72 self.assertFalse(disco.hasinfo())
73 self.assertFalse(disco.iscomplete())
73 self.assertFalse(disco.iscomplete())
74
74
75 disco.addcommons([1])
75 disco.addcommons([1])
76 self.assertTrue(disco.hasinfo())
76 self.assertTrue(disco.hasinfo())
77 self.assertFalse(disco.iscomplete())
77 self.assertFalse(disco.iscomplete())
78
78
79 disco.addmissings([2])
79 disco.addmissings([2])
80 self.assertTrue(disco.hasinfo())
80 self.assertTrue(disco.hasinfo())
81 self.assertTrue(disco.iscomplete())
81 self.assertTrue(disco.iscomplete())
82
82
83 self.assertEqual(disco.commonheads(), {1})
83 self.assertEqual(disco.commonheads(), {1})
84
84
85 def testaddmissingsstats(self):
85 def testaddmissingsstats(self):
86 disco = PartialDiscovery(self.repo(), [3], True)
86 disco = PartialDiscovery(self.repo(), [3], True)
87 self.assertIsNone(disco.stats()['undecided'], None)
87 self.assertIsNone(disco.stats()['undecided'], None)
88
88
89 disco.addmissings([2])
89 disco.addmissings([2])
90 self.assertEqual(disco.stats()['undecided'], 2)
90 self.assertEqual(disco.stats()['undecided'], 2)
91
91
92 def testaddinfocommonfirst(self):
92 def testaddinfocommonfirst(self):
93 disco = PartialDiscovery(self.repo(), [3], True)
93 disco = PartialDiscovery(self.repo(), [3], True)
94 disco.addinfo([(1, True), (2, False)])
94 disco.addinfo([(1, True), (2, False)])
95 self.assertTrue(disco.hasinfo())
95 self.assertTrue(disco.hasinfo())
96 self.assertTrue(disco.iscomplete())
96 self.assertTrue(disco.iscomplete())
97 self.assertEqual(disco.commonheads(), {1})
97 self.assertEqual(disco.commonheads(), {1})
98
98
99 def testaddinfomissingfirst(self):
99 def testaddinfomissingfirst(self):
100 disco = PartialDiscovery(self.repo(), [3], True)
100 disco = PartialDiscovery(self.repo(), [3], True)
101 disco.addinfo([(2, False), (1, True)])
101 disco.addinfo([(2, False), (1, True)])
102 self.assertTrue(disco.hasinfo())
102 self.assertTrue(disco.hasinfo())
103 self.assertTrue(disco.iscomplete())
103 self.assertTrue(disco.iscomplete())
104 self.assertEqual(disco.commonheads(), {1})
104 self.assertEqual(disco.commonheads(), {1})
105
105
106 def testinitnorandom(self):
107 PartialDiscovery(self.repo(), [3], True, randomize=False)
108
106 if __name__ == '__main__':
109 if __name__ == '__main__':
107 import silenttestrunner
110 import silenttestrunner
108 silenttestrunner.main(__name__)
111 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now