##// END OF EJS Templates
discovery: moved sampling functions inside discovery object...
Georges Racinet -
r42045:e5ece0f4 default
parent child Browse files
Show More
@@ -92,69 +92,6 b' def _updatesample(revs, heads, sample, p'
92 92 dist.setdefault(p, d + 1)
93 93 visit.append(p)
94 94
95 def _takequicksample(repo, headrevs, revs, size):
96 """takes a quick sample of size <size>
97
98 It is meant for initial sampling and focuses on querying heads and close
99 ancestors of heads.
100
101 :dag: a dag object
102 :headrevs: set of head revisions in local DAG to consider
103 :revs: set of revs to discover
104 :size: the maximum size of the sample"""
105 if len(revs) <= size:
106 return list(revs)
107 sample = set(repo.revs('heads(%ld)', revs))
108
109 if len(sample) >= size:
110 return _limitsample(sample, size)
111
112 _updatesample(None, headrevs, sample, repo.changelog.parentrevs,
113 quicksamplesize=size)
114 return sample
115
116 def _takefullsample(repo, headrevs, revs, size):
117 if len(revs) <= size:
118 return list(revs)
119 sample = set(repo.revs('heads(%ld)', revs))
120
121 # update from heads
122 revsheads = set(repo.revs('heads(%ld)', revs))
123 _updatesample(revs, revsheads, sample, repo.changelog.parentrevs)
124
125 # update from roots
126 revsroots = set(repo.revs('roots(%ld)', revs))
127
128 # _updatesample() essentially does interaction over revisions to look up
129 # their children. This lookup is expensive and doing it in a loop is
130 # quadratic. We precompute the children for all relevant revisions and
131 # make the lookup in _updatesample() a simple dict lookup.
132 #
133 # Because this function can be called multiple times during discovery, we
134 # may still perform redundant work and there is room to optimize this by
135 # keeping a persistent cache of children across invocations.
136 children = {}
137
138 parentrevs = repo.changelog.parentrevs
139 for rev in repo.changelog.revs(start=min(revsroots)):
140 # Always ensure revision has an entry so we don't need to worry about
141 # missing keys.
142 children.setdefault(rev, [])
143
144 for prev in parentrevs(rev):
145 if prev == nullrev:
146 continue
147
148 children.setdefault(prev, []).append(rev)
149
150 _updatesample(revs, revsroots, sample, children.__getitem__)
151 assert sample
152 sample = _limitsample(sample, size)
153 if len(sample) < size:
154 more = size - len(sample)
155 sample.update(random.sample(list(revs - sample), more))
156 return sample
157
158 95 def _limitsample(sample, desiredlen):
159 96 """return a random subset of sample of at most desiredlen item"""
160 97 if len(sample) > desiredlen:
@@ -228,6 +165,70 b' class partialdiscovery(object):'
228 165 # common.bases and all its ancestors
229 166 return self._common.basesheads()
230 167
168 def takequicksample(self, headrevs, size):
169 """takes a quick sample of size <size>
170
171 It is meant for initial sampling and focuses on querying heads and close
172 ancestors of heads.
173
174 :headrevs: set of head revisions in local DAG to consider
175 :size: the maximum size of the sample"""
176 revs = self.undecided
177 if len(revs) <= size:
178 return list(revs)
179 sample = set(self._repo.revs('heads(%ld)', revs))
180
181 if len(sample) >= size:
182 return _limitsample(sample, size)
183
184 _updatesample(None, headrevs, sample, self._repo.changelog.parentrevs,
185 quicksamplesize=size)
186 return sample
187
188 def takefullsample(self, headrevs, size):
189 revs = self.undecided
190 if len(revs) <= size:
191 return list(revs)
192 repo = self._repo
193 sample = set(repo.revs('heads(%ld)', revs))
194
195 # update from heads
196 revsheads = set(repo.revs('heads(%ld)', revs))
197 _updatesample(revs, revsheads, sample, repo.changelog.parentrevs)
198
199 # update from roots
200 revsroots = set(repo.revs('roots(%ld)', revs))
201
202 # _updatesample() essentially does interaction over revisions to look
203 # up their children. This lookup is expensive and doing it in a loop is
204 # quadratic. We precompute the children for all relevant revisions and
205 # make the lookup in _updatesample() a simple dict lookup.
206 #
207 # Because this function can be called multiple times during discovery,
208 # we may still perform redundant work and there is room to optimize
209 # this by keeping a persistent cache of children across invocations.
210 children = {}
211
212 parentrevs = repo.changelog.parentrevs
213 for rev in repo.changelog.revs(start=min(revsroots)):
214 # Always ensure revision has an entry so we don't need to worry
215 # about missing keys.
216 children.setdefault(rev, [])
217
218 for prev in parentrevs(rev):
219 if prev == nullrev:
220 continue
221
222 children.setdefault(prev, []).append(rev)
223
224 _updatesample(revs, revsroots, sample, children.__getitem__)
225 assert sample
226 sample = _limitsample(sample, size)
227 if len(sample) < size:
228 more = size - len(sample)
229 sample.update(random.sample(list(revs - sample), more))
230 return sample
231
231 232 def findcommonheads(ui, local, remote,
232 233 initialsamplesize=100,
233 234 fullsamplesize=200,
@@ -309,14 +310,14 b' def findcommonheads(ui, local, remote,'
309 310 ui.note(_("sampling from both directions\n"))
310 311 else:
311 312 ui.debug("taking initial sample\n")
312 samplefunc = _takefullsample
313 samplefunc = disco.takefullsample
313 314 targetsize = fullsamplesize
314 315 else:
315 316 # use even cheaper initial sample
316 317 ui.debug("taking quick initial sample\n")
317 samplefunc = _takequicksample
318 samplefunc = disco.takequicksample
318 319 targetsize = initialsamplesize
319 sample = samplefunc(local, ownheads, disco.undecided, targetsize)
320 sample = samplefunc(ownheads, targetsize)
320 321
321 322 roundtrips += 1
322 323 progress.update(roundtrips)
General Comments 0
You need to be logged in to leave comments. Login now