##// END OF EJS Templates
dagop: comment why revancestors() doesn't heapify input revs at once...
Yuya Nishihara -
r32998:c7da57bb default
parent child Browse files
Show More
@@ -1,424 +1,427 b''
1 1 # dagop.py - graph ancestry and topology algorithm for revset
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 error,
14 14 mdiff,
15 15 node,
16 16 patch,
17 17 smartset,
18 18 )
19 19
20 20 baseset = smartset.baseset
21 21 generatorset = smartset.generatorset
22 22
23 23 def _genrevancestors(repo, revs, followfirst):
24 24 if followfirst:
25 25 cut = 1
26 26 else:
27 27 cut = None
28 28 cl = repo.changelog
29
30 # load input revs lazily to heap so earlier revisions can be yielded
31 # without fully computing the input revs
29 32 revs.sort(reverse=True)
30 33 irevs = iter(revs)
31 34 h = []
32 35
33 36 inputrev = next(irevs, None)
34 37 if inputrev is not None:
35 38 heapq.heappush(h, -inputrev)
36 39
37 40 seen = set()
38 41 while h:
39 42 current = -heapq.heappop(h)
40 43 if current == inputrev:
41 44 inputrev = next(irevs, None)
42 45 if inputrev is not None:
43 46 heapq.heappush(h, -inputrev)
44 47 if current not in seen:
45 48 seen.add(current)
46 49 yield current
47 50 try:
48 51 for parent in cl.parentrevs(current)[:cut]:
49 52 if parent != node.nullrev:
50 53 heapq.heappush(h, -parent)
51 54 except error.WdirUnsupported:
52 55 for parent in repo[current].parents()[:cut]:
53 56 if parent.rev() != node.nullrev:
54 57 heapq.heappush(h, -parent.rev())
55 58
56 59 def revancestors(repo, revs, followfirst):
57 60 """Like revlog.ancestors(), but supports followfirst."""
58 61 gen = _genrevancestors(repo, revs, followfirst)
59 62 return generatorset(gen, iterasc=False)
60 63
61 64 def revdescendants(repo, revs, followfirst):
62 65 """Like revlog.descendants() but supports followfirst."""
63 66 if followfirst:
64 67 cut = 1
65 68 else:
66 69 cut = None
67 70
68 71 def iterate():
69 72 cl = repo.changelog
70 73 # XXX this should be 'parentset.min()' assuming 'parentset' is a
71 74 # smartset (and if it is not, it should.)
72 75 first = min(revs)
73 76 nullrev = node.nullrev
74 77 if first == nullrev:
75 78 # Are there nodes with a null first parent and a non-null
76 79 # second one? Maybe. Do we care? Probably not.
77 80 for i in cl:
78 81 yield i
79 82 else:
80 83 seen = set(revs)
81 84 for i in cl.revs(first + 1):
82 85 for x in cl.parentrevs(i)[:cut]:
83 86 if x != nullrev and x in seen:
84 87 seen.add(i)
85 88 yield i
86 89 break
87 90
88 91 return generatorset(iterate(), iterasc=True)
89 92
90 93 def _reachablerootspure(repo, minroot, roots, heads, includepath):
91 94 """return (heads(::<roots> and ::<heads>))
92 95
93 96 If includepath is True, return (<roots>::<heads>)."""
94 97 if not roots:
95 98 return []
96 99 parentrevs = repo.changelog.parentrevs
97 100 roots = set(roots)
98 101 visit = list(heads)
99 102 reachable = set()
100 103 seen = {}
101 104 # prefetch all the things! (because python is slow)
102 105 reached = reachable.add
103 106 dovisit = visit.append
104 107 nextvisit = visit.pop
105 108 # open-code the post-order traversal due to the tiny size of
106 109 # sys.getrecursionlimit()
107 110 while visit:
108 111 rev = nextvisit()
109 112 if rev in roots:
110 113 reached(rev)
111 114 if not includepath:
112 115 continue
113 116 parents = parentrevs(rev)
114 117 seen[rev] = parents
115 118 for parent in parents:
116 119 if parent >= minroot and parent not in seen:
117 120 dovisit(parent)
118 121 if not reachable:
119 122 return baseset()
120 123 if not includepath:
121 124 return reachable
122 125 for rev in sorted(seen):
123 126 for parent in seen[rev]:
124 127 if parent in reachable:
125 128 reached(rev)
126 129 return reachable
127 130
128 131 def reachableroots(repo, roots, heads, includepath=False):
129 132 """return (heads(::<roots> and ::<heads>))
130 133
131 134 If includepath is True, return (<roots>::<heads>)."""
132 135 if not roots:
133 136 return baseset()
134 137 minroot = roots.min()
135 138 roots = list(roots)
136 139 heads = list(heads)
137 140 try:
138 141 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
139 142 except AttributeError:
140 143 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
141 144 revs = baseset(revs)
142 145 revs.sort()
143 146 return revs
144 147
145 148 def _changesrange(fctx1, fctx2, linerange2, diffopts):
146 149 """Return `(diffinrange, linerange1)` where `diffinrange` is True
147 150 if diff from fctx2 to fctx1 has changes in linerange2 and
148 151 `linerange1` is the new line range for fctx1.
149 152 """
150 153 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
151 154 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
152 155 diffinrange = any(stype == '!' for _, stype in filteredblocks)
153 156 return diffinrange, linerange1
154 157
155 158 def blockancestors(fctx, fromline, toline, followfirst=False):
156 159 """Yield ancestors of `fctx` with respect to the block of lines within
157 160 `fromline`-`toline` range.
158 161 """
159 162 diffopts = patch.diffopts(fctx._repo.ui)
160 163 introrev = fctx.introrev()
161 164 if fctx.rev() != introrev:
162 165 fctx = fctx.filectx(fctx.filenode(), changeid=introrev)
163 166 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
164 167 while visit:
165 168 c, linerange2 = visit.pop(max(visit))
166 169 pl = c.parents()
167 170 if followfirst:
168 171 pl = pl[:1]
169 172 if not pl:
170 173 # The block originates from the initial revision.
171 174 yield c, linerange2
172 175 continue
173 176 inrange = False
174 177 for p in pl:
175 178 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
176 179 inrange = inrange or inrangep
177 180 if linerange1[0] == linerange1[1]:
178 181 # Parent's linerange is empty, meaning that the block got
179 182 # introduced in this revision; no need to go futher in this
180 183 # branch.
181 184 continue
182 185 # Set _descendantrev with 'c' (a known descendant) so that, when
183 186 # _adjustlinkrev is called for 'p', it receives this descendant
184 187 # (as srcrev) instead possibly topmost introrev.
185 188 p._descendantrev = c.rev()
186 189 visit[p.linkrev(), p.filenode()] = p, linerange1
187 190 if inrange:
188 191 yield c, linerange2
189 192
190 193 def blockdescendants(fctx, fromline, toline):
191 194 """Yield descendants of `fctx` with respect to the block of lines within
192 195 `fromline`-`toline` range.
193 196 """
194 197 # First possibly yield 'fctx' if it has changes in range with respect to
195 198 # its parents.
196 199 try:
197 200 c, linerange1 = next(blockancestors(fctx, fromline, toline))
198 201 except StopIteration:
199 202 pass
200 203 else:
201 204 if c == fctx:
202 205 yield c, linerange1
203 206
204 207 diffopts = patch.diffopts(fctx._repo.ui)
205 208 fl = fctx.filelog()
206 209 seen = {fctx.filerev(): (fctx, (fromline, toline))}
207 210 for i in fl.descendants([fctx.filerev()]):
208 211 c = fctx.filectx(i)
209 212 inrange = False
210 213 for x in fl.parentrevs(i):
211 214 try:
212 215 p, linerange2 = seen[x]
213 216 except KeyError:
214 217 # nullrev or other branch
215 218 continue
216 219 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
217 220 inrange = inrange or inrangep
218 221 # If revision 'i' has been seen (it's a merge), we assume that its
219 222 # line range is the same independently of which parents was used
220 223 # to compute it.
221 224 assert i not in seen or seen[i][1] == linerange1, (
222 225 'computed line range for %s is not consistent between '
223 226 'ancestor branches' % c)
224 227 seen[i] = c, linerange1
225 228 if inrange:
226 229 yield c, linerange1
227 230
228 231 def toposort(revs, parentsfunc, firstbranch=()):
229 232 """Yield revisions from heads to roots one (topo) branch at a time.
230 233
231 234 This function aims to be used by a graph generator that wishes to minimize
232 235 the number of parallel branches and their interleaving.
233 236
234 237 Example iteration order (numbers show the "true" order in a changelog):
235 238
236 239 o 4
237 240 |
238 241 o 1
239 242 |
240 243 | o 3
241 244 | |
242 245 | o 2
243 246 |/
244 247 o 0
245 248
246 249 Note that the ancestors of merges are understood by the current
247 250 algorithm to be on the same branch. This means no reordering will
248 251 occur behind a merge.
249 252 """
250 253
251 254 ### Quick summary of the algorithm
252 255 #
253 256 # This function is based around a "retention" principle. We keep revisions
254 257 # in memory until we are ready to emit a whole branch that immediately
255 258 # "merges" into an existing one. This reduces the number of parallel
256 259 # branches with interleaved revisions.
257 260 #
258 261 # During iteration revs are split into two groups:
259 262 # A) revision already emitted
260 263 # B) revision in "retention". They are stored as different subgroups.
261 264 #
262 265 # for each REV, we do the following logic:
263 266 #
264 267 # 1) if REV is a parent of (A), we will emit it. If there is a
265 268 # retention group ((B) above) that is blocked on REV being
266 269 # available, we emit all the revisions out of that retention
267 270 # group first.
268 271 #
269 272 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
270 273 # available, if such subgroup exist, we add REV to it and the subgroup is
271 274 # now awaiting for REV.parents() to be available.
272 275 #
273 276 # 3) finally if no such group existed in (B), we create a new subgroup.
274 277 #
275 278 #
276 279 # To bootstrap the algorithm, we emit the tipmost revision (which
277 280 # puts it in group (A) from above).
278 281
279 282 revs.sort(reverse=True)
280 283
281 284 # Set of parents of revision that have been emitted. They can be considered
282 285 # unblocked as the graph generator is already aware of them so there is no
283 286 # need to delay the revisions that reference them.
284 287 #
285 288 # If someone wants to prioritize a branch over the others, pre-filling this
286 289 # set will force all other branches to wait until this branch is ready to be
287 290 # emitted.
288 291 unblocked = set(firstbranch)
289 292
290 293 # list of groups waiting to be displayed, each group is defined by:
291 294 #
292 295 # (revs: lists of revs waiting to be displayed,
293 296 # blocked: set of that cannot be displayed before those in 'revs')
294 297 #
295 298 # The second value ('blocked') correspond to parents of any revision in the
296 299 # group ('revs') that is not itself contained in the group. The main idea
297 300 # of this algorithm is to delay as much as possible the emission of any
298 301 # revision. This means waiting for the moment we are about to display
299 302 # these parents to display the revs in a group.
300 303 #
301 304 # This first implementation is smart until it encounters a merge: it will
302 305 # emit revs as soon as any parent is about to be emitted and can grow an
303 306 # arbitrary number of revs in 'blocked'. In practice this mean we properly
304 307 # retains new branches but gives up on any special ordering for ancestors
305 308 # of merges. The implementation can be improved to handle this better.
306 309 #
307 310 # The first subgroup is special. It corresponds to all the revision that
308 311 # were already emitted. The 'revs' lists is expected to be empty and the
309 312 # 'blocked' set contains the parents revisions of already emitted revision.
310 313 #
311 314 # You could pre-seed the <parents> set of groups[0] to a specific
312 315 # changesets to select what the first emitted branch should be.
313 316 groups = [([], unblocked)]
314 317 pendingheap = []
315 318 pendingset = set()
316 319
317 320 heapq.heapify(pendingheap)
318 321 heappop = heapq.heappop
319 322 heappush = heapq.heappush
320 323 for currentrev in revs:
321 324 # Heap works with smallest element, we want highest so we invert
322 325 if currentrev not in pendingset:
323 326 heappush(pendingheap, -currentrev)
324 327 pendingset.add(currentrev)
325 328 # iterates on pending rev until after the current rev have been
326 329 # processed.
327 330 rev = None
328 331 while rev != currentrev:
329 332 rev = -heappop(pendingheap)
330 333 pendingset.remove(rev)
331 334
332 335 # Seek for a subgroup blocked, waiting for the current revision.
333 336 matching = [i for i, g in enumerate(groups) if rev in g[1]]
334 337
335 338 if matching:
336 339 # The main idea is to gather together all sets that are blocked
337 340 # on the same revision.
338 341 #
339 342 # Groups are merged when a common blocking ancestor is
340 343 # observed. For example, given two groups:
341 344 #
342 345 # revs [5, 4] waiting for 1
343 346 # revs [3, 2] waiting for 1
344 347 #
345 348 # These two groups will be merged when we process
346 349 # 1. In theory, we could have merged the groups when
347 350 # we added 2 to the group it is now in (we could have
348 351 # noticed the groups were both blocked on 1 then), but
349 352 # the way it works now makes the algorithm simpler.
350 353 #
351 354 # We also always keep the oldest subgroup first. We can
352 355 # probably improve the behavior by having the longest set
353 356 # first. That way, graph algorithms could minimise the length
354 357 # of parallel lines their drawing. This is currently not done.
355 358 targetidx = matching.pop(0)
356 359 trevs, tparents = groups[targetidx]
357 360 for i in matching:
358 361 gr = groups[i]
359 362 trevs.extend(gr[0])
360 363 tparents |= gr[1]
361 364 # delete all merged subgroups (except the one we kept)
362 365 # (starting from the last subgroup for performance and
363 366 # sanity reasons)
364 367 for i in reversed(matching):
365 368 del groups[i]
366 369 else:
367 370 # This is a new head. We create a new subgroup for it.
368 371 targetidx = len(groups)
369 372 groups.append(([], {rev}))
370 373
371 374 gr = groups[targetidx]
372 375
373 376 # We now add the current nodes to this subgroups. This is done
374 377 # after the subgroup merging because all elements from a subgroup
375 378 # that relied on this rev must precede it.
376 379 #
377 380 # we also update the <parents> set to include the parents of the
378 381 # new nodes.
379 382 if rev == currentrev: # only display stuff in rev
380 383 gr[0].append(rev)
381 384 gr[1].remove(rev)
382 385 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
383 386 gr[1].update(parents)
384 387 for p in parents:
385 388 if p not in pendingset:
386 389 pendingset.add(p)
387 390 heappush(pendingheap, -p)
388 391
389 392 # Look for a subgroup to display
390 393 #
391 394 # When unblocked is empty (if clause), we were not waiting for any
392 395 # revisions during the first iteration (if no priority was given) or
393 396 # if we emitted a whole disconnected set of the graph (reached a
394 397 # root). In that case we arbitrarily take the oldest known
395 398 # subgroup. The heuristic could probably be better.
396 399 #
397 400 # Otherwise (elif clause) if the subgroup is blocked on
398 401 # a revision we just emitted, we can safely emit it as
399 402 # well.
400 403 if not unblocked:
401 404 if len(groups) > 1: # display other subset
402 405 targetidx = 1
403 406 gr = groups[1]
404 407 elif not gr[1] & unblocked:
405 408 gr = None
406 409
407 410 if gr is not None:
408 411 # update the set of awaited revisions with the one from the
409 412 # subgroup
410 413 unblocked |= gr[1]
411 414 # output all revisions in the subgroup
412 415 for r in gr[0]:
413 416 yield r
414 417 # delete the subgroup that you just output
415 418 # unless it is groups[0] in which case you just empty it.
416 419 if targetidx:
417 420 del groups[targetidx]
418 421 else:
419 422 gr[0][:] = []
420 423 # Check if we have some subgroup waiting for revisions we are not going to
421 424 # iterate over
422 425 for g in groups:
423 426 for r in g[0]:
424 427 yield r
General Comments 0
You need to be logged in to leave comments. Login now