##// END OF EJS Templates
dagop: factor out generator of ancestor nodes...
Yuya Nishihara -
r33078:fb663bd0 default
parent child Browse files
Show More
@@ -1,454 +1,463 b''
1 1 # dagop.py - graph ancestry and topology algorithm for revset
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 error,
14 14 mdiff,
15 15 node,
16 16 patch,
17 17 smartset,
18 18 )
19 19
20 20 baseset = smartset.baseset
21 21 generatorset = smartset.generatorset
22 22
23 23 # possible maximum depth between null and wdir()
24 24 _maxlogdepth = 0x80000000
25 25
26 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth):
27 if followfirst:
28 cut = 1
29 else:
30 cut = None
26 def _walkrevtree(pfunc, revs, startdepth, stopdepth):
27 """Walk DAG using 'pfunc' from the given 'revs' nodes
28
29 'pfunc(rev)' should return the parent revisions of the given 'rev'.
30
31 Scan ends at the stopdepth (exlusive) if specified. Revisions found
32 earlier than the startdepth are omitted.
33 """
31 34 if startdepth is None:
32 35 startdepth = 0
33 36 if stopdepth is None:
34 37 stopdepth = _maxlogdepth
35 38 if stopdepth == 0:
36 39 return
37 40 if stopdepth < 0:
38 41 raise error.ProgrammingError('negative stopdepth')
39 42
40 cl = repo.changelog
41 def pfunc(rev):
42 try:
43 return cl.parentrevs(rev)[:cut]
44 except error.WdirUnsupported:
45 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
46
47 43 # load input revs lazily to heap so earlier revisions can be yielded
48 44 # without fully computing the input revs
49 45 revs.sort(reverse=True)
50 46 irevs = iter(revs)
51 47 pendingheap = [] # [(-rev, depth), ...] (i.e. lower depth first)
52 48
53 49 inputrev = next(irevs, None)
54 50 if inputrev is not None:
55 51 heapq.heappush(pendingheap, (-inputrev, 0))
56 52
57 53 lastrev = None
58 54 while pendingheap:
59 55 currev, curdepth = heapq.heappop(pendingheap)
60 56 currev = -currev
61 57 if currev == inputrev:
62 58 inputrev = next(irevs, None)
63 59 if inputrev is not None:
64 60 heapq.heappush(pendingheap, (-inputrev, 0))
65 61 # rescan parents until curdepth >= startdepth because queued entries
66 62 # of the same revision are iterated from the lowest depth
67 63 foundnew = (currev != lastrev)
68 64 if foundnew and curdepth >= startdepth:
69 65 lastrev = currev
70 66 yield currev
71 67 pdepth = curdepth + 1
72 68 if foundnew and pdepth < stopdepth:
73 69 for prev in pfunc(currev):
74 70 if prev != node.nullrev:
75 71 heapq.heappush(pendingheap, (-prev, pdepth))
76 72
73 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth):
74 if followfirst:
75 cut = 1
76 else:
77 cut = None
78 cl = repo.changelog
79 def pfunc(rev):
80 try:
81 return cl.parentrevs(rev)[:cut]
82 except error.WdirUnsupported:
83 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
84 return _walkrevtree(pfunc, revs, startdepth, stopdepth)
85
77 86 def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None):
78 87 """Like revlog.ancestors(), but supports additional options, includes
79 88 the given revs themselves, and returns a smartset
80 89
81 90 Scan ends at the stopdepth (exlusive) if specified. Revisions found
82 91 earlier than the startdepth are omitted.
83 92 """
84 93 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth)
85 94 return generatorset(gen, iterasc=False)
86 95
87 96 def _genrevdescendants(repo, revs, followfirst):
88 97 if followfirst:
89 98 cut = 1
90 99 else:
91 100 cut = None
92 101
93 102 cl = repo.changelog
94 103 first = revs.min()
95 104 nullrev = node.nullrev
96 105 if first == nullrev:
97 106 # Are there nodes with a null first parent and a non-null
98 107 # second one? Maybe. Do we care? Probably not.
99 108 yield first
100 109 for i in cl:
101 110 yield i
102 111 else:
103 112 seen = set(revs)
104 113 for i in cl.revs(first):
105 114 if i in seen:
106 115 yield i
107 116 continue
108 117 for x in cl.parentrevs(i)[:cut]:
109 118 if x != nullrev and x in seen:
110 119 seen.add(i)
111 120 yield i
112 121 break
113 122
114 123 def revdescendants(repo, revs, followfirst):
115 124 """Like revlog.descendants() but supports additional options, includes
116 125 the given revs themselves, and returns a smartset"""
117 126 gen = _genrevdescendants(repo, revs, followfirst)
118 127 return generatorset(gen, iterasc=True)
119 128
120 129 def _reachablerootspure(repo, minroot, roots, heads, includepath):
121 130 """return (heads(::<roots> and ::<heads>))
122 131
123 132 If includepath is True, return (<roots>::<heads>)."""
124 133 if not roots:
125 134 return []
126 135 parentrevs = repo.changelog.parentrevs
127 136 roots = set(roots)
128 137 visit = list(heads)
129 138 reachable = set()
130 139 seen = {}
131 140 # prefetch all the things! (because python is slow)
132 141 reached = reachable.add
133 142 dovisit = visit.append
134 143 nextvisit = visit.pop
135 144 # open-code the post-order traversal due to the tiny size of
136 145 # sys.getrecursionlimit()
137 146 while visit:
138 147 rev = nextvisit()
139 148 if rev in roots:
140 149 reached(rev)
141 150 if not includepath:
142 151 continue
143 152 parents = parentrevs(rev)
144 153 seen[rev] = parents
145 154 for parent in parents:
146 155 if parent >= minroot and parent not in seen:
147 156 dovisit(parent)
148 157 if not reachable:
149 158 return baseset()
150 159 if not includepath:
151 160 return reachable
152 161 for rev in sorted(seen):
153 162 for parent in seen[rev]:
154 163 if parent in reachable:
155 164 reached(rev)
156 165 return reachable
157 166
158 167 def reachableroots(repo, roots, heads, includepath=False):
159 168 """return (heads(::<roots> and ::<heads>))
160 169
161 170 If includepath is True, return (<roots>::<heads>)."""
162 171 if not roots:
163 172 return baseset()
164 173 minroot = roots.min()
165 174 roots = list(roots)
166 175 heads = list(heads)
167 176 try:
168 177 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
169 178 except AttributeError:
170 179 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
171 180 revs = baseset(revs)
172 181 revs.sort()
173 182 return revs
174 183
175 184 def _changesrange(fctx1, fctx2, linerange2, diffopts):
176 185 """Return `(diffinrange, linerange1)` where `diffinrange` is True
177 186 if diff from fctx2 to fctx1 has changes in linerange2 and
178 187 `linerange1` is the new line range for fctx1.
179 188 """
180 189 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
181 190 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
182 191 diffinrange = any(stype == '!' for _, stype in filteredblocks)
183 192 return diffinrange, linerange1
184 193
185 194 def blockancestors(fctx, fromline, toline, followfirst=False):
186 195 """Yield ancestors of `fctx` with respect to the block of lines within
187 196 `fromline`-`toline` range.
188 197 """
189 198 diffopts = patch.diffopts(fctx._repo.ui)
190 199 introrev = fctx.introrev()
191 200 if fctx.rev() != introrev:
192 201 fctx = fctx.filectx(fctx.filenode(), changeid=introrev)
193 202 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
194 203 while visit:
195 204 c, linerange2 = visit.pop(max(visit))
196 205 pl = c.parents()
197 206 if followfirst:
198 207 pl = pl[:1]
199 208 if not pl:
200 209 # The block originates from the initial revision.
201 210 yield c, linerange2
202 211 continue
203 212 inrange = False
204 213 for p in pl:
205 214 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
206 215 inrange = inrange or inrangep
207 216 if linerange1[0] == linerange1[1]:
208 217 # Parent's linerange is empty, meaning that the block got
209 218 # introduced in this revision; no need to go futher in this
210 219 # branch.
211 220 continue
212 221 # Set _descendantrev with 'c' (a known descendant) so that, when
213 222 # _adjustlinkrev is called for 'p', it receives this descendant
214 223 # (as srcrev) instead possibly topmost introrev.
215 224 p._descendantrev = c.rev()
216 225 visit[p.linkrev(), p.filenode()] = p, linerange1
217 226 if inrange:
218 227 yield c, linerange2
219 228
220 229 def blockdescendants(fctx, fromline, toline):
221 230 """Yield descendants of `fctx` with respect to the block of lines within
222 231 `fromline`-`toline` range.
223 232 """
224 233 # First possibly yield 'fctx' if it has changes in range with respect to
225 234 # its parents.
226 235 try:
227 236 c, linerange1 = next(blockancestors(fctx, fromline, toline))
228 237 except StopIteration:
229 238 pass
230 239 else:
231 240 if c == fctx:
232 241 yield c, linerange1
233 242
234 243 diffopts = patch.diffopts(fctx._repo.ui)
235 244 fl = fctx.filelog()
236 245 seen = {fctx.filerev(): (fctx, (fromline, toline))}
237 246 for i in fl.descendants([fctx.filerev()]):
238 247 c = fctx.filectx(i)
239 248 inrange = False
240 249 for x in fl.parentrevs(i):
241 250 try:
242 251 p, linerange2 = seen[x]
243 252 except KeyError:
244 253 # nullrev or other branch
245 254 continue
246 255 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
247 256 inrange = inrange or inrangep
248 257 # If revision 'i' has been seen (it's a merge), we assume that its
249 258 # line range is the same independently of which parents was used
250 259 # to compute it.
251 260 assert i not in seen or seen[i][1] == linerange1, (
252 261 'computed line range for %s is not consistent between '
253 262 'ancestor branches' % c)
254 263 seen[i] = c, linerange1
255 264 if inrange:
256 265 yield c, linerange1
257 266
258 267 def toposort(revs, parentsfunc, firstbranch=()):
259 268 """Yield revisions from heads to roots one (topo) branch at a time.
260 269
261 270 This function aims to be used by a graph generator that wishes to minimize
262 271 the number of parallel branches and their interleaving.
263 272
264 273 Example iteration order (numbers show the "true" order in a changelog):
265 274
266 275 o 4
267 276 |
268 277 o 1
269 278 |
270 279 | o 3
271 280 | |
272 281 | o 2
273 282 |/
274 283 o 0
275 284
276 285 Note that the ancestors of merges are understood by the current
277 286 algorithm to be on the same branch. This means no reordering will
278 287 occur behind a merge.
279 288 """
280 289
281 290 ### Quick summary of the algorithm
282 291 #
283 292 # This function is based around a "retention" principle. We keep revisions
284 293 # in memory until we are ready to emit a whole branch that immediately
285 294 # "merges" into an existing one. This reduces the number of parallel
286 295 # branches with interleaved revisions.
287 296 #
288 297 # During iteration revs are split into two groups:
289 298 # A) revision already emitted
290 299 # B) revision in "retention". They are stored as different subgroups.
291 300 #
292 301 # for each REV, we do the following logic:
293 302 #
294 303 # 1) if REV is a parent of (A), we will emit it. If there is a
295 304 # retention group ((B) above) that is blocked on REV being
296 305 # available, we emit all the revisions out of that retention
297 306 # group first.
298 307 #
299 308 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
300 309 # available, if such subgroup exist, we add REV to it and the subgroup is
301 310 # now awaiting for REV.parents() to be available.
302 311 #
303 312 # 3) finally if no such group existed in (B), we create a new subgroup.
304 313 #
305 314 #
306 315 # To bootstrap the algorithm, we emit the tipmost revision (which
307 316 # puts it in group (A) from above).
308 317
309 318 revs.sort(reverse=True)
310 319
311 320 # Set of parents of revision that have been emitted. They can be considered
312 321 # unblocked as the graph generator is already aware of them so there is no
313 322 # need to delay the revisions that reference them.
314 323 #
315 324 # If someone wants to prioritize a branch over the others, pre-filling this
316 325 # set will force all other branches to wait until this branch is ready to be
317 326 # emitted.
318 327 unblocked = set(firstbranch)
319 328
320 329 # list of groups waiting to be displayed, each group is defined by:
321 330 #
322 331 # (revs: lists of revs waiting to be displayed,
323 332 # blocked: set of that cannot be displayed before those in 'revs')
324 333 #
325 334 # The second value ('blocked') correspond to parents of any revision in the
326 335 # group ('revs') that is not itself contained in the group. The main idea
327 336 # of this algorithm is to delay as much as possible the emission of any
328 337 # revision. This means waiting for the moment we are about to display
329 338 # these parents to display the revs in a group.
330 339 #
331 340 # This first implementation is smart until it encounters a merge: it will
332 341 # emit revs as soon as any parent is about to be emitted and can grow an
333 342 # arbitrary number of revs in 'blocked'. In practice this mean we properly
334 343 # retains new branches but gives up on any special ordering for ancestors
335 344 # of merges. The implementation can be improved to handle this better.
336 345 #
337 346 # The first subgroup is special. It corresponds to all the revision that
338 347 # were already emitted. The 'revs' lists is expected to be empty and the
339 348 # 'blocked' set contains the parents revisions of already emitted revision.
340 349 #
341 350 # You could pre-seed the <parents> set of groups[0] to a specific
342 351 # changesets to select what the first emitted branch should be.
343 352 groups = [([], unblocked)]
344 353 pendingheap = []
345 354 pendingset = set()
346 355
347 356 heapq.heapify(pendingheap)
348 357 heappop = heapq.heappop
349 358 heappush = heapq.heappush
350 359 for currentrev in revs:
351 360 # Heap works with smallest element, we want highest so we invert
352 361 if currentrev not in pendingset:
353 362 heappush(pendingheap, -currentrev)
354 363 pendingset.add(currentrev)
355 364 # iterates on pending rev until after the current rev have been
356 365 # processed.
357 366 rev = None
358 367 while rev != currentrev:
359 368 rev = -heappop(pendingheap)
360 369 pendingset.remove(rev)
361 370
362 371 # Seek for a subgroup blocked, waiting for the current revision.
363 372 matching = [i for i, g in enumerate(groups) if rev in g[1]]
364 373
365 374 if matching:
366 375 # The main idea is to gather together all sets that are blocked
367 376 # on the same revision.
368 377 #
369 378 # Groups are merged when a common blocking ancestor is
370 379 # observed. For example, given two groups:
371 380 #
372 381 # revs [5, 4] waiting for 1
373 382 # revs [3, 2] waiting for 1
374 383 #
375 384 # These two groups will be merged when we process
376 385 # 1. In theory, we could have merged the groups when
377 386 # we added 2 to the group it is now in (we could have
378 387 # noticed the groups were both blocked on 1 then), but
379 388 # the way it works now makes the algorithm simpler.
380 389 #
381 390 # We also always keep the oldest subgroup first. We can
382 391 # probably improve the behavior by having the longest set
383 392 # first. That way, graph algorithms could minimise the length
384 393 # of parallel lines their drawing. This is currently not done.
385 394 targetidx = matching.pop(0)
386 395 trevs, tparents = groups[targetidx]
387 396 for i in matching:
388 397 gr = groups[i]
389 398 trevs.extend(gr[0])
390 399 tparents |= gr[1]
391 400 # delete all merged subgroups (except the one we kept)
392 401 # (starting from the last subgroup for performance and
393 402 # sanity reasons)
394 403 for i in reversed(matching):
395 404 del groups[i]
396 405 else:
397 406 # This is a new head. We create a new subgroup for it.
398 407 targetidx = len(groups)
399 408 groups.append(([], {rev}))
400 409
401 410 gr = groups[targetidx]
402 411
403 412 # We now add the current nodes to this subgroups. This is done
404 413 # after the subgroup merging because all elements from a subgroup
405 414 # that relied on this rev must precede it.
406 415 #
407 416 # we also update the <parents> set to include the parents of the
408 417 # new nodes.
409 418 if rev == currentrev: # only display stuff in rev
410 419 gr[0].append(rev)
411 420 gr[1].remove(rev)
412 421 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
413 422 gr[1].update(parents)
414 423 for p in parents:
415 424 if p not in pendingset:
416 425 pendingset.add(p)
417 426 heappush(pendingheap, -p)
418 427
419 428 # Look for a subgroup to display
420 429 #
421 430 # When unblocked is empty (if clause), we were not waiting for any
422 431 # revisions during the first iteration (if no priority was given) or
423 432 # if we emitted a whole disconnected set of the graph (reached a
424 433 # root). In that case we arbitrarily take the oldest known
425 434 # subgroup. The heuristic could probably be better.
426 435 #
427 436 # Otherwise (elif clause) if the subgroup is blocked on
428 437 # a revision we just emitted, we can safely emit it as
429 438 # well.
430 439 if not unblocked:
431 440 if len(groups) > 1: # display other subset
432 441 targetidx = 1
433 442 gr = groups[1]
434 443 elif not gr[1] & unblocked:
435 444 gr = None
436 445
437 446 if gr is not None:
438 447 # update the set of awaited revisions with the one from the
439 448 # subgroup
440 449 unblocked |= gr[1]
441 450 # output all revisions in the subgroup
442 451 for r in gr[0]:
443 452 yield r
444 453 # delete the subgroup that you just output
445 454 # unless it is groups[0] in which case you just empty it.
446 455 if targetidx:
447 456 del groups[targetidx]
448 457 else:
449 458 gr[0][:] = []
450 459 # Check if we have some subgroup waiting for revisions we are not going to
451 460 # iterate over
452 461 for g in groups:
453 462 for r in g[0]:
454 463 yield r
General Comments 0
You need to be logged in to leave comments. Login now