# dagop.py - graph ancestry and topology algorithm for revset # # Copyright 2010 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import heapq from . import ( error, mdiff, node, patch, smartset, ) baseset = smartset.baseset generatorset = smartset.generatorset # possible maximum depth between null and wdir() _maxlogdepth = 0x80000000 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse): """Walk DAG using 'pfunc' from the given 'revs' nodes 'pfunc(rev)' should return the parent/child revisions of the given 'rev' if 'reverse' is True/False respectively. Scan ends at the stopdepth (exlusive) if specified. Revisions found earlier than the startdepth are omitted. """ if startdepth is None: startdepth = 0 if stopdepth is None: stopdepth = _maxlogdepth if stopdepth == 0: return if stopdepth < 0: raise error.ProgrammingError('negative stopdepth') if reverse: heapsign = -1 # max heap else: heapsign = +1 # min heap # load input revs lazily to heap so earlier revisions can be yielded # without fully computing the input revs revs.sort(reverse) irevs = iter(revs) pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first) inputrev = next(irevs, None) if inputrev is not None: heapq.heappush(pendingheap, (heapsign * inputrev, 0)) lastrev = None while pendingheap: currev, curdepth = heapq.heappop(pendingheap) currev = heapsign * currev if currev == inputrev: inputrev = next(irevs, None) if inputrev is not None: heapq.heappush(pendingheap, (heapsign * inputrev, 0)) # rescan parents until curdepth >= startdepth because queued entries # of the same revision are iterated from the lowest depth foundnew = (currev != lastrev) if foundnew and curdepth >= startdepth: lastrev = currev yield currev pdepth = curdepth + 1 if foundnew and pdepth < stopdepth: for prev in pfunc(currev): if prev != node.nullrev: heapq.heappush(pendingheap, (heapsign * prev, pdepth)) def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth): if followfirst: cut = 1 else: cut = None cl = repo.changelog def pfunc(rev): try: return cl.parentrevs(rev)[:cut] except error.WdirUnsupported: return (pctx.rev() for pctx in repo[rev].parents()[:cut]) return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True) def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None): """Like revlog.ancestors(), but supports additional options, includes the given revs themselves, and returns a smartset Scan ends at the stopdepth (exlusive) if specified. Revisions found earlier than the startdepth are omitted. """ gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth) return generatorset(gen, iterasc=False) def _genrevdescendants(repo, revs, followfirst): if followfirst: cut = 1 else: cut = None cl = repo.changelog first = revs.min() nullrev = node.nullrev if first == nullrev: # Are there nodes with a null first parent and a non-null # second one? Maybe. Do we care? Probably not. yield first for i in cl: yield i else: seen = set(revs) for i in cl.revs(first): if i in seen: yield i continue for x in cl.parentrevs(i)[:cut]: if x != nullrev and x in seen: seen.add(i) yield i break def _builddescendantsmap(repo, startrev, followfirst): """Build map of 'rev -> child revs', offset from startrev""" cl = repo.changelog nullrev = node.nullrev descmap = [[] for _rev in xrange(startrev, len(cl))] for currev in cl.revs(startrev + 1): p1rev, p2rev = cl.parentrevs(currev) if p1rev >= startrev: descmap[p1rev - startrev].append(currev) if not followfirst and p2rev != nullrev and p2rev >= startrev: descmap[p2rev - startrev].append(currev) return descmap def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth): startrev = revs.min() descmap = _builddescendantsmap(repo, startrev, followfirst) def pfunc(rev): return descmap[rev - startrev] return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False) def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None): """Like revlog.descendants() but supports additional options, includes the given revs themselves, and returns a smartset Scan ends at the stopdepth (exlusive) if specified. Revisions found earlier than the startdepth are omitted. """ if startdepth is None and stopdepth is None: gen = _genrevdescendants(repo, revs, followfirst) else: gen = _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth) return generatorset(gen, iterasc=True) def _reachablerootspure(repo, minroot, roots, heads, includepath): """return (heads(::<roots> and ::<heads>)) If includepath is True, return (<roots>::<heads>).""" if not roots: return [] parentrevs = repo.changelog.parentrevs roots = set(roots) visit = list(heads) reachable = set() seen = {} # prefetch all the things! (because python is slow) reached = reachable.add dovisit = visit.append nextvisit = visit.pop # open-code the post-order traversal due to the tiny size of # sys.getrecursionlimit() while visit: rev = nextvisit() if rev in roots: reached(rev) if not includepath: continue parents = parentrevs(rev) seen[rev] = parents for parent in parents: if parent >= minroot and parent not in seen: dovisit(parent) if not reachable: return baseset() if not includepath: return reachable for rev in sorted(seen): for parent in seen[rev]: if parent in reachable: reached(rev) return reachable def reachableroots(repo, roots, heads, includepath=False): """return (heads(::<roots> and ::<heads>)) If includepath is True, return (<roots>::<heads>).""" if not roots: return baseset() minroot = roots.min() roots = list(roots) heads = list(heads) try: revs = repo.changelog.reachableroots(minroot, heads, roots, includepath) except AttributeError: revs = _reachablerootspure(repo, minroot, roots, heads, includepath) revs = baseset(revs) revs.sort() return revs def _changesrange(fctx1, fctx2, linerange2, diffopts): """Return `(diffinrange, linerange1)` where `diffinrange` is True if diff from fctx2 to fctx1 has changes in linerange2 and `linerange1` is the new line range for fctx1. """ blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts) filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2) diffinrange = any(stype == '!' for _, stype in filteredblocks) return diffinrange, linerange1 def blockancestors(fctx, fromline, toline, followfirst=False): """Yield ancestors of `fctx` with respect to the block of lines within `fromline`-`toline` range. """ diffopts = patch.diffopts(fctx._repo.ui) introrev = fctx.introrev() if fctx.rev() != introrev: fctx = fctx.filectx(fctx.filenode(), changeid=introrev) visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))} while visit: c, linerange2 = visit.pop(max(visit)) pl = c.parents() if followfirst: pl = pl[:1] if not pl: # The block originates from the initial revision. yield c, linerange2 continue inrange = False for p in pl: inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts) inrange = inrange or inrangep if linerange1[0] == linerange1[1]: # Parent's linerange is empty, meaning that the block got # introduced in this revision; no need to go futher in this # branch. continue # Set _descendantrev with 'c' (a known descendant) so that, when # _adjustlinkrev is called for 'p', it receives this descendant # (as srcrev) instead possibly topmost introrev. p._descendantrev = c.rev() visit[p.linkrev(), p.filenode()] = p, linerange1 if inrange: yield c, linerange2 def blockdescendants(fctx, fromline, toline): """Yield descendants of `fctx` with respect to the block of lines within `fromline`-`toline` range. """ # First possibly yield 'fctx' if it has changes in range with respect to # its parents. try: c, linerange1 = next(blockancestors(fctx, fromline, toline)) except StopIteration: pass else: if c == fctx: yield c, linerange1 diffopts = patch.diffopts(fctx._repo.ui) fl = fctx.filelog() seen = {fctx.filerev(): (fctx, (fromline, toline))} for i in fl.descendants([fctx.filerev()]): c = fctx.filectx(i) inrange = False for x in fl.parentrevs(i): try: p, linerange2 = seen[x] except KeyError: # nullrev or other branch continue inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts) inrange = inrange or inrangep # If revision 'i' has been seen (it's a merge), we assume that its # line range is the same independently of which parents was used # to compute it. assert i not in seen or seen[i][1] == linerange1, ( 'computed line range for %s is not consistent between ' 'ancestor branches' % c) seen[i] = c, linerange1 if inrange: yield c, linerange1 def toposort(revs, parentsfunc, firstbranch=()): """Yield revisions from heads to roots one (topo) branch at a time. This function aims to be used by a graph generator that wishes to minimize the number of parallel branches and their interleaving. Example iteration order (numbers show the "true" order in a changelog): o 4 | o 1 | | o 3 | | | o 2 |/ o 0 Note that the ancestors of merges are understood by the current algorithm to be on the same branch. This means no reordering will occur behind a merge. """ ### Quick summary of the algorithm # # This function is based around a "retention" principle. We keep revisions # in memory until we are ready to emit a whole branch that immediately # "merges" into an existing one. This reduces the number of parallel # branches with interleaved revisions. # # During iteration revs are split into two groups: # A) revision already emitted # B) revision in "retention". They are stored as different subgroups. # # for each REV, we do the following logic: # # 1) if REV is a parent of (A), we will emit it. If there is a # retention group ((B) above) that is blocked on REV being # available, we emit all the revisions out of that retention # group first. # # 2) else, we'll search for a subgroup in (B) awaiting for REV to be # available, if such subgroup exist, we add REV to it and the subgroup is # now awaiting for REV.parents() to be available. # # 3) finally if no such group existed in (B), we create a new subgroup. # # # To bootstrap the algorithm, we emit the tipmost revision (which # puts it in group (A) from above). revs.sort(reverse=True) # Set of parents of revision that have been emitted. They can be considered # unblocked as the graph generator is already aware of them so there is no # need to delay the revisions that reference them. # # If someone wants to prioritize a branch over the others, pre-filling this # set will force all other branches to wait until this branch is ready to be # emitted. unblocked = set(firstbranch) # list of groups waiting to be displayed, each group is defined by: # # (revs: lists of revs waiting to be displayed, # blocked: set of that cannot be displayed before those in 'revs') # # The second value ('blocked') correspond to parents of any revision in the # group ('revs') that is not itself contained in the group. The main idea # of this algorithm is to delay as much as possible the emission of any # revision. This means waiting for the moment we are about to display # these parents to display the revs in a group. # # This first implementation is smart until it encounters a merge: it will # emit revs as soon as any parent is about to be emitted and can grow an # arbitrary number of revs in 'blocked'. In practice this mean we properly # retains new branches but gives up on any special ordering for ancestors # of merges. The implementation can be improved to handle this better. # # The first subgroup is special. It corresponds to all the revision that # were already emitted. The 'revs' lists is expected to be empty and the # 'blocked' set contains the parents revisions of already emitted revision. # # You could pre-seed the <parents> set of groups[0] to a specific # changesets to select what the first emitted branch should be. groups = [([], unblocked)] pendingheap = [] pendingset = set() heapq.heapify(pendingheap) heappop = heapq.heappop heappush = heapq.heappush for currentrev in revs: # Heap works with smallest element, we want highest so we invert if currentrev not in pendingset: heappush(pendingheap, -currentrev) pendingset.add(currentrev) # iterates on pending rev until after the current rev have been # processed. rev = None while rev != currentrev: rev = -heappop(pendingheap) pendingset.remove(rev) # Seek for a subgroup blocked, waiting for the current revision. matching = [i for i, g in enumerate(groups) if rev in g[1]] if matching: # The main idea is to gather together all sets that are blocked # on the same revision. # # Groups are merged when a common blocking ancestor is # observed. For example, given two groups: # # revs [5, 4] waiting for 1 # revs [3, 2] waiting for 1 # # These two groups will be merged when we process # 1. In theory, we could have merged the groups when # we added 2 to the group it is now in (we could have # noticed the groups were both blocked on 1 then), but # the way it works now makes the algorithm simpler. # # We also always keep the oldest subgroup first. We can # probably improve the behavior by having the longest set # first. That way, graph algorithms could minimise the length # of parallel lines their drawing. This is currently not done. targetidx = matching.pop(0) trevs, tparents = groups[targetidx] for i in matching: gr = groups[i] trevs.extend(gr[0]) tparents |= gr[1] # delete all merged subgroups (except the one we kept) # (starting from the last subgroup for performance and # sanity reasons) for i in reversed(matching): del groups[i] else: # This is a new head. We create a new subgroup for it. targetidx = len(groups) groups.append(([], {rev})) gr = groups[targetidx] # We now add the current nodes to this subgroups. This is done # after the subgroup merging because all elements from a subgroup # that relied on this rev must precede it. # # we also update the <parents> set to include the parents of the # new nodes. if rev == currentrev: # only display stuff in rev gr[0].append(rev) gr[1].remove(rev) parents = [p for p in parentsfunc(rev) if p > node.nullrev] gr[1].update(parents) for p in parents: if p not in pendingset: pendingset.add(p) heappush(pendingheap, -p) # Look for a subgroup to display # # When unblocked is empty (if clause), we were not waiting for any # revisions during the first iteration (if no priority was given) or # if we emitted a whole disconnected set of the graph (reached a # root). In that case we arbitrarily take the oldest known # subgroup. The heuristic could probably be better. # # Otherwise (elif clause) if the subgroup is blocked on # a revision we just emitted, we can safely emit it as # well. if not unblocked: if len(groups) > 1: # display other subset targetidx = 1 gr = groups[1] elif not gr[1] & unblocked: gr = None if gr is not None: # update the set of awaited revisions with the one from the # subgroup unblocked |= gr[1] # output all revisions in the subgroup for r in gr[0]: yield r # delete the subgroup that you just output # unless it is groups[0] in which case you just empty it. if targetidx: del groups[targetidx] else: gr[0][:] = [] # Check if we have some subgroup waiting for revisions we are not going to # iterate over for g in groups: for r in g[0]: yield r