|
|
# dagop.py - graph ancestry and topology algorithm for revset
|
|
|
#
|
|
|
# Copyright 2010 Matt Mackall <mpm@selenic.com>
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
|
import heapq
|
|
|
|
|
|
from . import (
|
|
|
error,
|
|
|
node,
|
|
|
smartset,
|
|
|
)
|
|
|
|
|
|
baseset = smartset.baseset
|
|
|
generatorset = smartset.generatorset
|
|
|
|
|
|
def revancestors(repo, revs, followfirst):
|
|
|
"""Like revlog.ancestors(), but supports followfirst."""
|
|
|
if followfirst:
|
|
|
cut = 1
|
|
|
else:
|
|
|
cut = None
|
|
|
cl = repo.changelog
|
|
|
|
|
|
def iterate():
|
|
|
revs.sort(reverse=True)
|
|
|
irevs = iter(revs)
|
|
|
h = []
|
|
|
|
|
|
inputrev = next(irevs, None)
|
|
|
if inputrev is not None:
|
|
|
heapq.heappush(h, -inputrev)
|
|
|
|
|
|
seen = set()
|
|
|
while h:
|
|
|
current = -heapq.heappop(h)
|
|
|
if current == inputrev:
|
|
|
inputrev = next(irevs, None)
|
|
|
if inputrev is not None:
|
|
|
heapq.heappush(h, -inputrev)
|
|
|
if current not in seen:
|
|
|
seen.add(current)
|
|
|
yield current
|
|
|
try:
|
|
|
for parent in cl.parentrevs(current)[:cut]:
|
|
|
if parent != node.nullrev:
|
|
|
heapq.heappush(h, -parent)
|
|
|
except error.WdirUnsupported:
|
|
|
for parent in repo[current].parents()[:cut]:
|
|
|
if parent.rev() != node.nullrev:
|
|
|
heapq.heappush(h, -parent.rev())
|
|
|
|
|
|
return generatorset(iterate(), iterasc=False)
|
|
|
|
|
|
def revdescendants(repo, revs, followfirst):
|
|
|
"""Like revlog.descendants() but supports followfirst."""
|
|
|
if followfirst:
|
|
|
cut = 1
|
|
|
else:
|
|
|
cut = None
|
|
|
|
|
|
def iterate():
|
|
|
cl = repo.changelog
|
|
|
# XXX this should be 'parentset.min()' assuming 'parentset' is a
|
|
|
# smartset (and if it is not, it should.)
|
|
|
first = min(revs)
|
|
|
nullrev = node.nullrev
|
|
|
if first == nullrev:
|
|
|
# Are there nodes with a null first parent and a non-null
|
|
|
# second one? Maybe. Do we care? Probably not.
|
|
|
for i in cl:
|
|
|
yield i
|
|
|
else:
|
|
|
seen = set(revs)
|
|
|
for i in cl.revs(first + 1):
|
|
|
for x in cl.parentrevs(i)[:cut]:
|
|
|
if x != nullrev and x in seen:
|
|
|
seen.add(i)
|
|
|
yield i
|
|
|
break
|
|
|
|
|
|
return generatorset(iterate(), iterasc=True)
|
|
|
|
|
|
def _reachablerootspure(repo, minroot, roots, heads, includepath):
|
|
|
"""return (heads(::<roots> and ::<heads>))
|
|
|
|
|
|
If includepath is True, return (<roots>::<heads>)."""
|
|
|
if not roots:
|
|
|
return []
|
|
|
parentrevs = repo.changelog.parentrevs
|
|
|
roots = set(roots)
|
|
|
visit = list(heads)
|
|
|
reachable = set()
|
|
|
seen = {}
|
|
|
# prefetch all the things! (because python is slow)
|
|
|
reached = reachable.add
|
|
|
dovisit = visit.append
|
|
|
nextvisit = visit.pop
|
|
|
# open-code the post-order traversal due to the tiny size of
|
|
|
# sys.getrecursionlimit()
|
|
|
while visit:
|
|
|
rev = nextvisit()
|
|
|
if rev in roots:
|
|
|
reached(rev)
|
|
|
if not includepath:
|
|
|
continue
|
|
|
parents = parentrevs(rev)
|
|
|
seen[rev] = parents
|
|
|
for parent in parents:
|
|
|
if parent >= minroot and parent not in seen:
|
|
|
dovisit(parent)
|
|
|
if not reachable:
|
|
|
return baseset()
|
|
|
if not includepath:
|
|
|
return reachable
|
|
|
for rev in sorted(seen):
|
|
|
for parent in seen[rev]:
|
|
|
if parent in reachable:
|
|
|
reached(rev)
|
|
|
return reachable
|
|
|
|
|
|
def reachableroots(repo, roots, heads, includepath=False):
|
|
|
"""return (heads(::<roots> and ::<heads>))
|
|
|
|
|
|
If includepath is True, return (<roots>::<heads>)."""
|
|
|
if not roots:
|
|
|
return baseset()
|
|
|
minroot = roots.min()
|
|
|
roots = list(roots)
|
|
|
heads = list(heads)
|
|
|
try:
|
|
|
revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
|
|
|
except AttributeError:
|
|
|
revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
|
|
|
revs = baseset(revs)
|
|
|
revs.sort()
|
|
|
return revs
|
|
|
|
|
|
def toposort(revs, parentsfunc, firstbranch=()):
|
|
|
"""Yield revisions from heads to roots one (topo) branch at a time.
|
|
|
|
|
|
This function aims to be used by a graph generator that wishes to minimize
|
|
|
the number of parallel branches and their interleaving.
|
|
|
|
|
|
Example iteration order (numbers show the "true" order in a changelog):
|
|
|
|
|
|
o 4
|
|
|
|
|
|
|
o 1
|
|
|
|
|
|
|
| o 3
|
|
|
| |
|
|
|
| o 2
|
|
|
|/
|
|
|
o 0
|
|
|
|
|
|
Note that the ancestors of merges are understood by the current
|
|
|
algorithm to be on the same branch. This means no reordering will
|
|
|
occur behind a merge.
|
|
|
"""
|
|
|
|
|
|
### Quick summary of the algorithm
|
|
|
#
|
|
|
# This function is based around a "retention" principle. We keep revisions
|
|
|
# in memory until we are ready to emit a whole branch that immediately
|
|
|
# "merges" into an existing one. This reduces the number of parallel
|
|
|
# branches with interleaved revisions.
|
|
|
#
|
|
|
# During iteration revs are split into two groups:
|
|
|
# A) revision already emitted
|
|
|
# B) revision in "retention". They are stored as different subgroups.
|
|
|
#
|
|
|
# for each REV, we do the following logic:
|
|
|
#
|
|
|
# 1) if REV is a parent of (A), we will emit it. If there is a
|
|
|
# retention group ((B) above) that is blocked on REV being
|
|
|
# available, we emit all the revisions out of that retention
|
|
|
# group first.
|
|
|
#
|
|
|
# 2) else, we'll search for a subgroup in (B) awaiting for REV to be
|
|
|
# available, if such subgroup exist, we add REV to it and the subgroup is
|
|
|
# now awaiting for REV.parents() to be available.
|
|
|
#
|
|
|
# 3) finally if no such group existed in (B), we create a new subgroup.
|
|
|
#
|
|
|
#
|
|
|
# To bootstrap the algorithm, we emit the tipmost revision (which
|
|
|
# puts it in group (A) from above).
|
|
|
|
|
|
revs.sort(reverse=True)
|
|
|
|
|
|
# Set of parents of revision that have been emitted. They can be considered
|
|
|
# unblocked as the graph generator is already aware of them so there is no
|
|
|
# need to delay the revisions that reference them.
|
|
|
#
|
|
|
# If someone wants to prioritize a branch over the others, pre-filling this
|
|
|
# set will force all other branches to wait until this branch is ready to be
|
|
|
# emitted.
|
|
|
unblocked = set(firstbranch)
|
|
|
|
|
|
# list of groups waiting to be displayed, each group is defined by:
|
|
|
#
|
|
|
# (revs: lists of revs waiting to be displayed,
|
|
|
# blocked: set of that cannot be displayed before those in 'revs')
|
|
|
#
|
|
|
# The second value ('blocked') correspond to parents of any revision in the
|
|
|
# group ('revs') that is not itself contained in the group. The main idea
|
|
|
# of this algorithm is to delay as much as possible the emission of any
|
|
|
# revision. This means waiting for the moment we are about to display
|
|
|
# these parents to display the revs in a group.
|
|
|
#
|
|
|
# This first implementation is smart until it encounters a merge: it will
|
|
|
# emit revs as soon as any parent is about to be emitted and can grow an
|
|
|
# arbitrary number of revs in 'blocked'. In practice this mean we properly
|
|
|
# retains new branches but gives up on any special ordering for ancestors
|
|
|
# of merges. The implementation can be improved to handle this better.
|
|
|
#
|
|
|
# The first subgroup is special. It corresponds to all the revision that
|
|
|
# were already emitted. The 'revs' lists is expected to be empty and the
|
|
|
# 'blocked' set contains the parents revisions of already emitted revision.
|
|
|
#
|
|
|
# You could pre-seed the <parents> set of groups[0] to a specific
|
|
|
# changesets to select what the first emitted branch should be.
|
|
|
groups = [([], unblocked)]
|
|
|
pendingheap = []
|
|
|
pendingset = set()
|
|
|
|
|
|
heapq.heapify(pendingheap)
|
|
|
heappop = heapq.heappop
|
|
|
heappush = heapq.heappush
|
|
|
for currentrev in revs:
|
|
|
# Heap works with smallest element, we want highest so we invert
|
|
|
if currentrev not in pendingset:
|
|
|
heappush(pendingheap, -currentrev)
|
|
|
pendingset.add(currentrev)
|
|
|
# iterates on pending rev until after the current rev have been
|
|
|
# processed.
|
|
|
rev = None
|
|
|
while rev != currentrev:
|
|
|
rev = -heappop(pendingheap)
|
|
|
pendingset.remove(rev)
|
|
|
|
|
|
# Seek for a subgroup blocked, waiting for the current revision.
|
|
|
matching = [i for i, g in enumerate(groups) if rev in g[1]]
|
|
|
|
|
|
if matching:
|
|
|
# The main idea is to gather together all sets that are blocked
|
|
|
# on the same revision.
|
|
|
#
|
|
|
# Groups are merged when a common blocking ancestor is
|
|
|
# observed. For example, given two groups:
|
|
|
#
|
|
|
# revs [5, 4] waiting for 1
|
|
|
# revs [3, 2] waiting for 1
|
|
|
#
|
|
|
# These two groups will be merged when we process
|
|
|
# 1. In theory, we could have merged the groups when
|
|
|
# we added 2 to the group it is now in (we could have
|
|
|
# noticed the groups were both blocked on 1 then), but
|
|
|
# the way it works now makes the algorithm simpler.
|
|
|
#
|
|
|
# We also always keep the oldest subgroup first. We can
|
|
|
# probably improve the behavior by having the longest set
|
|
|
# first. That way, graph algorithms could minimise the length
|
|
|
# of parallel lines their drawing. This is currently not done.
|
|
|
targetidx = matching.pop(0)
|
|
|
trevs, tparents = groups[targetidx]
|
|
|
for i in matching:
|
|
|
gr = groups[i]
|
|
|
trevs.extend(gr[0])
|
|
|
tparents |= gr[1]
|
|
|
# delete all merged subgroups (except the one we kept)
|
|
|
# (starting from the last subgroup for performance and
|
|
|
# sanity reasons)
|
|
|
for i in reversed(matching):
|
|
|
del groups[i]
|
|
|
else:
|
|
|
# This is a new head. We create a new subgroup for it.
|
|
|
targetidx = len(groups)
|
|
|
groups.append(([], {rev}))
|
|
|
|
|
|
gr = groups[targetidx]
|
|
|
|
|
|
# We now add the current nodes to this subgroups. This is done
|
|
|
# after the subgroup merging because all elements from a subgroup
|
|
|
# that relied on this rev must precede it.
|
|
|
#
|
|
|
# we also update the <parents> set to include the parents of the
|
|
|
# new nodes.
|
|
|
if rev == currentrev: # only display stuff in rev
|
|
|
gr[0].append(rev)
|
|
|
gr[1].remove(rev)
|
|
|
parents = [p for p in parentsfunc(rev) if p > node.nullrev]
|
|
|
gr[1].update(parents)
|
|
|
for p in parents:
|
|
|
if p not in pendingset:
|
|
|
pendingset.add(p)
|
|
|
heappush(pendingheap, -p)
|
|
|
|
|
|
# Look for a subgroup to display
|
|
|
#
|
|
|
# When unblocked is empty (if clause), we were not waiting for any
|
|
|
# revisions during the first iteration (if no priority was given) or
|
|
|
# if we emitted a whole disconnected set of the graph (reached a
|
|
|
# root). In that case we arbitrarily take the oldest known
|
|
|
# subgroup. The heuristic could probably be better.
|
|
|
#
|
|
|
# Otherwise (elif clause) if the subgroup is blocked on
|
|
|
# a revision we just emitted, we can safely emit it as
|
|
|
# well.
|
|
|
if not unblocked:
|
|
|
if len(groups) > 1: # display other subset
|
|
|
targetidx = 1
|
|
|
gr = groups[1]
|
|
|
elif not gr[1] & unblocked:
|
|
|
gr = None
|
|
|
|
|
|
if gr is not None:
|
|
|
# update the set of awaited revisions with the one from the
|
|
|
# subgroup
|
|
|
unblocked |= gr[1]
|
|
|
# output all revisions in the subgroup
|
|
|
for r in gr[0]:
|
|
|
yield r
|
|
|
# delete the subgroup that you just output
|
|
|
# unless it is groups[0] in which case you just empty it.
|
|
|
if targetidx:
|
|
|
del groups[targetidx]
|
|
|
else:
|
|
|
gr[0][:] = []
|
|
|
# Check if we have some subgroup waiting for revisions we are not going to
|
|
|
# iterate over
|
|
|
for g in groups:
|
|
|
for r in g[0]:
|
|
|
yield r
|
|
|
|