upstream/mercurial-mirror Commit - r32998:c7da57bb

dagop: comment why revancestors() doesn't heapify input revs at once...

Yuya Nishihara -

r32998:c7da57bb default

parent child

mercurial/dagop.py

0 +3 0

              # dagop.py - graph ancestry and topology algorithm for revset
              #
              # Copyright 2010 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import heapq
              from . import (
                  error,
                  mdiff,
                  node,
                  patch,
                  smartset,
              )
              baseset = smartset.baseset
              generatorset = smartset.generatorset
              def _genrevancestors(repo, revs, followfirst):
                  if followfirst:
                      cut = 1
                  else:
                      cut = None
                  cl = repo.changelog
+                 # load input revs lazily to heap so earlier revisions can be yielded
+                 # without fully computing the input revs
                  revs.sort(reverse=True)
                  irevs = iter(revs)
                  h = []
                  inputrev = next(irevs, None)
                  if inputrev is not None:
                      heapq.heappush(h, -inputrev)
                  seen = set()
                  while h:
                      current = -heapq.heappop(h)
                      if current == inputrev:
                          inputrev = next(irevs, None)
                          if inputrev is not None:
                              heapq.heappush(h, -inputrev)
                      if current not in seen:
                          seen.add(current)
                          yield current
                          try:
                              for parent in cl.parentrevs(current)[:cut]:
                                  if parent != node.nullrev:
                                      heapq.heappush(h, -parent)
                          except error.WdirUnsupported:
                              for parent in repo[current].parents()[:cut]:
                                  if parent.rev() != node.nullrev:
                                      heapq.heappush(h, -parent.rev())
              def revancestors(repo, revs, followfirst):
                  """Like revlog.ancestors(), but supports followfirst."""
                  gen = _genrevancestors(repo, revs, followfirst)
                  return generatorset(gen, iterasc=False)
              def revdescendants(repo, revs, followfirst):
                  """Like revlog.descendants() but supports followfirst."""
                  if followfirst:
                      cut = 1
                  else:
                      cut = None
                  def iterate():
                      cl = repo.changelog
                      # XXX this should be 'parentset.min()' assuming 'parentset' is a
                      # smartset (and if it is not, it should.)
                      first = min(revs)
                      nullrev = node.nullrev
                      if first == nullrev:
                          # Are there nodes with a null first parent and a non-null
                          # second one? Maybe. Do we care? Probably not.
                          for i in cl:
                              yield i
                      else:
                          seen = set(revs)
                          for i in cl.revs(first + 1):
                              for x in cl.parentrevs(i)[:cut]:
                                  if x != nullrev and x in seen:
                                      seen.add(i)
                                      yield i
                                      break
                  return generatorset(iterate(), iterasc=True)
              def _reachablerootspure(repo, minroot, roots, heads, includepath):
                  """return (heads(::<roots> and ::<heads>))
                  If includepath is True, return (<roots>::<heads>)."""
                  if not roots:
                      return []
                  parentrevs = repo.changelog.parentrevs
                  roots = set(roots)
                  visit = list(heads)
                  reachable = set()
                  seen = {}
                  # prefetch all the things! (because python is slow)
                  reached = reachable.add
                  dovisit = visit.append
                  nextvisit = visit.pop
                  # open-code the post-order traversal due to the tiny size of
                  # sys.getrecursionlimit()
                  while visit:
                      rev = nextvisit()
                      if rev in roots:
                          reached(rev)
                          if not includepath:
                              continue
                      parents = parentrevs(rev)
                      seen[rev] = parents
                      for parent in parents:
                          if parent >= minroot and parent not in seen:
                              dovisit(parent)
                  if not reachable:
                      return baseset()
                  if not includepath:
                      return reachable
                  for rev in sorted(seen):
                      for parent in seen[rev]:
                          if parent in reachable:
                              reached(rev)
                  return reachable
              def reachableroots(repo, roots, heads, includepath=False):
                  """return (heads(::<roots> and ::<heads>))
                  If includepath is True, return (<roots>::<heads>)."""
                  if not roots:
                      return baseset()
                  minroot = roots.min()
                  roots = list(roots)
                  heads = list(heads)
                  try:
                      revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
                  except AttributeError:
                      revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
                  revs = baseset(revs)
                  revs.sort()
                  return revs
              def _changesrange(fctx1, fctx2, linerange2, diffopts):
                  """Return `(diffinrange, linerange1)` where `diffinrange` is True
                  if diff from fctx2 to fctx1 has changes in linerange2 and
                  `linerange1` is the new line range for fctx1.
                  """
                  blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
                  filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
                  diffinrange = any(stype == '!' for _, stype in filteredblocks)
                  return diffinrange, linerange1
              def blockancestors(fctx, fromline, toline, followfirst=False):
                  """Yield ancestors of `fctx` with respect to the block of lines within
                  `fromline`-`toline` range.
                  """
                  diffopts = patch.diffopts(fctx._repo.ui)
                  introrev = fctx.introrev()
                  if fctx.rev() != introrev:
                      fctx = fctx.filectx(fctx.filenode(), changeid=introrev)
                  visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
                  while visit:
                      c, linerange2 = visit.pop(max(visit))
                      pl = c.parents()
                      if followfirst:
                          pl = pl[:1]
                      if not pl:
                          # The block originates from the initial revision.
                          yield c, linerange2
                          continue
                      inrange = False
                      for p in pl:
                          inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
                          inrange = inrange or inrangep
                          if linerange1[0] == linerange1[1]:
                              # Parent's linerange is empty, meaning that the block got
                              # introduced in this revision; no need to go futher in this
                              # branch.
                              continue
                          # Set _descendantrev with 'c' (a known descendant) so that, when
                          # _adjustlinkrev is called for 'p', it receives this descendant
                          # (as srcrev) instead possibly topmost introrev.
                          p._descendantrev = c.rev()
                          visit[p.linkrev(), p.filenode()] = p, linerange1
                      if inrange:
                          yield c, linerange2
              def blockdescendants(fctx, fromline, toline):
                  """Yield descendants of `fctx` with respect to the block of lines within
                  `fromline`-`toline` range.
                  """
                  # First possibly yield 'fctx' if it has changes in range with respect to
                  # its parents.
                  try:
                      c, linerange1 = next(blockancestors(fctx, fromline, toline))
                  except StopIteration:
                      pass
                  else:
                      if c == fctx:
                          yield c, linerange1
                  diffopts = patch.diffopts(fctx._repo.ui)
                  fl = fctx.filelog()
                  seen = {fctx.filerev(): (fctx, (fromline, toline))}
                  for i in fl.descendants([fctx.filerev()]):
                      c = fctx.filectx(i)
                      inrange = False
                      for x in fl.parentrevs(i):
                          try:
                              p, linerange2 = seen[x]
                          except KeyError:
                              # nullrev or other branch
                              continue
                          inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
                          inrange = inrange or inrangep
                          # If revision 'i' has been seen (it's a merge), we assume that its
                          # line range is the same independently of which parents was used
                          # to compute it.
                          assert i not in seen or seen[i][1] == linerange1, (
                              'computed line range for %s is not consistent between '
                              'ancestor branches' % c)
                          seen[i] = c, linerange1
                      if inrange:
                          yield c, linerange1
              def toposort(revs, parentsfunc, firstbranch=()):
                  """Yield revisions from heads to roots one (topo) branch at a time.
                  This function aims to be used by a graph generator that wishes to minimize
                  the number of parallel branches and their interleaving.
                  Example iteration order (numbers show the "true" order in a changelog):
                    o  4
                    |
                    o  1
                    |
                    | o  3
                    | |
                    | o  2
                    |/
                    o  0
                  Note that the ancestors of merges are understood by the current
                  algorithm to be on the same branch. This means no reordering will
                  occur behind a merge.
                  """
                  ### Quick summary of the algorithm
                  #
                  # This function is based around a "retention" principle. We keep revisions
                  # in memory until we are ready to emit a whole branch that immediately
                  # "merges" into an existing one. This reduces the number of parallel
                  # branches with interleaved revisions.
                  #
                  # During iteration revs are split into two groups:
                  # A) revision already emitted
                  # B) revision in "retention". They are stored as different subgroups.
                  #
                  # for each REV, we do the following logic:
                  #
                  #   1) if REV is a parent of (A), we will emit it. If there is a
                  #   retention group ((B) above) that is blocked on REV being
                  #   available, we emit all the revisions out of that retention
                  #   group first.
                  #
                  #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
                  #   available, if such subgroup exist, we add REV to it and the subgroup is
                  #   now awaiting for REV.parents() to be available.
                  #
                  #   3) finally if no such group existed in (B), we create a new subgroup.
                  #
                  #
                  # To bootstrap the algorithm, we emit the tipmost revision (which
                  # puts it in group (A) from above).
                  revs.sort(reverse=True)
                  # Set of parents of revision that have been emitted. They can be considered
                  # unblocked as the graph generator is already aware of them so there is no
                  # need to delay the revisions that reference them.
                  #
                  # If someone wants to prioritize a branch over the others, pre-filling this
                  # set will force all other branches to wait until this branch is ready to be
                  # emitted.
                  unblocked = set(firstbranch)
                  # list of groups waiting to be displayed, each group is defined by:
                  #
                  #   (revs:    lists of revs waiting to be displayed,
                  #    blocked: set of that cannot be displayed before those in 'revs')
                  #
                  # The second value ('blocked') correspond to parents of any revision in the
                  # group ('revs') that is not itself contained in the group. The main idea
                  # of this algorithm is to delay as much as possible the emission of any
                  # revision.  This means waiting for the moment we are about to display
                  # these parents to display the revs in a group.
                  #
                  # This first implementation is smart until it encounters a merge: it will
                  # emit revs as soon as any parent is about to be emitted and can grow an
                  # arbitrary number of revs in 'blocked'. In practice this mean we properly
                  # retains new branches but gives up on any special ordering for ancestors
                  # of merges. The implementation can be improved to handle this better.
                  #
                  # The first subgroup is special. It corresponds to all the revision that
                  # were already emitted. The 'revs' lists is expected to be empty and the
                  # 'blocked' set contains the parents revisions of already emitted revision.
                  #
                  # You could pre-seed the <parents> set of groups[0] to a specific
                  # changesets to select what the first emitted branch should be.
                  groups = [([], unblocked)]
                  pendingheap = []
                  pendingset = set()
                  heapq.heapify(pendingheap)
                  heappop = heapq.heappop
                  heappush = heapq.heappush
                  for currentrev in revs:
                      # Heap works with smallest element, we want highest so we invert
                      if currentrev not in pendingset:
                          heappush(pendingheap, -currentrev)
                          pendingset.add(currentrev)
                      # iterates on pending rev until after the current rev have been
                      # processed.
                      rev = None
                      while rev != currentrev:
                          rev = -heappop(pendingheap)
                          pendingset.remove(rev)
                          # Seek for a subgroup blocked, waiting for the current revision.
                          matching = [i for i, g in enumerate(groups) if rev in g[1]]
                          if matching:
                              # The main idea is to gather together all sets that are blocked
                              # on the same revision.
                              #
                              # Groups are merged when a common blocking ancestor is
                              # observed. For example, given two groups:
                              #
                              # revs [5, 4] waiting for 1
                              # revs [3, 2] waiting for 1
                              #
                              # These two groups will be merged when we process
                              # 1. In theory, we could have merged the groups when
                              # we added 2 to the group it is now in (we could have
                              # noticed the groups were both blocked on 1 then), but
                              # the way it works now makes the algorithm simpler.
                              #
                              # We also always keep the oldest subgroup first. We can
                              # probably improve the behavior by having the longest set
                              # first. That way, graph algorithms could minimise the length
                              # of parallel lines their drawing. This is currently not done.
                              targetidx = matching.pop(0)
                              trevs, tparents = groups[targetidx]
                              for i in matching:
                                  gr = groups[i]
                                  trevs.extend(gr[0])
                                  tparents |= gr[1]
                              # delete all merged subgroups (except the one we kept)
                              # (starting from the last subgroup for performance and
                              # sanity reasons)
                              for i in reversed(matching):
                                  del groups[i]
                          else:
                              # This is a new head. We create a new subgroup for it.
                              targetidx = len(groups)
                              groups.append(([], {rev}))
                          gr = groups[targetidx]
                          # We now add the current nodes to this subgroups. This is done
                          # after the subgroup merging because all elements from a subgroup
                          # that relied on this rev must precede it.
                          #
                          # we also update the <parents> set to include the parents of the
                          # new nodes.
                          if rev == currentrev: # only display stuff in rev
                              gr[0].append(rev)
                          gr[1].remove(rev)
                          parents = [p for p in parentsfunc(rev) if p > node.nullrev]
                          gr[1].update(parents)
                          for p in parents:
                              if p not in pendingset:
                                  pendingset.add(p)
                                  heappush(pendingheap, -p)
                          # Look for a subgroup to display
                          #
                          # When unblocked is empty (if clause), we were not waiting for any
                          # revisions during the first iteration (if no priority was given) or
                          # if we emitted a whole disconnected set of the graph (reached a
                          # root).  In that case we arbitrarily take the oldest known
                          # subgroup. The heuristic could probably be better.
                          #
                          # Otherwise (elif clause) if the subgroup is blocked on
                          # a revision we just emitted, we can safely emit it as
                          # well.
                          if not unblocked:
                              if len(groups) > 1:  # display other subset
                                  targetidx = 1
                                  gr = groups[1]
                          elif not gr[1] & unblocked:
                              gr = None
                          if gr is not None:
                              # update the set of awaited revisions with the one from the
                              # subgroup
                              unblocked |= gr[1]
                              # output all revisions in the subgroup
                              for r in gr[0]:
                                  yield r
                              # delete the subgroup that you just output
                              # unless it is groups[0] in which case you just empty it.
                              if targetidx:
                                  del groups[targetidx]
                              else:
                                  gr[0][:] = []
                  # Check if we have some subgroup waiting for revisions we are not going to
                  # iterate over
                  for g in groups:
                      for r in g[0]:
                          yield r

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages