upstream/mercurial-mirror Commit - r16177:b8c1a8a5

copies: fix mergecopies doc mapping direction

Matt Mackall -

r16177:b8c1a8a5 default

parent child

mercurial/copies.py

0 +1 -1

              # copies.py - copy detection for Mercurial
              #
              # Copyright 2008 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import util
              import heapq
              def _nonoverlap(d1, d2, d3):
                  "Return list of elements in d1 not in d2 or d3"
                  return sorted([d for d in d1 if d not in d3 and d not in d2])
              def _dirname(f):
                  s = f.rfind("/")
                  if s == -1:
                      return ""
                  return f[:s]
              def _dirs(files):
                  d = set()
                  for f in files:
                      f = _dirname(f)
                      while f not in d:
                          d.add(f)
                          f = _dirname(f)
                  return d
              def _findlimit(repo, a, b):
                  """Find the earliest revision that's an ancestor of a or b but not both,
                  None if no such revision exists.
                  """
                  # basic idea:
                  # - mark a and b with different sides
                  # - if a parent's children are all on the same side, the parent is
                  #   on that side, otherwise it is on no side
                  # - walk the graph in topological order with the help of a heap;
                  #   - add unseen parents to side map
                  #   - clear side of any parent that has children on different sides
                  #   - track number of interesting revs that might still be on a side
                  #   - track the lowest interesting rev seen
                  #   - quit when interesting revs is zero
                  cl = repo.changelog
                  working = len(cl) # pseudo rev for the working directory
                  if a is None:
                      a = working
                  if b is None:
                      b = working
                  side = {a: -1, b: 1}
                  visit = [-a, -b]
                  heapq.heapify(visit)
                  interesting = len(visit)
                  hascommonancestor = False
                  limit = working
                  while interesting:
                      r = -heapq.heappop(visit)
                      if r == working:
                          parents = [cl.rev(p) for p in repo.dirstate.parents()]
                      else:
                          parents = cl.parentrevs(r)
                      for p in parents:
                          if p < 0:
                              continue
                          if p not in side:
                              # first time we see p; add it to visit
                              side[p] = side[r]
                              if side[p]:
                                  interesting += 1
                              heapq.heappush(visit, -p)
                          elif side[p] and side[p] != side[r]:
                              # p was interesting but now we know better
                              side[p] = 0
                              interesting -= 1
                              hascommonancestor = True
                      if side[r]:
                          limit = r # lowest rev visited
                          interesting -= 1
                  if not hascommonancestor:
                      return None
                  return limit
              def _chain(src, dst, a, b):
                  '''chain two sets of copies a->b'''
                  t = a.copy()
                  for k, v in b.iteritems():
                      if v in t:
                          # found a chain
                          if t[v] != k:
                              # file wasn't renamed back to itself
                              t[k] = t[v]
                          if v not in dst:
                              # chain was a rename, not a copy
                              del t[v]
                      if v in src:
                          # file is a copy of an existing file
                          t[k] = v
                  # remove criss-crossed copies
                  for k, v in t.items():
                      if k in src and v in dst:
                          del t[k]
                  return t
              def _tracefile(fctx, actx):
                  '''return file context that is the ancestor of fctx present in actx'''
                  stop = actx.rev()
                  am = actx.manifest()
                  for f in fctx.ancestors():
                      if am.get(f.path(), None) == f.filenode():
                          return f
                      if f.rev() < stop:
                          return None
              def _dirstatecopies(d):
                  ds = d._repo.dirstate
                  c = ds.copies().copy()
                  for k in c.keys():
                      if ds[k] not in 'anm':
                          del c[k]
                  return c
              def _forwardcopies(a, b):
                  '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                  # check for working copy
                  w = None
                  if b.rev() is None:
                      w = b
                      b = w.p1()
                      if a == b:
                          # short-circuit to avoid issues with merge states
                          return _dirstatecopies(w)
                  # find where new files came from
                  # we currently don't try to find where old files went, too expensive
                  # this means we can miss a case like 'hg rm b; hg cp a b'
                  cm = {}
                  for f in b:
                      if f not in a:
                          ofctx = _tracefile(b[f], a)
                          if ofctx:
                              cm[f] = ofctx.path()
                  # combine copies from dirstate if necessary
                  if w is not None:
                      cm = _chain(a, w, cm, _dirstatecopies(w))
                  return cm
              def _backwardcopies(a, b):
                  # because the forward mapping is 1:n, we can lose renames here
                  # in particular, we find renames better than copies
                  f = _forwardcopies(b, a)
                  r = {}
                  for k, v in f.iteritems():
                      r[v] = k
                  return r
              def pathcopies(x, y):
                  '''find {dst@y: src@x} copy mapping for directed compare'''
                  if x == y or not x or not y:
                      return {}
                  a = y.ancestor(x)
                  if a == x:
                      return _forwardcopies(x, y)
                  if a == y:
                      return _backwardcopies(x, y)
                  return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
              def mergecopies(repo, c1, c2, ca):
                  """
                  Find moves and copies between context c1 and c2 that are relevant
                  for merging.
                  Returns two dicts, "copy" and "diverge".
-                 "copy" is a mapping from source name -> destination name,
+                 "copy" is a mapping from destination name -> source name,
                  where source is in c1 and destination is in c2 or vice-versa.
                  "diverge" is a mapping of source name -> list of destination names
                  for divergent renames.
                  """
                  # avoid silly behavior for update from empty dir
                  if not c1 or not c2 or c1 == c2:
                      return {}, {}
                  # avoid silly behavior for parent -> working dir
                  if c2.node() is None and c1.node() == repo.dirstate.p1():
                      return repo.dirstate.copies(), {}
                  limit = _findlimit(repo, c1.rev(), c2.rev())
                  if limit is None:
                      # no common ancestor, no copies
                      return {}, {}
                  m1 = c1.manifest()
                  m2 = c2.manifest()
                  ma = ca.manifest()
                  def makectx(f, n):
                      if len(n) != 20: # in a working context?
                          if c1.rev() is None:
                              return c1.filectx(f)
                          return c2.filectx(f)
                      return repo.filectx(f, fileid=n)
                  ctx = util.lrucachefunc(makectx)
                  copy = {}
                  fullcopy = {}
                  diverge = {}
                  def related(f1, f2, limit):
                      # Walk back to common ancestor to see if the two files originate
                      # from the same file. Since workingfilectx's rev() is None it messes
                      # up the integer comparison logic, hence the pre-step check for
                      # None (f1 and f2 can only be workingfilectx's initially).
                      if f1 == f2:
                          return f1 # a match
                      g1, g2 = f1.ancestors(), f2.ancestors()
                      try:
                          f1r, f2r = f1.rev(), f2.rev()
                          if f1r is None:
                              f1 = g1.next()
                          if f2r is None:
                              f2 = g2.next()
                          while True:
                              f1r, f2r = f1.rev(), f2.rev()
                              if f1r > f2r:
                                  f1 = g1.next()
                              elif f2r > f1r:
                                  f2 = g2.next()
                              elif f1 == f2:
                                  return f1 # a match
                              elif f1r == f2r or f1r < limit or f2r < limit:
                                  return False # copy no longer relevant
                      except StopIteration:
                          return False
                  def checkcopies(f, m1, m2):
                      '''check possible copies of f from m1 to m2'''
                      of = None
                      seen = set([f])
                      for oc in ctx(f, m1[f]).ancestors():
                          ocr = oc.rev()
                          of = oc.path()
                          if of in seen:
                              # check limit late - grab last rename before
                              if ocr < limit:
                                  break
                              continue
                          seen.add(of)
                          fullcopy[f] = of # remember for dir rename detection
                          if of not in m2:
                              continue # no match, keep looking
                          if m2[of] == ma.get(of):
                              break # no merge needed, quit early
                          c2 = ctx(of, m2[of])
                          cr = related(oc, c2, ca.rev())
                          if cr and (of == f or of == c2.path()): # non-divergent
                              copy[f] = of
                              of = None
                              break
                      if of in ma:
                          diverge.setdefault(of, []).append(f)
                  repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                  u1 = _nonoverlap(m1, m2, ma)
                  u2 = _nonoverlap(m2, m1, ma)
                  if u1:
                      repo.ui.debug("  unmatched files in local:\n   %s\n"
                                    % "\n   ".join(u1))
                  if u2:
                      repo.ui.debug("  unmatched files in other:\n   %s\n"
                                    % "\n   ".join(u2))
                  for f in u1:
                      checkcopies(f, m1, m2)
                  for f in u2:
                      checkcopies(f, m2, m1)
                  diverge2 = set()
                  for of, fl in diverge.items():
                      if len(fl) == 1 or of in c2:
                          del diverge[of] # not actually divergent, or not a rename
                      else:
                          diverge2.update(fl) # reverse map for below
                  if fullcopy:
                      repo.ui.debug("  all copies found (* = to merge, ! = divergent):\n")
                      for f in fullcopy:
                          note = ""
                          if f in copy:
                              note += "*"
                          if f in diverge2:
                              note += "!"
                          repo.ui.debug("   %s -> %s %s\n" % (f, fullcopy[f], note))
                  del diverge2
                  if not fullcopy:
                      return copy, diverge
                  repo.ui.debug("  checking for directory renames\n")
                  # generate a directory move map
                  d1, d2 = _dirs(m1), _dirs(m2)
                  invalid = set()
                  dirmove = {}
                  # examine each file copy for a potential directory move, which is
                  # when all the files in a directory are moved to a new directory
                  for dst, src in fullcopy.iteritems():
                      dsrc, ddst = _dirname(src), _dirname(dst)
                      if dsrc in invalid:
                          # already seen to be uninteresting
                          continue
                      elif dsrc in d1 and ddst in d1:
                          # directory wasn't entirely moved locally
                          invalid.add(dsrc)
                      elif dsrc in d2 and ddst in d2:
                          # directory wasn't entirely moved remotely
                          invalid.add(dsrc)
                      elif dsrc in dirmove and dirmove[dsrc] != ddst:
                          # files from the same directory moved to two different places
                          invalid.add(dsrc)
                      else:
                          # looks good so far
                          dirmove[dsrc + "/"] = ddst + "/"
                  for i in invalid:
                      if i in dirmove:
                          del dirmove[i]
                  del d1, d2, invalid
                  if not dirmove:
                      return copy, diverge
                  for d in dirmove:
                      repo.ui.debug("  dir %s -> %s\n" % (d, dirmove[d]))
                  # check unaccounted nonoverlapping files against directory moves
                  for f in u1 + u2:
                      if f not in fullcopy:
                          for d in dirmove:
                              if f.startswith(d):
                                  # new file added in a directory that was moved, move it
                                  df = dirmove[d] + f[len(d):]
                                  if df not in copy:
                                      copy[f] = df
                                      repo.ui.debug("  file %s -> %s\n" % (f, copy[f]))
                                  break
                  return copy, diverge

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages