##// END OF EJS Templates
sidedatacopies: directly fetch copies information from sidedata...
sidedatacopies: directly fetch copies information from sidedata When using the sidedata mode, we don't need a complicated and expensive `context` object. Instead we directly fetch copies information from the sidedata (through a changelogrevision object). More optimisations coming. revision: large amount; added files: large amount; rename small amount; c3b14617fbd7 9ba6ab77fd29 filelog: ! wall 3.679613 comb 3.680000 user 3.580000 sys 0.100000 (median of 3) base: ! wall 8.884369 comb 8.880000 user 8.850000 sys 0.030000 (median of 3) before: ! wall 4.681985 comb 4.680000 user 4.640000 sys 0.040000 (median of 3) after: ! wall 3.955894 comb 3.950000 user 3.940000 sys 0.010000 (median of 3) revision: large amount; added files: small amount; rename small amount; c3b14617fbd7 f650a9b140d2 filelog: ! wall 0.003357 comb 0.010000 user 0.010000 sys 0.000000 (median of 781) base: ! wall 12.398524 comb 12.400000 user 12.330000 sys 0.070000 (median of 3) before: ! wall 6.459592 comb 6.470000 user 6.390000 sys 0.080000 (median of 3) after: ! wall 5.505774 comb 5.500000 user 5.410000 sys 0.090000 (median of 3) revision: large amount; added files: large amount; rename large amount; 08ea3258278e d9fa043f30c0 filelog: ! wall 2.754687 comb 2.760000 user 2.650000 sys 0.110000 (median of 4) base: ! wall 1.423166 comb 1.420000 user 1.400000 sys 0.020000 (median of 8) before: ! wall 0.961048 comb 0.960000 user 0.940000 sys 0.020000 (median of 11) after: ! wall 0.882950 comb 0.880000 user 0.880000 sys 0.000000 (median of 11) revision: small amount; added files: large amount; rename large amount; df6f7a526b60 a83dc6a2d56f filelog: ! wall 1.552293 comb 1.550000 user 1.510000 sys 0.040000 (median of 6 base: ! wall 0.022662 comb 0.020000 user 0.020000 sys 0.000000 (median of 128) before: ! wall 0.021649 comb 0.020000 user 0.020000 sys 0.000000 (median of 135) after: ! wall 0.020951 comb 0.020000 user 0.020000 sys 0.000000 (median of 141) revision: small amount; added files: large amount; rename small amount; 4aa4e1f8e19a 169138063d63 filelog: ! wall 1.500983 comb 1.500000 user 1.420000 sys 0.080000 (median of 7) base: ! wall 0.006956 comb 0.010000 user 0.010000 sys 0.000000 (median of 392) before: ! wall 0.004022 comb 0.000000 user 0.000000 sys 0.000000 (median of 735) after: ! wall 0.003988 comb 0.000000 user 0.000000 sys 0.000000 (median of 736) revision: small amount; added files: small amount; rename small amount; 4bc173b045a6 964879152e2e filelog: ! wall 0.011745 comb 0.020000 user 0.020000 sys 0.000000 (median of 250) base: ! wall 0.000156 comb 0.000000 user 0.000000 sys 0.000000 (median of 17180) before: ! wall 0.000118 comb 0.000000 user 0.000000 sys 0.000000 (median of 19170) after: ! wall 0.000097 comb 0.000000 user 0.000000 sys 0.000000 (median of 27276) revision: medium amount; added files: large amount; rename medium amount; c95f1ced15f2 2c68e87c3efe filelog: ! wall 3.228230 comb 3.230000 user 3.110000 sys 0.120000 (median of 4) base: ! wall 0.997640 comb 1.000000 user 0.980000 sys 0.020000 (median of 10) before: ! wall 0.679500 comb 0.680000 user 0.680000 sys 0.000000 (median of 15) after: ! wall 0.596779 comb 0.600000 user 0.600000 sys 0.000000 (median of 17) revision: medium amount; added files: medium amount; rename small amount; d343da0c55a8 d7746d32bf9d filelog: ! wall 1.052501 comb 1.060000 user 1.040000 sys 0.020000 (median of 10 base: ! wall 0.214519 comb 0.220000 user 0.220000 sys 0.000000 (median of 45) before: ! wall 0.149675 comb 0.150000 user 0.150000 sys 0.000000 (median of 66) after: ! wall 0.130786 comb 0.130000 user 0.130000 sys 0.000000 (median of 75) Differential Revision: https://phab.mercurial-scm.org/D7072

File last commit:

r43551:675c776f default
r43551:675c776f default
Show More
copies.py
968 lines | 32.2 KiB | text/x-python | PythonLexer
Matt Mackall
copies: move findcopies code to its own module...
r6274 # copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gregory Szorc
copies: use absolute_import
r25924 from __future__ import absolute_import
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 import collections
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import heapq
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 import os
Matt Mackall
copies: move findcopies code to its own module...
r6274
Pulkit Goyal
copies: add a config to limit the number of candidates to check in heuristics...
r34847 from .i18n import _
sidedatacopies: deal with upgrading and downgrading to that format...
r43418
from .revlogutils.flagutil import REVIDX_SIDEDATA
Gregory Szorc
copies: use absolute_import
r25924 from . import (
sidedatacopies: move various copies related function to the copies modules...
r43417 error,
Yuya Nishihara
copies: use intersectmatchers() in non-merge p1 optimization...
r33869 match as matchmod,
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000 node,
Gregory Szorc
copies: use absolute_import
r25924 pathutil,
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 pycompat,
Gregory Szorc
copies: use absolute_import
r25924 util,
)
sidedatacopies: deal with upgrading and downgrading to that format...
r43418
from .revlogutils import sidedata as sidedatamod
Augie Fackler
formatting: blacken the codebase...
r43346 from .utils import stringutil
Gregory Szorc
copies: use absolute_import
r25924
Martin von Zweigbergk
copies: inline _chainandfilter() to prepare for next patch...
r42796 def _filter(src, dst, t):
"""filters out invalid copies after chaining"""
Martin von Zweigbergk
copies: document cases in _chain()...
r42413
Martin von Zweigbergk
copies: inline _chainandfilter() to prepare for next patch...
r42796 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
# in the following table (not including trivial cases). For example, case 2
# is where a file existed in 'src' and remained under that name in 'mid' and
Martin von Zweigbergk
copies: document cases in _chain()...
r42413 # then was renamed between 'mid' and 'dst'.
#
# case src mid dst result
# 1 x y - -
# 2 x y y x->y
# 3 x y x -
# 4 x y z x->z
# 5 - x y -
# 6 x x y x->y
Martin von Zweigbergk
copies: split up _chain() in naive chaining and filtering steps...
r42565 #
# _chain() takes care of chaining the copies in 'a' and 'b', but it
# cannot tell the difference between cases 1 and 2, between 3 and 4, or
# between 5 and 6, so it includes all cases in its result.
# Cases 1, 3, and 5 are then removed by _filter().
Martin von Zweigbergk
copies: document cases in _chain()...
r42413
Martin von Zweigbergk
copies: split up _chain() in naive chaining and filtering steps...
r42565 for k, v in list(t.items()):
# remove copies from files that didn't exist
if v not in src:
del t[k]
# remove criss-crossed copies
elif k in src and v in dst:
del t[k]
# remove copies to files that were then removed
elif k not in dst:
del t[k]
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: split up _chain() in naive chaining and filtering steps...
r42565 def _chain(a, b):
"""chain two sets of copies 'a' and 'b'"""
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 t = a.copy()
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for k, v in pycompat.iteritems(b):
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 if v in t:
Martin von Zweigbergk
copies: remove redundant filtering of ping-pong renames in _chain()...
r42440 t[k] = t[v]
Martin von Zweigbergk
copies: filter out copies from non-existent source later in _chain()...
r42416 else:
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 t[k] = v
return t
Augie Fackler
formatting: blacken the codebase...
r43346
copies: drop the findlimit logic...
r43470 def _tracefile(fctx, am, basemf):
Martin von Zweigbergk
copies: consistently use """ for docstrings...
r35422 """return file context that is the ancestor of fctx present in ancestor
pathcopies: give up any optimization based on `introrev`...
r43469 manifest am
Note: we used to try and stop after a given limit, however checking if that
limit is reached turned out to be very expensive. we are better off
disabling that feature."""
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775
for f in fctx.ancestors():
Martin von Zweigbergk
copies: return only path from _tracefile() since that's all caller needs...
r42751 path = f.path()
if am.get(path, None) == f.filenode():
return path
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 if basemf and basemf.get(path, None) == f.filenode():
return path
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: respect narrowmatcher in "parent -> working dir" case...
r41918 def _dirstatecopies(repo, match=None):
ds = repo.dirstate
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 c = ds.copies().copy()
Pulkit Goyal
py3: explicitly convert dict.keys() and dict.items() into a list...
r34350 for k in list(c):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if ds[k] not in b'anm' or (match and not match(k)):
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 del c[k]
return c
Augie Fackler
formatting: blacken the codebase...
r43346
Durham Goode
copies: add matcher parameter to copy logic...
r24782 def _computeforwardmissing(a, b, match=None):
Durham Goode
copy: move _forwardcopies file logic to a function...
r24011 """Computes which files are in b but not a.
This is its own function so extensions can easily wrap this call to see what
files _forwardcopies is about to process.
"""
Durham Goode
copies: add matcher parameter to copy logic...
r24782 ma = a.manifest()
mb = b.manifest()
Durham Goode
copies: remove use of manifest.matches...
r31256 return mb.filesnotin(ma, match=match)
Durham Goode
copy: move _forwardcopies file logic to a function...
r24011
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: extract function for deciding whether to use changeset-centric algos...
r42284 def usechangesetcentricalgo(repo):
"""Checks if we should use changeset-centric copy algorithms"""
sidedatacopies: read rename information from sidedata...
r43416 if repo.filecopiesmode == b'changeset-sidedata':
return True
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
changesetsource = (b'changeset-only', b'compatibility')
copies: expand the logic of usechangesetcentricalgo...
r43290 return readfrom in changesetsource
Martin von Zweigbergk
copies: extract function for deciding whether to use changeset-centric algos...
r42284
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 def _committedforwardcopies(a, b, base, match):
Martin von Zweigbergk
copies: extract method for getting non-wdir forward copies...
r35423 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
Mads Kiilerich
diff: search beyond ancestor when detecting renames...
r20294 # files might have to be traced back to the fctx parent of the last
# one-side-only changeset, but not further back than that
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 repo = a._repo
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922
Martin von Zweigbergk
copies: extract function for deciding whether to use changeset-centric algos...
r42284 if usechangesetcentricalgo(repo):
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 return _changesetforwardcopies(a, b, match)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 dbg = repo.ui.debug
if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
Mads Kiilerich
diff: search beyond ancestor when detecting renames...
r20294 am = a.manifest()
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 basemf = None if base is None else base.manifest()
Mads Kiilerich
diff: search beyond ancestor when detecting renames...
r20294
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 # find where new files came from
# we currently don't try to find where old files went, too expensive
# this means we can miss a case like 'hg rm b; hg cp a b'
cm = {}
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000
# Computing the forward missing is quite expensive on large manifests, since
# it compares the entire manifests. We can optimize it in the common use
# case of computing what copies are in a commit versus its parent (like
# during a rebase or histedit). Note, we exclude merge commits from this
# optimization, since the ctx.files() for a merge commit is not correct for
# this comparison.
forwardmissingmatch = match
Yuya Nishihara
copies: use intersectmatchers() in non-merge p1 optimization...
r33869 if b.p1() == a and b.p2().node() == node.nullid:
Martin von Zweigbergk
copies: remove dependency on scmutil by directly using match.exact()...
r42102 filesmatcher = matchmod.exact(b.files())
Yuya Nishihara
copies: use intersectmatchers() in non-merge p1 optimization...
r33869 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
Pierre-Yves David
_adjustlinkrev: reuse ancestors set during rename detection (issue4514)...
r23980 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093
if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093
Martin von Zweigbergk
copies: process files in deterministic order for stable tests...
r42396 for f in sorted(missing):
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 dbg(b'debug.copies: tracing file: %s\n' % f)
Pierre-Yves David
_adjustlinkrev: reuse ancestors set during rename detection (issue4514)...
r23980 fctx = b[f]
fctx._ancestrycontext = ancestrycontext
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093
Boris Feld
copies: add time information to the debug information
r40094 if debug:
start = util.timer()
copies: drop the findlimit logic...
r43470 opath = _tracefile(fctx, am, basemf)
Martin von Zweigbergk
copies: return only path from _tracefile() since that's all caller needs...
r42751 if opath:
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 dbg(b'debug.copies: rename of: %s\n' % opath)
Martin von Zweigbergk
copies: return only path from _tracefile() since that's all caller needs...
r42751 cm[f] = opath
Boris Feld
copies: add time information to the debug information
r40094 if debug:
Augie Fackler
formatting: blacken the codebase...
r43346 dbg(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'debug.copies: time: %f seconds\n'
Augie Fackler
formatting: blacken the codebase...
r43346 % (util.timer() - start)
)
Martin von Zweigbergk
copies: extract method for getting non-wdir forward copies...
r35423 return cm
Augie Fackler
formatting: blacken the codebase...
r43346
copies: extract data extraction into a `revinfo` function...
r43549 def _revinfogetter(repo):
"""return a function that return multiple data given a <rev>"i
* p1: revision number of first parent
* p2: revision number of first parent
* p1copies: mapping of copies from p1
* p2copies: mapping of copies from p2
* removed: a list of removed files
"""
cl = repo.changelog
parents = cl.parentrevs
sidedatacopies: directly fetch copies information from sidedata...
r43551 if repo.filecopiesmode == b'changeset-sidedata':
changelogrevision = cl.changelogrevision
def revinfo(rev):
p1, p2 = parents(rev)
c = changelogrevision(rev)
p1copies = c.p1copies
p2copies = c.p2copies
removed = c.filesremoved
return p1, p2, p1copies, p2copies, removed
else:
def revinfo(rev):
p1, p2 = parents(rev)
ctx = repo[rev]
p1copies, p2copies = ctx._copies
removed = ctx.filesremoved()
return p1, p2, p1copies, p2copies, removed
copies: extract data extraction into a `revinfo` function...
r43549
return revinfo
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 def _changesetforwardcopies(a, b, match):
Martin von Zweigbergk
copies: fix crash on in changeset-centric tracing from commit to itself...
r42868 if a.rev() in (node.nullrev, b.rev()):
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 return {}
copies: use an unfiltered repository for the changeset centric algorithm...
r43550 repo = a.repo().unfiltered()
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 children = {}
copies: extract data extraction into a `revinfo` function...
r43549 revinfo = _revinfogetter(repo)
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 cl = repo.changelog
missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
for r in missingrevs:
for p in cl.parentrevs(r):
if p == node.nullrev:
continue
if p not in children:
children[p] = [r]
else:
children[p].append(r)
roots = set(children) - set(missingrevs)
copies: simplify the handling of merges...
r43546 work = list(roots)
all_copies = {r: {} for r in roots}
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 heapq.heapify(work)
Martin von Zweigbergk
copies: avoid calling matcher if matcher.always()...
r42688 alwaysmatch = match.always()
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 while work:
copies: simplify the handling of merges...
r43546 r = heapq.heappop(work)
copies = all_copies.pop(r)
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 if r == b.rev():
return copies
Martin von Zweigbergk
copies: avoid unnecessary copying of copy dict...
r42687 for i, c in enumerate(children[r]):
copies: extract data extraction into a `revinfo` function...
r43549 p1, p2, p1copies, p2copies, removed = revinfo(c)
copies: avoid instancing more changectx to access parent revisions...
r43548 if r == p1:
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 parent = 1
copies: get copies information directly from _copies...
r43547 childcopies = p1copies
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 else:
copies: avoid instancing more changectx to access parent revisions...
r43548 assert r == p2
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 parent = 2
copies: get copies information directly from _copies...
r43547 childcopies = p2copies
Martin von Zweigbergk
copies: avoid calling matcher if matcher.always()...
r42688 if not alwaysmatch:
Augie Fackler
formatting: blacken the codebase...
r43346 childcopies = {
dst: src for dst, src in childcopies.items() if match(dst)
}
Martin von Zweigbergk
copies: avoid unnecessary copying of copy dict...
r42687 # Copy the dict only if later iterations will also need it
if i != len(children[r]) - 1:
Martin von Zweigbergk
copies: avoid reusing the same variable for two different copy dicts...
r42714 newcopies = copies.copy()
Martin von Zweigbergk
copies: avoid unnecessary copying of copy dict...
r42687 else:
Martin von Zweigbergk
copies: avoid reusing the same variable for two different copy dicts...
r42714 newcopies = copies
if childcopies:
newcopies = _chain(newcopies, childcopies)
copies: extract data extraction into a `revinfo` function...
r43549 for f in removed:
Martin von Zweigbergk
copies: avoid reusing the same variable for two different copy dicts...
r42714 if f in newcopies:
del newcopies[f]
copies: simplify the handling of merges...
r43546 othercopies = all_copies.get(c)
if othercopies is None:
heapq.heappush(work, c)
all_copies[c] = newcopies
else:
# we are the second parent to work on c, we need to merge our
# work with the other.
#
# Unlike when copies are stored in the filelog, we consider
# it a copy even if the destination already existed on the
# other branch. It's simply too expensive to check if the
# file existed in the manifest.
#
# In case of conflict, parent 1 take precedence over parent 2.
# This is an arbitrary choice made anew when implementing
# changeset based copies. It was made without regards with
# potential filelog related behavior.
if parent == 1:
othercopies.update(newcopies)
else:
newcopies.update(othercopies)
all_copies[c] = newcopies
Martin von Zweigbergk
copies: do copy tracing based on ctx.p[12]copies() if configured...
r41922 assert False
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 def _forwardcopies(a, b, base=None, match=None):
Martin von Zweigbergk
copies: extract method for getting non-wdir forward copies...
r35423 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 if base is None:
base = a
Martin von Zweigbergk
narrow: make copies.pathcopies() filter with narrowspec again...
r40487 match = a.repo().narrowmatch(match)
Martin von Zweigbergk
copies: extract method for getting non-wdir forward copies...
r35423 # check for working copy
if b.rev() is None:
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 cm = _committedforwardcopies(a, b.p1(), base, match)
Martin von Zweigbergk
copies: group wdir-handling in one place...
r35424 # combine copies from dirstate if necessary
Martin von Zweigbergk
copies: inline _chainandfilter() to prepare for next patch...
r42796 copies = _chain(cm, _dirstatecopies(b._repo, match))
Martin von Zweigbergk
copies: remove most early returns from pathcopies() and _forwardcopies()...
r42795 else:
Augie Fackler
formatting: blacken the codebase...
r43346 copies = _committedforwardcopies(a, b, base, match)
Martin von Zweigbergk
copies: remove most early returns from pathcopies() and _forwardcopies()...
r42795 return copies
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
copies: make _backwardrenames() filter out copies by destination...
r41919 def _backwardrenames(a, b, match):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
Durham Goode
copy: add flag for disabling copy tracing...
r26013 return {}
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 # Even though we're not taking copies into account, 1:n rename situations
# can still exist (e.g. hg cp a b; hg mv a c). In those cases we
# arbitrarily pick one of the renames.
Martin von Zweigbergk
copies: make _backwardrenames() filter out copies by destination...
r41919 # We don't want to pass in "match" here, since that would filter
# the destination by it. Since we're reversing the copies, we want
# to filter the source instead.
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 f = _forwardcopies(b, a)
r = {}
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for k, v in sorted(pycompat.iteritems(f)):
Martin von Zweigbergk
copies: make _backwardrenames() filter out copies by destination...
r41919 if match and not match(v):
continue
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 # remove copies
if v in a:
continue
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 r[v] = k
return r
Augie Fackler
formatting: blacken the codebase...
r43346
Durham Goode
copies: add matcher parameter to copy logic...
r24782 def pathcopies(x, y, match=None):
Martin von Zweigbergk
copies: consistently use """ for docstrings...
r35422 """find {dst@y: src@x} copy mapping for directed compare"""
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 repo = x._repo
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(
b'debug.copies: searching copies from %s to %s\n' % (x, y)
)
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 if x == y or not x or not y:
return {}
a = y.ancestor(x)
if a == x:
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b'debug.copies: search mode: forward\n')
Martin von Zweigbergk
copies: move short-circuiting of dirstate copies out of _forwardcopies()...
r42794 if y.rev() is None and x == y.p1():
# short-circuit to avoid issues with merge states
return _dirstatecopies(repo, match)
Martin von Zweigbergk
copies: remove most early returns from pathcopies() and _forwardcopies()...
r42795 copies = _forwardcopies(x, y, match=match)
elif a == y:
Boris Feld
copies: add a devel debug mode to trace what copy tracing does...
r40093 if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b'debug.copies: search mode: backward\n')
Martin von Zweigbergk
copies: remove most early returns from pathcopies() and _forwardcopies()...
r42795 copies = _backwardrenames(x, y, match=match)
else:
if debug:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b'debug.copies: search mode: combined\n')
Martin von Zweigbergk
copies: follow copies across merge base without source file (issue6163)...
r42798 base = None
if a.rev() != node.nullrev:
base = x
Augie Fackler
formatting: blacken the codebase...
r43346 copies = _chain(
_backwardrenames(x, a, match=match),
_forwardcopies(a, y, base, match=match),
)
Martin von Zweigbergk
copies: filter invalid copies only at end of pathcopies() (issue6163)...
r42797 _filter(x, y, copies)
Martin von Zweigbergk
copies: remove most early returns from pathcopies() and _forwardcopies()...
r42795 return copies
Matt Mackall
copies: split the copies api for "normal" and merge cases (API)
r15774
Augie Fackler
formatting: blacken the codebase...
r43346
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 def mergecopies(repo, c1, c2, base):
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
Martin von Zweigbergk
copies: move comment about implementation of mergecopies() to end...
r42287 Finds moves and copies between context c1 and c2 that are relevant for
Pulkit Goyal
copytrace: move the default copytracing algorithm in a new function...
r34080 merging. 'base' will be used as the merge base.
Copytracing is used in commands like rebase, merge, unshelve, etc to merge
files that were moved/ copied in one merge parent and modified in another.
For example:
Pulkit Goyal
copies: add more details to the documentation of mergecopies()...
r33821
o ---> 4 another commit
|
| o ---> 3 commit that modifies a.txt
| /
o / ---> 2 commit that moves a.txt to b.txt
|/
o ---> 1 merge base
If we try to rebase revision 3 on revision 4, since there is no a.txt in
revision 4, and if user have copytrace disabled, we prints the following
message:
```other changed <file> which local deleted```
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
"dirmove".
Matt Mackall
copies: add docstring for mergecopies
r16168
Matt Mackall
copies: fix mergecopies doc mapping direction
r16177 "copy" is a mapping from destination name -> source name,
Matt Mackall
copies: add docstring for mergecopies
r16168 where source is in c1 and destination is in c2 or vice-versa.
Siddharth Agarwal
copies: separate moves via directory renames from explicit copies...
r18134 "movewithdir" is a mapping from source name -> destination name,
where the file at source present in one context but not the other
needs to be moved to destination by the merge process, because the
other context moved the directory it is in.
Matt Mackall
copies: add docstring for mergecopies
r16168 "diverge" is a mapping of source name -> list of destination names
for divergent renames.
Thomas Arendsen Hein
merge: warn about file deleted in one branch and renamed in other (issue3074)...
r16794
"renamedelete" is a mapping of source name -> list of destination
names for files deleted in c1 that were renamed in c2 or vice-versa.
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581
"dirmove" is a mapping of detected source dir -> destination dir renames.
This is needed for handling changes to new files previously grafted into
renamed directories.
Martin von Zweigbergk
copies: move comment about implementation of mergecopies() to end...
r42287
This function calls different copytracing algorithms based on config.
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
# avoid silly behavior for update from empty dir
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if not c1 or not c2 or c1 == c2:
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return {}, {}, {}, {}, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274
Martin von Zweigbergk
copies: respect narrowmatcher in "parent -> working dir" case...
r41918 narrowmatch = c1.repo().narrowmatch()
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 # avoid silly behavior for parent -> working dir
Matt Mackall
misc: replace .parents()[0] with p1()
r13878 if c2.node() is None and c1.node() == repo.dirstate.p1():
Martin von Zweigbergk
copies: respect narrowmatcher in "parent -> working dir" case...
r41918 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
Matt Mackall
copies: teach copies about dirstate.copies...
r6646
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 copytracing = repo.ui.config(b'experimental', b'copytrace')
Martin von Zweigbergk
copies: move check for experimental.copytrace==<falsy> earlier...
r42411 if stringutil.parsebool(copytracing) is False:
# stringutil.parsebool() returns None when it is unable to parse the
# value, so we should rely on making sure copytracing is on such cases
return {}, {}, {}, {}, {}
Pulkit Goyal
copytrace: move the default copytracing algorithm in a new function...
r34080
Martin von Zweigbergk
copies: ignore heuristics copytracing when using changeset-centric algos...
r42412 if usechangesetcentricalgo(repo):
# The heuristics don't make sense when we need changeset-centric algos
return _fullcopytracing(repo, c1, c2, base)
Durham Goode
copy: add flag for disabling copy tracing...
r26013 # Copy trace disabling is explicitly below the node == p1 logic above
# because the logic above is required for a simple copy to be kept across a
# rebase.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if copytracing == b'heuristics':
Yuya Nishihara
copytrace: use ctx.mutable() instead of adhoc constant of non-public phases
r34365 # Do full copytracing if only non-public revisions are involved as
# that will be fast enough and will also cover the copies which could
# be missed by heuristics
Pulkit Goyal
copytrace: add a a new config to limit the number of drafts in heuristics...
r34312 if _isfullcopytraceable(repo, c1, base):
Pulkit Goyal
copytrace: use the full copytracing method if only drafts are involved...
r34289 return _fullcopytracing(repo, c1, c2, base)
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 return _heuristicscopytracing(repo, c1, c2, base)
Pulkit Goyal
copytrace: move the default copytracing algorithm in a new function...
r34080 else:
return _fullcopytracing(repo, c1, c2, base)
Durham Goode
copy: add flag for disabling copy tracing...
r26013
Augie Fackler
formatting: blacken the codebase...
r43346
Pulkit Goyal
copytrace: add a a new config to limit the number of drafts in heuristics...
r34312 def _isfullcopytraceable(repo, c1, base):
Yuya Nishihara
copytrace: use ctx.mutable() instead of adhoc constant of non-public phases
r34365 """ Checks that if base, source and destination are all no-public branches,
if yes let's use the full copytrace algorithm for increased capabilities
since it will be fast enough.
Pulkit Goyal
copies: add docs for config `experimental.copytrace.sourcecommitlimit`...
r34517
`experimental.copytrace.sourcecommitlimit` can be used to set a limit for
number of changesets from c1 to base such that if number of changesets are
more than the limit, full copytracing algorithm won't be used.
Pulkit Goyal
copytrace: use the full copytracing method if only drafts are involved...
r34289 """
Pulkit Goyal
copytrace: add a a new config to limit the number of drafts in heuristics...
r34312 if c1.rev() is None:
c1 = c1.p1()
Yuya Nishihara
copytrace: use ctx.mutable() instead of adhoc constant of non-public phases
r34365 if c1.mutable() and base.mutable():
Augie Fackler
formatting: blacken the codebase...
r43346 sourcecommitlimit = repo.ui.configint(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'experimental', b'copytrace.sourcecommitlimit'
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
Pulkit Goyal
copytrace: add a a new config to limit the number of drafts in heuristics...
r34312 return commits < sourcecommitlimit
Pulkit Goyal
copytrace: use the full copytracing method if only drafts are involved...
r34289 return False
Augie Fackler
formatting: blacken the codebase...
r43346
def _checksinglesidecopies(
src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
):
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 if src not in m2:
# deleted on side 2
if src not in m1:
# renamed on side 1, deleted on side 2
renamedelete[src] = dsts1
elif m2[src] != mb[src]:
if not _related(c2[src], base[src]):
return
# modified on side 2
for dst in dsts1:
if dst not in m2:
# dst not added on side 2 (handle as regular
# "both created" case in manifestmerge otherwise)
copy[dst] = src
Augie Fackler
formatting: blacken the codebase...
r43346
Pulkit Goyal
copytrace: move the default copytracing algorithm in a new function...
r34080 def _fullcopytracing(repo, c1, c2, base):
""" The full copytracing algorithm which finds all the new files that were
added from merge base up to the top commit and for each file it checks if
this file was copied from another file.
This is pretty slow when a lot of changesets are involved but will track all
the copies.
"""
Matt Mackall
copies: move findcopies code to its own module...
r6274 m1 = c1.manifest()
m2 = c2.manifest()
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 mb = base.manifest()
Matt Mackall
copies: move findcopies code to its own module...
r6274
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 copies1 = pathcopies(base, c1)
copies2 = pathcopies(base, c2)
inversecopies1 = {}
inversecopies2 = {}
for dst, src in copies1.items():
inversecopies1.setdefault(src, []).append(dst)
for dst, src in copies2.items():
inversecopies2.setdefault(src, []).append(dst)
copy = {}
diverge = {}
renamedelete = {}
allsources = set(inversecopies1) | set(inversecopies2)
for src in allsources:
dsts1 = inversecopies1.get(src)
dsts2 = inversecopies2.get(src)
if dsts1 and dsts2:
# copied/renamed on both sides
if src not in m1 and src not in m2:
# renamed on both sides
dsts1 = set(dsts1)
dsts2 = set(dsts2)
# If there's some overlap in the rename destinations, we
# consider it not divergent. For example, if side 1 copies 'a'
# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
# and 'd' and deletes 'a'.
if dsts1 & dsts2:
Augie Fackler
formatting: blacken the codebase...
r43346 for dst in dsts1 & dsts2:
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 copy[dst] = src
else:
diverge[src] = sorted(dsts1 | dsts2)
elif src in m1 and src in m2:
# copied on both sides
dsts1 = set(dsts1)
dsts2 = set(dsts2)
Augie Fackler
formatting: blacken the codebase...
r43346 for dst in dsts1 & dsts2:
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 copy[dst] = src
# TODO: Handle cases where it was renamed on one side and copied
# on the other side
elif dsts1:
# copied/renamed only on side 1
Augie Fackler
formatting: blacken the codebase...
r43346 _checksinglesidecopies(
src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
)
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 elif dsts2:
# copied/renamed only on side 2
Augie Fackler
formatting: blacken the codebase...
r43346 _checksinglesidecopies(
src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
)
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408
renamedeleteset = set()
divergeset = set()
Martin von Zweigbergk
copies: replace .items() by .values() where appropriate...
r42410 for dsts in diverge.values():
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 divergeset.update(dsts)
Martin von Zweigbergk
copies: replace .items() by .values() where appropriate...
r42410 for dsts in renamedelete.values():
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 renamedeleteset.update(dsts)
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: group bothnew with other sets
r26659 # find interesting file sets from manifests
Martin von Zweigbergk
narrow: move copies overrides to core...
r40002 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
Martin von Zweigbergk
copies: inline _computenonoverlap() in mergecopies()...
r42409 u1 = sorted(addedinm1 - addedinm2)
u2 = sorted(addedinm2 - addedinm1)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 header = b" unmatched files in %s"
Martin von Zweigbergk
copies: inline _computenonoverlap() in mergecopies()...
r42409 if u1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
Martin von Zweigbergk
copies: inline _computenonoverlap() in mergecopies()...
r42409 if u2:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
Matt Mackall
copies: move findcopies code to its own module...
r6274
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 fullcopy = copies1.copy()
fullcopy.update(copies2)
Martin von Zweigbergk
copies: move early return for "no copies" case a little earlier...
r42342 if not fullcopy:
return copy, {}, diverge, renamedelete, {}
if repo.ui.debugflag:
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b" all copies found (* = to merge, ! = divergent, "
b"% = renamed and deleted):\n"
Augie Fackler
formatting: blacken the codebase...
r43346 )
Mads Kiilerich
copies: report found copies sorted
r18362 for f in sorted(fullcopy):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 note = b""
Matt Mackall
many, many trivial check-code fixups
r10282 if f in copy:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 note += b"*"
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 if f in divergeset:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 note += b"!"
Matt Mackall
copies: rename renamedelete to renamedeleteset for clarity
r26658 if f in renamedeleteset:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 note += b"%"
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 del divergeset
Matt Mackall
copies: move findcopies code to its own module...
r6274
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b" checking for directory renames\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274
# generate a directory move map
Matt Mackall
copies: use ctx.dirs() for directory rename detection
r16178 d1, d2 = c1.dirs(), c2.dirs()
Matt Mackall
copies: re-include root directory in directory rename detection (issue3511)
r17055 invalid = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for dst, src in pycompat.iteritems(fullcopy):
Durham Goode
copies: switch to using pathutil.dirname...
r25282 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
Matt Mackall
copies: move findcopies code to its own module...
r6274 if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
Kyle Lippincott
copies: correctly skip directories that have already been considered...
r39299 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
Kyle Lippincott
copies: correctly skip directories that have already been considered...
r39299 invalid.add(dsrc)
elif dsrc in dirmove and dirmove[dsrc] != ddst:
Matt Mackall
copies: move findcopies code to its own module...
r6274 # files from the same directory moved to two different places
Kyle Lippincott
copies: correctly skip directories that have already been considered...
r39299 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
# looks good so far
Kyle Lippincott
copies: correctly skip directories that have already been considered...
r39299 dirmove[dsrc] = ddst
Matt Mackall
copies: move findcopies code to its own module...
r6274
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return copy, {}, diverge, renamedelete, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
Kyle Lippincott
copies: correctly skip directories that have already been considered...
r39299
Matt Mackall
copies: move findcopies code to its own module...
r6274 for d in dirmove:
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
Augie Fackler
formatting: blacken the codebase...
r43346 )
Matt Mackall
copies: move findcopies code to its own module...
r6274
Pierre-Yves David
checkcopies: move 'movewithdir' initialisation right before its usage...
r30183 movewithdir = {}
Matt Mackall
copies: move findcopies code to its own module...
r6274 # check unaccounted nonoverlapping files against directory moves
Martin von Zweigbergk
copies: calculate mergecopies() based on pathcopies()...
r42408 for f in u1 + u2:
Matt Mackall
copies: move findcopies code to its own module...
r6274 if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
Augie Fackler
formatting: blacken the codebase...
r43346 df = dirmove[d] + f[len(d) :]
Matt Mackall
copies: don't double-detect items in the directory copy check
r6426 if df not in copy:
Siddharth Agarwal
copies: separate moves via directory renames from explicit copies...
r18134 movewithdir[f] = df
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Martin von Zweigbergk
cleanup: join string literals that are already on one line...
r43387 b" pending file src: '%s' -> dst: '%s'\n"
Augie Fackler
formatting: blacken the codebase...
r43346 % (f, df)
)
Matt Mackall
copies: move findcopies code to its own module...
r6274 break
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return copy, movewithdir, diverge, renamedelete, dirmove
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178
Augie Fackler
formatting: blacken the codebase...
r43346
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 def _heuristicscopytracing(repo, c1, c2, base):
""" Fast copytracing using filename heuristics
Assumes that moves or renames are of following two types:
1) Inside a directory only (same directory name but different filenames)
2) Move from one directory to another
(same filenames but different directory names)
Works only when there are no merge commits in the "source branch".
Source branch is commits from base up to c2 not including base.
If merge is involved it fallbacks to _fullcopytracing().
Can be used by setting the following config:
[experimental]
copytrace = heuristics
Pulkit Goyal
copies: add a config to limit the number of candidates to check in heuristics...
r34847
In some cases the copy/move candidates found by heuristics can be very large
in number and that will make the algorithm slow. The number of possible
candidates to check can be limited by using the config
`experimental.copytrace.movecandidateslimit` which defaults to 100.
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 """
if c1.rev() is None:
c1 = c1.p1()
if c2.rev() is None:
c2 = c2.p1()
copies = {}
changedfiles = set()
m1 = c1.manifest()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 # If base is not in c2 branch, we switch to fullcopytracing
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"switching to full copytracing as base is not "
b"an ancestor of c2\n"
Augie Fackler
formatting: blacken the codebase...
r43346 )
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 return _fullcopytracing(repo, c1, c2, base)
ctx = c2
while ctx != base:
if len(ctx.parents()) == 2:
# To keep things simple let's not handle merges
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b"switching to full copytracing because of merges\n")
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 return _fullcopytracing(repo, c1, c2, base)
changedfiles.update(ctx.files())
ctx = ctx.p1()
cp = _forwardcopies(base, c2)
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for dst, src in pycompat.iteritems(cp):
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 if src in m1:
copies[dst] = src
# file is missing if it isn't present in the destination, but is present in
# the base and present in the source.
# Presence in the base is important to exclude added files, presence in the
# source is important to exclude removed files.
Augie Fackler
py3: use list comprehensions instead of filter where we need to eagerly filter...
r36364 filt = lambda f: f not in m1 and f in base and f in c2
missingfiles = [f for f in changedfiles if filt(f)]
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180
if missingfiles:
basenametofilename = collections.defaultdict(list)
dirnametofilename = collections.defaultdict(list)
for f in m1.filesnotin(base.manifest()):
basename = os.path.basename(f)
dirname = os.path.dirname(f)
basenametofilename[basename].append(f)
dirnametofilename[dirname].append(f)
for f in missingfiles:
basename = os.path.basename(f)
dirname = os.path.dirname(f)
samebasename = basenametofilename[basename]
samedirname = dirnametofilename[dirname]
movecandidates = samebasename + samedirname
# f is guaranteed to be present in c2, that's why
# c2.filectx(f) won't fail
f2 = c2.filectx(f)
Pulkit Goyal
copies: add a config to limit the number of candidates to check in heuristics...
r34847 # we can have a lot of candidates which can slow down the heuristics
# config value to limit the number of candidates moves to check
Augie Fackler
formatting: blacken the codebase...
r43346 maxcandidates = repo.ui.configint(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'experimental', b'copytrace.movecandidateslimit'
Augie Fackler
formatting: blacken the codebase...
r43346 )
Pulkit Goyal
copies: add a config to limit the number of candidates to check in heuristics...
r34847
if len(movecandidates) > maxcandidates:
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.status(
_(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"skipping copytracing for '%s', more "
b"candidates than the limit: %d\n"
Augie Fackler
formatting: blacken the codebase...
r43346 )
% (f, len(movecandidates))
)
Pulkit Goyal
copies: add a config to limit the number of candidates to check in heuristics...
r34847 continue
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 for candidate in movecandidates:
f1 = c1.filectx(candidate)
Gábor Stefanik
copies: clean up _related logic...
r37410 if _related(f1, f2):
Pulkit Goyal
copytrace: move fast heuristic copytracing algorithm to core...
r34180 # if there are a few related copies then we'll merge
# changes into all of them. This matches the behaviour
# of upstream copytracing
copies[candidate] = f
return copies, {}, {}, {}, {}
Augie Fackler
formatting: blacken the codebase...
r43346
Gábor Stefanik
copies: clean up _related logic...
r37410 def _related(f1, f2):
Pierre-Yves David
checkcopies: extract the '_related' closure...
r30138 """return True if f1 and f2 filectx have a common ancestor
Walk back to common ancestor to see if the two files originate
from the same file. Since workingfilectx's rev() is None it messes
up the integer comparison logic, hence the pre-step check for
None (f1 and f2 can only be workingfilectx's initially).
"""
if f1 == f2:
Augie Fackler
formatting: blacken the codebase...
r43346 return True # a match
Pierre-Yves David
checkcopies: extract the '_related' closure...
r30138
g1, g2 = f1.ancestors(), f2.ancestors()
try:
f1r, f2r = f1.linkrev(), f2.linkrev()
if f1r is None:
f1 = next(g1)
if f2r is None:
f2 = next(g2)
while True:
f1r, f2r = f1.linkrev(), f2.linkrev()
if f1r > f2r:
f1 = next(g1)
elif f2r > f1r:
f2 = next(g2)
Augie Fackler
formatting: blacken the codebase...
r43346 else: # f1 and f2 point to files in the same linkrev
return f1 == f2 # true if they point to the same file
Pierre-Yves David
checkcopies: extract the '_related' closure...
r30138 except StopIteration:
return False
Augie Fackler
formatting: blacken the codebase...
r43346
Phil Cohen
context: add workingfilectx.markcopied...
r34788 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
Martin von Zweigbergk
copies: consistently use """ for docstrings...
r35422 """reproduce copies from fromrev to rev in the dirstate
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901
If skiprev is specified, it's a revision that should be used to
filter copy records. Any copies that occur between fromrev and
skiprev will not be duplicated, even if they appear in the set of
copies between fromrev and rev.
Martin von Zweigbergk
copies: consistently use """ for docstrings...
r35422 """
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901 exclude = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
Pulkit Goyal
copies: improve logic of deciding copytracing on based of config options...
r39402 bctrace = stringutil.parsebool(ctraceconfig)
Augie Fackler
formatting: blacken the codebase...
r43346 if skiprev is not None and (
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ctraceconfig == b'heuristics' or bctrace or bctrace is None
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Pulkit Goyal
copytrace: replace experimental.disablecopytrace config with copytrace (BC)...
r34079 # copytrace='off' skips this line, but not the entire function because
Durham Goode
copy: add flag for disabling copy tracing...
r26013 # the line below is O(size of the repo) during a rebase, while the rest
# of the function is much faster (and is required for carrying copy
# metadata across the rebase anyway).
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901 exclude = pathcopies(repo[fromrev], repo[skiprev])
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901 if dst in exclude:
continue
Martin von Zweigbergk
copies: fix duplicatecopies() with overlay context...
r42509 if dst in wctx:
wctx[dst].markcopied(src)
copies: extract an explicit `computechangesetcopie` method from context...
r42935
Augie Fackler
formatting: blacken the codebase...
r43346
sidedatacopies: move various copies related function to the copies modules...
r43417 def computechangesetfilesadded(ctx):
"""return the list of files added in a changeset
"""
added = []
for f in ctx.files():
if not any(f in p for p in ctx.parents()):
added.append(f)
return added
def computechangesetfilesremoved(ctx):
"""return the list of files removed in a changeset
"""
removed = []
for f in ctx.files():
if f not in ctx:
removed.append(f)
return removed
copies: extract an explicit `computechangesetcopie` method from context...
r42935 def computechangesetcopies(ctx):
"""return the copies data for a changeset
The copies data are returned as a pair of dictionnary (p1copies, p2copies).
Each dictionnary are in the form: `{newname: oldname}`
"""
p1copies = {}
p2copies = {}
p1 = ctx.p1()
p2 = ctx.p2()
narrowmatch = ctx._repo.narrowmatch()
for dst in ctx.files():
if not narrowmatch(dst) or dst not in ctx:
continue
copied = ctx[dst].renamed()
if not copied:
continue
src, srcnode = copied
if src in p1 and p1[src].filenode() == srcnode:
p1copies[dst] = src
elif src in p2 and p2[src].filenode() == srcnode:
p2copies[dst] = src
return p1copies, p2copies
sidedatacopies: move various copies related function to the copies modules...
r43417
def encodecopies(files, copies):
items = []
for i, dst in enumerate(files):
if dst in copies:
items.append(b'%d\0%s' % (i, copies[dst]))
if len(items) != len(copies):
raise error.ProgrammingError(
b'some copy targets missing from file list'
)
return b"\n".join(items)
def decodecopies(files, data):
try:
copies = {}
if not data:
return copies
for l in data.split(b'\n'):
strindex, src = l.split(b'\0')
i = int(strindex)
dst = files[i]
copies[dst] = src
return copies
except (ValueError, IndexError):
# Perhaps someone had chosen the same key name (e.g. "p1copies") and
# used different syntax for the value.
return None
def encodefileindices(files, subset):
subset = set(subset)
indices = []
for i, f in enumerate(files):
if f in subset:
indices.append(b'%d' % i)
return b'\n'.join(indices)
def decodefileindices(files, data):
try:
subset = []
if not data:
return subset
for strindex in data.split(b'\n'):
i = int(strindex)
if i < 0 or i >= len(files):
return None
subset.append(files[i])
return subset
except (ValueError, IndexError):
# Perhaps someone had chosen the same key name (e.g. "added") and
# used different syntax for the value.
return None
sidedatacopies: deal with upgrading and downgrading to that format...
r43418
def _getsidedata(srcrepo, rev):
ctx = srcrepo[rev]
filescopies = computechangesetcopies(ctx)
filesadded = computechangesetfilesadded(ctx)
filesremoved = computechangesetfilesremoved(ctx)
sidedata = {}
if any([filescopies, filesadded, filesremoved]):
sortedfiles = sorted(ctx.files())
p1copies, p2copies = filescopies
p1copies = encodecopies(sortedfiles, p1copies)
p2copies = encodecopies(sortedfiles, p2copies)
filesadded = encodefileindices(sortedfiles, filesadded)
filesremoved = encodefileindices(sortedfiles, filesremoved)
sidedatacopies: only store an entry if it has values...
r43505 if p1copies:
sidedata[sidedatamod.SD_P1COPIES] = p1copies
if p2copies:
sidedata[sidedatamod.SD_P2COPIES] = p2copies
if filesadded:
sidedata[sidedatamod.SD_FILESADDED] = filesadded
if filesremoved:
sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
sidedatacopies: deal with upgrading and downgrading to that format...
r43418 return sidedata
def getsidedataadder(srcrepo, destrepo):
def sidedatacompanion(revlog, rev):
sidedata = {}
if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
sidedata = _getsidedata(srcrepo, rev)
return False, (), sidedata
return sidedatacompanion
def getsidedataremover(srcrepo, destrepo):
def sidedatacompanion(revlog, rev):
f = ()
if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
if revlog.flags(rev) & REVIDX_SIDEDATA:
f = (
sidedatamod.SD_P1COPIES,
sidedatamod.SD_P2COPIES,
sidedatamod.SD_FILESADDED,
sidedatamod.SD_FILESREMOVED,
)
return False, f, {}
return sidedatacompanion