copies.py
1307 lines
| 45.9 KiB
| text/x-python
|
PythonLexer
/ mercurial / copies.py
r46763 | # coding: utf8 | |||
Matt Mackall
|
r6274 | # copies.py - copy detection for Mercurial | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2008 Olivia Mackall <olivia@selenic.com> | ||
Matt Mackall
|
r6274 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r6274 | |||
Gregory Szorc
|
r25924 | |||
Pulkit Goyal
|
r34180 | import collections | ||
import os | ||||
Matt Mackall
|
r6274 | |||
Pulkit Goyal
|
r34847 | from .i18n import _ | ||
Joerg Sonnenberger
|
r47771 | from .node import nullrev | ||
r43418 | ||||
Gregory Szorc
|
r25924 | from . import ( | ||
Yuya Nishihara
|
r33869 | match as matchmod, | ||
Gregory Szorc
|
r25924 | pathutil, | ||
r46576 | policy, | |||
Gregory Szorc
|
r25924 | util, | ||
) | ||||
r43418 | ||||
Augie Fackler
|
r43346 | from .utils import stringutil | ||
r46674 | from .revlogutils import ( | |||
flagutil, | ||||
sidedata as sidedatamod, | ||||
) | ||||
r46264 | ||||
r46576 | rustmod = policy.importrust("copy_tracing") | |||
Gregory Szorc
|
r25924 | |||
Martin von Zweigbergk
|
r42796 | def _filter(src, dst, t): | ||
"""filters out invalid copies after chaining""" | ||||
Martin von Zweigbergk
|
r42413 | |||
Martin von Zweigbergk
|
r42796 | # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid') | ||
# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases | ||||
r47020 | # in the following table (not including trivial cases). For example, case 6 | |||
Martin von Zweigbergk
|
r42796 | # is where a file existed in 'src' and remained under that name in 'mid' and | ||
Martin von Zweigbergk
|
r42413 | # then was renamed between 'mid' and 'dst'. | ||
# | ||||
# case src mid dst result | ||||
# 1 x y - - | ||||
# 2 x y y x->y | ||||
# 3 x y x - | ||||
# 4 x y z x->z | ||||
# 5 - x y - | ||||
# 6 x x y x->y | ||||
Martin von Zweigbergk
|
r42565 | # | ||
# _chain() takes care of chaining the copies in 'a' and 'b', but it | ||||
# cannot tell the difference between cases 1 and 2, between 3 and 4, or | ||||
# between 5 and 6, so it includes all cases in its result. | ||||
# Cases 1, 3, and 5 are then removed by _filter(). | ||||
Martin von Zweigbergk
|
r42413 | |||
Martin von Zweigbergk
|
r42565 | for k, v in list(t.items()): | ||
r47128 | if k == v: # case 3 | |||
Martin von Zweigbergk
|
r42565 | del t[k] | ||
r47128 | elif v not in src: # case 5 | |||
# remove copies from files that didn't exist | ||||
Martin von Zweigbergk
|
r42565 | del t[k] | ||
r47127 | elif k not in dst: # case 1 | |||
r47128 | # remove copies to files that were then removed | |||
Martin von Zweigbergk
|
r42565 | del t[k] | ||
Augie Fackler
|
r43346 | |||
r44223 | def _chain(prefix, suffix): | |||
"""chain two sets of copies 'prefix' and 'suffix'""" | ||||
result = prefix.copy() | ||||
Gregory Szorc
|
r49768 | for key, value in suffix.items(): | ||
r44223 | result[key] = prefix.get(value, value) | |||
return result | ||||
Matt Mackall
|
r15775 | |||
Augie Fackler
|
r43346 | |||
r43470 | def _tracefile(fctx, am, basemf): | |||
Martin von Zweigbergk
|
r35422 | """return file context that is the ancestor of fctx present in ancestor | ||
r43469 | manifest am | |||
Note: we used to try and stop after a given limit, however checking if that | ||||
limit is reached turned out to be very expensive. we are better off | ||||
disabling that feature.""" | ||||
Matt Mackall
|
r15775 | |||
for f in fctx.ancestors(): | ||||
Martin von Zweigbergk
|
r42751 | path = f.path() | ||
if am.get(path, None) == f.filenode(): | ||||
return path | ||||
Martin von Zweigbergk
|
r42798 | if basemf and basemf.get(path, None) == f.filenode(): | ||
return path | ||||
Matt Mackall
|
r15775 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41918 | def _dirstatecopies(repo, match=None): | ||
ds = repo.dirstate | ||||
Matt Mackall
|
r15775 | c = ds.copies().copy() | ||
Pulkit Goyal
|
r34350 | for k in list(c): | ||
r48909 | if not ds.get_entry(k).tracked or (match and not match(k)): | |||
Matt Mackall
|
r15775 | del c[k] | ||
return c | ||||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r24782 | def _computeforwardmissing(a, b, match=None): | ||
Durham Goode
|
r24011 | """Computes which files are in b but not a. | ||
This is its own function so extensions can easily wrap this call to see what | ||||
files _forwardcopies is about to process. | ||||
""" | ||||
Durham Goode
|
r24782 | ma = a.manifest() | ||
mb = b.manifest() | ||||
Durham Goode
|
r31256 | return mb.filesnotin(ma, match=match) | ||
Durham Goode
|
r24011 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r42284 | def usechangesetcentricalgo(repo): | ||
"""Checks if we should use changeset-centric copy algorithms""" | ||||
r43416 | if repo.filecopiesmode == b'changeset-sidedata': | |||
return True | ||||
Augie Fackler
|
r43347 | readfrom = repo.ui.config(b'experimental', b'copies.read-from') | ||
changesetsource = (b'changeset-only', b'compatibility') | ||||
r43290 | return readfrom in changesetsource | |||
Martin von Zweigbergk
|
r42284 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r42798 | def _committedforwardcopies(a, b, base, match): | ||
Martin von Zweigbergk
|
r35423 | """Like _forwardcopies(), but b.rev() cannot be None (working copy)""" | ||
Mads Kiilerich
|
r20294 | # files might have to be traced back to the fctx parent of the last | ||
# one-side-only changeset, but not further back than that | ||||
Boris Feld
|
r40093 | repo = a._repo | ||
Martin von Zweigbergk
|
r41922 | |||
Martin von Zweigbergk
|
r42284 | if usechangesetcentricalgo(repo): | ||
Martin von Zweigbergk
|
r41922 | return _changesetforwardcopies(a, b, match) | ||
Augie Fackler
|
r43347 | debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies') | ||
Boris Feld
|
r40093 | dbg = repo.ui.debug | ||
if debug: | ||||
Augie Fackler
|
r43347 | dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b)) | ||
Mads Kiilerich
|
r20294 | am = a.manifest() | ||
Martin von Zweigbergk
|
r42798 | basemf = None if base is None else base.manifest() | ||
Mads Kiilerich
|
r20294 | |||
Matt Mackall
|
r15775 | # find where new files came from | ||
# we currently don't try to find where old files went, too expensive | ||||
# this means we can miss a case like 'hg rm b; hg cp a b' | ||||
cm = {} | ||||
Durham Goode
|
r28000 | |||
# Computing the forward missing is quite expensive on large manifests, since | ||||
# it compares the entire manifests. We can optimize it in the common use | ||||
# case of computing what copies are in a commit versus its parent (like | ||||
# during a rebase or histedit). Note, we exclude merge commits from this | ||||
# optimization, since the ctx.files() for a merge commit is not correct for | ||||
# this comparison. | ||||
forwardmissingmatch = match | ||||
Joerg Sonnenberger
|
r47601 | if b.p1() == a and b.p2().rev() == nullrev: | ||
Martin von Zweigbergk
|
r42102 | filesmatcher = matchmod.exact(b.files()) | ||
Yuya Nishihara
|
r33869 | forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher) | ||
r47137 | if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'): | |||
missing = list(b.walk(match)) | ||||
# _computeforwardmissing(a, b, match=forwardmissingmatch) | ||||
if debug: | ||||
dbg(b'debug.copies: searching all files: %d\n' % len(missing)) | ||||
else: | ||||
missing = _computeforwardmissing(a, b, match=forwardmissingmatch) | ||||
if debug: | ||||
dbg( | ||||
b'debug.copies: missing files to search: %d\n' | ||||
% len(missing) | ||||
) | ||||
Durham Goode
|
r28000 | |||
Pierre-Yves David
|
r23980 | ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True) | ||
Boris Feld
|
r40093 | |||
Martin von Zweigbergk
|
r42396 | for f in sorted(missing): | ||
Boris Feld
|
r40093 | if debug: | ||
Augie Fackler
|
r43347 | dbg(b'debug.copies: tracing file: %s\n' % f) | ||
Pierre-Yves David
|
r23980 | fctx = b[f] | ||
fctx._ancestrycontext = ancestrycontext | ||||
Boris Feld
|
r40093 | |||
Boris Feld
|
r40094 | if debug: | ||
start = util.timer() | ||||
r43470 | opath = _tracefile(fctx, am, basemf) | |||
Martin von Zweigbergk
|
r42751 | if opath: | ||
Boris Feld
|
r40093 | if debug: | ||
Augie Fackler
|
r43347 | dbg(b'debug.copies: rename of: %s\n' % opath) | ||
Martin von Zweigbergk
|
r42751 | cm[f] = opath | ||
Boris Feld
|
r40094 | if debug: | ||
Augie Fackler
|
r43346 | dbg( | ||
Augie Fackler
|
r43347 | b'debug.copies: time: %f seconds\n' | ||
Augie Fackler
|
r43346 | % (util.timer() - start) | ||
) | ||||
Martin von Zweigbergk
|
r35423 | return cm | ||
Augie Fackler
|
r43346 | |||
r46674 | def _revinfo_getter(repo, match): | |||
r46217 | """returns a function that returns the following data given a <rev>" | |||
r43549 | ||||
* p1: revision number of first parent | ||||
* p2: revision number of first parent | ||||
r46217 | * changes: a ChangingFiles object | |||
r43549 | """ | |||
cl = repo.changelog | ||||
parents = cl.parentrevs | ||||
r46264 | flags = cl.flags | |||
HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO | ||||
r43549 | ||||
r46215 | changelogrevision = cl.changelogrevision | |||
r43551 | ||||
r46774 | if rustmod is not None: | |||
r46674 | ||||
def revinfo(rev): | ||||
p1, p2 = parents(rev) | ||||
if flags(rev) & HASCOPIESINFO: | ||||
raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES) | ||||
else: | ||||
raw = None | ||||
r46765 | return (p1, p2, raw) | |||
r46674 | ||||
else: | ||||
def revinfo(rev): | ||||
p1, p2 = parents(rev) | ||||
if flags(rev) & HASCOPIESINFO: | ||||
changes = changelogrevision(rev).changes | ||||
r46765 | else: | |||
changes = None | ||||
return (p1, p2, changes) | ||||
r43549 | ||||
return revinfo | ||||
r46504 | def cached_is_ancestor(is_ancestor): | |||
"""return a cached version of is_ancestor""" | ||||
cache = {} | ||||
def _is_ancestor(anc, desc): | ||||
if anc > desc: | ||||
return False | ||||
elif anc == desc: | ||||
return True | ||||
key = (anc, desc) | ||||
ret = cache.get(key) | ||||
if ret is None: | ||||
ret = cache[key] = is_ancestor(anc, desc) | ||||
return ret | ||||
return _is_ancestor | ||||
Martin von Zweigbergk
|
r41922 | def _changesetforwardcopies(a, b, match): | ||
Joerg Sonnenberger
|
r46729 | if a.rev() in (nullrev, b.rev()): | ||
Martin von Zweigbergk
|
r41922 | return {} | ||
r43550 | repo = a.repo().unfiltered() | |||
r43549 | ||||
Martin von Zweigbergk
|
r41922 | cl = repo.changelog | ||
r46586 | isancestor = cl.isancestorrev | |||
r46764 | ||||
# To track rename from "A" to B, we need to gather all parent → children | ||||
# edges that are contains in `::B` but not in `::A`. | ||||
# | ||||
# | ||||
# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b - | ||||
# ::a`) and also all the "roots point", ie the parents of the exclusive set | ||||
# that belong to ::a. These are exactly all the revisions needed to express | ||||
# the parent → children we need to combine. | ||||
# | ||||
# [1] actually, we need to gather all the edges within `(::a)::b`, ie: | ||||
# excluding paths that leads to roots that are not ancestors of `a`. We | ||||
# keep this out of the explanation because it is hard enough without this special case.. | ||||
parents = cl._uncheckedparentrevs | ||||
graph_roots = (nullrev, nullrev) | ||||
ancestors = cl.ancestors([a.rev()], inclusive=True) | ||||
revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) | ||||
r43593 | roots = set() | |||
r46764 | has_graph_roots = False | |||
Simon Sapin
|
r47330 | multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread') | ||
r46764 | ||||
# iterate over `only(B, A)` | ||||
for r in revs: | ||||
ps = parents(r) | ||||
if ps == graph_roots: | ||||
has_graph_roots = True | ||||
else: | ||||
p1, p2 = ps | ||||
# find all the "root points" (see larger comment above) | ||||
if p1 != nullrev and p1 in ancestors: | ||||
roots.add(p1) | ||||
if p2 != nullrev and p2 in ancestors: | ||||
roots.add(p2) | ||||
r43593 | if not roots: | |||
# no common revision to track copies from | ||||
return {} | ||||
r46764 | if has_graph_roots: | |||
Matt Harbison
|
r49307 | # this deal with the special case mentioned in the [1] footnotes. We | ||
r46764 | # must filter out revisions that leads to non-common graphroots. | |||
roots = list(roots) | ||||
m = min(roots) | ||||
h = [b.rev()] | ||||
roots_to_head = cl.reachableroots(m, h, roots, includepath=True) | ||||
roots_to_head = set(roots_to_head) | ||||
revs = [r for r in revs if r in roots_to_head] | ||||
r46214 | ||||
if repo.filecopiesmode == b'changeset-sidedata': | ||||
r46764 | # When using side-data, we will process the edges "from" the children. | |||
Matt Harbison
|
r49307 | # We iterate over the children, gathering previous collected data for | ||
r46764 | # the parents. Do know when the parents data is no longer necessary, we | |||
# keep a counter of how many children each revision has. | ||||
# | ||||
Matt Harbison
|
r49307 | # An interesting property of `children_count` is that it only contains | ||
r46764 | # revision that will be relevant for a edge of the graph. So if a | |||
# children has parent not in `children_count`, that edges should not be | ||||
# processed. | ||||
children_count = dict((r, 0) for r in roots) | ||||
for r in revs: | ||||
for p in cl.parentrevs(r): | ||||
if p == nullrev: | ||||
continue | ||||
children_count[r] = 0 | ||||
if p in children_count: | ||||
children_count[p] += 1 | ||||
r46674 | revinfo = _revinfo_getter(repo, match) | |||
Simon Sapin
|
r48256 | with repo.changelog.reading(): | ||
return _combine_changeset_copies( | ||||
revs, | ||||
children_count, | ||||
b.rev(), | ||||
revinfo, | ||||
match, | ||||
isancestor, | ||||
multi_thread, | ||||
) | ||||
r46214 | else: | |||
r46764 | # When not using side-data, we will process the edges "from" the parent. | |||
# so we need a full mapping of the parent -> children relation. | ||||
children = dict((r, []) for r in roots) | ||||
for r in revs: | ||||
for p in cl.parentrevs(r): | ||||
if p == nullrev: | ||||
continue | ||||
children[r] = [] | ||||
if p in children: | ||||
children[p].append(r) | ||||
x = revs.pop() | ||||
assert x == b.rev() | ||||
revs.extend(roots) | ||||
revs.sort() | ||||
r46215 | revinfo = _revinfo_getter_extra(repo) | |||
r46214 | return _combine_changeset_copies_extra( | |||
revs, children, b.rev(), revinfo, match, isancestor | ||||
) | ||||
r44225 | ||||
r46199 | def _combine_changeset_copies( | |||
Simon Sapin
|
r47330 | revs, children_count, targetrev, revinfo, match, isancestor, multi_thread | ||
r45252 | ): | |||
r44225 | """combine the copies information for each item of iterrevs | |||
revs: sorted iterable of revision to visit | ||||
r46764 | children_count: a {parent: <number-of-relevant-children>} mapping. | |||
r44225 | targetrev: the final copies destination revision (not in iterrevs) | |||
revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed) | ||||
match: a matcher | ||||
It returns the aggregated copies information for `targetrev`. | ||||
""" | ||||
r46576 | ||||
alwaysmatch = match.always() | ||||
r46774 | if rustmod is not None: | |||
r46773 | final_copies = rustmod.combine_changeset_copies( | |||
Simon Sapin
|
r47330 | list(revs), children_count, targetrev, revinfo, multi_thread | ||
r46576 | ) | |||
r46773 | else: | |||
isancestor = cached_is_ancestor(isancestor) | ||||
r46763 | ||||
r46773 | all_copies = {} | |||
# iterate over all the "children" side of copy tracing "edge" | ||||
for current_rev in revs: | ||||
p1, p2, changes = revinfo(current_rev) | ||||
current_copies = None | ||||
# iterate over all parents to chain the existing data with the | ||||
# data from the parent → child edge. | ||||
for parent, parent_rev in ((1, p1), (2, p2)): | ||||
if parent_rev == nullrev: | ||||
continue | ||||
remaining_children = children_count.get(parent_rev) | ||||
if remaining_children is None: | ||||
continue | ||||
remaining_children -= 1 | ||||
children_count[parent_rev] = remaining_children | ||||
if remaining_children: | ||||
copies = all_copies.get(parent_rev, None) | ||||
else: | ||||
copies = all_copies.pop(parent_rev, None) | ||||
r46763 | ||||
r46773 | if copies is None: | |||
# this is a root | ||||
r46800 | newcopies = copies = {} | |||
elif remaining_children: | ||||
newcopies = copies.copy() | ||||
else: | ||||
newcopies = copies | ||||
r46773 | # chain the data in the edge with the existing data | |||
if changes is not None: | ||||
childcopies = {} | ||||
if parent == 1: | ||||
childcopies = changes.copied_from_p1 | ||||
elif parent == 2: | ||||
childcopies = changes.copied_from_p2 | ||||
r46763 | ||||
r46773 | if childcopies: | |||
r46764 | newcopies = copies.copy() | |||
Gregory Szorc
|
r49768 | for dest, source in childcopies.items(): | ||
r46773 | prev = copies.get(source) | |||
if prev is not None and prev[1] is not None: | ||||
source = prev[1] | ||||
newcopies[dest] = (current_rev, source) | ||||
assert newcopies is not copies | ||||
if changes.removed: | ||||
for f in changes.removed: | ||||
if f in newcopies: | ||||
if newcopies is copies: | ||||
# copy on write to avoid affecting potential other | ||||
# branches. when there are no other branches, this | ||||
# could be avoided. | ||||
newcopies = copies.copy() | ||||
newcopies[f] = (current_rev, None) | ||||
# check potential need to combine the data from another parent (for | ||||
# that child). See comment below for details. | ||||
if current_copies is None: | ||||
current_copies = newcopies | ||||
else: | ||||
# we are the second parent to work on c, we need to merge our | ||||
# work with the other. | ||||
# | ||||
# In case of conflict, parent 1 take precedence over parent 2. | ||||
# This is an arbitrary choice made anew when implementing | ||||
# changeset based copies. It was made without regards with | ||||
# potential filelog related behavior. | ||||
assert parent == 2 | ||||
current_copies = _merge_copies_dict( | ||||
r47310 | newcopies, | |||
current_copies, | ||||
isancestor, | ||||
changes, | ||||
current_rev, | ||||
r46773 | ) | |||
all_copies[current_rev] = current_copies | ||||
r46763 | ||||
r46773 | # filter out internal details and return a {dest: source mapping} | |||
final_copies = {} | ||||
Matt Harbison
|
r49308 | |||
targetrev_items = all_copies[targetrev] | ||||
assert targetrev_items is not None # help pytype | ||||
for dest, (tt, source) in targetrev_items.items(): | ||||
r46773 | if source is not None: | |||
final_copies[dest] = source | ||||
r46774 | if not alwaysmatch: | |||
for filename in list(final_copies.keys()): | ||||
if not match(filename): | ||||
del final_copies[filename] | ||||
r45252 | return final_copies | |||
r46776 | # constant to decide which side to pick with _merge_copies_dict | |||
PICK_MINOR = 0 | ||||
PICK_MAJOR = 1 | ||||
PICK_EITHER = 2 | ||||
r47310 | def _merge_copies_dict(minor, major, isancestor, changes, current_merge): | |||
r45252 | """merge two copies-mapping together, minor and major | |||
In case of conflict, value from "major" will be picked. | ||||
- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an | ||||
ancestors of `high_rev`, | ||||
- `ismerged(path)`: callable return True if `path` have been merged in the | ||||
current revision, | ||||
r46600 | ||||
return the resulting dict (in practice, the "minor" object, updated) | ||||
r45252 | """ | |||
for dest, value in major.items(): | ||||
other = minor.get(dest) | ||||
if other is None: | ||||
minor[dest] = value | ||||
else: | ||||
r47310 | pick, overwrite = _compare_values( | |||
changes, isancestor, dest, other, value | ||||
) | ||||
if overwrite: | ||||
if pick == PICK_MAJOR: | ||||
minor[dest] = (current_merge, value[1]) | ||||
else: | ||||
minor[dest] = (current_merge, other[1]) | ||||
elif pick == PICK_MAJOR: | ||||
r46262 | minor[dest] = value | |||
r46600 | return minor | |||
Martin von Zweigbergk
|
r41922 | |||
Augie Fackler
|
r43346 | |||
r46777 | def _compare_values(changes, isancestor, dest, minor, major): | |||
r47309 | """compare two value within a _merge_copies_dict loop iteration | |||
r47310 | return (pick, overwrite). | |||
r47309 | ||||
- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER | ||||
r47310 | - overwrite is True if pick is a return of an ambiguity that needs resolution. | |||
r47309 | """ | |||
r46799 | major_tt, major_value = major | |||
minor_tt, minor_value = minor | ||||
r46776 | ||||
r46799 | if major_tt == minor_tt: | |||
# if it comes from the same revision it must be the same value | ||||
assert major_value == minor_value | ||||
r47310 | return PICK_EITHER, False | |||
r47309 | elif ( | |||
changes is not None | ||||
and minor_value is not None | ||||
and major_value is None | ||||
and dest in changes.salvaged | ||||
): | ||||
# In this case, a deletion was reverted, the "alive" value overwrite | ||||
# the deleted one. | ||||
r47310 | return PICK_MINOR, True | |||
r47309 | elif ( | |||
changes is not None | ||||
and major_value is not None | ||||
and minor_value is None | ||||
and dest in changes.salvaged | ||||
): | ||||
# In this case, a deletion was reverted, the "alive" value overwrite | ||||
# the deleted one. | ||||
r47310 | return PICK_MAJOR, True | |||
r47309 | elif isancestor(minor_tt, major_tt): | |||
if changes is not None and dest in changes.merged: | ||||
# change to dest happened on the branch without copy-source change, | ||||
# so both source are valid and "major" wins. | ||||
r47310 | return PICK_MAJOR, True | |||
r47309 | else: | |||
r47310 | return PICK_MAJOR, False | |||
r47309 | elif isancestor(major_tt, minor_tt): | |||
if changes is not None and dest in changes.merged: | ||||
# change to dest happened on the branch without copy-source change, | ||||
# so both source are valid and "major" wins. | ||||
r47310 | return PICK_MAJOR, True | |||
r47309 | else: | |||
r47310 | return PICK_MINOR, False | |||
r47309 | elif minor_value is None: | |||
# in case of conflict, the "alive" side wins. | ||||
r47310 | return PICK_MAJOR, True | |||
r47309 | elif major_value is None: | |||
# in case of conflict, the "alive" side wins. | ||||
r47310 | return PICK_MINOR, True | |||
r47309 | else: | |||
# in case of conflict where both side are alive, major wins. | ||||
r47310 | return PICK_MAJOR, True | |||
r46776 | ||||
r46215 | def _revinfo_getter_extra(repo): | |||
"""return a function that return multiple data given a <rev>"i | ||||
* p1: revision number of first parent | ||||
* p2: revision number of first parent | ||||
* p1copies: mapping of copies from p1 | ||||
* p2copies: mapping of copies from p2 | ||||
* removed: a list of removed files | ||||
* ismerged: a callback to know if file was merged in that revision | ||||
""" | ||||
cl = repo.changelog | ||||
parents = cl.parentrevs | ||||
def get_ismerged(rev): | ||||
ctx = repo[rev] | ||||
def ismerged(path): | ||||
if path not in ctx.files(): | ||||
return False | ||||
fctx = ctx[path] | ||||
parents = fctx._filelog.parents(fctx._filenode) | ||||
nb_parents = 0 | ||||
for n in parents: | ||||
Joerg Sonnenberger
|
r47771 | if n != repo.nullid: | ||
r46215 | nb_parents += 1 | |||
return nb_parents >= 2 | ||||
return ismerged | ||||
def revinfo(rev): | ||||
p1, p2 = parents(rev) | ||||
ctx = repo[rev] | ||||
p1copies, p2copies = ctx._copies | ||||
removed = ctx.filesremoved() | ||||
return p1, p2, p1copies, p2copies, removed, get_ismerged(rev) | ||||
return revinfo | ||||
r46214 | def _combine_changeset_copies_extra( | |||
revs, children, targetrev, revinfo, match, isancestor | ||||
): | ||||
"""version of `_combine_changeset_copies` that works with the Google | ||||
specific "extra" based storage for copy information""" | ||||
all_copies = {} | ||||
alwaysmatch = match.always() | ||||
for r in revs: | ||||
copies = all_copies.pop(r, None) | ||||
if copies is None: | ||||
# this is a root | ||||
copies = {} | ||||
for i, c in enumerate(children[r]): | ||||
p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c) | ||||
if r == p1: | ||||
parent = 1 | ||||
childcopies = p1copies | ||||
else: | ||||
assert r == p2 | ||||
parent = 2 | ||||
childcopies = p2copies | ||||
if not alwaysmatch: | ||||
childcopies = { | ||||
dst: src for dst, src in childcopies.items() if match(dst) | ||||
} | ||||
newcopies = copies | ||||
if childcopies: | ||||
newcopies = copies.copy() | ||||
Gregory Szorc
|
r49768 | for dest, source in childcopies.items(): | ||
r46214 | prev = copies.get(source) | |||
if prev is not None and prev[1] is not None: | ||||
source = prev[1] | ||||
newcopies[dest] = (c, source) | ||||
assert newcopies is not copies | ||||
for f in removed: | ||||
if f in newcopies: | ||||
if newcopies is copies: | ||||
# copy on write to avoid affecting potential other | ||||
# branches. when there are no other branches, this | ||||
# could be avoided. | ||||
newcopies = copies.copy() | ||||
newcopies[f] = (c, None) | ||||
othercopies = all_copies.get(c) | ||||
if othercopies is None: | ||||
all_copies[c] = newcopies | ||||
else: | ||||
# we are the second parent to work on c, we need to merge our | ||||
# work with the other. | ||||
# | ||||
# In case of conflict, parent 1 take precedence over parent 2. | ||||
# This is an arbitrary choice made anew when implementing | ||||
# changeset based copies. It was made without regards with | ||||
# potential filelog related behavior. | ||||
if parent == 1: | ||||
_merge_copies_dict_extra( | ||||
othercopies, newcopies, isancestor, ismerged | ||||
) | ||||
else: | ||||
_merge_copies_dict_extra( | ||||
newcopies, othercopies, isancestor, ismerged | ||||
) | ||||
all_copies[c] = newcopies | ||||
final_copies = {} | ||||
for dest, (tt, source) in all_copies[targetrev].items(): | ||||
if source is not None: | ||||
final_copies[dest] = source | ||||
return final_copies | ||||
def _merge_copies_dict_extra(minor, major, isancestor, ismerged): | ||||
"""version of `_merge_copies_dict` that works with the Google | ||||
specific "extra" based storage for copy information""" | ||||
for dest, value in major.items(): | ||||
other = minor.get(dest) | ||||
if other is None: | ||||
minor[dest] = value | ||||
else: | ||||
new_tt = value[0] | ||||
other_tt = other[0] | ||||
if value[1] == other[1]: | ||||
continue | ||||
# content from "major" wins, unless it is older | ||||
# than the branch point or there is a merge | ||||
if ( | ||||
new_tt == other_tt | ||||
or not isancestor(new_tt, other_tt) | ||||
or ismerged(dest) | ||||
): | ||||
minor[dest] = value | ||||
Martin von Zweigbergk
|
r42798 | def _forwardcopies(a, b, base=None, match=None): | ||
Martin von Zweigbergk
|
r35423 | """find {dst@b: src@a} copy mapping where a is an ancestor of b""" | ||
Martin von Zweigbergk
|
r42798 | if base is None: | ||
base = a | ||||
Martin von Zweigbergk
|
r40487 | match = a.repo().narrowmatch(match) | ||
Martin von Zweigbergk
|
r35423 | # check for working copy | ||
if b.rev() is None: | ||||
Martin von Zweigbergk
|
r42798 | cm = _committedforwardcopies(a, b.p1(), base, match) | ||
Martin von Zweigbergk
|
r35424 | # combine copies from dirstate if necessary | ||
Martin von Zweigbergk
|
r42796 | copies = _chain(cm, _dirstatecopies(b._repo, match)) | ||
Martin von Zweigbergk
|
r42795 | else: | ||
Augie Fackler
|
r43346 | copies = _committedforwardcopies(a, b, base, match) | ||
Martin von Zweigbergk
|
r42795 | return copies | ||
Matt Mackall
|
r15775 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41919 | def _backwardrenames(a, b, match): | ||
Martin von Zweigbergk
|
r47394 | """find renames from a to b""" | ||
Augie Fackler
|
r43347 | if a._repo.ui.config(b'experimental', b'copytrace') == b'off': | ||
Durham Goode
|
r26013 | return {} | ||
Martin von Zweigbergk
|
r47394 | # We don't want to pass in "match" here, since that would filter | ||
# the destination by it. Since we're reversing the copies, we want | ||||
# to filter the source instead. | ||||
copies = _forwardcopies(b, a) | ||||
return _reverse_renames(copies, a, match) | ||||
def _reverse_renames(copies, dst, match): | ||||
"""given copies to context 'dst', finds renames from that context""" | ||||
Siddharth Agarwal
|
r18136 | # Even though we're not taking copies into account, 1:n rename situations | ||
# can still exist (e.g. hg cp a b; hg mv a c). In those cases we | ||||
# arbitrarily pick one of the renames. | ||||
Matt Mackall
|
r15775 | r = {} | ||
Gregory Szorc
|
r49768 | for k, v in sorted(copies.items()): | ||
Martin von Zweigbergk
|
r41919 | if match and not match(v): | ||
continue | ||||
Siddharth Agarwal
|
r18136 | # remove copies | ||
Martin von Zweigbergk
|
r47394 | if v in dst: | ||
Siddharth Agarwal
|
r18136 | continue | ||
Matt Mackall
|
r15775 | r[v] = k | ||
return r | ||||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r24782 | def pathcopies(x, y, match=None): | ||
Martin von Zweigbergk
|
r35422 | """find {dst@y: src@x} copy mapping for directed compare""" | ||
Boris Feld
|
r40093 | repo = x._repo | ||
Augie Fackler
|
r43347 | debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies') | ||
Boris Feld
|
r40093 | if debug: | ||
Augie Fackler
|
r43347 | repo.ui.debug( | ||
b'debug.copies: searching copies from %s to %s\n' % (x, y) | ||||
) | ||||
Matt Mackall
|
r15775 | if x == y or not x or not y: | ||
return {} | ||||
Martin von Zweigbergk
|
r44749 | if y.rev() is None and x == y.p1(): | ||
if debug: | ||||
repo.ui.debug(b'debug.copies: search mode: dirstate\n') | ||||
# short-circuit to avoid issues with merge states | ||||
return _dirstatecopies(repo, match) | ||||
Matt Mackall
|
r15775 | a = y.ancestor(x) | ||
if a == x: | ||||
Boris Feld
|
r40093 | if debug: | ||
Augie Fackler
|
r43347 | repo.ui.debug(b'debug.copies: search mode: forward\n') | ||
Martin von Zweigbergk
|
r42795 | copies = _forwardcopies(x, y, match=match) | ||
elif a == y: | ||||
Boris Feld
|
r40093 | if debug: | ||
Augie Fackler
|
r43347 | repo.ui.debug(b'debug.copies: search mode: backward\n') | ||
Martin von Zweigbergk
|
r42795 | copies = _backwardrenames(x, y, match=match) | ||
else: | ||||
if debug: | ||||
Augie Fackler
|
r43347 | repo.ui.debug(b'debug.copies: search mode: combined\n') | ||
Martin von Zweigbergk
|
r42798 | base = None | ||
Joerg Sonnenberger
|
r46729 | if a.rev() != nullrev: | ||
Martin von Zweigbergk
|
r42798 | base = x | ||
Martin von Zweigbergk
|
r47395 | x_copies = _forwardcopies(a, x) | ||
y_copies = _forwardcopies(a, y, base, match=match) | ||||
Martin von Zweigbergk
|
r47396 | same_keys = set(x_copies) & set(y_copies) | ||
for k in same_keys: | ||||
if x_copies.get(k) == y_copies.get(k): | ||||
del x_copies[k] | ||||
del y_copies[k] | ||||
Martin von Zweigbergk
|
r47395 | x_backward_renames = _reverse_renames(x_copies, x, match) | ||
Augie Fackler
|
r43346 | copies = _chain( | ||
Martin von Zweigbergk
|
r47395 | x_backward_renames, | ||
y_copies, | ||||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r42797 | _filter(x, y, copies) | ||
Martin von Zweigbergk
|
r42795 | return copies | ||
Matt Mackall
|
r15774 | |||
Augie Fackler
|
r43346 | |||
Pierre-Yves David
|
r30186 | def mergecopies(repo, c1, c2, base): | ||
Matt Mackall
|
r6274 | """ | ||
Martin von Zweigbergk
|
r42287 | Finds moves and copies between context c1 and c2 that are relevant for | ||
Pulkit Goyal
|
r34080 | merging. 'base' will be used as the merge base. | ||
Copytracing is used in commands like rebase, merge, unshelve, etc to merge | ||||
files that were moved/ copied in one merge parent and modified in another. | ||||
For example: | ||||
Pulkit Goyal
|
r33821 | |||
o ---> 4 another commit | ||||
| | ||||
| o ---> 3 commit that modifies a.txt | ||||
| / | ||||
o / ---> 2 commit that moves a.txt to b.txt | ||||
|/ | ||||
o ---> 1 merge base | ||||
If we try to rebase revision 3 on revision 4, since there is no a.txt in | ||||
revision 4, and if user have copytrace disabled, we prints the following | ||||
message: | ||||
```other changed <file> which local deleted``` | ||||
Martin von Zweigbergk
|
r44681 | Returns a tuple where: | ||
Matt Mackall
|
r16168 | |||
Martin von Zweigbergk
|
r44681 | "branch_copies" an instance of branch_copies. | ||
Siddharth Agarwal
|
r18134 | |||
Matt Mackall
|
r16168 | "diverge" is a mapping of source name -> list of destination names | ||
for divergent renames. | ||||
Thomas Arendsen Hein
|
r16794 | |||
Martin von Zweigbergk
|
r42287 | This function calls different copytracing algorithms based on config. | ||
Matt Mackall
|
r6274 | """ | ||
# avoid silly behavior for update from empty dir | ||||
Matt Mackall
|
r6430 | if not c1 or not c2 or c1 == c2: | ||
Martin von Zweigbergk
|
r44682 | return branch_copies(), branch_copies(), {} | ||
Matt Mackall
|
r6274 | |||
Martin von Zweigbergk
|
r41918 | narrowmatch = c1.repo().narrowmatch() | ||
Matt Mackall
|
r6646 | # avoid silly behavior for parent -> working dir | ||
Matt Mackall
|
r13878 | if c2.node() is None and c1.node() == repo.dirstate.p1(): | ||
Martin von Zweigbergk
|
r44682 | return ( | ||
branch_copies(_dirstatecopies(repo, narrowmatch)), | ||||
branch_copies(), | ||||
{}, | ||||
) | ||||
Matt Mackall
|
r6646 | |||
Augie Fackler
|
r43347 | copytracing = repo.ui.config(b'experimental', b'copytrace') | ||
Martin von Zweigbergk
|
r42411 | if stringutil.parsebool(copytracing) is False: | ||
# stringutil.parsebool() returns None when it is unable to parse the | ||||
# value, so we should rely on making sure copytracing is on such cases | ||||
Martin von Zweigbergk
|
r44682 | return branch_copies(), branch_copies(), {} | ||
Pulkit Goyal
|
r34080 | |||
Martin von Zweigbergk
|
r42412 | if usechangesetcentricalgo(repo): | ||
# The heuristics don't make sense when we need changeset-centric algos | ||||
return _fullcopytracing(repo, c1, c2, base) | ||||
Durham Goode
|
r26013 | # Copy trace disabling is explicitly below the node == p1 logic above | ||
# because the logic above is required for a simple copy to be kept across a | ||||
# rebase. | ||||
Augie Fackler
|
r43347 | if copytracing == b'heuristics': | ||
Yuya Nishihara
|
r34365 | # Do full copytracing if only non-public revisions are involved as | ||
# that will be fast enough and will also cover the copies which could | ||||
# be missed by heuristics | ||||
Pulkit Goyal
|
r34312 | if _isfullcopytraceable(repo, c1, base): | ||
Pulkit Goyal
|
r34289 | return _fullcopytracing(repo, c1, c2, base) | ||
Pulkit Goyal
|
r34180 | return _heuristicscopytracing(repo, c1, c2, base) | ||
Pulkit Goyal
|
r34080 | else: | ||
return _fullcopytracing(repo, c1, c2, base) | ||||
Durham Goode
|
r26013 | |||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r34312 | def _isfullcopytraceable(repo, c1, base): | ||
Augie Fackler
|
r46554 | """Checks that if base, source and destination are all no-public branches, | ||
Yuya Nishihara
|
r34365 | if yes let's use the full copytrace algorithm for increased capabilities | ||
since it will be fast enough. | ||||
Pulkit Goyal
|
r34517 | |||
`experimental.copytrace.sourcecommitlimit` can be used to set a limit for | ||||
number of changesets from c1 to base such that if number of changesets are | ||||
more than the limit, full copytracing algorithm won't be used. | ||||
Pulkit Goyal
|
r34289 | """ | ||
Pulkit Goyal
|
r34312 | if c1.rev() is None: | ||
c1 = c1.p1() | ||||
Yuya Nishihara
|
r34365 | if c1.mutable() and base.mutable(): | ||
Augie Fackler
|
r43346 | sourcecommitlimit = repo.ui.configint( | ||
Augie Fackler
|
r43347 | b'experimental', b'copytrace.sourcecommitlimit' | ||
Augie Fackler
|
r43346 | ) | ||
Augie Fackler
|
r43347 | commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev())) | ||
Pulkit Goyal
|
r34312 | return commits < sourcecommitlimit | ||
Pulkit Goyal
|
r34289 | return False | ||
Augie Fackler
|
r43346 | |||
def _checksinglesidecopies( | ||||
src, dsts1, m1, m2, mb, c2, base, copy, renamedelete | ||||
): | ||||
Martin von Zweigbergk
|
r42408 | if src not in m2: | ||
# deleted on side 2 | ||||
if src not in m1: | ||||
# renamed on side 1, deleted on side 2 | ||||
renamedelete[src] = dsts1 | ||||
Martin von Zweigbergk
|
r44691 | elif src not in mb: | ||
# Work around the "short-circuit to avoid issues with merge states" | ||||
# thing in pathcopies(): pathcopies(x, y) can return a copy where the | ||||
# destination doesn't exist in y. | ||||
pass | ||||
r45396 | elif mb[src] != m2[src] and not _related(c2[src], base[src]): | |||
return | ||||
elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src): | ||||
Martin von Zweigbergk
|
r42408 | # modified on side 2 | ||
for dst in dsts1: | ||||
Martin von Zweigbergk
|
r44714 | copy[dst] = src | ||
Martin von Zweigbergk
|
r42408 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r44681 | class branch_copies(object): | ||
"""Information about copies made on one side of a merge/graft. | ||||
"copy" is a mapping from destination name -> source name, | ||||
where source is in c1 and destination is in c2 or vice-versa. | ||||
"movewithdir" is a mapping from source name -> destination name, | ||||
where the file at source present in one context but not the other | ||||
needs to be moved to destination by the merge process, because the | ||||
other context moved the directory it is in. | ||||
"renamedelete" is a mapping of source name -> list of destination | ||||
names for files deleted in c1 that were renamed in c2 or vice-versa. | ||||
"dirmove" is a mapping of detected source dir -> destination dir renames. | ||||
This is needed for handling changes to new files previously grafted into | ||||
renamed directories. | ||||
""" | ||||
def __init__( | ||||
self, copy=None, renamedelete=None, dirmove=None, movewithdir=None | ||||
): | ||||
self.copy = {} if copy is None else copy | ||||
self.renamedelete = {} if renamedelete is None else renamedelete | ||||
self.dirmove = {} if dirmove is None else dirmove | ||||
self.movewithdir = {} if movewithdir is None else movewithdir | ||||
Martin von Zweigbergk
|
r45528 | def __repr__(self): | ||
Augie Fackler
|
r46554 | return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % ( | ||
self.copy, | ||||
self.renamedelete, | ||||
self.dirmove, | ||||
self.movewithdir, | ||||
Martin von Zweigbergk
|
r45528 | ) | ||
Martin von Zweigbergk
|
r44681 | |||
Pulkit Goyal
|
r34080 | def _fullcopytracing(repo, c1, c2, base): | ||
Augie Fackler
|
r46554 | """The full copytracing algorithm which finds all the new files that were | ||
Pulkit Goyal
|
r34080 | added from merge base up to the top commit and for each file it checks if | ||
this file was copied from another file. | ||||
This is pretty slow when a lot of changesets are involved but will track all | ||||
the copies. | ||||
""" | ||||
Matt Mackall
|
r6274 | m1 = c1.manifest() | ||
m2 = c2.manifest() | ||||
Pierre-Yves David
|
r30186 | mb = base.manifest() | ||
Matt Mackall
|
r6274 | |||
Martin von Zweigbergk
|
r42408 | copies1 = pathcopies(base, c1) | ||
copies2 = pathcopies(base, c2) | ||||
Martin von Zweigbergk
|
r44622 | if not (copies1 or copies2): | ||
Martin von Zweigbergk
|
r44682 | return branch_copies(), branch_copies(), {} | ||
Martin von Zweigbergk
|
r44622 | |||
Martin von Zweigbergk
|
r42408 | inversecopies1 = {} | ||
inversecopies2 = {} | ||||
for dst, src in copies1.items(): | ||||
inversecopies1.setdefault(src, []).append(dst) | ||||
for dst, src in copies2.items(): | ||||
inversecopies2.setdefault(src, []).append(dst) | ||||
Martin von Zweigbergk
|
r44657 | copy1 = {} | ||
copy2 = {} | ||||
Martin von Zweigbergk
|
r42408 | diverge = {} | ||
Martin von Zweigbergk
|
r44657 | renamedelete1 = {} | ||
renamedelete2 = {} | ||||
Martin von Zweigbergk
|
r42408 | allsources = set(inversecopies1) | set(inversecopies2) | ||
for src in allsources: | ||||
dsts1 = inversecopies1.get(src) | ||||
dsts2 = inversecopies2.get(src) | ||||
if dsts1 and dsts2: | ||||
# copied/renamed on both sides | ||||
if src not in m1 and src not in m2: | ||||
# renamed on both sides | ||||
dsts1 = set(dsts1) | ||||
dsts2 = set(dsts2) | ||||
# If there's some overlap in the rename destinations, we | ||||
# consider it not divergent. For example, if side 1 copies 'a' | ||||
# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c' | ||||
# and 'd' and deletes 'a'. | ||||
if dsts1 & dsts2: | ||||
Augie Fackler
|
r43346 | for dst in dsts1 & dsts2: | ||
Martin von Zweigbergk
|
r44657 | copy1[dst] = src | ||
copy2[dst] = src | ||||
Martin von Zweigbergk
|
r42408 | else: | ||
diverge[src] = sorted(dsts1 | dsts2) | ||||
elif src in m1 and src in m2: | ||||
# copied on both sides | ||||
dsts1 = set(dsts1) | ||||
dsts2 = set(dsts2) | ||||
Augie Fackler
|
r43346 | for dst in dsts1 & dsts2: | ||
Martin von Zweigbergk
|
r44657 | copy1[dst] = src | ||
copy2[dst] = src | ||||
Martin von Zweigbergk
|
r42408 | # TODO: Handle cases where it was renamed on one side and copied | ||
# on the other side | ||||
elif dsts1: | ||||
# copied/renamed only on side 1 | ||||
Augie Fackler
|
r43346 | _checksinglesidecopies( | ||
Martin von Zweigbergk
|
r44657 | src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1 | ||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r42408 | elif dsts2: | ||
# copied/renamed only on side 2 | ||||
Augie Fackler
|
r43346 | _checksinglesidecopies( | ||
Martin von Zweigbergk
|
r44657 | src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2 | ||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r42408 | |||
Matt Mackall
|
r26659 | # find interesting file sets from manifests | ||
Kyle Lippincott
|
r46725 | cache = [] | ||
def _get_addedfiles(idx): | ||||
if not cache: | ||||
addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) | ||||
addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) | ||||
u1 = sorted(addedinm1 - addedinm2) | ||||
u2 = sorted(addedinm2 - addedinm1) | ||||
cache.extend((u1, u2)) | ||||
return cache[idx] | ||||
Martin von Zweigbergk
|
r42409 | |||
Kyle Lippincott
|
r46725 | u1fn = lambda: _get_addedfiles(0) | ||
u2fn = lambda: _get_addedfiles(1) | ||||
if repo.ui.debugflag: | ||||
u1 = u1fn() | ||||
u2 = u2fn() | ||||
Matt Mackall
|
r6274 | |||
Kyle Lippincott
|
r46725 | header = b" unmatched files in %s" | ||
if u1: | ||||
repo.ui.debug( | ||||
b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)) | ||||
) | ||||
if u2: | ||||
repo.ui.debug( | ||||
b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)) | ||||
) | ||||
Martin von Zweigbergk
|
r44623 | renamedeleteset = set() | ||
divergeset = set() | ||||
for dsts in diverge.values(): | ||||
divergeset.update(dsts) | ||||
Martin von Zweigbergk
|
r44657 | for dsts in renamedelete1.values(): | ||
renamedeleteset.update(dsts) | ||||
for dsts in renamedelete2.values(): | ||||
Martin von Zweigbergk
|
r44623 | renamedeleteset.update(dsts) | ||
Augie Fackler
|
r43346 | repo.ui.debug( | ||
Augie Fackler
|
r43347 | b" all copies found (* = to merge, ! = divergent, " | ||
b"% = renamed and deleted):\n" | ||||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r44679 | for side, copies in ((b"local", copies1), (b"remote", copies2)): | ||
if not copies: | ||||
continue | ||||
repo.ui.debug(b" on %s side:\n" % side) | ||||
for f in sorted(copies): | ||||
note = b"" | ||||
if f in copy1 or f in copy2: | ||||
note += b"*" | ||||
if f in divergeset: | ||||
note += b"!" | ||||
if f in renamedeleteset: | ||||
note += b"%" | ||||
repo.ui.debug( | ||||
b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note) | ||||
) | ||||
Martin von Zweigbergk
|
r44623 | del renamedeleteset | ||
del divergeset | ||||
Matt Mackall
|
r6274 | |||
Augie Fackler
|
r43347 | repo.ui.debug(b" checking for directory renames\n") | ||
Matt Mackall
|
r6274 | |||
Kyle Lippincott
|
r46725 | dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn) | ||
dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn) | ||||
Martin von Zweigbergk
|
r44624 | |||
Martin von Zweigbergk
|
r44682 | branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1) | ||
branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2) | ||||
Martin von Zweigbergk
|
r44657 | |||
Martin von Zweigbergk
|
r44682 | return branch_copies1, branch_copies2, diverge | ||
Martin von Zweigbergk
|
r44624 | |||
Kyle Lippincott
|
r46725 | def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn): | ||
Martin von Zweigbergk
|
r44657 | """Finds moved directories and files that should move with them. | ||
ctx: the context for one of the sides | ||||
copy: files copied on the same side (as ctx) | ||||
fullcopy: files copied on the same side (as ctx), including those that | ||||
merge.manifestmerge() won't care about | ||||
Kyle Lippincott
|
r46725 | addedfilesfn: function returning added files on the other side (compared to | ||
ctx) | ||||
Martin von Zweigbergk
|
r44657 | """ | ||
Matt Mackall
|
r6274 | # generate a directory move map | ||
Matt Mackall
|
r17055 | invalid = set() | ||
Matt Mackall
|
r6274 | dirmove = {} | ||
# examine each file copy for a potential directory move, which is | ||||
# when all the files in a directory are moved to a new directory | ||||
Gregory Szorc
|
r49768 | for dst, src in fullcopy.items(): | ||
Durham Goode
|
r25282 | dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst) | ||
Matt Mackall
|
r6274 | if dsrc in invalid: | ||
# already seen to be uninteresting | ||||
continue | ||||
Kyle Lippincott
|
r46632 | elif ctx.hasdir(dsrc) and ctx.hasdir(ddst): | ||
Matt Mackall
|
r6274 | # directory wasn't entirely moved locally | ||
Kyle Lippincott
|
r39299 | invalid.add(dsrc) | ||
elif dsrc in dirmove and dirmove[dsrc] != ddst: | ||||
Matt Mackall
|
r6274 | # files from the same directory moved to two different places | ||
Kyle Lippincott
|
r39299 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | else: | ||
# looks good so far | ||||
Kyle Lippincott
|
r39299 | dirmove[dsrc] = ddst | ||
Matt Mackall
|
r6274 | |||
for i in invalid: | ||||
if i in dirmove: | ||||
del dirmove[i] | ||||
Kyle Lippincott
|
r46632 | del invalid | ||
Matt Mackall
|
r6274 | |||
if not dirmove: | ||||
Martin von Zweigbergk
|
r44624 | return {}, {} | ||
Matt Mackall
|
r6274 | |||
Gregory Szorc
|
r49768 | dirmove = {k + b"/": v + b"/" for k, v in dirmove.items()} | ||
Kyle Lippincott
|
r39299 | |||
Matt Mackall
|
r6274 | for d in dirmove: | ||
Augie Fackler
|
r43346 | repo.ui.debug( | ||
Augie Fackler
|
r43347 | b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]) | ||
Augie Fackler
|
r43346 | ) | ||
Matt Mackall
|
r6274 | |||
Martin von Zweigbergk
|
r47379 | # Sort the directories in reverse order, so we find children first | ||
# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/ | ||||
# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file | ||||
# to dir2/subdir2/file (not dir2/subdir1/file) | ||||
dirmove_children_first = sorted(dirmove, reverse=True) | ||||
Pierre-Yves David
|
r30183 | movewithdir = {} | ||
Matt Mackall
|
r6274 | # check unaccounted nonoverlapping files against directory moves | ||
Kyle Lippincott
|
r46725 | for f in addedfilesfn(): | ||
Matt Mackall
|
r6274 | if f not in fullcopy: | ||
Martin von Zweigbergk
|
r47379 | for d in dirmove_children_first: | ||
Matt Mackall
|
r6274 | if f.startswith(d): | ||
# new file added in a directory that was moved, move it | ||||
Augie Fackler
|
r43346 | df = dirmove[d] + f[len(d) :] | ||
Matt Mackall
|
r6426 | if df not in copy: | ||
Siddharth Agarwal
|
r18134 | movewithdir[f] = df | ||
Augie Fackler
|
r43346 | repo.ui.debug( | ||
Martin von Zweigbergk
|
r43387 | b" pending file src: '%s' -> dst: '%s'\n" | ||
Augie Fackler
|
r43346 | % (f, df) | ||
) | ||||
Matt Mackall
|
r6274 | break | ||
Martin von Zweigbergk
|
r44624 | return dirmove, movewithdir | ||
Durham Goode
|
r19178 | |||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r34180 | def _heuristicscopytracing(repo, c1, c2, base): | ||
Augie Fackler
|
r46554 | """Fast copytracing using filename heuristics | ||
Pulkit Goyal
|
r34180 | |||
Assumes that moves or renames are of following two types: | ||||
1) Inside a directory only (same directory name but different filenames) | ||||
2) Move from one directory to another | ||||
(same filenames but different directory names) | ||||
Works only when there are no merge commits in the "source branch". | ||||
Source branch is commits from base up to c2 not including base. | ||||
If merge is involved it fallbacks to _fullcopytracing(). | ||||
Can be used by setting the following config: | ||||
[experimental] | ||||
copytrace = heuristics | ||||
Pulkit Goyal
|
r34847 | |||
In some cases the copy/move candidates found by heuristics can be very large | ||||
in number and that will make the algorithm slow. The number of possible | ||||
candidates to check can be limited by using the config | ||||
`experimental.copytrace.movecandidateslimit` which defaults to 100. | ||||
Pulkit Goyal
|
r34180 | """ | ||
if c1.rev() is None: | ||||
c1 = c1.p1() | ||||
if c2.rev() is None: | ||||
c2 = c2.p1() | ||||
changedfiles = set() | ||||
m1 = c1.manifest() | ||||
Augie Fackler
|
r43347 | if not repo.revs(b'%d::%d', base.rev(), c2.rev()): | ||
Pulkit Goyal
|
r34180 | # If base is not in c2 branch, we switch to fullcopytracing | ||
Augie Fackler
|
r43346 | repo.ui.debug( | ||
Augie Fackler
|
r43347 | b"switching to full copytracing as base is not " | ||
b"an ancestor of c2\n" | ||||
Augie Fackler
|
r43346 | ) | ||
Pulkit Goyal
|
r34180 | return _fullcopytracing(repo, c1, c2, base) | ||
ctx = c2 | ||||
while ctx != base: | ||||
if len(ctx.parents()) == 2: | ||||
# To keep things simple let's not handle merges | ||||
Augie Fackler
|
r43347 | repo.ui.debug(b"switching to full copytracing because of merges\n") | ||
Pulkit Goyal
|
r34180 | return _fullcopytracing(repo, c1, c2, base) | ||
changedfiles.update(ctx.files()) | ||||
ctx = ctx.p1() | ||||
Martin von Zweigbergk
|
r44682 | copies2 = {} | ||
Pulkit Goyal
|
r34180 | cp = _forwardcopies(base, c2) | ||
Gregory Szorc
|
r49768 | for dst, src in cp.items(): | ||
Pulkit Goyal
|
r34180 | if src in m1: | ||
Martin von Zweigbergk
|
r44682 | copies2[dst] = src | ||
Pulkit Goyal
|
r34180 | |||
# file is missing if it isn't present in the destination, but is present in | ||||
# the base and present in the source. | ||||
# Presence in the base is important to exclude added files, presence in the | ||||
# source is important to exclude removed files. | ||||
Augie Fackler
|
r36364 | filt = lambda f: f not in m1 and f in base and f in c2 | ||
missingfiles = [f for f in changedfiles if filt(f)] | ||||
Pulkit Goyal
|
r34180 | |||
Martin von Zweigbergk
|
r44682 | copies1 = {} | ||
Pulkit Goyal
|
r34180 | if missingfiles: | ||
basenametofilename = collections.defaultdict(list) | ||||
dirnametofilename = collections.defaultdict(list) | ||||
for f in m1.filesnotin(base.manifest()): | ||||
basename = os.path.basename(f) | ||||
dirname = os.path.dirname(f) | ||||
basenametofilename[basename].append(f) | ||||
dirnametofilename[dirname].append(f) | ||||
for f in missingfiles: | ||||
basename = os.path.basename(f) | ||||
dirname = os.path.dirname(f) | ||||
samebasename = basenametofilename[basename] | ||||
samedirname = dirnametofilename[dirname] | ||||
movecandidates = samebasename + samedirname | ||||
# f is guaranteed to be present in c2, that's why | ||||
# c2.filectx(f) won't fail | ||||
f2 = c2.filectx(f) | ||||
Pulkit Goyal
|
r34847 | # we can have a lot of candidates which can slow down the heuristics | ||
# config value to limit the number of candidates moves to check | ||||
Augie Fackler
|
r43346 | maxcandidates = repo.ui.configint( | ||
Augie Fackler
|
r43347 | b'experimental', b'copytrace.movecandidateslimit' | ||
Augie Fackler
|
r43346 | ) | ||
Pulkit Goyal
|
r34847 | |||
if len(movecandidates) > maxcandidates: | ||||
Augie Fackler
|
r43346 | repo.ui.status( | ||
_( | ||||
Augie Fackler
|
r43347 | b"skipping copytracing for '%s', more " | ||
b"candidates than the limit: %d\n" | ||||
Augie Fackler
|
r43346 | ) | ||
% (f, len(movecandidates)) | ||||
) | ||||
Pulkit Goyal
|
r34847 | continue | ||
Pulkit Goyal
|
r34180 | for candidate in movecandidates: | ||
f1 = c1.filectx(candidate) | ||||
Gábor Stefanik
|
r37410 | if _related(f1, f2): | ||
Pulkit Goyal
|
r34180 | # if there are a few related copies then we'll merge | ||
# changes into all of them. This matches the behaviour | ||||
# of upstream copytracing | ||||
Martin von Zweigbergk
|
r44682 | copies1[candidate] = f | ||
Pulkit Goyal
|
r34180 | |||
Martin von Zweigbergk
|
r44682 | return branch_copies(copies1), branch_copies(copies2), {} | ||
Pulkit Goyal
|
r34180 | |||
Augie Fackler
|
r43346 | |||
Gábor Stefanik
|
r37410 | def _related(f1, f2): | ||
Pierre-Yves David
|
r30138 | """return True if f1 and f2 filectx have a common ancestor | ||
Walk back to common ancestor to see if the two files originate | ||||
from the same file. Since workingfilectx's rev() is None it messes | ||||
up the integer comparison logic, hence the pre-step check for | ||||
None (f1 and f2 can only be workingfilectx's initially). | ||||
""" | ||||
if f1 == f2: | ||||
Augie Fackler
|
r43346 | return True # a match | ||
Pierre-Yves David
|
r30138 | |||
g1, g2 = f1.ancestors(), f2.ancestors() | ||||
try: | ||||
f1r, f2r = f1.linkrev(), f2.linkrev() | ||||
if f1r is None: | ||||
f1 = next(g1) | ||||
if f2r is None: | ||||
f2 = next(g2) | ||||
while True: | ||||
f1r, f2r = f1.linkrev(), f2.linkrev() | ||||
if f1r > f2r: | ||||
f1 = next(g1) | ||||
elif f2r > f1r: | ||||
f2 = next(g2) | ||||
Augie Fackler
|
r43346 | else: # f1 and f2 point to files in the same linkrev | ||
return f1 == f2 # true if they point to the same file | ||||
Pierre-Yves David
|
r30138 | except StopIteration: | ||
return False | ||||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r44551 | def graftcopies(wctx, ctx, base): | ||
Martin von Zweigbergk
|
r44552 | """reproduce copies between base and ctx in the wctx | ||
Unlike mergecopies(), this function will only consider copies between base | ||||
and ctx; it will ignore copies between base and wctx. Also unlike | ||||
mergecopies(), this function will apply copies to the working copy (instead | ||||
of just returning information about the copies). That makes it cheaper | ||||
(especially in the common case of base==ctx.p1()) and useful also when | ||||
experimental.copytrace=off. | ||||
merge.update() will have already marked most copies, but it will only | ||||
mark copies if it thinks the source files are related (see | ||||
merge._related()). It will also not mark copies if the file wasn't modified | ||||
on the local side. This function adds the copies that were "missed" | ||||
by merge.update(). | ||||
""" | ||||
Martin von Zweigbergk
|
r44550 | new_copies = pathcopies(base, ctx) | ||
r47126 | parent = wctx.p1() | |||
_filter(parent, wctx, new_copies) | ||||
Martin von Zweigbergk
|
r47139 | # Extra filtering to drop copy information for files that existed before | ||
# the graft. This is to handle the case of grafting a rename onto a commit | ||||
# that already has the rename. Otherwise the presence of copy information | ||||
# would result in the creation of an empty commit where we would prefer to | ||||
# not create one. | ||||
r47126 | for dest, __ in list(new_copies.items()): | |||
if dest in parent: | ||||
del new_copies[dest] | ||||
Gregory Szorc
|
r49768 | for dst, src in new_copies.items(): | ||
Martin von Zweigbergk
|
r44550 | wctx[dst].markcopied(src) | ||