##// END OF EJS Templates
copies: expand `_chain` variable name to make the function easier to read...
marmoute -
r44223:995066c4 default
parent child Browse files
Show More
@@ -1,1014 +1,1014 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(a, b):
66 def _chain(prefix, suffix):
67 """chain two sets of copies 'a' and 'b'"""
67 """chain two sets of copies 'prefix' and 'suffix'"""
68 t = a.copy()
68 result = prefix.copy()
69 for k, v in pycompat.iteritems(b):
69 for key, value in pycompat.iteritems(suffix):
70 t[k] = a.get(v, v)
70 result[key] = prefix.get(value, value)
71 return t
71 return result
72
72
73
73
74 def _tracefile(fctx, am, basemf):
74 def _tracefile(fctx, am, basemf):
75 """return file context that is the ancestor of fctx present in ancestor
75 """return file context that is the ancestor of fctx present in ancestor
76 manifest am
76 manifest am
77
77
78 Note: we used to try and stop after a given limit, however checking if that
78 Note: we used to try and stop after a given limit, however checking if that
79 limit is reached turned out to be very expensive. we are better off
79 limit is reached turned out to be very expensive. we are better off
80 disabling that feature."""
80 disabling that feature."""
81
81
82 for f in fctx.ancestors():
82 for f in fctx.ancestors():
83 path = f.path()
83 path = f.path()
84 if am.get(path, None) == f.filenode():
84 if am.get(path, None) == f.filenode():
85 return path
85 return path
86 if basemf and basemf.get(path, None) == f.filenode():
86 if basemf and basemf.get(path, None) == f.filenode():
87 return path
87 return path
88
88
89
89
90 def _dirstatecopies(repo, match=None):
90 def _dirstatecopies(repo, match=None):
91 ds = repo.dirstate
91 ds = repo.dirstate
92 c = ds.copies().copy()
92 c = ds.copies().copy()
93 for k in list(c):
93 for k in list(c):
94 if ds[k] not in b'anm' or (match and not match(k)):
94 if ds[k] not in b'anm' or (match and not match(k)):
95 del c[k]
95 del c[k]
96 return c
96 return c
97
97
98
98
99 def _computeforwardmissing(a, b, match=None):
99 def _computeforwardmissing(a, b, match=None):
100 """Computes which files are in b but not a.
100 """Computes which files are in b but not a.
101 This is its own function so extensions can easily wrap this call to see what
101 This is its own function so extensions can easily wrap this call to see what
102 files _forwardcopies is about to process.
102 files _forwardcopies is about to process.
103 """
103 """
104 ma = a.manifest()
104 ma = a.manifest()
105 mb = b.manifest()
105 mb = b.manifest()
106 return mb.filesnotin(ma, match=match)
106 return mb.filesnotin(ma, match=match)
107
107
108
108
109 def usechangesetcentricalgo(repo):
109 def usechangesetcentricalgo(repo):
110 """Checks if we should use changeset-centric copy algorithms"""
110 """Checks if we should use changeset-centric copy algorithms"""
111 if repo.filecopiesmode == b'changeset-sidedata':
111 if repo.filecopiesmode == b'changeset-sidedata':
112 return True
112 return True
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 changesetsource = (b'changeset-only', b'compatibility')
114 changesetsource = (b'changeset-only', b'compatibility')
115 return readfrom in changesetsource
115 return readfrom in changesetsource
116
116
117
117
118 def _committedforwardcopies(a, b, base, match):
118 def _committedforwardcopies(a, b, base, match):
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 # files might have to be traced back to the fctx parent of the last
120 # files might have to be traced back to the fctx parent of the last
121 # one-side-only changeset, but not further back than that
121 # one-side-only changeset, but not further back than that
122 repo = a._repo
122 repo = a._repo
123
123
124 if usechangesetcentricalgo(repo):
124 if usechangesetcentricalgo(repo):
125 return _changesetforwardcopies(a, b, match)
125 return _changesetforwardcopies(a, b, match)
126
126
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 dbg = repo.ui.debug
128 dbg = repo.ui.debug
129 if debug:
129 if debug:
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 am = a.manifest()
131 am = a.manifest()
132 basemf = None if base is None else base.manifest()
132 basemf = None if base is None else base.manifest()
133
133
134 # find where new files came from
134 # find where new files came from
135 # we currently don't try to find where old files went, too expensive
135 # we currently don't try to find where old files went, too expensive
136 # this means we can miss a case like 'hg rm b; hg cp a b'
136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 cm = {}
137 cm = {}
138
138
139 # Computing the forward missing is quite expensive on large manifests, since
139 # Computing the forward missing is quite expensive on large manifests, since
140 # it compares the entire manifests. We can optimize it in the common use
140 # it compares the entire manifests. We can optimize it in the common use
141 # case of computing what copies are in a commit versus its parent (like
141 # case of computing what copies are in a commit versus its parent (like
142 # during a rebase or histedit). Note, we exclude merge commits from this
142 # during a rebase or histedit). Note, we exclude merge commits from this
143 # optimization, since the ctx.files() for a merge commit is not correct for
143 # optimization, since the ctx.files() for a merge commit is not correct for
144 # this comparison.
144 # this comparison.
145 forwardmissingmatch = match
145 forwardmissingmatch = match
146 if b.p1() == a and b.p2().node() == node.nullid:
146 if b.p1() == a and b.p2().node() == node.nullid:
147 filesmatcher = matchmod.exact(b.files())
147 filesmatcher = matchmod.exact(b.files())
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150
150
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152
152
153 if debug:
153 if debug:
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155
155
156 for f in sorted(missing):
156 for f in sorted(missing):
157 if debug:
157 if debug:
158 dbg(b'debug.copies: tracing file: %s\n' % f)
158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 fctx = b[f]
159 fctx = b[f]
160 fctx._ancestrycontext = ancestrycontext
160 fctx._ancestrycontext = ancestrycontext
161
161
162 if debug:
162 if debug:
163 start = util.timer()
163 start = util.timer()
164 opath = _tracefile(fctx, am, basemf)
164 opath = _tracefile(fctx, am, basemf)
165 if opath:
165 if opath:
166 if debug:
166 if debug:
167 dbg(b'debug.copies: rename of: %s\n' % opath)
167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 cm[f] = opath
168 cm[f] = opath
169 if debug:
169 if debug:
170 dbg(
170 dbg(
171 b'debug.copies: time: %f seconds\n'
171 b'debug.copies: time: %f seconds\n'
172 % (util.timer() - start)
172 % (util.timer() - start)
173 )
173 )
174 return cm
174 return cm
175
175
176
176
177 def _revinfogetter(repo):
177 def _revinfogetter(repo):
178 """return a function that return multiple data given a <rev>"i
178 """return a function that return multiple data given a <rev>"i
179
179
180 * p1: revision number of first parent
180 * p1: revision number of first parent
181 * p2: revision number of first parent
181 * p2: revision number of first parent
182 * p1copies: mapping of copies from p1
182 * p1copies: mapping of copies from p1
183 * p2copies: mapping of copies from p2
183 * p2copies: mapping of copies from p2
184 * removed: a list of removed files
184 * removed: a list of removed files
185 """
185 """
186 cl = repo.changelog
186 cl = repo.changelog
187 parents = cl.parentrevs
187 parents = cl.parentrevs
188
188
189 if repo.filecopiesmode == b'changeset-sidedata':
189 if repo.filecopiesmode == b'changeset-sidedata':
190 changelogrevision = cl.changelogrevision
190 changelogrevision = cl.changelogrevision
191 flags = cl.flags
191 flags = cl.flags
192
192
193 # A small cache to avoid doing the work twice for merges
193 # A small cache to avoid doing the work twice for merges
194 #
194 #
195 # In the vast majority of cases, if we ask information for a revision
195 # In the vast majority of cases, if we ask information for a revision
196 # about 1 parent, we'll later ask it for the other. So it make sense to
196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # keep the information around when reaching the first parent of a merge
197 # keep the information around when reaching the first parent of a merge
198 # and dropping it after it was provided for the second parents.
198 # and dropping it after it was provided for the second parents.
199 #
199 #
200 # It exists cases were only one parent of the merge will be walked. It
200 # It exists cases were only one parent of the merge will be walked. It
201 # happens when the "destination" the copy tracing is descendant from a
201 # happens when the "destination" the copy tracing is descendant from a
202 # new root, not common with the "source". In that case, we will only walk
202 # new root, not common with the "source". In that case, we will only walk
203 # through merge parents that are descendant of changesets common
203 # through merge parents that are descendant of changesets common
204 # between "source" and "destination".
204 # between "source" and "destination".
205 #
205 #
206 # With the current case implementation if such changesets have a copy
206 # With the current case implementation if such changesets have a copy
207 # information, we'll keep them in memory until the end of
207 # information, we'll keep them in memory until the end of
208 # _changesetforwardcopies. We don't expect the case to be frequent
208 # _changesetforwardcopies. We don't expect the case to be frequent
209 # enough to matters.
209 # enough to matters.
210 #
210 #
211 # In addition, it would be possible to reach pathological case, were
211 # In addition, it would be possible to reach pathological case, were
212 # many first parent are met before any second parent is reached. In
212 # many first parent are met before any second parent is reached. In
213 # that case the cache could grow. If this even become an issue one can
213 # that case the cache could grow. If this even become an issue one can
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # time to save memory.
215 # time to save memory.
216 merge_caches = {}
216 merge_caches = {}
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & REVIDX_SIDEDATA:
220 if flags(rev) & REVIDX_SIDEDATA:
221 e = merge_caches.pop(rev, None)
221 e = merge_caches.pop(rev, None)
222 if e is not None:
222 if e is not None:
223 return e
223 return e
224 c = changelogrevision(rev)
224 c = changelogrevision(rev)
225 p1copies = c.p1copies
225 p1copies = c.p1copies
226 p2copies = c.p2copies
226 p2copies = c.p2copies
227 removed = c.filesremoved
227 removed = c.filesremoved
228 if p1 != node.nullrev and p2 != node.nullrev:
228 if p1 != node.nullrev and p2 != node.nullrev:
229 # XXX some case we over cache, IGNORE
229 # XXX some case we over cache, IGNORE
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 else:
231 else:
232 p1copies = {}
232 p1copies = {}
233 p2copies = {}
233 p2copies = {}
234 removed = []
234 removed = []
235 return p1, p2, p1copies, p2copies, removed
235 return p1, p2, p1copies, p2copies, removed
236
236
237 else:
237 else:
238
238
239 def revinfo(rev):
239 def revinfo(rev):
240 p1, p2 = parents(rev)
240 p1, p2 = parents(rev)
241 ctx = repo[rev]
241 ctx = repo[rev]
242 p1copies, p2copies = ctx._copies
242 p1copies, p2copies = ctx._copies
243 removed = ctx.filesremoved()
243 removed = ctx.filesremoved()
244 return p1, p2, p1copies, p2copies, removed
244 return p1, p2, p1copies, p2copies, removed
245
245
246 return revinfo
246 return revinfo
247
247
248
248
249 def _changesetforwardcopies(a, b, match):
249 def _changesetforwardcopies(a, b, match):
250 if a.rev() in (node.nullrev, b.rev()):
250 if a.rev() in (node.nullrev, b.rev()):
251 return {}
251 return {}
252
252
253 repo = a.repo().unfiltered()
253 repo = a.repo().unfiltered()
254 children = {}
254 children = {}
255 revinfo = _revinfogetter(repo)
255 revinfo = _revinfogetter(repo)
256
256
257 cl = repo.changelog
257 cl = repo.changelog
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 mrset = set(missingrevs)
259 mrset = set(missingrevs)
260 roots = set()
260 roots = set()
261 for r in missingrevs:
261 for r in missingrevs:
262 for p in cl.parentrevs(r):
262 for p in cl.parentrevs(r):
263 if p == node.nullrev:
263 if p == node.nullrev:
264 continue
264 continue
265 if p not in children:
265 if p not in children:
266 children[p] = [r]
266 children[p] = [r]
267 else:
267 else:
268 children[p].append(r)
268 children[p].append(r)
269 if p not in mrset:
269 if p not in mrset:
270 roots.add(p)
270 roots.add(p)
271 if not roots:
271 if not roots:
272 # no common revision to track copies from
272 # no common revision to track copies from
273 return {}
273 return {}
274 min_root = min(roots)
274 min_root = min(roots)
275
275
276 from_head = set(
276 from_head = set(
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 )
278 )
279
279
280 iterrevs = set(from_head)
280 iterrevs = set(from_head)
281 iterrevs &= mrset
281 iterrevs &= mrset
282 iterrevs.update(roots)
282 iterrevs.update(roots)
283 iterrevs.remove(b.rev())
283 iterrevs.remove(b.rev())
284 all_copies = {r: {} for r in roots}
284 all_copies = {r: {} for r in roots}
285 alwaysmatch = match.always()
285 alwaysmatch = match.always()
286 for r in sorted(iterrevs):
286 for r in sorted(iterrevs):
287 copies = all_copies.pop(r)
287 copies = all_copies.pop(r)
288 for i, c in enumerate(children[r]):
288 for i, c in enumerate(children[r]):
289 p1, p2, p1copies, p2copies, removed = revinfo(c)
289 p1, p2, p1copies, p2copies, removed = revinfo(c)
290 if r == p1:
290 if r == p1:
291 parent = 1
291 parent = 1
292 childcopies = p1copies
292 childcopies = p1copies
293 else:
293 else:
294 assert r == p2
294 assert r == p2
295 parent = 2
295 parent = 2
296 childcopies = p2copies
296 childcopies = p2copies
297 if not alwaysmatch:
297 if not alwaysmatch:
298 childcopies = {
298 childcopies = {
299 dst: src for dst, src in childcopies.items() if match(dst)
299 dst: src for dst, src in childcopies.items() if match(dst)
300 }
300 }
301 newcopies = copies
301 newcopies = copies
302 if childcopies:
302 if childcopies:
303 newcopies = _chain(newcopies, childcopies)
303 newcopies = _chain(newcopies, childcopies)
304 # _chain makes a copies, we can avoid doing so in some
304 # _chain makes a copies, we can avoid doing so in some
305 # simple/linear cases.
305 # simple/linear cases.
306 assert newcopies is not copies
306 assert newcopies is not copies
307 for f in removed:
307 for f in removed:
308 if f in newcopies:
308 if f in newcopies:
309 if newcopies is copies:
309 if newcopies is copies:
310 # copy on write to avoid affecting potential other
310 # copy on write to avoid affecting potential other
311 # branches. when there are no other branches, this
311 # branches. when there are no other branches, this
312 # could be avoided.
312 # could be avoided.
313 newcopies = copies.copy()
313 newcopies = copies.copy()
314 del newcopies[f]
314 del newcopies[f]
315 othercopies = all_copies.get(c)
315 othercopies = all_copies.get(c)
316 if othercopies is None:
316 if othercopies is None:
317 all_copies[c] = newcopies
317 all_copies[c] = newcopies
318 else:
318 else:
319 # we are the second parent to work on c, we need to merge our
319 # we are the second parent to work on c, we need to merge our
320 # work with the other.
320 # work with the other.
321 #
321 #
322 # Unlike when copies are stored in the filelog, we consider
322 # Unlike when copies are stored in the filelog, we consider
323 # it a copy even if the destination already existed on the
323 # it a copy even if the destination already existed on the
324 # other branch. It's simply too expensive to check if the
324 # other branch. It's simply too expensive to check if the
325 # file existed in the manifest.
325 # file existed in the manifest.
326 #
326 #
327 # In case of conflict, parent 1 take precedence over parent 2.
327 # In case of conflict, parent 1 take precedence over parent 2.
328 # This is an arbitrary choice made anew when implementing
328 # This is an arbitrary choice made anew when implementing
329 # changeset based copies. It was made without regards with
329 # changeset based copies. It was made without regards with
330 # potential filelog related behavior.
330 # potential filelog related behavior.
331 if parent == 1:
331 if parent == 1:
332 othercopies.update(newcopies)
332 othercopies.update(newcopies)
333 else:
333 else:
334 newcopies.update(othercopies)
334 newcopies.update(othercopies)
335 all_copies[c] = newcopies
335 all_copies[c] = newcopies
336 return all_copies[b.rev()]
336 return all_copies[b.rev()]
337
337
338
338
339 def _forwardcopies(a, b, base=None, match=None):
339 def _forwardcopies(a, b, base=None, match=None):
340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
341
341
342 if base is None:
342 if base is None:
343 base = a
343 base = a
344 match = a.repo().narrowmatch(match)
344 match = a.repo().narrowmatch(match)
345 # check for working copy
345 # check for working copy
346 if b.rev() is None:
346 if b.rev() is None:
347 cm = _committedforwardcopies(a, b.p1(), base, match)
347 cm = _committedforwardcopies(a, b.p1(), base, match)
348 # combine copies from dirstate if necessary
348 # combine copies from dirstate if necessary
349 copies = _chain(cm, _dirstatecopies(b._repo, match))
349 copies = _chain(cm, _dirstatecopies(b._repo, match))
350 else:
350 else:
351 copies = _committedforwardcopies(a, b, base, match)
351 copies = _committedforwardcopies(a, b, base, match)
352 return copies
352 return copies
353
353
354
354
355 def _backwardrenames(a, b, match):
355 def _backwardrenames(a, b, match):
356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
357 return {}
357 return {}
358
358
359 # Even though we're not taking copies into account, 1:n rename situations
359 # Even though we're not taking copies into account, 1:n rename situations
360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
361 # arbitrarily pick one of the renames.
361 # arbitrarily pick one of the renames.
362 # We don't want to pass in "match" here, since that would filter
362 # We don't want to pass in "match" here, since that would filter
363 # the destination by it. Since we're reversing the copies, we want
363 # the destination by it. Since we're reversing the copies, we want
364 # to filter the source instead.
364 # to filter the source instead.
365 f = _forwardcopies(b, a)
365 f = _forwardcopies(b, a)
366 r = {}
366 r = {}
367 for k, v in sorted(pycompat.iteritems(f)):
367 for k, v in sorted(pycompat.iteritems(f)):
368 if match and not match(v):
368 if match and not match(v):
369 continue
369 continue
370 # remove copies
370 # remove copies
371 if v in a:
371 if v in a:
372 continue
372 continue
373 r[v] = k
373 r[v] = k
374 return r
374 return r
375
375
376
376
377 def pathcopies(x, y, match=None):
377 def pathcopies(x, y, match=None):
378 """find {dst@y: src@x} copy mapping for directed compare"""
378 """find {dst@y: src@x} copy mapping for directed compare"""
379 repo = x._repo
379 repo = x._repo
380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
381 if debug:
381 if debug:
382 repo.ui.debug(
382 repo.ui.debug(
383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
384 )
384 )
385 if x == y or not x or not y:
385 if x == y or not x or not y:
386 return {}
386 return {}
387 a = y.ancestor(x)
387 a = y.ancestor(x)
388 if a == x:
388 if a == x:
389 if debug:
389 if debug:
390 repo.ui.debug(b'debug.copies: search mode: forward\n')
390 repo.ui.debug(b'debug.copies: search mode: forward\n')
391 if y.rev() is None and x == y.p1():
391 if y.rev() is None and x == y.p1():
392 # short-circuit to avoid issues with merge states
392 # short-circuit to avoid issues with merge states
393 return _dirstatecopies(repo, match)
393 return _dirstatecopies(repo, match)
394 copies = _forwardcopies(x, y, match=match)
394 copies = _forwardcopies(x, y, match=match)
395 elif a == y:
395 elif a == y:
396 if debug:
396 if debug:
397 repo.ui.debug(b'debug.copies: search mode: backward\n')
397 repo.ui.debug(b'debug.copies: search mode: backward\n')
398 copies = _backwardrenames(x, y, match=match)
398 copies = _backwardrenames(x, y, match=match)
399 else:
399 else:
400 if debug:
400 if debug:
401 repo.ui.debug(b'debug.copies: search mode: combined\n')
401 repo.ui.debug(b'debug.copies: search mode: combined\n')
402 base = None
402 base = None
403 if a.rev() != node.nullrev:
403 if a.rev() != node.nullrev:
404 base = x
404 base = x
405 copies = _chain(
405 copies = _chain(
406 _backwardrenames(x, a, match=match),
406 _backwardrenames(x, a, match=match),
407 _forwardcopies(a, y, base, match=match),
407 _forwardcopies(a, y, base, match=match),
408 )
408 )
409 _filter(x, y, copies)
409 _filter(x, y, copies)
410 return copies
410 return copies
411
411
412
412
413 def mergecopies(repo, c1, c2, base):
413 def mergecopies(repo, c1, c2, base):
414 """
414 """
415 Finds moves and copies between context c1 and c2 that are relevant for
415 Finds moves and copies between context c1 and c2 that are relevant for
416 merging. 'base' will be used as the merge base.
416 merging. 'base' will be used as the merge base.
417
417
418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
419 files that were moved/ copied in one merge parent and modified in another.
419 files that were moved/ copied in one merge parent and modified in another.
420 For example:
420 For example:
421
421
422 o ---> 4 another commit
422 o ---> 4 another commit
423 |
423 |
424 | o ---> 3 commit that modifies a.txt
424 | o ---> 3 commit that modifies a.txt
425 | /
425 | /
426 o / ---> 2 commit that moves a.txt to b.txt
426 o / ---> 2 commit that moves a.txt to b.txt
427 |/
427 |/
428 o ---> 1 merge base
428 o ---> 1 merge base
429
429
430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
431 revision 4, and if user have copytrace disabled, we prints the following
431 revision 4, and if user have copytrace disabled, we prints the following
432 message:
432 message:
433
433
434 ```other changed <file> which local deleted```
434 ```other changed <file> which local deleted```
435
435
436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
437 "dirmove".
437 "dirmove".
438
438
439 "copy" is a mapping from destination name -> source name,
439 "copy" is a mapping from destination name -> source name,
440 where source is in c1 and destination is in c2 or vice-versa.
440 where source is in c1 and destination is in c2 or vice-versa.
441
441
442 "movewithdir" is a mapping from source name -> destination name,
442 "movewithdir" is a mapping from source name -> destination name,
443 where the file at source present in one context but not the other
443 where the file at source present in one context but not the other
444 needs to be moved to destination by the merge process, because the
444 needs to be moved to destination by the merge process, because the
445 other context moved the directory it is in.
445 other context moved the directory it is in.
446
446
447 "diverge" is a mapping of source name -> list of destination names
447 "diverge" is a mapping of source name -> list of destination names
448 for divergent renames.
448 for divergent renames.
449
449
450 "renamedelete" is a mapping of source name -> list of destination
450 "renamedelete" is a mapping of source name -> list of destination
451 names for files deleted in c1 that were renamed in c2 or vice-versa.
451 names for files deleted in c1 that were renamed in c2 or vice-versa.
452
452
453 "dirmove" is a mapping of detected source dir -> destination dir renames.
453 "dirmove" is a mapping of detected source dir -> destination dir renames.
454 This is needed for handling changes to new files previously grafted into
454 This is needed for handling changes to new files previously grafted into
455 renamed directories.
455 renamed directories.
456
456
457 This function calls different copytracing algorithms based on config.
457 This function calls different copytracing algorithms based on config.
458 """
458 """
459 # avoid silly behavior for update from empty dir
459 # avoid silly behavior for update from empty dir
460 if not c1 or not c2 or c1 == c2:
460 if not c1 or not c2 or c1 == c2:
461 return {}, {}, {}, {}, {}
461 return {}, {}, {}, {}, {}
462
462
463 narrowmatch = c1.repo().narrowmatch()
463 narrowmatch = c1.repo().narrowmatch()
464
464
465 # avoid silly behavior for parent -> working dir
465 # avoid silly behavior for parent -> working dir
466 if c2.node() is None and c1.node() == repo.dirstate.p1():
466 if c2.node() is None and c1.node() == repo.dirstate.p1():
467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
468
468
469 copytracing = repo.ui.config(b'experimental', b'copytrace')
469 copytracing = repo.ui.config(b'experimental', b'copytrace')
470 if stringutil.parsebool(copytracing) is False:
470 if stringutil.parsebool(copytracing) is False:
471 # stringutil.parsebool() returns None when it is unable to parse the
471 # stringutil.parsebool() returns None when it is unable to parse the
472 # value, so we should rely on making sure copytracing is on such cases
472 # value, so we should rely on making sure copytracing is on such cases
473 return {}, {}, {}, {}, {}
473 return {}, {}, {}, {}, {}
474
474
475 if usechangesetcentricalgo(repo):
475 if usechangesetcentricalgo(repo):
476 # The heuristics don't make sense when we need changeset-centric algos
476 # The heuristics don't make sense when we need changeset-centric algos
477 return _fullcopytracing(repo, c1, c2, base)
477 return _fullcopytracing(repo, c1, c2, base)
478
478
479 # Copy trace disabling is explicitly below the node == p1 logic above
479 # Copy trace disabling is explicitly below the node == p1 logic above
480 # because the logic above is required for a simple copy to be kept across a
480 # because the logic above is required for a simple copy to be kept across a
481 # rebase.
481 # rebase.
482 if copytracing == b'heuristics':
482 if copytracing == b'heuristics':
483 # Do full copytracing if only non-public revisions are involved as
483 # Do full copytracing if only non-public revisions are involved as
484 # that will be fast enough and will also cover the copies which could
484 # that will be fast enough and will also cover the copies which could
485 # be missed by heuristics
485 # be missed by heuristics
486 if _isfullcopytraceable(repo, c1, base):
486 if _isfullcopytraceable(repo, c1, base):
487 return _fullcopytracing(repo, c1, c2, base)
487 return _fullcopytracing(repo, c1, c2, base)
488 return _heuristicscopytracing(repo, c1, c2, base)
488 return _heuristicscopytracing(repo, c1, c2, base)
489 else:
489 else:
490 return _fullcopytracing(repo, c1, c2, base)
490 return _fullcopytracing(repo, c1, c2, base)
491
491
492
492
493 def _isfullcopytraceable(repo, c1, base):
493 def _isfullcopytraceable(repo, c1, base):
494 """ Checks that if base, source and destination are all no-public branches,
494 """ Checks that if base, source and destination are all no-public branches,
495 if yes let's use the full copytrace algorithm for increased capabilities
495 if yes let's use the full copytrace algorithm for increased capabilities
496 since it will be fast enough.
496 since it will be fast enough.
497
497
498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
499 number of changesets from c1 to base such that if number of changesets are
499 number of changesets from c1 to base such that if number of changesets are
500 more than the limit, full copytracing algorithm won't be used.
500 more than the limit, full copytracing algorithm won't be used.
501 """
501 """
502 if c1.rev() is None:
502 if c1.rev() is None:
503 c1 = c1.p1()
503 c1 = c1.p1()
504 if c1.mutable() and base.mutable():
504 if c1.mutable() and base.mutable():
505 sourcecommitlimit = repo.ui.configint(
505 sourcecommitlimit = repo.ui.configint(
506 b'experimental', b'copytrace.sourcecommitlimit'
506 b'experimental', b'copytrace.sourcecommitlimit'
507 )
507 )
508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
509 return commits < sourcecommitlimit
509 return commits < sourcecommitlimit
510 return False
510 return False
511
511
512
512
513 def _checksinglesidecopies(
513 def _checksinglesidecopies(
514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
515 ):
515 ):
516 if src not in m2:
516 if src not in m2:
517 # deleted on side 2
517 # deleted on side 2
518 if src not in m1:
518 if src not in m1:
519 # renamed on side 1, deleted on side 2
519 # renamed on side 1, deleted on side 2
520 renamedelete[src] = dsts1
520 renamedelete[src] = dsts1
521 elif m2[src] != mb[src]:
521 elif m2[src] != mb[src]:
522 if not _related(c2[src], base[src]):
522 if not _related(c2[src], base[src]):
523 return
523 return
524 # modified on side 2
524 # modified on side 2
525 for dst in dsts1:
525 for dst in dsts1:
526 if dst not in m2:
526 if dst not in m2:
527 # dst not added on side 2 (handle as regular
527 # dst not added on side 2 (handle as regular
528 # "both created" case in manifestmerge otherwise)
528 # "both created" case in manifestmerge otherwise)
529 copy[dst] = src
529 copy[dst] = src
530
530
531
531
532 def _fullcopytracing(repo, c1, c2, base):
532 def _fullcopytracing(repo, c1, c2, base):
533 """ The full copytracing algorithm which finds all the new files that were
533 """ The full copytracing algorithm which finds all the new files that were
534 added from merge base up to the top commit and for each file it checks if
534 added from merge base up to the top commit and for each file it checks if
535 this file was copied from another file.
535 this file was copied from another file.
536
536
537 This is pretty slow when a lot of changesets are involved but will track all
537 This is pretty slow when a lot of changesets are involved but will track all
538 the copies.
538 the copies.
539 """
539 """
540 m1 = c1.manifest()
540 m1 = c1.manifest()
541 m2 = c2.manifest()
541 m2 = c2.manifest()
542 mb = base.manifest()
542 mb = base.manifest()
543
543
544 copies1 = pathcopies(base, c1)
544 copies1 = pathcopies(base, c1)
545 copies2 = pathcopies(base, c2)
545 copies2 = pathcopies(base, c2)
546
546
547 inversecopies1 = {}
547 inversecopies1 = {}
548 inversecopies2 = {}
548 inversecopies2 = {}
549 for dst, src in copies1.items():
549 for dst, src in copies1.items():
550 inversecopies1.setdefault(src, []).append(dst)
550 inversecopies1.setdefault(src, []).append(dst)
551 for dst, src in copies2.items():
551 for dst, src in copies2.items():
552 inversecopies2.setdefault(src, []).append(dst)
552 inversecopies2.setdefault(src, []).append(dst)
553
553
554 copy = {}
554 copy = {}
555 diverge = {}
555 diverge = {}
556 renamedelete = {}
556 renamedelete = {}
557 allsources = set(inversecopies1) | set(inversecopies2)
557 allsources = set(inversecopies1) | set(inversecopies2)
558 for src in allsources:
558 for src in allsources:
559 dsts1 = inversecopies1.get(src)
559 dsts1 = inversecopies1.get(src)
560 dsts2 = inversecopies2.get(src)
560 dsts2 = inversecopies2.get(src)
561 if dsts1 and dsts2:
561 if dsts1 and dsts2:
562 # copied/renamed on both sides
562 # copied/renamed on both sides
563 if src not in m1 and src not in m2:
563 if src not in m1 and src not in m2:
564 # renamed on both sides
564 # renamed on both sides
565 dsts1 = set(dsts1)
565 dsts1 = set(dsts1)
566 dsts2 = set(dsts2)
566 dsts2 = set(dsts2)
567 # If there's some overlap in the rename destinations, we
567 # If there's some overlap in the rename destinations, we
568 # consider it not divergent. For example, if side 1 copies 'a'
568 # consider it not divergent. For example, if side 1 copies 'a'
569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
570 # and 'd' and deletes 'a'.
570 # and 'd' and deletes 'a'.
571 if dsts1 & dsts2:
571 if dsts1 & dsts2:
572 for dst in dsts1 & dsts2:
572 for dst in dsts1 & dsts2:
573 copy[dst] = src
573 copy[dst] = src
574 else:
574 else:
575 diverge[src] = sorted(dsts1 | dsts2)
575 diverge[src] = sorted(dsts1 | dsts2)
576 elif src in m1 and src in m2:
576 elif src in m1 and src in m2:
577 # copied on both sides
577 # copied on both sides
578 dsts1 = set(dsts1)
578 dsts1 = set(dsts1)
579 dsts2 = set(dsts2)
579 dsts2 = set(dsts2)
580 for dst in dsts1 & dsts2:
580 for dst in dsts1 & dsts2:
581 copy[dst] = src
581 copy[dst] = src
582 # TODO: Handle cases where it was renamed on one side and copied
582 # TODO: Handle cases where it was renamed on one side and copied
583 # on the other side
583 # on the other side
584 elif dsts1:
584 elif dsts1:
585 # copied/renamed only on side 1
585 # copied/renamed only on side 1
586 _checksinglesidecopies(
586 _checksinglesidecopies(
587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
588 )
588 )
589 elif dsts2:
589 elif dsts2:
590 # copied/renamed only on side 2
590 # copied/renamed only on side 2
591 _checksinglesidecopies(
591 _checksinglesidecopies(
592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
593 )
593 )
594
594
595 renamedeleteset = set()
595 renamedeleteset = set()
596 divergeset = set()
596 divergeset = set()
597 for dsts in diverge.values():
597 for dsts in diverge.values():
598 divergeset.update(dsts)
598 divergeset.update(dsts)
599 for dsts in renamedelete.values():
599 for dsts in renamedelete.values():
600 renamedeleteset.update(dsts)
600 renamedeleteset.update(dsts)
601
601
602 # find interesting file sets from manifests
602 # find interesting file sets from manifests
603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
605 u1 = sorted(addedinm1 - addedinm2)
605 u1 = sorted(addedinm1 - addedinm2)
606 u2 = sorted(addedinm2 - addedinm1)
606 u2 = sorted(addedinm2 - addedinm1)
607
607
608 header = b" unmatched files in %s"
608 header = b" unmatched files in %s"
609 if u1:
609 if u1:
610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
611 if u2:
611 if u2:
612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
613
613
614 fullcopy = copies1.copy()
614 fullcopy = copies1.copy()
615 fullcopy.update(copies2)
615 fullcopy.update(copies2)
616 if not fullcopy:
616 if not fullcopy:
617 return copy, {}, diverge, renamedelete, {}
617 return copy, {}, diverge, renamedelete, {}
618
618
619 if repo.ui.debugflag:
619 if repo.ui.debugflag:
620 repo.ui.debug(
620 repo.ui.debug(
621 b" all copies found (* = to merge, ! = divergent, "
621 b" all copies found (* = to merge, ! = divergent, "
622 b"% = renamed and deleted):\n"
622 b"% = renamed and deleted):\n"
623 )
623 )
624 for f in sorted(fullcopy):
624 for f in sorted(fullcopy):
625 note = b""
625 note = b""
626 if f in copy:
626 if f in copy:
627 note += b"*"
627 note += b"*"
628 if f in divergeset:
628 if f in divergeset:
629 note += b"!"
629 note += b"!"
630 if f in renamedeleteset:
630 if f in renamedeleteset:
631 note += b"%"
631 note += b"%"
632 repo.ui.debug(
632 repo.ui.debug(
633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
634 )
634 )
635 del divergeset
635 del divergeset
636
636
637 repo.ui.debug(b" checking for directory renames\n")
637 repo.ui.debug(b" checking for directory renames\n")
638
638
639 # generate a directory move map
639 # generate a directory move map
640 d1, d2 = c1.dirs(), c2.dirs()
640 d1, d2 = c1.dirs(), c2.dirs()
641 invalid = set()
641 invalid = set()
642 dirmove = {}
642 dirmove = {}
643
643
644 # examine each file copy for a potential directory move, which is
644 # examine each file copy for a potential directory move, which is
645 # when all the files in a directory are moved to a new directory
645 # when all the files in a directory are moved to a new directory
646 for dst, src in pycompat.iteritems(fullcopy):
646 for dst, src in pycompat.iteritems(fullcopy):
647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
648 if dsrc in invalid:
648 if dsrc in invalid:
649 # already seen to be uninteresting
649 # already seen to be uninteresting
650 continue
650 continue
651 elif dsrc in d1 and ddst in d1:
651 elif dsrc in d1 and ddst in d1:
652 # directory wasn't entirely moved locally
652 # directory wasn't entirely moved locally
653 invalid.add(dsrc)
653 invalid.add(dsrc)
654 elif dsrc in d2 and ddst in d2:
654 elif dsrc in d2 and ddst in d2:
655 # directory wasn't entirely moved remotely
655 # directory wasn't entirely moved remotely
656 invalid.add(dsrc)
656 invalid.add(dsrc)
657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
658 # files from the same directory moved to two different places
658 # files from the same directory moved to two different places
659 invalid.add(dsrc)
659 invalid.add(dsrc)
660 else:
660 else:
661 # looks good so far
661 # looks good so far
662 dirmove[dsrc] = ddst
662 dirmove[dsrc] = ddst
663
663
664 for i in invalid:
664 for i in invalid:
665 if i in dirmove:
665 if i in dirmove:
666 del dirmove[i]
666 del dirmove[i]
667 del d1, d2, invalid
667 del d1, d2, invalid
668
668
669 if not dirmove:
669 if not dirmove:
670 return copy, {}, diverge, renamedelete, {}
670 return copy, {}, diverge, renamedelete, {}
671
671
672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
673
673
674 for d in dirmove:
674 for d in dirmove:
675 repo.ui.debug(
675 repo.ui.debug(
676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
677 )
677 )
678
678
679 movewithdir = {}
679 movewithdir = {}
680 # check unaccounted nonoverlapping files against directory moves
680 # check unaccounted nonoverlapping files against directory moves
681 for f in u1 + u2:
681 for f in u1 + u2:
682 if f not in fullcopy:
682 if f not in fullcopy:
683 for d in dirmove:
683 for d in dirmove:
684 if f.startswith(d):
684 if f.startswith(d):
685 # new file added in a directory that was moved, move it
685 # new file added in a directory that was moved, move it
686 df = dirmove[d] + f[len(d) :]
686 df = dirmove[d] + f[len(d) :]
687 if df not in copy:
687 if df not in copy:
688 movewithdir[f] = df
688 movewithdir[f] = df
689 repo.ui.debug(
689 repo.ui.debug(
690 b" pending file src: '%s' -> dst: '%s'\n"
690 b" pending file src: '%s' -> dst: '%s'\n"
691 % (f, df)
691 % (f, df)
692 )
692 )
693 break
693 break
694
694
695 return copy, movewithdir, diverge, renamedelete, dirmove
695 return copy, movewithdir, diverge, renamedelete, dirmove
696
696
697
697
698 def _heuristicscopytracing(repo, c1, c2, base):
698 def _heuristicscopytracing(repo, c1, c2, base):
699 """ Fast copytracing using filename heuristics
699 """ Fast copytracing using filename heuristics
700
700
701 Assumes that moves or renames are of following two types:
701 Assumes that moves or renames are of following two types:
702
702
703 1) Inside a directory only (same directory name but different filenames)
703 1) Inside a directory only (same directory name but different filenames)
704 2) Move from one directory to another
704 2) Move from one directory to another
705 (same filenames but different directory names)
705 (same filenames but different directory names)
706
706
707 Works only when there are no merge commits in the "source branch".
707 Works only when there are no merge commits in the "source branch".
708 Source branch is commits from base up to c2 not including base.
708 Source branch is commits from base up to c2 not including base.
709
709
710 If merge is involved it fallbacks to _fullcopytracing().
710 If merge is involved it fallbacks to _fullcopytracing().
711
711
712 Can be used by setting the following config:
712 Can be used by setting the following config:
713
713
714 [experimental]
714 [experimental]
715 copytrace = heuristics
715 copytrace = heuristics
716
716
717 In some cases the copy/move candidates found by heuristics can be very large
717 In some cases the copy/move candidates found by heuristics can be very large
718 in number and that will make the algorithm slow. The number of possible
718 in number and that will make the algorithm slow. The number of possible
719 candidates to check can be limited by using the config
719 candidates to check can be limited by using the config
720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
721 """
721 """
722
722
723 if c1.rev() is None:
723 if c1.rev() is None:
724 c1 = c1.p1()
724 c1 = c1.p1()
725 if c2.rev() is None:
725 if c2.rev() is None:
726 c2 = c2.p1()
726 c2 = c2.p1()
727
727
728 copies = {}
728 copies = {}
729
729
730 changedfiles = set()
730 changedfiles = set()
731 m1 = c1.manifest()
731 m1 = c1.manifest()
732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
733 # If base is not in c2 branch, we switch to fullcopytracing
733 # If base is not in c2 branch, we switch to fullcopytracing
734 repo.ui.debug(
734 repo.ui.debug(
735 b"switching to full copytracing as base is not "
735 b"switching to full copytracing as base is not "
736 b"an ancestor of c2\n"
736 b"an ancestor of c2\n"
737 )
737 )
738 return _fullcopytracing(repo, c1, c2, base)
738 return _fullcopytracing(repo, c1, c2, base)
739
739
740 ctx = c2
740 ctx = c2
741 while ctx != base:
741 while ctx != base:
742 if len(ctx.parents()) == 2:
742 if len(ctx.parents()) == 2:
743 # To keep things simple let's not handle merges
743 # To keep things simple let's not handle merges
744 repo.ui.debug(b"switching to full copytracing because of merges\n")
744 repo.ui.debug(b"switching to full copytracing because of merges\n")
745 return _fullcopytracing(repo, c1, c2, base)
745 return _fullcopytracing(repo, c1, c2, base)
746 changedfiles.update(ctx.files())
746 changedfiles.update(ctx.files())
747 ctx = ctx.p1()
747 ctx = ctx.p1()
748
748
749 cp = _forwardcopies(base, c2)
749 cp = _forwardcopies(base, c2)
750 for dst, src in pycompat.iteritems(cp):
750 for dst, src in pycompat.iteritems(cp):
751 if src in m1:
751 if src in m1:
752 copies[dst] = src
752 copies[dst] = src
753
753
754 # file is missing if it isn't present in the destination, but is present in
754 # file is missing if it isn't present in the destination, but is present in
755 # the base and present in the source.
755 # the base and present in the source.
756 # Presence in the base is important to exclude added files, presence in the
756 # Presence in the base is important to exclude added files, presence in the
757 # source is important to exclude removed files.
757 # source is important to exclude removed files.
758 filt = lambda f: f not in m1 and f in base and f in c2
758 filt = lambda f: f not in m1 and f in base and f in c2
759 missingfiles = [f for f in changedfiles if filt(f)]
759 missingfiles = [f for f in changedfiles if filt(f)]
760
760
761 if missingfiles:
761 if missingfiles:
762 basenametofilename = collections.defaultdict(list)
762 basenametofilename = collections.defaultdict(list)
763 dirnametofilename = collections.defaultdict(list)
763 dirnametofilename = collections.defaultdict(list)
764
764
765 for f in m1.filesnotin(base.manifest()):
765 for f in m1.filesnotin(base.manifest()):
766 basename = os.path.basename(f)
766 basename = os.path.basename(f)
767 dirname = os.path.dirname(f)
767 dirname = os.path.dirname(f)
768 basenametofilename[basename].append(f)
768 basenametofilename[basename].append(f)
769 dirnametofilename[dirname].append(f)
769 dirnametofilename[dirname].append(f)
770
770
771 for f in missingfiles:
771 for f in missingfiles:
772 basename = os.path.basename(f)
772 basename = os.path.basename(f)
773 dirname = os.path.dirname(f)
773 dirname = os.path.dirname(f)
774 samebasename = basenametofilename[basename]
774 samebasename = basenametofilename[basename]
775 samedirname = dirnametofilename[dirname]
775 samedirname = dirnametofilename[dirname]
776 movecandidates = samebasename + samedirname
776 movecandidates = samebasename + samedirname
777 # f is guaranteed to be present in c2, that's why
777 # f is guaranteed to be present in c2, that's why
778 # c2.filectx(f) won't fail
778 # c2.filectx(f) won't fail
779 f2 = c2.filectx(f)
779 f2 = c2.filectx(f)
780 # we can have a lot of candidates which can slow down the heuristics
780 # we can have a lot of candidates which can slow down the heuristics
781 # config value to limit the number of candidates moves to check
781 # config value to limit the number of candidates moves to check
782 maxcandidates = repo.ui.configint(
782 maxcandidates = repo.ui.configint(
783 b'experimental', b'copytrace.movecandidateslimit'
783 b'experimental', b'copytrace.movecandidateslimit'
784 )
784 )
785
785
786 if len(movecandidates) > maxcandidates:
786 if len(movecandidates) > maxcandidates:
787 repo.ui.status(
787 repo.ui.status(
788 _(
788 _(
789 b"skipping copytracing for '%s', more "
789 b"skipping copytracing for '%s', more "
790 b"candidates than the limit: %d\n"
790 b"candidates than the limit: %d\n"
791 )
791 )
792 % (f, len(movecandidates))
792 % (f, len(movecandidates))
793 )
793 )
794 continue
794 continue
795
795
796 for candidate in movecandidates:
796 for candidate in movecandidates:
797 f1 = c1.filectx(candidate)
797 f1 = c1.filectx(candidate)
798 if _related(f1, f2):
798 if _related(f1, f2):
799 # if there are a few related copies then we'll merge
799 # if there are a few related copies then we'll merge
800 # changes into all of them. This matches the behaviour
800 # changes into all of them. This matches the behaviour
801 # of upstream copytracing
801 # of upstream copytracing
802 copies[candidate] = f
802 copies[candidate] = f
803
803
804 return copies, {}, {}, {}, {}
804 return copies, {}, {}, {}, {}
805
805
806
806
807 def _related(f1, f2):
807 def _related(f1, f2):
808 """return True if f1 and f2 filectx have a common ancestor
808 """return True if f1 and f2 filectx have a common ancestor
809
809
810 Walk back to common ancestor to see if the two files originate
810 Walk back to common ancestor to see if the two files originate
811 from the same file. Since workingfilectx's rev() is None it messes
811 from the same file. Since workingfilectx's rev() is None it messes
812 up the integer comparison logic, hence the pre-step check for
812 up the integer comparison logic, hence the pre-step check for
813 None (f1 and f2 can only be workingfilectx's initially).
813 None (f1 and f2 can only be workingfilectx's initially).
814 """
814 """
815
815
816 if f1 == f2:
816 if f1 == f2:
817 return True # a match
817 return True # a match
818
818
819 g1, g2 = f1.ancestors(), f2.ancestors()
819 g1, g2 = f1.ancestors(), f2.ancestors()
820 try:
820 try:
821 f1r, f2r = f1.linkrev(), f2.linkrev()
821 f1r, f2r = f1.linkrev(), f2.linkrev()
822
822
823 if f1r is None:
823 if f1r is None:
824 f1 = next(g1)
824 f1 = next(g1)
825 if f2r is None:
825 if f2r is None:
826 f2 = next(g2)
826 f2 = next(g2)
827
827
828 while True:
828 while True:
829 f1r, f2r = f1.linkrev(), f2.linkrev()
829 f1r, f2r = f1.linkrev(), f2.linkrev()
830 if f1r > f2r:
830 if f1r > f2r:
831 f1 = next(g1)
831 f1 = next(g1)
832 elif f2r > f1r:
832 elif f2r > f1r:
833 f2 = next(g2)
833 f2 = next(g2)
834 else: # f1 and f2 point to files in the same linkrev
834 else: # f1 and f2 point to files in the same linkrev
835 return f1 == f2 # true if they point to the same file
835 return f1 == f2 # true if they point to the same file
836 except StopIteration:
836 except StopIteration:
837 return False
837 return False
838
838
839
839
840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
841 """reproduce copies from fromrev to rev in the dirstate
841 """reproduce copies from fromrev to rev in the dirstate
842
842
843 If skiprev is specified, it's a revision that should be used to
843 If skiprev is specified, it's a revision that should be used to
844 filter copy records. Any copies that occur between fromrev and
844 filter copy records. Any copies that occur between fromrev and
845 skiprev will not be duplicated, even if they appear in the set of
845 skiprev will not be duplicated, even if they appear in the set of
846 copies between fromrev and rev.
846 copies between fromrev and rev.
847 """
847 """
848 exclude = {}
848 exclude = {}
849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
850 bctrace = stringutil.parsebool(ctraceconfig)
850 bctrace = stringutil.parsebool(ctraceconfig)
851 if skiprev is not None and (
851 if skiprev is not None and (
852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
853 ):
853 ):
854 # copytrace='off' skips this line, but not the entire function because
854 # copytrace='off' skips this line, but not the entire function because
855 # the line below is O(size of the repo) during a rebase, while the rest
855 # the line below is O(size of the repo) during a rebase, while the rest
856 # of the function is much faster (and is required for carrying copy
856 # of the function is much faster (and is required for carrying copy
857 # metadata across the rebase anyway).
857 # metadata across the rebase anyway).
858 exclude = pathcopies(repo[fromrev], repo[skiprev])
858 exclude = pathcopies(repo[fromrev], repo[skiprev])
859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
860 if dst in exclude:
860 if dst in exclude:
861 continue
861 continue
862 if dst in wctx:
862 if dst in wctx:
863 wctx[dst].markcopied(src)
863 wctx[dst].markcopied(src)
864
864
865
865
866 def computechangesetfilesadded(ctx):
866 def computechangesetfilesadded(ctx):
867 """return the list of files added in a changeset
867 """return the list of files added in a changeset
868 """
868 """
869 added = []
869 added = []
870 for f in ctx.files():
870 for f in ctx.files():
871 if not any(f in p for p in ctx.parents()):
871 if not any(f in p for p in ctx.parents()):
872 added.append(f)
872 added.append(f)
873 return added
873 return added
874
874
875
875
876 def computechangesetfilesremoved(ctx):
876 def computechangesetfilesremoved(ctx):
877 """return the list of files removed in a changeset
877 """return the list of files removed in a changeset
878 """
878 """
879 removed = []
879 removed = []
880 for f in ctx.files():
880 for f in ctx.files():
881 if f not in ctx:
881 if f not in ctx:
882 removed.append(f)
882 removed.append(f)
883 return removed
883 return removed
884
884
885
885
886 def computechangesetcopies(ctx):
886 def computechangesetcopies(ctx):
887 """return the copies data for a changeset
887 """return the copies data for a changeset
888
888
889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
890
890
891 Each dictionnary are in the form: `{newname: oldname}`
891 Each dictionnary are in the form: `{newname: oldname}`
892 """
892 """
893 p1copies = {}
893 p1copies = {}
894 p2copies = {}
894 p2copies = {}
895 p1 = ctx.p1()
895 p1 = ctx.p1()
896 p2 = ctx.p2()
896 p2 = ctx.p2()
897 narrowmatch = ctx._repo.narrowmatch()
897 narrowmatch = ctx._repo.narrowmatch()
898 for dst in ctx.files():
898 for dst in ctx.files():
899 if not narrowmatch(dst) or dst not in ctx:
899 if not narrowmatch(dst) or dst not in ctx:
900 continue
900 continue
901 copied = ctx[dst].renamed()
901 copied = ctx[dst].renamed()
902 if not copied:
902 if not copied:
903 continue
903 continue
904 src, srcnode = copied
904 src, srcnode = copied
905 if src in p1 and p1[src].filenode() == srcnode:
905 if src in p1 and p1[src].filenode() == srcnode:
906 p1copies[dst] = src
906 p1copies[dst] = src
907 elif src in p2 and p2[src].filenode() == srcnode:
907 elif src in p2 and p2[src].filenode() == srcnode:
908 p2copies[dst] = src
908 p2copies[dst] = src
909 return p1copies, p2copies
909 return p1copies, p2copies
910
910
911
911
912 def encodecopies(files, copies):
912 def encodecopies(files, copies):
913 items = []
913 items = []
914 for i, dst in enumerate(files):
914 for i, dst in enumerate(files):
915 if dst in copies:
915 if dst in copies:
916 items.append(b'%d\0%s' % (i, copies[dst]))
916 items.append(b'%d\0%s' % (i, copies[dst]))
917 if len(items) != len(copies):
917 if len(items) != len(copies):
918 raise error.ProgrammingError(
918 raise error.ProgrammingError(
919 b'some copy targets missing from file list'
919 b'some copy targets missing from file list'
920 )
920 )
921 return b"\n".join(items)
921 return b"\n".join(items)
922
922
923
923
924 def decodecopies(files, data):
924 def decodecopies(files, data):
925 try:
925 try:
926 copies = {}
926 copies = {}
927 if not data:
927 if not data:
928 return copies
928 return copies
929 for l in data.split(b'\n'):
929 for l in data.split(b'\n'):
930 strindex, src = l.split(b'\0')
930 strindex, src = l.split(b'\0')
931 i = int(strindex)
931 i = int(strindex)
932 dst = files[i]
932 dst = files[i]
933 copies[dst] = src
933 copies[dst] = src
934 return copies
934 return copies
935 except (ValueError, IndexError):
935 except (ValueError, IndexError):
936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
937 # used different syntax for the value.
937 # used different syntax for the value.
938 return None
938 return None
939
939
940
940
941 def encodefileindices(files, subset):
941 def encodefileindices(files, subset):
942 subset = set(subset)
942 subset = set(subset)
943 indices = []
943 indices = []
944 for i, f in enumerate(files):
944 for i, f in enumerate(files):
945 if f in subset:
945 if f in subset:
946 indices.append(b'%d' % i)
946 indices.append(b'%d' % i)
947 return b'\n'.join(indices)
947 return b'\n'.join(indices)
948
948
949
949
950 def decodefileindices(files, data):
950 def decodefileindices(files, data):
951 try:
951 try:
952 subset = []
952 subset = []
953 if not data:
953 if not data:
954 return subset
954 return subset
955 for strindex in data.split(b'\n'):
955 for strindex in data.split(b'\n'):
956 i = int(strindex)
956 i = int(strindex)
957 if i < 0 or i >= len(files):
957 if i < 0 or i >= len(files):
958 return None
958 return None
959 subset.append(files[i])
959 subset.append(files[i])
960 return subset
960 return subset
961 except (ValueError, IndexError):
961 except (ValueError, IndexError):
962 # Perhaps someone had chosen the same key name (e.g. "added") and
962 # Perhaps someone had chosen the same key name (e.g. "added") and
963 # used different syntax for the value.
963 # used different syntax for the value.
964 return None
964 return None
965
965
966
966
967 def _getsidedata(srcrepo, rev):
967 def _getsidedata(srcrepo, rev):
968 ctx = srcrepo[rev]
968 ctx = srcrepo[rev]
969 filescopies = computechangesetcopies(ctx)
969 filescopies = computechangesetcopies(ctx)
970 filesadded = computechangesetfilesadded(ctx)
970 filesadded = computechangesetfilesadded(ctx)
971 filesremoved = computechangesetfilesremoved(ctx)
971 filesremoved = computechangesetfilesremoved(ctx)
972 sidedata = {}
972 sidedata = {}
973 if any([filescopies, filesadded, filesremoved]):
973 if any([filescopies, filesadded, filesremoved]):
974 sortedfiles = sorted(ctx.files())
974 sortedfiles = sorted(ctx.files())
975 p1copies, p2copies = filescopies
975 p1copies, p2copies = filescopies
976 p1copies = encodecopies(sortedfiles, p1copies)
976 p1copies = encodecopies(sortedfiles, p1copies)
977 p2copies = encodecopies(sortedfiles, p2copies)
977 p2copies = encodecopies(sortedfiles, p2copies)
978 filesadded = encodefileindices(sortedfiles, filesadded)
978 filesadded = encodefileindices(sortedfiles, filesadded)
979 filesremoved = encodefileindices(sortedfiles, filesremoved)
979 filesremoved = encodefileindices(sortedfiles, filesremoved)
980 if p1copies:
980 if p1copies:
981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
982 if p2copies:
982 if p2copies:
983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
984 if filesadded:
984 if filesadded:
985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
986 if filesremoved:
986 if filesremoved:
987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
988 return sidedata
988 return sidedata
989
989
990
990
991 def getsidedataadder(srcrepo, destrepo):
991 def getsidedataadder(srcrepo, destrepo):
992 def sidedatacompanion(revlog, rev):
992 def sidedatacompanion(revlog, rev):
993 sidedata = {}
993 sidedata = {}
994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
995 sidedata = _getsidedata(srcrepo, rev)
995 sidedata = _getsidedata(srcrepo, rev)
996 return False, (), sidedata
996 return False, (), sidedata
997
997
998 return sidedatacompanion
998 return sidedatacompanion
999
999
1000
1000
1001 def getsidedataremover(srcrepo, destrepo):
1001 def getsidedataremover(srcrepo, destrepo):
1002 def sidedatacompanion(revlog, rev):
1002 def sidedatacompanion(revlog, rev):
1003 f = ()
1003 f = ()
1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1006 f = (
1006 f = (
1007 sidedatamod.SD_P1COPIES,
1007 sidedatamod.SD_P1COPIES,
1008 sidedatamod.SD_P2COPIES,
1008 sidedatamod.SD_P2COPIES,
1009 sidedatamod.SD_FILESADDED,
1009 sidedatamod.SD_FILESADDED,
1010 sidedatamod.SD_FILESREMOVED,
1010 sidedatamod.SD_FILESREMOVED,
1011 )
1011 )
1012 return False, f, {}
1012 return False, f, {}
1013
1013
1014 return sidedatacompanion
1014 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now