##// END OF EJS Templates
copies: use an unfiltered repository for the changeset centric algorithm...
marmoute -
r43550:00de32aa default
parent child Browse files
Show More
@@ -1,955 +1,955 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _revinfogetter(repo):
181 def _revinfogetter(repo):
182 """return a function that return multiple data given a <rev>"i
182 """return a function that return multiple data given a <rev>"i
183
183
184 * p1: revision number of first parent
184 * p1: revision number of first parent
185 * p2: revision number of first parent
185 * p2: revision number of first parent
186 * p1copies: mapping of copies from p1
186 * p1copies: mapping of copies from p1
187 * p2copies: mapping of copies from p2
187 * p2copies: mapping of copies from p2
188 * removed: a list of removed files
188 * removed: a list of removed files
189 """
189 """
190 cl = repo.changelog
190 cl = repo.changelog
191 parents = cl.parentrevs
191 parents = cl.parentrevs
192
192
193 def revinfo(rev):
193 def revinfo(rev):
194 p1, p2 = parents(rev)
194 p1, p2 = parents(rev)
195 ctx = repo[rev]
195 ctx = repo[rev]
196 p1copies, p2copies = ctx._copies
196 p1copies, p2copies = ctx._copies
197 removed = ctx.filesremoved()
197 removed = ctx.filesremoved()
198 return p1, p2, p1copies, p2copies, removed
198 return p1, p2, p1copies, p2copies, removed
199
199
200 return revinfo
200 return revinfo
201
201
202
202
203 def _changesetforwardcopies(a, b, match):
203 def _changesetforwardcopies(a, b, match):
204 if a.rev() in (node.nullrev, b.rev()):
204 if a.rev() in (node.nullrev, b.rev()):
205 return {}
205 return {}
206
206
207 repo = a.repo()
207 repo = a.repo().unfiltered()
208 children = {}
208 children = {}
209 revinfo = _revinfogetter(repo)
209 revinfo = _revinfogetter(repo)
210
210
211 cl = repo.changelog
211 cl = repo.changelog
212 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
212 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
213 for r in missingrevs:
213 for r in missingrevs:
214 for p in cl.parentrevs(r):
214 for p in cl.parentrevs(r):
215 if p == node.nullrev:
215 if p == node.nullrev:
216 continue
216 continue
217 if p not in children:
217 if p not in children:
218 children[p] = [r]
218 children[p] = [r]
219 else:
219 else:
220 children[p].append(r)
220 children[p].append(r)
221
221
222 roots = set(children) - set(missingrevs)
222 roots = set(children) - set(missingrevs)
223 work = list(roots)
223 work = list(roots)
224 all_copies = {r: {} for r in roots}
224 all_copies = {r: {} for r in roots}
225 heapq.heapify(work)
225 heapq.heapify(work)
226 alwaysmatch = match.always()
226 alwaysmatch = match.always()
227 while work:
227 while work:
228 r = heapq.heappop(work)
228 r = heapq.heappop(work)
229 copies = all_copies.pop(r)
229 copies = all_copies.pop(r)
230 if r == b.rev():
230 if r == b.rev():
231 return copies
231 return copies
232 for i, c in enumerate(children[r]):
232 for i, c in enumerate(children[r]):
233 p1, p2, p1copies, p2copies, removed = revinfo(c)
233 p1, p2, p1copies, p2copies, removed = revinfo(c)
234 if r == p1:
234 if r == p1:
235 parent = 1
235 parent = 1
236 childcopies = p1copies
236 childcopies = p1copies
237 else:
237 else:
238 assert r == p2
238 assert r == p2
239 parent = 2
239 parent = 2
240 childcopies = p2copies
240 childcopies = p2copies
241 if not alwaysmatch:
241 if not alwaysmatch:
242 childcopies = {
242 childcopies = {
243 dst: src for dst, src in childcopies.items() if match(dst)
243 dst: src for dst, src in childcopies.items() if match(dst)
244 }
244 }
245 # Copy the dict only if later iterations will also need it
245 # Copy the dict only if later iterations will also need it
246 if i != len(children[r]) - 1:
246 if i != len(children[r]) - 1:
247 newcopies = copies.copy()
247 newcopies = copies.copy()
248 else:
248 else:
249 newcopies = copies
249 newcopies = copies
250 if childcopies:
250 if childcopies:
251 newcopies = _chain(newcopies, childcopies)
251 newcopies = _chain(newcopies, childcopies)
252 for f in removed:
252 for f in removed:
253 if f in newcopies:
253 if f in newcopies:
254 del newcopies[f]
254 del newcopies[f]
255 othercopies = all_copies.get(c)
255 othercopies = all_copies.get(c)
256 if othercopies is None:
256 if othercopies is None:
257 heapq.heappush(work, c)
257 heapq.heappush(work, c)
258 all_copies[c] = newcopies
258 all_copies[c] = newcopies
259 else:
259 else:
260 # we are the second parent to work on c, we need to merge our
260 # we are the second parent to work on c, we need to merge our
261 # work with the other.
261 # work with the other.
262 #
262 #
263 # Unlike when copies are stored in the filelog, we consider
263 # Unlike when copies are stored in the filelog, we consider
264 # it a copy even if the destination already existed on the
264 # it a copy even if the destination already existed on the
265 # other branch. It's simply too expensive to check if the
265 # other branch. It's simply too expensive to check if the
266 # file existed in the manifest.
266 # file existed in the manifest.
267 #
267 #
268 # In case of conflict, parent 1 take precedence over parent 2.
268 # In case of conflict, parent 1 take precedence over parent 2.
269 # This is an arbitrary choice made anew when implementing
269 # This is an arbitrary choice made anew when implementing
270 # changeset based copies. It was made without regards with
270 # changeset based copies. It was made without regards with
271 # potential filelog related behavior.
271 # potential filelog related behavior.
272 if parent == 1:
272 if parent == 1:
273 othercopies.update(newcopies)
273 othercopies.update(newcopies)
274 else:
274 else:
275 newcopies.update(othercopies)
275 newcopies.update(othercopies)
276 all_copies[c] = newcopies
276 all_copies[c] = newcopies
277 assert False
277 assert False
278
278
279
279
280 def _forwardcopies(a, b, base=None, match=None):
280 def _forwardcopies(a, b, base=None, match=None):
281 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
281 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
282
282
283 if base is None:
283 if base is None:
284 base = a
284 base = a
285 match = a.repo().narrowmatch(match)
285 match = a.repo().narrowmatch(match)
286 # check for working copy
286 # check for working copy
287 if b.rev() is None:
287 if b.rev() is None:
288 cm = _committedforwardcopies(a, b.p1(), base, match)
288 cm = _committedforwardcopies(a, b.p1(), base, match)
289 # combine copies from dirstate if necessary
289 # combine copies from dirstate if necessary
290 copies = _chain(cm, _dirstatecopies(b._repo, match))
290 copies = _chain(cm, _dirstatecopies(b._repo, match))
291 else:
291 else:
292 copies = _committedforwardcopies(a, b, base, match)
292 copies = _committedforwardcopies(a, b, base, match)
293 return copies
293 return copies
294
294
295
295
296 def _backwardrenames(a, b, match):
296 def _backwardrenames(a, b, match):
297 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
297 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
298 return {}
298 return {}
299
299
300 # Even though we're not taking copies into account, 1:n rename situations
300 # Even though we're not taking copies into account, 1:n rename situations
301 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
301 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
302 # arbitrarily pick one of the renames.
302 # arbitrarily pick one of the renames.
303 # We don't want to pass in "match" here, since that would filter
303 # We don't want to pass in "match" here, since that would filter
304 # the destination by it. Since we're reversing the copies, we want
304 # the destination by it. Since we're reversing the copies, we want
305 # to filter the source instead.
305 # to filter the source instead.
306 f = _forwardcopies(b, a)
306 f = _forwardcopies(b, a)
307 r = {}
307 r = {}
308 for k, v in sorted(pycompat.iteritems(f)):
308 for k, v in sorted(pycompat.iteritems(f)):
309 if match and not match(v):
309 if match and not match(v):
310 continue
310 continue
311 # remove copies
311 # remove copies
312 if v in a:
312 if v in a:
313 continue
313 continue
314 r[v] = k
314 r[v] = k
315 return r
315 return r
316
316
317
317
318 def pathcopies(x, y, match=None):
318 def pathcopies(x, y, match=None):
319 """find {dst@y: src@x} copy mapping for directed compare"""
319 """find {dst@y: src@x} copy mapping for directed compare"""
320 repo = x._repo
320 repo = x._repo
321 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
321 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
322 if debug:
322 if debug:
323 repo.ui.debug(
323 repo.ui.debug(
324 b'debug.copies: searching copies from %s to %s\n' % (x, y)
324 b'debug.copies: searching copies from %s to %s\n' % (x, y)
325 )
325 )
326 if x == y or not x or not y:
326 if x == y or not x or not y:
327 return {}
327 return {}
328 a = y.ancestor(x)
328 a = y.ancestor(x)
329 if a == x:
329 if a == x:
330 if debug:
330 if debug:
331 repo.ui.debug(b'debug.copies: search mode: forward\n')
331 repo.ui.debug(b'debug.copies: search mode: forward\n')
332 if y.rev() is None and x == y.p1():
332 if y.rev() is None and x == y.p1():
333 # short-circuit to avoid issues with merge states
333 # short-circuit to avoid issues with merge states
334 return _dirstatecopies(repo, match)
334 return _dirstatecopies(repo, match)
335 copies = _forwardcopies(x, y, match=match)
335 copies = _forwardcopies(x, y, match=match)
336 elif a == y:
336 elif a == y:
337 if debug:
337 if debug:
338 repo.ui.debug(b'debug.copies: search mode: backward\n')
338 repo.ui.debug(b'debug.copies: search mode: backward\n')
339 copies = _backwardrenames(x, y, match=match)
339 copies = _backwardrenames(x, y, match=match)
340 else:
340 else:
341 if debug:
341 if debug:
342 repo.ui.debug(b'debug.copies: search mode: combined\n')
342 repo.ui.debug(b'debug.copies: search mode: combined\n')
343 base = None
343 base = None
344 if a.rev() != node.nullrev:
344 if a.rev() != node.nullrev:
345 base = x
345 base = x
346 copies = _chain(
346 copies = _chain(
347 _backwardrenames(x, a, match=match),
347 _backwardrenames(x, a, match=match),
348 _forwardcopies(a, y, base, match=match),
348 _forwardcopies(a, y, base, match=match),
349 )
349 )
350 _filter(x, y, copies)
350 _filter(x, y, copies)
351 return copies
351 return copies
352
352
353
353
354 def mergecopies(repo, c1, c2, base):
354 def mergecopies(repo, c1, c2, base):
355 """
355 """
356 Finds moves and copies between context c1 and c2 that are relevant for
356 Finds moves and copies between context c1 and c2 that are relevant for
357 merging. 'base' will be used as the merge base.
357 merging. 'base' will be used as the merge base.
358
358
359 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
359 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
360 files that were moved/ copied in one merge parent and modified in another.
360 files that were moved/ copied in one merge parent and modified in another.
361 For example:
361 For example:
362
362
363 o ---> 4 another commit
363 o ---> 4 another commit
364 |
364 |
365 | o ---> 3 commit that modifies a.txt
365 | o ---> 3 commit that modifies a.txt
366 | /
366 | /
367 o / ---> 2 commit that moves a.txt to b.txt
367 o / ---> 2 commit that moves a.txt to b.txt
368 |/
368 |/
369 o ---> 1 merge base
369 o ---> 1 merge base
370
370
371 If we try to rebase revision 3 on revision 4, since there is no a.txt in
371 If we try to rebase revision 3 on revision 4, since there is no a.txt in
372 revision 4, and if user have copytrace disabled, we prints the following
372 revision 4, and if user have copytrace disabled, we prints the following
373 message:
373 message:
374
374
375 ```other changed <file> which local deleted```
375 ```other changed <file> which local deleted```
376
376
377 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
377 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
378 "dirmove".
378 "dirmove".
379
379
380 "copy" is a mapping from destination name -> source name,
380 "copy" is a mapping from destination name -> source name,
381 where source is in c1 and destination is in c2 or vice-versa.
381 where source is in c1 and destination is in c2 or vice-versa.
382
382
383 "movewithdir" is a mapping from source name -> destination name,
383 "movewithdir" is a mapping from source name -> destination name,
384 where the file at source present in one context but not the other
384 where the file at source present in one context but not the other
385 needs to be moved to destination by the merge process, because the
385 needs to be moved to destination by the merge process, because the
386 other context moved the directory it is in.
386 other context moved the directory it is in.
387
387
388 "diverge" is a mapping of source name -> list of destination names
388 "diverge" is a mapping of source name -> list of destination names
389 for divergent renames.
389 for divergent renames.
390
390
391 "renamedelete" is a mapping of source name -> list of destination
391 "renamedelete" is a mapping of source name -> list of destination
392 names for files deleted in c1 that were renamed in c2 or vice-versa.
392 names for files deleted in c1 that were renamed in c2 or vice-versa.
393
393
394 "dirmove" is a mapping of detected source dir -> destination dir renames.
394 "dirmove" is a mapping of detected source dir -> destination dir renames.
395 This is needed for handling changes to new files previously grafted into
395 This is needed for handling changes to new files previously grafted into
396 renamed directories.
396 renamed directories.
397
397
398 This function calls different copytracing algorithms based on config.
398 This function calls different copytracing algorithms based on config.
399 """
399 """
400 # avoid silly behavior for update from empty dir
400 # avoid silly behavior for update from empty dir
401 if not c1 or not c2 or c1 == c2:
401 if not c1 or not c2 or c1 == c2:
402 return {}, {}, {}, {}, {}
402 return {}, {}, {}, {}, {}
403
403
404 narrowmatch = c1.repo().narrowmatch()
404 narrowmatch = c1.repo().narrowmatch()
405
405
406 # avoid silly behavior for parent -> working dir
406 # avoid silly behavior for parent -> working dir
407 if c2.node() is None and c1.node() == repo.dirstate.p1():
407 if c2.node() is None and c1.node() == repo.dirstate.p1():
408 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
408 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
409
409
410 copytracing = repo.ui.config(b'experimental', b'copytrace')
410 copytracing = repo.ui.config(b'experimental', b'copytrace')
411 if stringutil.parsebool(copytracing) is False:
411 if stringutil.parsebool(copytracing) is False:
412 # stringutil.parsebool() returns None when it is unable to parse the
412 # stringutil.parsebool() returns None when it is unable to parse the
413 # value, so we should rely on making sure copytracing is on such cases
413 # value, so we should rely on making sure copytracing is on such cases
414 return {}, {}, {}, {}, {}
414 return {}, {}, {}, {}, {}
415
415
416 if usechangesetcentricalgo(repo):
416 if usechangesetcentricalgo(repo):
417 # The heuristics don't make sense when we need changeset-centric algos
417 # The heuristics don't make sense when we need changeset-centric algos
418 return _fullcopytracing(repo, c1, c2, base)
418 return _fullcopytracing(repo, c1, c2, base)
419
419
420 # Copy trace disabling is explicitly below the node == p1 logic above
420 # Copy trace disabling is explicitly below the node == p1 logic above
421 # because the logic above is required for a simple copy to be kept across a
421 # because the logic above is required for a simple copy to be kept across a
422 # rebase.
422 # rebase.
423 if copytracing == b'heuristics':
423 if copytracing == b'heuristics':
424 # Do full copytracing if only non-public revisions are involved as
424 # Do full copytracing if only non-public revisions are involved as
425 # that will be fast enough and will also cover the copies which could
425 # that will be fast enough and will also cover the copies which could
426 # be missed by heuristics
426 # be missed by heuristics
427 if _isfullcopytraceable(repo, c1, base):
427 if _isfullcopytraceable(repo, c1, base):
428 return _fullcopytracing(repo, c1, c2, base)
428 return _fullcopytracing(repo, c1, c2, base)
429 return _heuristicscopytracing(repo, c1, c2, base)
429 return _heuristicscopytracing(repo, c1, c2, base)
430 else:
430 else:
431 return _fullcopytracing(repo, c1, c2, base)
431 return _fullcopytracing(repo, c1, c2, base)
432
432
433
433
434 def _isfullcopytraceable(repo, c1, base):
434 def _isfullcopytraceable(repo, c1, base):
435 """ Checks that if base, source and destination are all no-public branches,
435 """ Checks that if base, source and destination are all no-public branches,
436 if yes let's use the full copytrace algorithm for increased capabilities
436 if yes let's use the full copytrace algorithm for increased capabilities
437 since it will be fast enough.
437 since it will be fast enough.
438
438
439 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
439 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
440 number of changesets from c1 to base such that if number of changesets are
440 number of changesets from c1 to base such that if number of changesets are
441 more than the limit, full copytracing algorithm won't be used.
441 more than the limit, full copytracing algorithm won't be used.
442 """
442 """
443 if c1.rev() is None:
443 if c1.rev() is None:
444 c1 = c1.p1()
444 c1 = c1.p1()
445 if c1.mutable() and base.mutable():
445 if c1.mutable() and base.mutable():
446 sourcecommitlimit = repo.ui.configint(
446 sourcecommitlimit = repo.ui.configint(
447 b'experimental', b'copytrace.sourcecommitlimit'
447 b'experimental', b'copytrace.sourcecommitlimit'
448 )
448 )
449 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
449 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
450 return commits < sourcecommitlimit
450 return commits < sourcecommitlimit
451 return False
451 return False
452
452
453
453
454 def _checksinglesidecopies(
454 def _checksinglesidecopies(
455 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
455 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
456 ):
456 ):
457 if src not in m2:
457 if src not in m2:
458 # deleted on side 2
458 # deleted on side 2
459 if src not in m1:
459 if src not in m1:
460 # renamed on side 1, deleted on side 2
460 # renamed on side 1, deleted on side 2
461 renamedelete[src] = dsts1
461 renamedelete[src] = dsts1
462 elif m2[src] != mb[src]:
462 elif m2[src] != mb[src]:
463 if not _related(c2[src], base[src]):
463 if not _related(c2[src], base[src]):
464 return
464 return
465 # modified on side 2
465 # modified on side 2
466 for dst in dsts1:
466 for dst in dsts1:
467 if dst not in m2:
467 if dst not in m2:
468 # dst not added on side 2 (handle as regular
468 # dst not added on side 2 (handle as regular
469 # "both created" case in manifestmerge otherwise)
469 # "both created" case in manifestmerge otherwise)
470 copy[dst] = src
470 copy[dst] = src
471
471
472
472
473 def _fullcopytracing(repo, c1, c2, base):
473 def _fullcopytracing(repo, c1, c2, base):
474 """ The full copytracing algorithm which finds all the new files that were
474 """ The full copytracing algorithm which finds all the new files that were
475 added from merge base up to the top commit and for each file it checks if
475 added from merge base up to the top commit and for each file it checks if
476 this file was copied from another file.
476 this file was copied from another file.
477
477
478 This is pretty slow when a lot of changesets are involved but will track all
478 This is pretty slow when a lot of changesets are involved but will track all
479 the copies.
479 the copies.
480 """
480 """
481 m1 = c1.manifest()
481 m1 = c1.manifest()
482 m2 = c2.manifest()
482 m2 = c2.manifest()
483 mb = base.manifest()
483 mb = base.manifest()
484
484
485 copies1 = pathcopies(base, c1)
485 copies1 = pathcopies(base, c1)
486 copies2 = pathcopies(base, c2)
486 copies2 = pathcopies(base, c2)
487
487
488 inversecopies1 = {}
488 inversecopies1 = {}
489 inversecopies2 = {}
489 inversecopies2 = {}
490 for dst, src in copies1.items():
490 for dst, src in copies1.items():
491 inversecopies1.setdefault(src, []).append(dst)
491 inversecopies1.setdefault(src, []).append(dst)
492 for dst, src in copies2.items():
492 for dst, src in copies2.items():
493 inversecopies2.setdefault(src, []).append(dst)
493 inversecopies2.setdefault(src, []).append(dst)
494
494
495 copy = {}
495 copy = {}
496 diverge = {}
496 diverge = {}
497 renamedelete = {}
497 renamedelete = {}
498 allsources = set(inversecopies1) | set(inversecopies2)
498 allsources = set(inversecopies1) | set(inversecopies2)
499 for src in allsources:
499 for src in allsources:
500 dsts1 = inversecopies1.get(src)
500 dsts1 = inversecopies1.get(src)
501 dsts2 = inversecopies2.get(src)
501 dsts2 = inversecopies2.get(src)
502 if dsts1 and dsts2:
502 if dsts1 and dsts2:
503 # copied/renamed on both sides
503 # copied/renamed on both sides
504 if src not in m1 and src not in m2:
504 if src not in m1 and src not in m2:
505 # renamed on both sides
505 # renamed on both sides
506 dsts1 = set(dsts1)
506 dsts1 = set(dsts1)
507 dsts2 = set(dsts2)
507 dsts2 = set(dsts2)
508 # If there's some overlap in the rename destinations, we
508 # If there's some overlap in the rename destinations, we
509 # consider it not divergent. For example, if side 1 copies 'a'
509 # consider it not divergent. For example, if side 1 copies 'a'
510 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
510 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
511 # and 'd' and deletes 'a'.
511 # and 'd' and deletes 'a'.
512 if dsts1 & dsts2:
512 if dsts1 & dsts2:
513 for dst in dsts1 & dsts2:
513 for dst in dsts1 & dsts2:
514 copy[dst] = src
514 copy[dst] = src
515 else:
515 else:
516 diverge[src] = sorted(dsts1 | dsts2)
516 diverge[src] = sorted(dsts1 | dsts2)
517 elif src in m1 and src in m2:
517 elif src in m1 and src in m2:
518 # copied on both sides
518 # copied on both sides
519 dsts1 = set(dsts1)
519 dsts1 = set(dsts1)
520 dsts2 = set(dsts2)
520 dsts2 = set(dsts2)
521 for dst in dsts1 & dsts2:
521 for dst in dsts1 & dsts2:
522 copy[dst] = src
522 copy[dst] = src
523 # TODO: Handle cases where it was renamed on one side and copied
523 # TODO: Handle cases where it was renamed on one side and copied
524 # on the other side
524 # on the other side
525 elif dsts1:
525 elif dsts1:
526 # copied/renamed only on side 1
526 # copied/renamed only on side 1
527 _checksinglesidecopies(
527 _checksinglesidecopies(
528 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
528 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
529 )
529 )
530 elif dsts2:
530 elif dsts2:
531 # copied/renamed only on side 2
531 # copied/renamed only on side 2
532 _checksinglesidecopies(
532 _checksinglesidecopies(
533 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
533 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
534 )
534 )
535
535
536 renamedeleteset = set()
536 renamedeleteset = set()
537 divergeset = set()
537 divergeset = set()
538 for dsts in diverge.values():
538 for dsts in diverge.values():
539 divergeset.update(dsts)
539 divergeset.update(dsts)
540 for dsts in renamedelete.values():
540 for dsts in renamedelete.values():
541 renamedeleteset.update(dsts)
541 renamedeleteset.update(dsts)
542
542
543 # find interesting file sets from manifests
543 # find interesting file sets from manifests
544 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
544 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
545 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
545 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
546 u1 = sorted(addedinm1 - addedinm2)
546 u1 = sorted(addedinm1 - addedinm2)
547 u2 = sorted(addedinm2 - addedinm1)
547 u2 = sorted(addedinm2 - addedinm1)
548
548
549 header = b" unmatched files in %s"
549 header = b" unmatched files in %s"
550 if u1:
550 if u1:
551 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
551 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
552 if u2:
552 if u2:
553 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
553 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
554
554
555 fullcopy = copies1.copy()
555 fullcopy = copies1.copy()
556 fullcopy.update(copies2)
556 fullcopy.update(copies2)
557 if not fullcopy:
557 if not fullcopy:
558 return copy, {}, diverge, renamedelete, {}
558 return copy, {}, diverge, renamedelete, {}
559
559
560 if repo.ui.debugflag:
560 if repo.ui.debugflag:
561 repo.ui.debug(
561 repo.ui.debug(
562 b" all copies found (* = to merge, ! = divergent, "
562 b" all copies found (* = to merge, ! = divergent, "
563 b"% = renamed and deleted):\n"
563 b"% = renamed and deleted):\n"
564 )
564 )
565 for f in sorted(fullcopy):
565 for f in sorted(fullcopy):
566 note = b""
566 note = b""
567 if f in copy:
567 if f in copy:
568 note += b"*"
568 note += b"*"
569 if f in divergeset:
569 if f in divergeset:
570 note += b"!"
570 note += b"!"
571 if f in renamedeleteset:
571 if f in renamedeleteset:
572 note += b"%"
572 note += b"%"
573 repo.ui.debug(
573 repo.ui.debug(
574 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
574 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
575 )
575 )
576 del divergeset
576 del divergeset
577
577
578 repo.ui.debug(b" checking for directory renames\n")
578 repo.ui.debug(b" checking for directory renames\n")
579
579
580 # generate a directory move map
580 # generate a directory move map
581 d1, d2 = c1.dirs(), c2.dirs()
581 d1, d2 = c1.dirs(), c2.dirs()
582 invalid = set()
582 invalid = set()
583 dirmove = {}
583 dirmove = {}
584
584
585 # examine each file copy for a potential directory move, which is
585 # examine each file copy for a potential directory move, which is
586 # when all the files in a directory are moved to a new directory
586 # when all the files in a directory are moved to a new directory
587 for dst, src in pycompat.iteritems(fullcopy):
587 for dst, src in pycompat.iteritems(fullcopy):
588 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
588 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
589 if dsrc in invalid:
589 if dsrc in invalid:
590 # already seen to be uninteresting
590 # already seen to be uninteresting
591 continue
591 continue
592 elif dsrc in d1 and ddst in d1:
592 elif dsrc in d1 and ddst in d1:
593 # directory wasn't entirely moved locally
593 # directory wasn't entirely moved locally
594 invalid.add(dsrc)
594 invalid.add(dsrc)
595 elif dsrc in d2 and ddst in d2:
595 elif dsrc in d2 and ddst in d2:
596 # directory wasn't entirely moved remotely
596 # directory wasn't entirely moved remotely
597 invalid.add(dsrc)
597 invalid.add(dsrc)
598 elif dsrc in dirmove and dirmove[dsrc] != ddst:
598 elif dsrc in dirmove and dirmove[dsrc] != ddst:
599 # files from the same directory moved to two different places
599 # files from the same directory moved to two different places
600 invalid.add(dsrc)
600 invalid.add(dsrc)
601 else:
601 else:
602 # looks good so far
602 # looks good so far
603 dirmove[dsrc] = ddst
603 dirmove[dsrc] = ddst
604
604
605 for i in invalid:
605 for i in invalid:
606 if i in dirmove:
606 if i in dirmove:
607 del dirmove[i]
607 del dirmove[i]
608 del d1, d2, invalid
608 del d1, d2, invalid
609
609
610 if not dirmove:
610 if not dirmove:
611 return copy, {}, diverge, renamedelete, {}
611 return copy, {}, diverge, renamedelete, {}
612
612
613 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
613 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
614
614
615 for d in dirmove:
615 for d in dirmove:
616 repo.ui.debug(
616 repo.ui.debug(
617 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
617 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
618 )
618 )
619
619
620 movewithdir = {}
620 movewithdir = {}
621 # check unaccounted nonoverlapping files against directory moves
621 # check unaccounted nonoverlapping files against directory moves
622 for f in u1 + u2:
622 for f in u1 + u2:
623 if f not in fullcopy:
623 if f not in fullcopy:
624 for d in dirmove:
624 for d in dirmove:
625 if f.startswith(d):
625 if f.startswith(d):
626 # new file added in a directory that was moved, move it
626 # new file added in a directory that was moved, move it
627 df = dirmove[d] + f[len(d) :]
627 df = dirmove[d] + f[len(d) :]
628 if df not in copy:
628 if df not in copy:
629 movewithdir[f] = df
629 movewithdir[f] = df
630 repo.ui.debug(
630 repo.ui.debug(
631 b" pending file src: '%s' -> dst: '%s'\n"
631 b" pending file src: '%s' -> dst: '%s'\n"
632 % (f, df)
632 % (f, df)
633 )
633 )
634 break
634 break
635
635
636 return copy, movewithdir, diverge, renamedelete, dirmove
636 return copy, movewithdir, diverge, renamedelete, dirmove
637
637
638
638
639 def _heuristicscopytracing(repo, c1, c2, base):
639 def _heuristicscopytracing(repo, c1, c2, base):
640 """ Fast copytracing using filename heuristics
640 """ Fast copytracing using filename heuristics
641
641
642 Assumes that moves or renames are of following two types:
642 Assumes that moves or renames are of following two types:
643
643
644 1) Inside a directory only (same directory name but different filenames)
644 1) Inside a directory only (same directory name but different filenames)
645 2) Move from one directory to another
645 2) Move from one directory to another
646 (same filenames but different directory names)
646 (same filenames but different directory names)
647
647
648 Works only when there are no merge commits in the "source branch".
648 Works only when there are no merge commits in the "source branch".
649 Source branch is commits from base up to c2 not including base.
649 Source branch is commits from base up to c2 not including base.
650
650
651 If merge is involved it fallbacks to _fullcopytracing().
651 If merge is involved it fallbacks to _fullcopytracing().
652
652
653 Can be used by setting the following config:
653 Can be used by setting the following config:
654
654
655 [experimental]
655 [experimental]
656 copytrace = heuristics
656 copytrace = heuristics
657
657
658 In some cases the copy/move candidates found by heuristics can be very large
658 In some cases the copy/move candidates found by heuristics can be very large
659 in number and that will make the algorithm slow. The number of possible
659 in number and that will make the algorithm slow. The number of possible
660 candidates to check can be limited by using the config
660 candidates to check can be limited by using the config
661 `experimental.copytrace.movecandidateslimit` which defaults to 100.
661 `experimental.copytrace.movecandidateslimit` which defaults to 100.
662 """
662 """
663
663
664 if c1.rev() is None:
664 if c1.rev() is None:
665 c1 = c1.p1()
665 c1 = c1.p1()
666 if c2.rev() is None:
666 if c2.rev() is None:
667 c2 = c2.p1()
667 c2 = c2.p1()
668
668
669 copies = {}
669 copies = {}
670
670
671 changedfiles = set()
671 changedfiles = set()
672 m1 = c1.manifest()
672 m1 = c1.manifest()
673 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
673 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
674 # If base is not in c2 branch, we switch to fullcopytracing
674 # If base is not in c2 branch, we switch to fullcopytracing
675 repo.ui.debug(
675 repo.ui.debug(
676 b"switching to full copytracing as base is not "
676 b"switching to full copytracing as base is not "
677 b"an ancestor of c2\n"
677 b"an ancestor of c2\n"
678 )
678 )
679 return _fullcopytracing(repo, c1, c2, base)
679 return _fullcopytracing(repo, c1, c2, base)
680
680
681 ctx = c2
681 ctx = c2
682 while ctx != base:
682 while ctx != base:
683 if len(ctx.parents()) == 2:
683 if len(ctx.parents()) == 2:
684 # To keep things simple let's not handle merges
684 # To keep things simple let's not handle merges
685 repo.ui.debug(b"switching to full copytracing because of merges\n")
685 repo.ui.debug(b"switching to full copytracing because of merges\n")
686 return _fullcopytracing(repo, c1, c2, base)
686 return _fullcopytracing(repo, c1, c2, base)
687 changedfiles.update(ctx.files())
687 changedfiles.update(ctx.files())
688 ctx = ctx.p1()
688 ctx = ctx.p1()
689
689
690 cp = _forwardcopies(base, c2)
690 cp = _forwardcopies(base, c2)
691 for dst, src in pycompat.iteritems(cp):
691 for dst, src in pycompat.iteritems(cp):
692 if src in m1:
692 if src in m1:
693 copies[dst] = src
693 copies[dst] = src
694
694
695 # file is missing if it isn't present in the destination, but is present in
695 # file is missing if it isn't present in the destination, but is present in
696 # the base and present in the source.
696 # the base and present in the source.
697 # Presence in the base is important to exclude added files, presence in the
697 # Presence in the base is important to exclude added files, presence in the
698 # source is important to exclude removed files.
698 # source is important to exclude removed files.
699 filt = lambda f: f not in m1 and f in base and f in c2
699 filt = lambda f: f not in m1 and f in base and f in c2
700 missingfiles = [f for f in changedfiles if filt(f)]
700 missingfiles = [f for f in changedfiles if filt(f)]
701
701
702 if missingfiles:
702 if missingfiles:
703 basenametofilename = collections.defaultdict(list)
703 basenametofilename = collections.defaultdict(list)
704 dirnametofilename = collections.defaultdict(list)
704 dirnametofilename = collections.defaultdict(list)
705
705
706 for f in m1.filesnotin(base.manifest()):
706 for f in m1.filesnotin(base.manifest()):
707 basename = os.path.basename(f)
707 basename = os.path.basename(f)
708 dirname = os.path.dirname(f)
708 dirname = os.path.dirname(f)
709 basenametofilename[basename].append(f)
709 basenametofilename[basename].append(f)
710 dirnametofilename[dirname].append(f)
710 dirnametofilename[dirname].append(f)
711
711
712 for f in missingfiles:
712 for f in missingfiles:
713 basename = os.path.basename(f)
713 basename = os.path.basename(f)
714 dirname = os.path.dirname(f)
714 dirname = os.path.dirname(f)
715 samebasename = basenametofilename[basename]
715 samebasename = basenametofilename[basename]
716 samedirname = dirnametofilename[dirname]
716 samedirname = dirnametofilename[dirname]
717 movecandidates = samebasename + samedirname
717 movecandidates = samebasename + samedirname
718 # f is guaranteed to be present in c2, that's why
718 # f is guaranteed to be present in c2, that's why
719 # c2.filectx(f) won't fail
719 # c2.filectx(f) won't fail
720 f2 = c2.filectx(f)
720 f2 = c2.filectx(f)
721 # we can have a lot of candidates which can slow down the heuristics
721 # we can have a lot of candidates which can slow down the heuristics
722 # config value to limit the number of candidates moves to check
722 # config value to limit the number of candidates moves to check
723 maxcandidates = repo.ui.configint(
723 maxcandidates = repo.ui.configint(
724 b'experimental', b'copytrace.movecandidateslimit'
724 b'experimental', b'copytrace.movecandidateslimit'
725 )
725 )
726
726
727 if len(movecandidates) > maxcandidates:
727 if len(movecandidates) > maxcandidates:
728 repo.ui.status(
728 repo.ui.status(
729 _(
729 _(
730 b"skipping copytracing for '%s', more "
730 b"skipping copytracing for '%s', more "
731 b"candidates than the limit: %d\n"
731 b"candidates than the limit: %d\n"
732 )
732 )
733 % (f, len(movecandidates))
733 % (f, len(movecandidates))
734 )
734 )
735 continue
735 continue
736
736
737 for candidate in movecandidates:
737 for candidate in movecandidates:
738 f1 = c1.filectx(candidate)
738 f1 = c1.filectx(candidate)
739 if _related(f1, f2):
739 if _related(f1, f2):
740 # if there are a few related copies then we'll merge
740 # if there are a few related copies then we'll merge
741 # changes into all of them. This matches the behaviour
741 # changes into all of them. This matches the behaviour
742 # of upstream copytracing
742 # of upstream copytracing
743 copies[candidate] = f
743 copies[candidate] = f
744
744
745 return copies, {}, {}, {}, {}
745 return copies, {}, {}, {}, {}
746
746
747
747
748 def _related(f1, f2):
748 def _related(f1, f2):
749 """return True if f1 and f2 filectx have a common ancestor
749 """return True if f1 and f2 filectx have a common ancestor
750
750
751 Walk back to common ancestor to see if the two files originate
751 Walk back to common ancestor to see if the two files originate
752 from the same file. Since workingfilectx's rev() is None it messes
752 from the same file. Since workingfilectx's rev() is None it messes
753 up the integer comparison logic, hence the pre-step check for
753 up the integer comparison logic, hence the pre-step check for
754 None (f1 and f2 can only be workingfilectx's initially).
754 None (f1 and f2 can only be workingfilectx's initially).
755 """
755 """
756
756
757 if f1 == f2:
757 if f1 == f2:
758 return True # a match
758 return True # a match
759
759
760 g1, g2 = f1.ancestors(), f2.ancestors()
760 g1, g2 = f1.ancestors(), f2.ancestors()
761 try:
761 try:
762 f1r, f2r = f1.linkrev(), f2.linkrev()
762 f1r, f2r = f1.linkrev(), f2.linkrev()
763
763
764 if f1r is None:
764 if f1r is None:
765 f1 = next(g1)
765 f1 = next(g1)
766 if f2r is None:
766 if f2r is None:
767 f2 = next(g2)
767 f2 = next(g2)
768
768
769 while True:
769 while True:
770 f1r, f2r = f1.linkrev(), f2.linkrev()
770 f1r, f2r = f1.linkrev(), f2.linkrev()
771 if f1r > f2r:
771 if f1r > f2r:
772 f1 = next(g1)
772 f1 = next(g1)
773 elif f2r > f1r:
773 elif f2r > f1r:
774 f2 = next(g2)
774 f2 = next(g2)
775 else: # f1 and f2 point to files in the same linkrev
775 else: # f1 and f2 point to files in the same linkrev
776 return f1 == f2 # true if they point to the same file
776 return f1 == f2 # true if they point to the same file
777 except StopIteration:
777 except StopIteration:
778 return False
778 return False
779
779
780
780
781 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
781 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
782 """reproduce copies from fromrev to rev in the dirstate
782 """reproduce copies from fromrev to rev in the dirstate
783
783
784 If skiprev is specified, it's a revision that should be used to
784 If skiprev is specified, it's a revision that should be used to
785 filter copy records. Any copies that occur between fromrev and
785 filter copy records. Any copies that occur between fromrev and
786 skiprev will not be duplicated, even if they appear in the set of
786 skiprev will not be duplicated, even if they appear in the set of
787 copies between fromrev and rev.
787 copies between fromrev and rev.
788 """
788 """
789 exclude = {}
789 exclude = {}
790 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
790 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
791 bctrace = stringutil.parsebool(ctraceconfig)
791 bctrace = stringutil.parsebool(ctraceconfig)
792 if skiprev is not None and (
792 if skiprev is not None and (
793 ctraceconfig == b'heuristics' or bctrace or bctrace is None
793 ctraceconfig == b'heuristics' or bctrace or bctrace is None
794 ):
794 ):
795 # copytrace='off' skips this line, but not the entire function because
795 # copytrace='off' skips this line, but not the entire function because
796 # the line below is O(size of the repo) during a rebase, while the rest
796 # the line below is O(size of the repo) during a rebase, while the rest
797 # of the function is much faster (and is required for carrying copy
797 # of the function is much faster (and is required for carrying copy
798 # metadata across the rebase anyway).
798 # metadata across the rebase anyway).
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
800 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
801 if dst in exclude:
801 if dst in exclude:
802 continue
802 continue
803 if dst in wctx:
803 if dst in wctx:
804 wctx[dst].markcopied(src)
804 wctx[dst].markcopied(src)
805
805
806
806
807 def computechangesetfilesadded(ctx):
807 def computechangesetfilesadded(ctx):
808 """return the list of files added in a changeset
808 """return the list of files added in a changeset
809 """
809 """
810 added = []
810 added = []
811 for f in ctx.files():
811 for f in ctx.files():
812 if not any(f in p for p in ctx.parents()):
812 if not any(f in p for p in ctx.parents()):
813 added.append(f)
813 added.append(f)
814 return added
814 return added
815
815
816
816
817 def computechangesetfilesremoved(ctx):
817 def computechangesetfilesremoved(ctx):
818 """return the list of files removed in a changeset
818 """return the list of files removed in a changeset
819 """
819 """
820 removed = []
820 removed = []
821 for f in ctx.files():
821 for f in ctx.files():
822 if f not in ctx:
822 if f not in ctx:
823 removed.append(f)
823 removed.append(f)
824 return removed
824 return removed
825
825
826
826
827 def computechangesetcopies(ctx):
827 def computechangesetcopies(ctx):
828 """return the copies data for a changeset
828 """return the copies data for a changeset
829
829
830 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
830 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
831
831
832 Each dictionnary are in the form: `{newname: oldname}`
832 Each dictionnary are in the form: `{newname: oldname}`
833 """
833 """
834 p1copies = {}
834 p1copies = {}
835 p2copies = {}
835 p2copies = {}
836 p1 = ctx.p1()
836 p1 = ctx.p1()
837 p2 = ctx.p2()
837 p2 = ctx.p2()
838 narrowmatch = ctx._repo.narrowmatch()
838 narrowmatch = ctx._repo.narrowmatch()
839 for dst in ctx.files():
839 for dst in ctx.files():
840 if not narrowmatch(dst) or dst not in ctx:
840 if not narrowmatch(dst) or dst not in ctx:
841 continue
841 continue
842 copied = ctx[dst].renamed()
842 copied = ctx[dst].renamed()
843 if not copied:
843 if not copied:
844 continue
844 continue
845 src, srcnode = copied
845 src, srcnode = copied
846 if src in p1 and p1[src].filenode() == srcnode:
846 if src in p1 and p1[src].filenode() == srcnode:
847 p1copies[dst] = src
847 p1copies[dst] = src
848 elif src in p2 and p2[src].filenode() == srcnode:
848 elif src in p2 and p2[src].filenode() == srcnode:
849 p2copies[dst] = src
849 p2copies[dst] = src
850 return p1copies, p2copies
850 return p1copies, p2copies
851
851
852
852
853 def encodecopies(files, copies):
853 def encodecopies(files, copies):
854 items = []
854 items = []
855 for i, dst in enumerate(files):
855 for i, dst in enumerate(files):
856 if dst in copies:
856 if dst in copies:
857 items.append(b'%d\0%s' % (i, copies[dst]))
857 items.append(b'%d\0%s' % (i, copies[dst]))
858 if len(items) != len(copies):
858 if len(items) != len(copies):
859 raise error.ProgrammingError(
859 raise error.ProgrammingError(
860 b'some copy targets missing from file list'
860 b'some copy targets missing from file list'
861 )
861 )
862 return b"\n".join(items)
862 return b"\n".join(items)
863
863
864
864
865 def decodecopies(files, data):
865 def decodecopies(files, data):
866 try:
866 try:
867 copies = {}
867 copies = {}
868 if not data:
868 if not data:
869 return copies
869 return copies
870 for l in data.split(b'\n'):
870 for l in data.split(b'\n'):
871 strindex, src = l.split(b'\0')
871 strindex, src = l.split(b'\0')
872 i = int(strindex)
872 i = int(strindex)
873 dst = files[i]
873 dst = files[i]
874 copies[dst] = src
874 copies[dst] = src
875 return copies
875 return copies
876 except (ValueError, IndexError):
876 except (ValueError, IndexError):
877 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
877 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
878 # used different syntax for the value.
878 # used different syntax for the value.
879 return None
879 return None
880
880
881
881
882 def encodefileindices(files, subset):
882 def encodefileindices(files, subset):
883 subset = set(subset)
883 subset = set(subset)
884 indices = []
884 indices = []
885 for i, f in enumerate(files):
885 for i, f in enumerate(files):
886 if f in subset:
886 if f in subset:
887 indices.append(b'%d' % i)
887 indices.append(b'%d' % i)
888 return b'\n'.join(indices)
888 return b'\n'.join(indices)
889
889
890
890
891 def decodefileindices(files, data):
891 def decodefileindices(files, data):
892 try:
892 try:
893 subset = []
893 subset = []
894 if not data:
894 if not data:
895 return subset
895 return subset
896 for strindex in data.split(b'\n'):
896 for strindex in data.split(b'\n'):
897 i = int(strindex)
897 i = int(strindex)
898 if i < 0 or i >= len(files):
898 if i < 0 or i >= len(files):
899 return None
899 return None
900 subset.append(files[i])
900 subset.append(files[i])
901 return subset
901 return subset
902 except (ValueError, IndexError):
902 except (ValueError, IndexError):
903 # Perhaps someone had chosen the same key name (e.g. "added") and
903 # Perhaps someone had chosen the same key name (e.g. "added") and
904 # used different syntax for the value.
904 # used different syntax for the value.
905 return None
905 return None
906
906
907
907
908 def _getsidedata(srcrepo, rev):
908 def _getsidedata(srcrepo, rev):
909 ctx = srcrepo[rev]
909 ctx = srcrepo[rev]
910 filescopies = computechangesetcopies(ctx)
910 filescopies = computechangesetcopies(ctx)
911 filesadded = computechangesetfilesadded(ctx)
911 filesadded = computechangesetfilesadded(ctx)
912 filesremoved = computechangesetfilesremoved(ctx)
912 filesremoved = computechangesetfilesremoved(ctx)
913 sidedata = {}
913 sidedata = {}
914 if any([filescopies, filesadded, filesremoved]):
914 if any([filescopies, filesadded, filesremoved]):
915 sortedfiles = sorted(ctx.files())
915 sortedfiles = sorted(ctx.files())
916 p1copies, p2copies = filescopies
916 p1copies, p2copies = filescopies
917 p1copies = encodecopies(sortedfiles, p1copies)
917 p1copies = encodecopies(sortedfiles, p1copies)
918 p2copies = encodecopies(sortedfiles, p2copies)
918 p2copies = encodecopies(sortedfiles, p2copies)
919 filesadded = encodefileindices(sortedfiles, filesadded)
919 filesadded = encodefileindices(sortedfiles, filesadded)
920 filesremoved = encodefileindices(sortedfiles, filesremoved)
920 filesremoved = encodefileindices(sortedfiles, filesremoved)
921 if p1copies:
921 if p1copies:
922 sidedata[sidedatamod.SD_P1COPIES] = p1copies
922 sidedata[sidedatamod.SD_P1COPIES] = p1copies
923 if p2copies:
923 if p2copies:
924 sidedata[sidedatamod.SD_P2COPIES] = p2copies
924 sidedata[sidedatamod.SD_P2COPIES] = p2copies
925 if filesadded:
925 if filesadded:
926 sidedata[sidedatamod.SD_FILESADDED] = filesadded
926 sidedata[sidedatamod.SD_FILESADDED] = filesadded
927 if filesremoved:
927 if filesremoved:
928 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
928 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
929 return sidedata
929 return sidedata
930
930
931
931
932 def getsidedataadder(srcrepo, destrepo):
932 def getsidedataadder(srcrepo, destrepo):
933 def sidedatacompanion(revlog, rev):
933 def sidedatacompanion(revlog, rev):
934 sidedata = {}
934 sidedata = {}
935 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
935 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
936 sidedata = _getsidedata(srcrepo, rev)
936 sidedata = _getsidedata(srcrepo, rev)
937 return False, (), sidedata
937 return False, (), sidedata
938
938
939 return sidedatacompanion
939 return sidedatacompanion
940
940
941
941
942 def getsidedataremover(srcrepo, destrepo):
942 def getsidedataremover(srcrepo, destrepo):
943 def sidedatacompanion(revlog, rev):
943 def sidedatacompanion(revlog, rev):
944 f = ()
944 f = ()
945 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
945 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
946 if revlog.flags(rev) & REVIDX_SIDEDATA:
946 if revlog.flags(rev) & REVIDX_SIDEDATA:
947 f = (
947 f = (
948 sidedatamod.SD_P1COPIES,
948 sidedatamod.SD_P1COPIES,
949 sidedatamod.SD_P2COPIES,
949 sidedatamod.SD_P2COPIES,
950 sidedatamod.SD_FILESADDED,
950 sidedatamod.SD_FILESADDED,
951 sidedatamod.SD_FILESREMOVED,
951 sidedatamod.SD_FILESREMOVED,
952 )
952 )
953 return False, f, {}
953 return False, f, {}
954
954
955 return sidedatacompanion
955 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now