##// END OF EJS Templates
copies: compute the exact set of revision to walk...
marmoute -
r43593:83bb1e89 default
parent child Browse files
Show More
@@ -1,974 +1,982 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
12 import os
11 import os
13
12
14 from .i18n import _
13 from .i18n import _
15
14
16
15
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
17
19 from . import (
18 from . import (
20 error,
19 error,
21 match as matchmod,
20 match as matchmod,
22 node,
21 node,
23 pathutil,
22 pathutil,
24 pycompat,
23 pycompat,
25 util,
24 util,
26 )
25 )
27
26
28 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
29
28
30 from .utils import stringutil
29 from .utils import stringutil
31
30
32
31
33 def _filter(src, dst, t):
32 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
35
34
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
41 #
40 #
42 # case src mid dst result
41 # case src mid dst result
43 # 1 x y - -
42 # 1 x y - -
44 # 2 x y y x->y
43 # 2 x y y x->y
45 # 3 x y x -
44 # 3 x y x -
46 # 4 x y z x->z
45 # 4 x y z x->z
47 # 5 - x y -
46 # 5 - x y -
48 # 6 x x y x->y
47 # 6 x x y x->y
49 #
48 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
54
53
55 for k, v in list(t.items()):
54 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
57 if v not in src:
56 if v not in src:
58 del t[k]
57 del t[k]
59 # remove criss-crossed copies
58 # remove criss-crossed copies
60 elif k in src and v in dst:
59 elif k in src and v in dst:
61 del t[k]
60 del t[k]
62 # remove copies to files that were then removed
61 # remove copies to files that were then removed
63 elif k not in dst:
62 elif k not in dst:
64 del t[k]
63 del t[k]
65
64
66
65
67 def _chain(a, b):
66 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
67 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
68 t = a.copy()
70 for k, v in pycompat.iteritems(b):
69 for k, v in pycompat.iteritems(b):
71 if v in t:
70 if v in t:
72 t[k] = t[v]
71 t[k] = t[v]
73 else:
72 else:
74 t[k] = v
73 t[k] = v
75 return t
74 return t
76
75
77
76
78 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
79 manifest am
81
80
82 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
83 disabling that feature."""
85
84
86 for f in fctx.ancestors():
85 for f in fctx.ancestors():
87 path = f.path()
86 path = f.path()
88 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
89 return path
88 return path
90 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
91 return path
90 return path
92
91
93
92
94 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
94 ds = repo.dirstate
96 c = ds.copies().copy()
95 c = ds.copies().copy()
97 for k in list(c):
96 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
97 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
98 del c[k]
100 return c
99 return c
101
100
102
101
103 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
107 """
106 """
108 ma = a.manifest()
107 ma = a.manifest()
109 mb = b.manifest()
108 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
111
110
112
111
113 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
115 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
118 return readfrom in changesetsource
120
119
121
120
122 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
126 repo = a._repo
125 repo = a._repo
127
126
128 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
130
129
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
131 dbg = repo.ui.debug
133 if debug:
132 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
134 am = a.manifest()
136 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
137
136
138 # find where new files came from
137 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
140 cm = {}
142
141
143 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
147 # this comparison.
149 forwardmissingmatch = match
148 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
149 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
153
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
155
157 if debug:
156 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
158
160 for f in sorted(missing):
159 for f in sorted(missing):
161 if debug:
160 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
161 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
162 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
163 fctx._ancestrycontext = ancestrycontext
165
164
166 if debug:
165 if debug:
167 start = util.timer()
166 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
167 opath = _tracefile(fctx, am, basemf)
169 if opath:
168 if opath:
170 if debug:
169 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
170 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
171 cm[f] = opath
173 if debug:
172 if debug:
174 dbg(
173 dbg(
175 b'debug.copies: time: %f seconds\n'
174 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
175 % (util.timer() - start)
177 )
176 )
178 return cm
177 return cm
179
178
180
179
181 def _revinfogetter(repo):
180 def _revinfogetter(repo):
182 """return a function that return multiple data given a <rev>"i
181 """return a function that return multiple data given a <rev>"i
183
182
184 * p1: revision number of first parent
183 * p1: revision number of first parent
185 * p2: revision number of first parent
184 * p2: revision number of first parent
186 * p1copies: mapping of copies from p1
185 * p1copies: mapping of copies from p1
187 * p2copies: mapping of copies from p2
186 * p2copies: mapping of copies from p2
188 * removed: a list of removed files
187 * removed: a list of removed files
189 """
188 """
190 cl = repo.changelog
189 cl = repo.changelog
191 parents = cl.parentrevs
190 parents = cl.parentrevs
192
191
193 if repo.filecopiesmode == b'changeset-sidedata':
192 if repo.filecopiesmode == b'changeset-sidedata':
194 changelogrevision = cl.changelogrevision
193 changelogrevision = cl.changelogrevision
195 flags = cl.flags
194 flags = cl.flags
196
195
197 def revinfo(rev):
196 def revinfo(rev):
198 p1, p2 = parents(rev)
197 p1, p2 = parents(rev)
199 if flags(rev) & REVIDX_SIDEDATA:
198 if flags(rev) & REVIDX_SIDEDATA:
200 c = changelogrevision(rev)
199 c = changelogrevision(rev)
201 p1copies = c.p1copies
200 p1copies = c.p1copies
202 p2copies = c.p2copies
201 p2copies = c.p2copies
203 removed = c.filesremoved
202 removed = c.filesremoved
204 else:
203 else:
205 p1copies = {}
204 p1copies = {}
206 p2copies = {}
205 p2copies = {}
207 removed = ()
206 removed = ()
208 return p1, p2, p1copies, p2copies, removed
207 return p1, p2, p1copies, p2copies, removed
209
208
210 else:
209 else:
211
210
212 def revinfo(rev):
211 def revinfo(rev):
213 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
214 ctx = repo[rev]
213 ctx = repo[rev]
215 p1copies, p2copies = ctx._copies
214 p1copies, p2copies = ctx._copies
216 removed = ctx.filesremoved()
215 removed = ctx.filesremoved()
217 return p1, p2, p1copies, p2copies, removed
216 return p1, p2, p1copies, p2copies, removed
218
217
219 return revinfo
218 return revinfo
220
219
221
220
222 def _changesetforwardcopies(a, b, match):
221 def _changesetforwardcopies(a, b, match):
223 if a.rev() in (node.nullrev, b.rev()):
222 if a.rev() in (node.nullrev, b.rev()):
224 return {}
223 return {}
225
224
226 repo = a.repo().unfiltered()
225 repo = a.repo().unfiltered()
227 children = {}
226 children = {}
228 revinfo = _revinfogetter(repo)
227 revinfo = _revinfogetter(repo)
229
228
230 cl = repo.changelog
229 cl = repo.changelog
231 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 mrset = set(missingrevs)
232 roots = set()
232 for r in missingrevs:
233 for r in missingrevs:
233 for p in cl.parentrevs(r):
234 for p in cl.parentrevs(r):
234 if p == node.nullrev:
235 if p == node.nullrev:
235 continue
236 continue
236 if p not in children:
237 if p not in children:
237 children[p] = [r]
238 children[p] = [r]
238 else:
239 else:
239 children[p].append(r)
240 children[p].append(r)
241 if p not in mrset:
242 roots.add(p)
243 if not roots:
244 # no common revision to track copies from
245 return {}
246 min_root = min(roots)
240
247
241 roots = set(children) - set(missingrevs)
248 from_head = set(
242 work = list(roots)
249 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
250 )
251
252 iterrevs = set(from_head)
253 iterrevs &= mrset
254 iterrevs.update(roots)
255 iterrevs.remove(b.rev())
243 all_copies = {r: {} for r in roots}
256 all_copies = {r: {} for r in roots}
244 heapq.heapify(work)
245 alwaysmatch = match.always()
257 alwaysmatch = match.always()
246 while work:
258 for r in sorted(iterrevs):
247 r = heapq.heappop(work)
248 copies = all_copies.pop(r)
259 copies = all_copies.pop(r)
249 if r == b.rev():
250 return copies
251 for i, c in enumerate(children[r]):
260 for i, c in enumerate(children[r]):
252 p1, p2, p1copies, p2copies, removed = revinfo(c)
261 p1, p2, p1copies, p2copies, removed = revinfo(c)
253 if r == p1:
262 if r == p1:
254 parent = 1
263 parent = 1
255 childcopies = p1copies
264 childcopies = p1copies
256 else:
265 else:
257 assert r == p2
266 assert r == p2
258 parent = 2
267 parent = 2
259 childcopies = p2copies
268 childcopies = p2copies
260 if not alwaysmatch:
269 if not alwaysmatch:
261 childcopies = {
270 childcopies = {
262 dst: src for dst, src in childcopies.items() if match(dst)
271 dst: src for dst, src in childcopies.items() if match(dst)
263 }
272 }
264 # Copy the dict only if later iterations will also need it
273 # Copy the dict only if later iterations will also need it
265 if i != len(children[r]) - 1:
274 if i != len(children[r]) - 1:
266 newcopies = copies.copy()
275 newcopies = copies.copy()
267 else:
276 else:
268 newcopies = copies
277 newcopies = copies
269 if childcopies:
278 if childcopies:
270 newcopies = _chain(newcopies, childcopies)
279 newcopies = _chain(newcopies, childcopies)
271 for f in removed:
280 for f in removed:
272 if f in newcopies:
281 if f in newcopies:
273 del newcopies[f]
282 del newcopies[f]
274 othercopies = all_copies.get(c)
283 othercopies = all_copies.get(c)
275 if othercopies is None:
284 if othercopies is None:
276 heapq.heappush(work, c)
277 all_copies[c] = newcopies
285 all_copies[c] = newcopies
278 else:
286 else:
279 # we are the second parent to work on c, we need to merge our
287 # we are the second parent to work on c, we need to merge our
280 # work with the other.
288 # work with the other.
281 #
289 #
282 # Unlike when copies are stored in the filelog, we consider
290 # Unlike when copies are stored in the filelog, we consider
283 # it a copy even if the destination already existed on the
291 # it a copy even if the destination already existed on the
284 # other branch. It's simply too expensive to check if the
292 # other branch. It's simply too expensive to check if the
285 # file existed in the manifest.
293 # file existed in the manifest.
286 #
294 #
287 # In case of conflict, parent 1 take precedence over parent 2.
295 # In case of conflict, parent 1 take precedence over parent 2.
288 # This is an arbitrary choice made anew when implementing
296 # This is an arbitrary choice made anew when implementing
289 # changeset based copies. It was made without regards with
297 # changeset based copies. It was made without regards with
290 # potential filelog related behavior.
298 # potential filelog related behavior.
291 if parent == 1:
299 if parent == 1:
292 othercopies.update(newcopies)
300 othercopies.update(newcopies)
293 else:
301 else:
294 newcopies.update(othercopies)
302 newcopies.update(othercopies)
295 all_copies[c] = newcopies
303 all_copies[c] = newcopies
296 assert False
304 return all_copies[b.rev()]
297
305
298
306
299 def _forwardcopies(a, b, base=None, match=None):
307 def _forwardcopies(a, b, base=None, match=None):
300 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
308 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
301
309
302 if base is None:
310 if base is None:
303 base = a
311 base = a
304 match = a.repo().narrowmatch(match)
312 match = a.repo().narrowmatch(match)
305 # check for working copy
313 # check for working copy
306 if b.rev() is None:
314 if b.rev() is None:
307 cm = _committedforwardcopies(a, b.p1(), base, match)
315 cm = _committedforwardcopies(a, b.p1(), base, match)
308 # combine copies from dirstate if necessary
316 # combine copies from dirstate if necessary
309 copies = _chain(cm, _dirstatecopies(b._repo, match))
317 copies = _chain(cm, _dirstatecopies(b._repo, match))
310 else:
318 else:
311 copies = _committedforwardcopies(a, b, base, match)
319 copies = _committedforwardcopies(a, b, base, match)
312 return copies
320 return copies
313
321
314
322
315 def _backwardrenames(a, b, match):
323 def _backwardrenames(a, b, match):
316 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
324 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
317 return {}
325 return {}
318
326
319 # Even though we're not taking copies into account, 1:n rename situations
327 # Even though we're not taking copies into account, 1:n rename situations
320 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
328 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
321 # arbitrarily pick one of the renames.
329 # arbitrarily pick one of the renames.
322 # We don't want to pass in "match" here, since that would filter
330 # We don't want to pass in "match" here, since that would filter
323 # the destination by it. Since we're reversing the copies, we want
331 # the destination by it. Since we're reversing the copies, we want
324 # to filter the source instead.
332 # to filter the source instead.
325 f = _forwardcopies(b, a)
333 f = _forwardcopies(b, a)
326 r = {}
334 r = {}
327 for k, v in sorted(pycompat.iteritems(f)):
335 for k, v in sorted(pycompat.iteritems(f)):
328 if match and not match(v):
336 if match and not match(v):
329 continue
337 continue
330 # remove copies
338 # remove copies
331 if v in a:
339 if v in a:
332 continue
340 continue
333 r[v] = k
341 r[v] = k
334 return r
342 return r
335
343
336
344
337 def pathcopies(x, y, match=None):
345 def pathcopies(x, y, match=None):
338 """find {dst@y: src@x} copy mapping for directed compare"""
346 """find {dst@y: src@x} copy mapping for directed compare"""
339 repo = x._repo
347 repo = x._repo
340 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
348 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
341 if debug:
349 if debug:
342 repo.ui.debug(
350 repo.ui.debug(
343 b'debug.copies: searching copies from %s to %s\n' % (x, y)
351 b'debug.copies: searching copies from %s to %s\n' % (x, y)
344 )
352 )
345 if x == y or not x or not y:
353 if x == y or not x or not y:
346 return {}
354 return {}
347 a = y.ancestor(x)
355 a = y.ancestor(x)
348 if a == x:
356 if a == x:
349 if debug:
357 if debug:
350 repo.ui.debug(b'debug.copies: search mode: forward\n')
358 repo.ui.debug(b'debug.copies: search mode: forward\n')
351 if y.rev() is None and x == y.p1():
359 if y.rev() is None and x == y.p1():
352 # short-circuit to avoid issues with merge states
360 # short-circuit to avoid issues with merge states
353 return _dirstatecopies(repo, match)
361 return _dirstatecopies(repo, match)
354 copies = _forwardcopies(x, y, match=match)
362 copies = _forwardcopies(x, y, match=match)
355 elif a == y:
363 elif a == y:
356 if debug:
364 if debug:
357 repo.ui.debug(b'debug.copies: search mode: backward\n')
365 repo.ui.debug(b'debug.copies: search mode: backward\n')
358 copies = _backwardrenames(x, y, match=match)
366 copies = _backwardrenames(x, y, match=match)
359 else:
367 else:
360 if debug:
368 if debug:
361 repo.ui.debug(b'debug.copies: search mode: combined\n')
369 repo.ui.debug(b'debug.copies: search mode: combined\n')
362 base = None
370 base = None
363 if a.rev() != node.nullrev:
371 if a.rev() != node.nullrev:
364 base = x
372 base = x
365 copies = _chain(
373 copies = _chain(
366 _backwardrenames(x, a, match=match),
374 _backwardrenames(x, a, match=match),
367 _forwardcopies(a, y, base, match=match),
375 _forwardcopies(a, y, base, match=match),
368 )
376 )
369 _filter(x, y, copies)
377 _filter(x, y, copies)
370 return copies
378 return copies
371
379
372
380
373 def mergecopies(repo, c1, c2, base):
381 def mergecopies(repo, c1, c2, base):
374 """
382 """
375 Finds moves and copies between context c1 and c2 that are relevant for
383 Finds moves and copies between context c1 and c2 that are relevant for
376 merging. 'base' will be used as the merge base.
384 merging. 'base' will be used as the merge base.
377
385
378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
386 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
379 files that were moved/ copied in one merge parent and modified in another.
387 files that were moved/ copied in one merge parent and modified in another.
380 For example:
388 For example:
381
389
382 o ---> 4 another commit
390 o ---> 4 another commit
383 |
391 |
384 | o ---> 3 commit that modifies a.txt
392 | o ---> 3 commit that modifies a.txt
385 | /
393 | /
386 o / ---> 2 commit that moves a.txt to b.txt
394 o / ---> 2 commit that moves a.txt to b.txt
387 |/
395 |/
388 o ---> 1 merge base
396 o ---> 1 merge base
389
397
390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
398 If we try to rebase revision 3 on revision 4, since there is no a.txt in
391 revision 4, and if user have copytrace disabled, we prints the following
399 revision 4, and if user have copytrace disabled, we prints the following
392 message:
400 message:
393
401
394 ```other changed <file> which local deleted```
402 ```other changed <file> which local deleted```
395
403
396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
404 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
397 "dirmove".
405 "dirmove".
398
406
399 "copy" is a mapping from destination name -> source name,
407 "copy" is a mapping from destination name -> source name,
400 where source is in c1 and destination is in c2 or vice-versa.
408 where source is in c1 and destination is in c2 or vice-versa.
401
409
402 "movewithdir" is a mapping from source name -> destination name,
410 "movewithdir" is a mapping from source name -> destination name,
403 where the file at source present in one context but not the other
411 where the file at source present in one context but not the other
404 needs to be moved to destination by the merge process, because the
412 needs to be moved to destination by the merge process, because the
405 other context moved the directory it is in.
413 other context moved the directory it is in.
406
414
407 "diverge" is a mapping of source name -> list of destination names
415 "diverge" is a mapping of source name -> list of destination names
408 for divergent renames.
416 for divergent renames.
409
417
410 "renamedelete" is a mapping of source name -> list of destination
418 "renamedelete" is a mapping of source name -> list of destination
411 names for files deleted in c1 that were renamed in c2 or vice-versa.
419 names for files deleted in c1 that were renamed in c2 or vice-versa.
412
420
413 "dirmove" is a mapping of detected source dir -> destination dir renames.
421 "dirmove" is a mapping of detected source dir -> destination dir renames.
414 This is needed for handling changes to new files previously grafted into
422 This is needed for handling changes to new files previously grafted into
415 renamed directories.
423 renamed directories.
416
424
417 This function calls different copytracing algorithms based on config.
425 This function calls different copytracing algorithms based on config.
418 """
426 """
419 # avoid silly behavior for update from empty dir
427 # avoid silly behavior for update from empty dir
420 if not c1 or not c2 or c1 == c2:
428 if not c1 or not c2 or c1 == c2:
421 return {}, {}, {}, {}, {}
429 return {}, {}, {}, {}, {}
422
430
423 narrowmatch = c1.repo().narrowmatch()
431 narrowmatch = c1.repo().narrowmatch()
424
432
425 # avoid silly behavior for parent -> working dir
433 # avoid silly behavior for parent -> working dir
426 if c2.node() is None and c1.node() == repo.dirstate.p1():
434 if c2.node() is None and c1.node() == repo.dirstate.p1():
427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
435 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
428
436
429 copytracing = repo.ui.config(b'experimental', b'copytrace')
437 copytracing = repo.ui.config(b'experimental', b'copytrace')
430 if stringutil.parsebool(copytracing) is False:
438 if stringutil.parsebool(copytracing) is False:
431 # stringutil.parsebool() returns None when it is unable to parse the
439 # stringutil.parsebool() returns None when it is unable to parse the
432 # value, so we should rely on making sure copytracing is on such cases
440 # value, so we should rely on making sure copytracing is on such cases
433 return {}, {}, {}, {}, {}
441 return {}, {}, {}, {}, {}
434
442
435 if usechangesetcentricalgo(repo):
443 if usechangesetcentricalgo(repo):
436 # The heuristics don't make sense when we need changeset-centric algos
444 # The heuristics don't make sense when we need changeset-centric algos
437 return _fullcopytracing(repo, c1, c2, base)
445 return _fullcopytracing(repo, c1, c2, base)
438
446
439 # Copy trace disabling is explicitly below the node == p1 logic above
447 # Copy trace disabling is explicitly below the node == p1 logic above
440 # because the logic above is required for a simple copy to be kept across a
448 # because the logic above is required for a simple copy to be kept across a
441 # rebase.
449 # rebase.
442 if copytracing == b'heuristics':
450 if copytracing == b'heuristics':
443 # Do full copytracing if only non-public revisions are involved as
451 # Do full copytracing if only non-public revisions are involved as
444 # that will be fast enough and will also cover the copies which could
452 # that will be fast enough and will also cover the copies which could
445 # be missed by heuristics
453 # be missed by heuristics
446 if _isfullcopytraceable(repo, c1, base):
454 if _isfullcopytraceable(repo, c1, base):
447 return _fullcopytracing(repo, c1, c2, base)
455 return _fullcopytracing(repo, c1, c2, base)
448 return _heuristicscopytracing(repo, c1, c2, base)
456 return _heuristicscopytracing(repo, c1, c2, base)
449 else:
457 else:
450 return _fullcopytracing(repo, c1, c2, base)
458 return _fullcopytracing(repo, c1, c2, base)
451
459
452
460
453 def _isfullcopytraceable(repo, c1, base):
461 def _isfullcopytraceable(repo, c1, base):
454 """ Checks that if base, source and destination are all no-public branches,
462 """ Checks that if base, source and destination are all no-public branches,
455 if yes let's use the full copytrace algorithm for increased capabilities
463 if yes let's use the full copytrace algorithm for increased capabilities
456 since it will be fast enough.
464 since it will be fast enough.
457
465
458 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
466 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
459 number of changesets from c1 to base such that if number of changesets are
467 number of changesets from c1 to base such that if number of changesets are
460 more than the limit, full copytracing algorithm won't be used.
468 more than the limit, full copytracing algorithm won't be used.
461 """
469 """
462 if c1.rev() is None:
470 if c1.rev() is None:
463 c1 = c1.p1()
471 c1 = c1.p1()
464 if c1.mutable() and base.mutable():
472 if c1.mutable() and base.mutable():
465 sourcecommitlimit = repo.ui.configint(
473 sourcecommitlimit = repo.ui.configint(
466 b'experimental', b'copytrace.sourcecommitlimit'
474 b'experimental', b'copytrace.sourcecommitlimit'
467 )
475 )
468 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
476 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
469 return commits < sourcecommitlimit
477 return commits < sourcecommitlimit
470 return False
478 return False
471
479
472
480
473 def _checksinglesidecopies(
481 def _checksinglesidecopies(
474 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
482 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
475 ):
483 ):
476 if src not in m2:
484 if src not in m2:
477 # deleted on side 2
485 # deleted on side 2
478 if src not in m1:
486 if src not in m1:
479 # renamed on side 1, deleted on side 2
487 # renamed on side 1, deleted on side 2
480 renamedelete[src] = dsts1
488 renamedelete[src] = dsts1
481 elif m2[src] != mb[src]:
489 elif m2[src] != mb[src]:
482 if not _related(c2[src], base[src]):
490 if not _related(c2[src], base[src]):
483 return
491 return
484 # modified on side 2
492 # modified on side 2
485 for dst in dsts1:
493 for dst in dsts1:
486 if dst not in m2:
494 if dst not in m2:
487 # dst not added on side 2 (handle as regular
495 # dst not added on side 2 (handle as regular
488 # "both created" case in manifestmerge otherwise)
496 # "both created" case in manifestmerge otherwise)
489 copy[dst] = src
497 copy[dst] = src
490
498
491
499
492 def _fullcopytracing(repo, c1, c2, base):
500 def _fullcopytracing(repo, c1, c2, base):
493 """ The full copytracing algorithm which finds all the new files that were
501 """ The full copytracing algorithm which finds all the new files that were
494 added from merge base up to the top commit and for each file it checks if
502 added from merge base up to the top commit and for each file it checks if
495 this file was copied from another file.
503 this file was copied from another file.
496
504
497 This is pretty slow when a lot of changesets are involved but will track all
505 This is pretty slow when a lot of changesets are involved but will track all
498 the copies.
506 the copies.
499 """
507 """
500 m1 = c1.manifest()
508 m1 = c1.manifest()
501 m2 = c2.manifest()
509 m2 = c2.manifest()
502 mb = base.manifest()
510 mb = base.manifest()
503
511
504 copies1 = pathcopies(base, c1)
512 copies1 = pathcopies(base, c1)
505 copies2 = pathcopies(base, c2)
513 copies2 = pathcopies(base, c2)
506
514
507 inversecopies1 = {}
515 inversecopies1 = {}
508 inversecopies2 = {}
516 inversecopies2 = {}
509 for dst, src in copies1.items():
517 for dst, src in copies1.items():
510 inversecopies1.setdefault(src, []).append(dst)
518 inversecopies1.setdefault(src, []).append(dst)
511 for dst, src in copies2.items():
519 for dst, src in copies2.items():
512 inversecopies2.setdefault(src, []).append(dst)
520 inversecopies2.setdefault(src, []).append(dst)
513
521
514 copy = {}
522 copy = {}
515 diverge = {}
523 diverge = {}
516 renamedelete = {}
524 renamedelete = {}
517 allsources = set(inversecopies1) | set(inversecopies2)
525 allsources = set(inversecopies1) | set(inversecopies2)
518 for src in allsources:
526 for src in allsources:
519 dsts1 = inversecopies1.get(src)
527 dsts1 = inversecopies1.get(src)
520 dsts2 = inversecopies2.get(src)
528 dsts2 = inversecopies2.get(src)
521 if dsts1 and dsts2:
529 if dsts1 and dsts2:
522 # copied/renamed on both sides
530 # copied/renamed on both sides
523 if src not in m1 and src not in m2:
531 if src not in m1 and src not in m2:
524 # renamed on both sides
532 # renamed on both sides
525 dsts1 = set(dsts1)
533 dsts1 = set(dsts1)
526 dsts2 = set(dsts2)
534 dsts2 = set(dsts2)
527 # If there's some overlap in the rename destinations, we
535 # If there's some overlap in the rename destinations, we
528 # consider it not divergent. For example, if side 1 copies 'a'
536 # consider it not divergent. For example, if side 1 copies 'a'
529 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
530 # and 'd' and deletes 'a'.
538 # and 'd' and deletes 'a'.
531 if dsts1 & dsts2:
539 if dsts1 & dsts2:
532 for dst in dsts1 & dsts2:
540 for dst in dsts1 & dsts2:
533 copy[dst] = src
541 copy[dst] = src
534 else:
542 else:
535 diverge[src] = sorted(dsts1 | dsts2)
543 diverge[src] = sorted(dsts1 | dsts2)
536 elif src in m1 and src in m2:
544 elif src in m1 and src in m2:
537 # copied on both sides
545 # copied on both sides
538 dsts1 = set(dsts1)
546 dsts1 = set(dsts1)
539 dsts2 = set(dsts2)
547 dsts2 = set(dsts2)
540 for dst in dsts1 & dsts2:
548 for dst in dsts1 & dsts2:
541 copy[dst] = src
549 copy[dst] = src
542 # TODO: Handle cases where it was renamed on one side and copied
550 # TODO: Handle cases where it was renamed on one side and copied
543 # on the other side
551 # on the other side
544 elif dsts1:
552 elif dsts1:
545 # copied/renamed only on side 1
553 # copied/renamed only on side 1
546 _checksinglesidecopies(
554 _checksinglesidecopies(
547 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
555 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
548 )
556 )
549 elif dsts2:
557 elif dsts2:
550 # copied/renamed only on side 2
558 # copied/renamed only on side 2
551 _checksinglesidecopies(
559 _checksinglesidecopies(
552 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
560 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
553 )
561 )
554
562
555 renamedeleteset = set()
563 renamedeleteset = set()
556 divergeset = set()
564 divergeset = set()
557 for dsts in diverge.values():
565 for dsts in diverge.values():
558 divergeset.update(dsts)
566 divergeset.update(dsts)
559 for dsts in renamedelete.values():
567 for dsts in renamedelete.values():
560 renamedeleteset.update(dsts)
568 renamedeleteset.update(dsts)
561
569
562 # find interesting file sets from manifests
570 # find interesting file sets from manifests
563 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
571 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
564 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
572 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
565 u1 = sorted(addedinm1 - addedinm2)
573 u1 = sorted(addedinm1 - addedinm2)
566 u2 = sorted(addedinm2 - addedinm1)
574 u2 = sorted(addedinm2 - addedinm1)
567
575
568 header = b" unmatched files in %s"
576 header = b" unmatched files in %s"
569 if u1:
577 if u1:
570 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
578 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
571 if u2:
579 if u2:
572 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
580 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
573
581
574 fullcopy = copies1.copy()
582 fullcopy = copies1.copy()
575 fullcopy.update(copies2)
583 fullcopy.update(copies2)
576 if not fullcopy:
584 if not fullcopy:
577 return copy, {}, diverge, renamedelete, {}
585 return copy, {}, diverge, renamedelete, {}
578
586
579 if repo.ui.debugflag:
587 if repo.ui.debugflag:
580 repo.ui.debug(
588 repo.ui.debug(
581 b" all copies found (* = to merge, ! = divergent, "
589 b" all copies found (* = to merge, ! = divergent, "
582 b"% = renamed and deleted):\n"
590 b"% = renamed and deleted):\n"
583 )
591 )
584 for f in sorted(fullcopy):
592 for f in sorted(fullcopy):
585 note = b""
593 note = b""
586 if f in copy:
594 if f in copy:
587 note += b"*"
595 note += b"*"
588 if f in divergeset:
596 if f in divergeset:
589 note += b"!"
597 note += b"!"
590 if f in renamedeleteset:
598 if f in renamedeleteset:
591 note += b"%"
599 note += b"%"
592 repo.ui.debug(
600 repo.ui.debug(
593 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
601 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
594 )
602 )
595 del divergeset
603 del divergeset
596
604
597 repo.ui.debug(b" checking for directory renames\n")
605 repo.ui.debug(b" checking for directory renames\n")
598
606
599 # generate a directory move map
607 # generate a directory move map
600 d1, d2 = c1.dirs(), c2.dirs()
608 d1, d2 = c1.dirs(), c2.dirs()
601 invalid = set()
609 invalid = set()
602 dirmove = {}
610 dirmove = {}
603
611
604 # examine each file copy for a potential directory move, which is
612 # examine each file copy for a potential directory move, which is
605 # when all the files in a directory are moved to a new directory
613 # when all the files in a directory are moved to a new directory
606 for dst, src in pycompat.iteritems(fullcopy):
614 for dst, src in pycompat.iteritems(fullcopy):
607 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
615 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
608 if dsrc in invalid:
616 if dsrc in invalid:
609 # already seen to be uninteresting
617 # already seen to be uninteresting
610 continue
618 continue
611 elif dsrc in d1 and ddst in d1:
619 elif dsrc in d1 and ddst in d1:
612 # directory wasn't entirely moved locally
620 # directory wasn't entirely moved locally
613 invalid.add(dsrc)
621 invalid.add(dsrc)
614 elif dsrc in d2 and ddst in d2:
622 elif dsrc in d2 and ddst in d2:
615 # directory wasn't entirely moved remotely
623 # directory wasn't entirely moved remotely
616 invalid.add(dsrc)
624 invalid.add(dsrc)
617 elif dsrc in dirmove and dirmove[dsrc] != ddst:
625 elif dsrc in dirmove and dirmove[dsrc] != ddst:
618 # files from the same directory moved to two different places
626 # files from the same directory moved to two different places
619 invalid.add(dsrc)
627 invalid.add(dsrc)
620 else:
628 else:
621 # looks good so far
629 # looks good so far
622 dirmove[dsrc] = ddst
630 dirmove[dsrc] = ddst
623
631
624 for i in invalid:
632 for i in invalid:
625 if i in dirmove:
633 if i in dirmove:
626 del dirmove[i]
634 del dirmove[i]
627 del d1, d2, invalid
635 del d1, d2, invalid
628
636
629 if not dirmove:
637 if not dirmove:
630 return copy, {}, diverge, renamedelete, {}
638 return copy, {}, diverge, renamedelete, {}
631
639
632 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
640 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
633
641
634 for d in dirmove:
642 for d in dirmove:
635 repo.ui.debug(
643 repo.ui.debug(
636 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
644 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
637 )
645 )
638
646
639 movewithdir = {}
647 movewithdir = {}
640 # check unaccounted nonoverlapping files against directory moves
648 # check unaccounted nonoverlapping files against directory moves
641 for f in u1 + u2:
649 for f in u1 + u2:
642 if f not in fullcopy:
650 if f not in fullcopy:
643 for d in dirmove:
651 for d in dirmove:
644 if f.startswith(d):
652 if f.startswith(d):
645 # new file added in a directory that was moved, move it
653 # new file added in a directory that was moved, move it
646 df = dirmove[d] + f[len(d) :]
654 df = dirmove[d] + f[len(d) :]
647 if df not in copy:
655 if df not in copy:
648 movewithdir[f] = df
656 movewithdir[f] = df
649 repo.ui.debug(
657 repo.ui.debug(
650 b" pending file src: '%s' -> dst: '%s'\n"
658 b" pending file src: '%s' -> dst: '%s'\n"
651 % (f, df)
659 % (f, df)
652 )
660 )
653 break
661 break
654
662
655 return copy, movewithdir, diverge, renamedelete, dirmove
663 return copy, movewithdir, diverge, renamedelete, dirmove
656
664
657
665
658 def _heuristicscopytracing(repo, c1, c2, base):
666 def _heuristicscopytracing(repo, c1, c2, base):
659 """ Fast copytracing using filename heuristics
667 """ Fast copytracing using filename heuristics
660
668
661 Assumes that moves or renames are of following two types:
669 Assumes that moves or renames are of following two types:
662
670
663 1) Inside a directory only (same directory name but different filenames)
671 1) Inside a directory only (same directory name but different filenames)
664 2) Move from one directory to another
672 2) Move from one directory to another
665 (same filenames but different directory names)
673 (same filenames but different directory names)
666
674
667 Works only when there are no merge commits in the "source branch".
675 Works only when there are no merge commits in the "source branch".
668 Source branch is commits from base up to c2 not including base.
676 Source branch is commits from base up to c2 not including base.
669
677
670 If merge is involved it fallbacks to _fullcopytracing().
678 If merge is involved it fallbacks to _fullcopytracing().
671
679
672 Can be used by setting the following config:
680 Can be used by setting the following config:
673
681
674 [experimental]
682 [experimental]
675 copytrace = heuristics
683 copytrace = heuristics
676
684
677 In some cases the copy/move candidates found by heuristics can be very large
685 In some cases the copy/move candidates found by heuristics can be very large
678 in number and that will make the algorithm slow. The number of possible
686 in number and that will make the algorithm slow. The number of possible
679 candidates to check can be limited by using the config
687 candidates to check can be limited by using the config
680 `experimental.copytrace.movecandidateslimit` which defaults to 100.
688 `experimental.copytrace.movecandidateslimit` which defaults to 100.
681 """
689 """
682
690
683 if c1.rev() is None:
691 if c1.rev() is None:
684 c1 = c1.p1()
692 c1 = c1.p1()
685 if c2.rev() is None:
693 if c2.rev() is None:
686 c2 = c2.p1()
694 c2 = c2.p1()
687
695
688 copies = {}
696 copies = {}
689
697
690 changedfiles = set()
698 changedfiles = set()
691 m1 = c1.manifest()
699 m1 = c1.manifest()
692 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
700 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
693 # If base is not in c2 branch, we switch to fullcopytracing
701 # If base is not in c2 branch, we switch to fullcopytracing
694 repo.ui.debug(
702 repo.ui.debug(
695 b"switching to full copytracing as base is not "
703 b"switching to full copytracing as base is not "
696 b"an ancestor of c2\n"
704 b"an ancestor of c2\n"
697 )
705 )
698 return _fullcopytracing(repo, c1, c2, base)
706 return _fullcopytracing(repo, c1, c2, base)
699
707
700 ctx = c2
708 ctx = c2
701 while ctx != base:
709 while ctx != base:
702 if len(ctx.parents()) == 2:
710 if len(ctx.parents()) == 2:
703 # To keep things simple let's not handle merges
711 # To keep things simple let's not handle merges
704 repo.ui.debug(b"switching to full copytracing because of merges\n")
712 repo.ui.debug(b"switching to full copytracing because of merges\n")
705 return _fullcopytracing(repo, c1, c2, base)
713 return _fullcopytracing(repo, c1, c2, base)
706 changedfiles.update(ctx.files())
714 changedfiles.update(ctx.files())
707 ctx = ctx.p1()
715 ctx = ctx.p1()
708
716
709 cp = _forwardcopies(base, c2)
717 cp = _forwardcopies(base, c2)
710 for dst, src in pycompat.iteritems(cp):
718 for dst, src in pycompat.iteritems(cp):
711 if src in m1:
719 if src in m1:
712 copies[dst] = src
720 copies[dst] = src
713
721
714 # file is missing if it isn't present in the destination, but is present in
722 # file is missing if it isn't present in the destination, but is present in
715 # the base and present in the source.
723 # the base and present in the source.
716 # Presence in the base is important to exclude added files, presence in the
724 # Presence in the base is important to exclude added files, presence in the
717 # source is important to exclude removed files.
725 # source is important to exclude removed files.
718 filt = lambda f: f not in m1 and f in base and f in c2
726 filt = lambda f: f not in m1 and f in base and f in c2
719 missingfiles = [f for f in changedfiles if filt(f)]
727 missingfiles = [f for f in changedfiles if filt(f)]
720
728
721 if missingfiles:
729 if missingfiles:
722 basenametofilename = collections.defaultdict(list)
730 basenametofilename = collections.defaultdict(list)
723 dirnametofilename = collections.defaultdict(list)
731 dirnametofilename = collections.defaultdict(list)
724
732
725 for f in m1.filesnotin(base.manifest()):
733 for f in m1.filesnotin(base.manifest()):
726 basename = os.path.basename(f)
734 basename = os.path.basename(f)
727 dirname = os.path.dirname(f)
735 dirname = os.path.dirname(f)
728 basenametofilename[basename].append(f)
736 basenametofilename[basename].append(f)
729 dirnametofilename[dirname].append(f)
737 dirnametofilename[dirname].append(f)
730
738
731 for f in missingfiles:
739 for f in missingfiles:
732 basename = os.path.basename(f)
740 basename = os.path.basename(f)
733 dirname = os.path.dirname(f)
741 dirname = os.path.dirname(f)
734 samebasename = basenametofilename[basename]
742 samebasename = basenametofilename[basename]
735 samedirname = dirnametofilename[dirname]
743 samedirname = dirnametofilename[dirname]
736 movecandidates = samebasename + samedirname
744 movecandidates = samebasename + samedirname
737 # f is guaranteed to be present in c2, that's why
745 # f is guaranteed to be present in c2, that's why
738 # c2.filectx(f) won't fail
746 # c2.filectx(f) won't fail
739 f2 = c2.filectx(f)
747 f2 = c2.filectx(f)
740 # we can have a lot of candidates which can slow down the heuristics
748 # we can have a lot of candidates which can slow down the heuristics
741 # config value to limit the number of candidates moves to check
749 # config value to limit the number of candidates moves to check
742 maxcandidates = repo.ui.configint(
750 maxcandidates = repo.ui.configint(
743 b'experimental', b'copytrace.movecandidateslimit'
751 b'experimental', b'copytrace.movecandidateslimit'
744 )
752 )
745
753
746 if len(movecandidates) > maxcandidates:
754 if len(movecandidates) > maxcandidates:
747 repo.ui.status(
755 repo.ui.status(
748 _(
756 _(
749 b"skipping copytracing for '%s', more "
757 b"skipping copytracing for '%s', more "
750 b"candidates than the limit: %d\n"
758 b"candidates than the limit: %d\n"
751 )
759 )
752 % (f, len(movecandidates))
760 % (f, len(movecandidates))
753 )
761 )
754 continue
762 continue
755
763
756 for candidate in movecandidates:
764 for candidate in movecandidates:
757 f1 = c1.filectx(candidate)
765 f1 = c1.filectx(candidate)
758 if _related(f1, f2):
766 if _related(f1, f2):
759 # if there are a few related copies then we'll merge
767 # if there are a few related copies then we'll merge
760 # changes into all of them. This matches the behaviour
768 # changes into all of them. This matches the behaviour
761 # of upstream copytracing
769 # of upstream copytracing
762 copies[candidate] = f
770 copies[candidate] = f
763
771
764 return copies, {}, {}, {}, {}
772 return copies, {}, {}, {}, {}
765
773
766
774
767 def _related(f1, f2):
775 def _related(f1, f2):
768 """return True if f1 and f2 filectx have a common ancestor
776 """return True if f1 and f2 filectx have a common ancestor
769
777
770 Walk back to common ancestor to see if the two files originate
778 Walk back to common ancestor to see if the two files originate
771 from the same file. Since workingfilectx's rev() is None it messes
779 from the same file. Since workingfilectx's rev() is None it messes
772 up the integer comparison logic, hence the pre-step check for
780 up the integer comparison logic, hence the pre-step check for
773 None (f1 and f2 can only be workingfilectx's initially).
781 None (f1 and f2 can only be workingfilectx's initially).
774 """
782 """
775
783
776 if f1 == f2:
784 if f1 == f2:
777 return True # a match
785 return True # a match
778
786
779 g1, g2 = f1.ancestors(), f2.ancestors()
787 g1, g2 = f1.ancestors(), f2.ancestors()
780 try:
788 try:
781 f1r, f2r = f1.linkrev(), f2.linkrev()
789 f1r, f2r = f1.linkrev(), f2.linkrev()
782
790
783 if f1r is None:
791 if f1r is None:
784 f1 = next(g1)
792 f1 = next(g1)
785 if f2r is None:
793 if f2r is None:
786 f2 = next(g2)
794 f2 = next(g2)
787
795
788 while True:
796 while True:
789 f1r, f2r = f1.linkrev(), f2.linkrev()
797 f1r, f2r = f1.linkrev(), f2.linkrev()
790 if f1r > f2r:
798 if f1r > f2r:
791 f1 = next(g1)
799 f1 = next(g1)
792 elif f2r > f1r:
800 elif f2r > f1r:
793 f2 = next(g2)
801 f2 = next(g2)
794 else: # f1 and f2 point to files in the same linkrev
802 else: # f1 and f2 point to files in the same linkrev
795 return f1 == f2 # true if they point to the same file
803 return f1 == f2 # true if they point to the same file
796 except StopIteration:
804 except StopIteration:
797 return False
805 return False
798
806
799
807
800 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
808 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
801 """reproduce copies from fromrev to rev in the dirstate
809 """reproduce copies from fromrev to rev in the dirstate
802
810
803 If skiprev is specified, it's a revision that should be used to
811 If skiprev is specified, it's a revision that should be used to
804 filter copy records. Any copies that occur between fromrev and
812 filter copy records. Any copies that occur between fromrev and
805 skiprev will not be duplicated, even if they appear in the set of
813 skiprev will not be duplicated, even if they appear in the set of
806 copies between fromrev and rev.
814 copies between fromrev and rev.
807 """
815 """
808 exclude = {}
816 exclude = {}
809 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
817 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
810 bctrace = stringutil.parsebool(ctraceconfig)
818 bctrace = stringutil.parsebool(ctraceconfig)
811 if skiprev is not None and (
819 if skiprev is not None and (
812 ctraceconfig == b'heuristics' or bctrace or bctrace is None
820 ctraceconfig == b'heuristics' or bctrace or bctrace is None
813 ):
821 ):
814 # copytrace='off' skips this line, but not the entire function because
822 # copytrace='off' skips this line, but not the entire function because
815 # the line below is O(size of the repo) during a rebase, while the rest
823 # the line below is O(size of the repo) during a rebase, while the rest
816 # of the function is much faster (and is required for carrying copy
824 # of the function is much faster (and is required for carrying copy
817 # metadata across the rebase anyway).
825 # metadata across the rebase anyway).
818 exclude = pathcopies(repo[fromrev], repo[skiprev])
826 exclude = pathcopies(repo[fromrev], repo[skiprev])
819 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
827 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
820 if dst in exclude:
828 if dst in exclude:
821 continue
829 continue
822 if dst in wctx:
830 if dst in wctx:
823 wctx[dst].markcopied(src)
831 wctx[dst].markcopied(src)
824
832
825
833
826 def computechangesetfilesadded(ctx):
834 def computechangesetfilesadded(ctx):
827 """return the list of files added in a changeset
835 """return the list of files added in a changeset
828 """
836 """
829 added = []
837 added = []
830 for f in ctx.files():
838 for f in ctx.files():
831 if not any(f in p for p in ctx.parents()):
839 if not any(f in p for p in ctx.parents()):
832 added.append(f)
840 added.append(f)
833 return added
841 return added
834
842
835
843
836 def computechangesetfilesremoved(ctx):
844 def computechangesetfilesremoved(ctx):
837 """return the list of files removed in a changeset
845 """return the list of files removed in a changeset
838 """
846 """
839 removed = []
847 removed = []
840 for f in ctx.files():
848 for f in ctx.files():
841 if f not in ctx:
849 if f not in ctx:
842 removed.append(f)
850 removed.append(f)
843 return removed
851 return removed
844
852
845
853
846 def computechangesetcopies(ctx):
854 def computechangesetcopies(ctx):
847 """return the copies data for a changeset
855 """return the copies data for a changeset
848
856
849 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
857 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
850
858
851 Each dictionnary are in the form: `{newname: oldname}`
859 Each dictionnary are in the form: `{newname: oldname}`
852 """
860 """
853 p1copies = {}
861 p1copies = {}
854 p2copies = {}
862 p2copies = {}
855 p1 = ctx.p1()
863 p1 = ctx.p1()
856 p2 = ctx.p2()
864 p2 = ctx.p2()
857 narrowmatch = ctx._repo.narrowmatch()
865 narrowmatch = ctx._repo.narrowmatch()
858 for dst in ctx.files():
866 for dst in ctx.files():
859 if not narrowmatch(dst) or dst not in ctx:
867 if not narrowmatch(dst) or dst not in ctx:
860 continue
868 continue
861 copied = ctx[dst].renamed()
869 copied = ctx[dst].renamed()
862 if not copied:
870 if not copied:
863 continue
871 continue
864 src, srcnode = copied
872 src, srcnode = copied
865 if src in p1 and p1[src].filenode() == srcnode:
873 if src in p1 and p1[src].filenode() == srcnode:
866 p1copies[dst] = src
874 p1copies[dst] = src
867 elif src in p2 and p2[src].filenode() == srcnode:
875 elif src in p2 and p2[src].filenode() == srcnode:
868 p2copies[dst] = src
876 p2copies[dst] = src
869 return p1copies, p2copies
877 return p1copies, p2copies
870
878
871
879
872 def encodecopies(files, copies):
880 def encodecopies(files, copies):
873 items = []
881 items = []
874 for i, dst in enumerate(files):
882 for i, dst in enumerate(files):
875 if dst in copies:
883 if dst in copies:
876 items.append(b'%d\0%s' % (i, copies[dst]))
884 items.append(b'%d\0%s' % (i, copies[dst]))
877 if len(items) != len(copies):
885 if len(items) != len(copies):
878 raise error.ProgrammingError(
886 raise error.ProgrammingError(
879 b'some copy targets missing from file list'
887 b'some copy targets missing from file list'
880 )
888 )
881 return b"\n".join(items)
889 return b"\n".join(items)
882
890
883
891
884 def decodecopies(files, data):
892 def decodecopies(files, data):
885 try:
893 try:
886 copies = {}
894 copies = {}
887 if not data:
895 if not data:
888 return copies
896 return copies
889 for l in data.split(b'\n'):
897 for l in data.split(b'\n'):
890 strindex, src = l.split(b'\0')
898 strindex, src = l.split(b'\0')
891 i = int(strindex)
899 i = int(strindex)
892 dst = files[i]
900 dst = files[i]
893 copies[dst] = src
901 copies[dst] = src
894 return copies
902 return copies
895 except (ValueError, IndexError):
903 except (ValueError, IndexError):
896 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
904 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
897 # used different syntax for the value.
905 # used different syntax for the value.
898 return None
906 return None
899
907
900
908
901 def encodefileindices(files, subset):
909 def encodefileindices(files, subset):
902 subset = set(subset)
910 subset = set(subset)
903 indices = []
911 indices = []
904 for i, f in enumerate(files):
912 for i, f in enumerate(files):
905 if f in subset:
913 if f in subset:
906 indices.append(b'%d' % i)
914 indices.append(b'%d' % i)
907 return b'\n'.join(indices)
915 return b'\n'.join(indices)
908
916
909
917
910 def decodefileindices(files, data):
918 def decodefileindices(files, data):
911 try:
919 try:
912 subset = []
920 subset = []
913 if not data:
921 if not data:
914 return subset
922 return subset
915 for strindex in data.split(b'\n'):
923 for strindex in data.split(b'\n'):
916 i = int(strindex)
924 i = int(strindex)
917 if i < 0 or i >= len(files):
925 if i < 0 or i >= len(files):
918 return None
926 return None
919 subset.append(files[i])
927 subset.append(files[i])
920 return subset
928 return subset
921 except (ValueError, IndexError):
929 except (ValueError, IndexError):
922 # Perhaps someone had chosen the same key name (e.g. "added") and
930 # Perhaps someone had chosen the same key name (e.g. "added") and
923 # used different syntax for the value.
931 # used different syntax for the value.
924 return None
932 return None
925
933
926
934
927 def _getsidedata(srcrepo, rev):
935 def _getsidedata(srcrepo, rev):
928 ctx = srcrepo[rev]
936 ctx = srcrepo[rev]
929 filescopies = computechangesetcopies(ctx)
937 filescopies = computechangesetcopies(ctx)
930 filesadded = computechangesetfilesadded(ctx)
938 filesadded = computechangesetfilesadded(ctx)
931 filesremoved = computechangesetfilesremoved(ctx)
939 filesremoved = computechangesetfilesremoved(ctx)
932 sidedata = {}
940 sidedata = {}
933 if any([filescopies, filesadded, filesremoved]):
941 if any([filescopies, filesadded, filesremoved]):
934 sortedfiles = sorted(ctx.files())
942 sortedfiles = sorted(ctx.files())
935 p1copies, p2copies = filescopies
943 p1copies, p2copies = filescopies
936 p1copies = encodecopies(sortedfiles, p1copies)
944 p1copies = encodecopies(sortedfiles, p1copies)
937 p2copies = encodecopies(sortedfiles, p2copies)
945 p2copies = encodecopies(sortedfiles, p2copies)
938 filesadded = encodefileindices(sortedfiles, filesadded)
946 filesadded = encodefileindices(sortedfiles, filesadded)
939 filesremoved = encodefileindices(sortedfiles, filesremoved)
947 filesremoved = encodefileindices(sortedfiles, filesremoved)
940 if p1copies:
948 if p1copies:
941 sidedata[sidedatamod.SD_P1COPIES] = p1copies
949 sidedata[sidedatamod.SD_P1COPIES] = p1copies
942 if p2copies:
950 if p2copies:
943 sidedata[sidedatamod.SD_P2COPIES] = p2copies
951 sidedata[sidedatamod.SD_P2COPIES] = p2copies
944 if filesadded:
952 if filesadded:
945 sidedata[sidedatamod.SD_FILESADDED] = filesadded
953 sidedata[sidedatamod.SD_FILESADDED] = filesadded
946 if filesremoved:
954 if filesremoved:
947 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
955 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
948 return sidedata
956 return sidedata
949
957
950
958
951 def getsidedataadder(srcrepo, destrepo):
959 def getsidedataadder(srcrepo, destrepo):
952 def sidedatacompanion(revlog, rev):
960 def sidedatacompanion(revlog, rev):
953 sidedata = {}
961 sidedata = {}
954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
962 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
955 sidedata = _getsidedata(srcrepo, rev)
963 sidedata = _getsidedata(srcrepo, rev)
956 return False, (), sidedata
964 return False, (), sidedata
957
965
958 return sidedatacompanion
966 return sidedatacompanion
959
967
960
968
961 def getsidedataremover(srcrepo, destrepo):
969 def getsidedataremover(srcrepo, destrepo):
962 def sidedatacompanion(revlog, rev):
970 def sidedatacompanion(revlog, rev):
963 f = ()
971 f = ()
964 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
972 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
965 if revlog.flags(rev) & REVIDX_SIDEDATA:
973 if revlog.flags(rev) & REVIDX_SIDEDATA:
966 f = (
974 f = (
967 sidedatamod.SD_P1COPIES,
975 sidedatamod.SD_P1COPIES,
968 sidedatamod.SD_P2COPIES,
976 sidedatamod.SD_P2COPIES,
969 sidedatamod.SD_FILESADDED,
977 sidedatamod.SD_FILESADDED,
970 sidedatamod.SD_FILESREMOVED,
978 sidedatamod.SD_FILESREMOVED,
971 )
979 )
972 return False, f, {}
980 return False, f, {}
973
981
974 return sidedatacompanion
982 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now