##// END OF EJS Templates
copies: simplify chain loop...
marmoute -
r43596:f0a2c197 default
parent child Browse files
Show More
@@ -1,1017 +1,1014 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(a, b):
66 def _chain(a, b):
67 """chain two sets of copies 'a' and 'b'"""
67 """chain two sets of copies 'a' and 'b'"""
68 t = a.copy()
68 t = a.copy()
69 for k, v in pycompat.iteritems(b):
69 for k, v in pycompat.iteritems(b):
70 if v in t:
70 t[k] = t.get(v, v)
71 t[k] = t[v]
72 else:
73 t[k] = v
74 return t
71 return t
75
72
76
73
77 def _tracefile(fctx, am, basemf):
74 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
75 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
76 manifest am
80
77
81 Note: we used to try and stop after a given limit, however checking if that
78 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
79 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
80 disabling that feature."""
84
81
85 for f in fctx.ancestors():
82 for f in fctx.ancestors():
86 path = f.path()
83 path = f.path()
87 if am.get(path, None) == f.filenode():
84 if am.get(path, None) == f.filenode():
88 return path
85 return path
89 if basemf and basemf.get(path, None) == f.filenode():
86 if basemf and basemf.get(path, None) == f.filenode():
90 return path
87 return path
91
88
92
89
93 def _dirstatecopies(repo, match=None):
90 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
91 ds = repo.dirstate
95 c = ds.copies().copy()
92 c = ds.copies().copy()
96 for k in list(c):
93 for k in list(c):
97 if ds[k] not in b'anm' or (match and not match(k)):
94 if ds[k] not in b'anm' or (match and not match(k)):
98 del c[k]
95 del c[k]
99 return c
96 return c
100
97
101
98
102 def _computeforwardmissing(a, b, match=None):
99 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
100 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
101 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
102 files _forwardcopies is about to process.
106 """
103 """
107 ma = a.manifest()
104 ma = a.manifest()
108 mb = b.manifest()
105 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
106 return mb.filesnotin(ma, match=match)
110
107
111
108
112 def usechangesetcentricalgo(repo):
109 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
110 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
111 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
112 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
114 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
115 return readfrom in changesetsource
119
116
120
117
121 def _committedforwardcopies(a, b, base, match):
118 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
120 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
121 # one-side-only changeset, but not further back than that
125 repo = a._repo
122 repo = a._repo
126
123
127 if usechangesetcentricalgo(repo):
124 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
125 return _changesetforwardcopies(a, b, match)
129
126
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
128 dbg = repo.ui.debug
132 if debug:
129 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
131 am = a.manifest()
135 basemf = None if base is None else base.manifest()
132 basemf = None if base is None else base.manifest()
136
133
137 # find where new files came from
134 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
135 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
136 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
137 cm = {}
141
138
142 # Computing the forward missing is quite expensive on large manifests, since
139 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
140 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
141 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
142 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
143 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
144 # this comparison.
148 forwardmissingmatch = match
145 forwardmissingmatch = match
149 if b.p1() == a and b.p2().node() == node.nullid:
146 if b.p1() == a and b.p2().node() == node.nullid:
150 filesmatcher = matchmod.exact(b.files())
147 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153
150
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155
152
156 if debug:
153 if debug:
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158
155
159 for f in sorted(missing):
156 for f in sorted(missing):
160 if debug:
157 if debug:
161 dbg(b'debug.copies: tracing file: %s\n' % f)
158 dbg(b'debug.copies: tracing file: %s\n' % f)
162 fctx = b[f]
159 fctx = b[f]
163 fctx._ancestrycontext = ancestrycontext
160 fctx._ancestrycontext = ancestrycontext
164
161
165 if debug:
162 if debug:
166 start = util.timer()
163 start = util.timer()
167 opath = _tracefile(fctx, am, basemf)
164 opath = _tracefile(fctx, am, basemf)
168 if opath:
165 if opath:
169 if debug:
166 if debug:
170 dbg(b'debug.copies: rename of: %s\n' % opath)
167 dbg(b'debug.copies: rename of: %s\n' % opath)
171 cm[f] = opath
168 cm[f] = opath
172 if debug:
169 if debug:
173 dbg(
170 dbg(
174 b'debug.copies: time: %f seconds\n'
171 b'debug.copies: time: %f seconds\n'
175 % (util.timer() - start)
172 % (util.timer() - start)
176 )
173 )
177 return cm
174 return cm
178
175
179
176
180 def _revinfogetter(repo):
177 def _revinfogetter(repo):
181 """return a function that return multiple data given a <rev>"i
178 """return a function that return multiple data given a <rev>"i
182
179
183 * p1: revision number of first parent
180 * p1: revision number of first parent
184 * p2: revision number of first parent
181 * p2: revision number of first parent
185 * p1copies: mapping of copies from p1
182 * p1copies: mapping of copies from p1
186 * p2copies: mapping of copies from p2
183 * p2copies: mapping of copies from p2
187 * removed: a list of removed files
184 * removed: a list of removed files
188 """
185 """
189 cl = repo.changelog
186 cl = repo.changelog
190 parents = cl.parentrevs
187 parents = cl.parentrevs
191
188
192 if repo.filecopiesmode == b'changeset-sidedata':
189 if repo.filecopiesmode == b'changeset-sidedata':
193 changelogrevision = cl.changelogrevision
190 changelogrevision = cl.changelogrevision
194 flags = cl.flags
191 flags = cl.flags
195
192
196 # A small cache to avoid doing the work twice for merges
193 # A small cache to avoid doing the work twice for merges
197 #
194 #
198 # In the vast majority of cases, if we ask information for a revision
195 # In the vast majority of cases, if we ask information for a revision
199 # about 1 parent, we'll later ask it for the other. So it make sense to
196 # about 1 parent, we'll later ask it for the other. So it make sense to
200 # keep the information around when reaching the first parent of a merge
197 # keep the information around when reaching the first parent of a merge
201 # and dropping it after it was provided for the second parents.
198 # and dropping it after it was provided for the second parents.
202 #
199 #
203 # It exists cases were only one parent of the merge will be walked. It
200 # It exists cases were only one parent of the merge will be walked. It
204 # happens when the "destination" the copy tracing is descendant from a
201 # happens when the "destination" the copy tracing is descendant from a
205 # new root, not common with the "source". In that case, we will only walk
202 # new root, not common with the "source". In that case, we will only walk
206 # through merge parents that are descendant of changesets common
203 # through merge parents that are descendant of changesets common
207 # between "source" and "destination".
204 # between "source" and "destination".
208 #
205 #
209 # With the current case implementation if such changesets have a copy
206 # With the current case implementation if such changesets have a copy
210 # information, we'll keep them in memory until the end of
207 # information, we'll keep them in memory until the end of
211 # _changesetforwardcopies. We don't expect the case to be frequent
208 # _changesetforwardcopies. We don't expect the case to be frequent
212 # enough to matters.
209 # enough to matters.
213 #
210 #
214 # In addition, it would be possible to reach pathological case, were
211 # In addition, it would be possible to reach pathological case, were
215 # many first parent are met before any second parent is reached. In
212 # many first parent are met before any second parent is reached. In
216 # that case the cache could grow. If this even become an issue one can
213 # that case the cache could grow. If this even become an issue one can
217 # safely introduce a maximum cache size. This would trade extra CPU/IO
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
218 # time to save memory.
215 # time to save memory.
219 merge_caches = {}
216 merge_caches = {}
220
217
221 def revinfo(rev):
218 def revinfo(rev):
222 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
223 if flags(rev) & REVIDX_SIDEDATA:
220 if flags(rev) & REVIDX_SIDEDATA:
224 e = merge_caches.pop(rev, None)
221 e = merge_caches.pop(rev, None)
225 if e is not None:
222 if e is not None:
226 return e
223 return e
227 c = changelogrevision(rev)
224 c = changelogrevision(rev)
228 p1copies = c.p1copies
225 p1copies = c.p1copies
229 p2copies = c.p2copies
226 p2copies = c.p2copies
230 removed = c.filesremoved
227 removed = c.filesremoved
231 if p1 != node.nullrev and p2 != node.nullrev:
228 if p1 != node.nullrev and p2 != node.nullrev:
232 # XXX some case we over cache, IGNORE
229 # XXX some case we over cache, IGNORE
233 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
234 else:
231 else:
235 p1copies = {}
232 p1copies = {}
236 p2copies = {}
233 p2copies = {}
237 removed = ()
234 removed = ()
238 return p1, p2, p1copies, p2copies, removed
235 return p1, p2, p1copies, p2copies, removed
239
236
240 else:
237 else:
241
238
242 def revinfo(rev):
239 def revinfo(rev):
243 p1, p2 = parents(rev)
240 p1, p2 = parents(rev)
244 ctx = repo[rev]
241 ctx = repo[rev]
245 p1copies, p2copies = ctx._copies
242 p1copies, p2copies = ctx._copies
246 removed = ctx.filesremoved()
243 removed = ctx.filesremoved()
247 return p1, p2, p1copies, p2copies, removed
244 return p1, p2, p1copies, p2copies, removed
248
245
249 return revinfo
246 return revinfo
250
247
251
248
252 def _changesetforwardcopies(a, b, match):
249 def _changesetforwardcopies(a, b, match):
253 if a.rev() in (node.nullrev, b.rev()):
250 if a.rev() in (node.nullrev, b.rev()):
254 return {}
251 return {}
255
252
256 repo = a.repo().unfiltered()
253 repo = a.repo().unfiltered()
257 children = {}
254 children = {}
258 revinfo = _revinfogetter(repo)
255 revinfo = _revinfogetter(repo)
259
256
260 cl = repo.changelog
257 cl = repo.changelog
261 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
262 mrset = set(missingrevs)
259 mrset = set(missingrevs)
263 roots = set()
260 roots = set()
264 for r in missingrevs:
261 for r in missingrevs:
265 for p in cl.parentrevs(r):
262 for p in cl.parentrevs(r):
266 if p == node.nullrev:
263 if p == node.nullrev:
267 continue
264 continue
268 if p not in children:
265 if p not in children:
269 children[p] = [r]
266 children[p] = [r]
270 else:
267 else:
271 children[p].append(r)
268 children[p].append(r)
272 if p not in mrset:
269 if p not in mrset:
273 roots.add(p)
270 roots.add(p)
274 if not roots:
271 if not roots:
275 # no common revision to track copies from
272 # no common revision to track copies from
276 return {}
273 return {}
277 min_root = min(roots)
274 min_root = min(roots)
278
275
279 from_head = set(
276 from_head = set(
280 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
281 )
278 )
282
279
283 iterrevs = set(from_head)
280 iterrevs = set(from_head)
284 iterrevs &= mrset
281 iterrevs &= mrset
285 iterrevs.update(roots)
282 iterrevs.update(roots)
286 iterrevs.remove(b.rev())
283 iterrevs.remove(b.rev())
287 all_copies = {r: {} for r in roots}
284 all_copies = {r: {} for r in roots}
288 alwaysmatch = match.always()
285 alwaysmatch = match.always()
289 for r in sorted(iterrevs):
286 for r in sorted(iterrevs):
290 copies = all_copies.pop(r)
287 copies = all_copies.pop(r)
291 for i, c in enumerate(children[r]):
288 for i, c in enumerate(children[r]):
292 p1, p2, p1copies, p2copies, removed = revinfo(c)
289 p1, p2, p1copies, p2copies, removed = revinfo(c)
293 if r == p1:
290 if r == p1:
294 parent = 1
291 parent = 1
295 childcopies = p1copies
292 childcopies = p1copies
296 else:
293 else:
297 assert r == p2
294 assert r == p2
298 parent = 2
295 parent = 2
299 childcopies = p2copies
296 childcopies = p2copies
300 if not alwaysmatch:
297 if not alwaysmatch:
301 childcopies = {
298 childcopies = {
302 dst: src for dst, src in childcopies.items() if match(dst)
299 dst: src for dst, src in childcopies.items() if match(dst)
303 }
300 }
304 newcopies = copies
301 newcopies = copies
305 if childcopies:
302 if childcopies:
306 newcopies = _chain(newcopies, childcopies)
303 newcopies = _chain(newcopies, childcopies)
307 # _chain makes a copies, we can avoid doing so in some
304 # _chain makes a copies, we can avoid doing so in some
308 # simple/linear cases.
305 # simple/linear cases.
309 assert newcopies is not copies
306 assert newcopies is not copies
310 for f in removed:
307 for f in removed:
311 if f in newcopies:
308 if f in newcopies:
312 if newcopies is copies:
309 if newcopies is copies:
313 # copy on write to avoid affecting potential other
310 # copy on write to avoid affecting potential other
314 # branches. when there are no other branches, this
311 # branches. when there are no other branches, this
315 # could be avoided.
312 # could be avoided.
316 newcopies = copies.copy()
313 newcopies = copies.copy()
317 del newcopies[f]
314 del newcopies[f]
318 othercopies = all_copies.get(c)
315 othercopies = all_copies.get(c)
319 if othercopies is None:
316 if othercopies is None:
320 all_copies[c] = newcopies
317 all_copies[c] = newcopies
321 else:
318 else:
322 # we are the second parent to work on c, we need to merge our
319 # we are the second parent to work on c, we need to merge our
323 # work with the other.
320 # work with the other.
324 #
321 #
325 # Unlike when copies are stored in the filelog, we consider
322 # Unlike when copies are stored in the filelog, we consider
326 # it a copy even if the destination already existed on the
323 # it a copy even if the destination already existed on the
327 # other branch. It's simply too expensive to check if the
324 # other branch. It's simply too expensive to check if the
328 # file existed in the manifest.
325 # file existed in the manifest.
329 #
326 #
330 # In case of conflict, parent 1 take precedence over parent 2.
327 # In case of conflict, parent 1 take precedence over parent 2.
331 # This is an arbitrary choice made anew when implementing
328 # This is an arbitrary choice made anew when implementing
332 # changeset based copies. It was made without regards with
329 # changeset based copies. It was made without regards with
333 # potential filelog related behavior.
330 # potential filelog related behavior.
334 if parent == 1:
331 if parent == 1:
335 othercopies.update(newcopies)
332 othercopies.update(newcopies)
336 else:
333 else:
337 newcopies.update(othercopies)
334 newcopies.update(othercopies)
338 all_copies[c] = newcopies
335 all_copies[c] = newcopies
339 return all_copies[b.rev()]
336 return all_copies[b.rev()]
340
337
341
338
342 def _forwardcopies(a, b, base=None, match=None):
339 def _forwardcopies(a, b, base=None, match=None):
343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
344
341
345 if base is None:
342 if base is None:
346 base = a
343 base = a
347 match = a.repo().narrowmatch(match)
344 match = a.repo().narrowmatch(match)
348 # check for working copy
345 # check for working copy
349 if b.rev() is None:
346 if b.rev() is None:
350 cm = _committedforwardcopies(a, b.p1(), base, match)
347 cm = _committedforwardcopies(a, b.p1(), base, match)
351 # combine copies from dirstate if necessary
348 # combine copies from dirstate if necessary
352 copies = _chain(cm, _dirstatecopies(b._repo, match))
349 copies = _chain(cm, _dirstatecopies(b._repo, match))
353 else:
350 else:
354 copies = _committedforwardcopies(a, b, base, match)
351 copies = _committedforwardcopies(a, b, base, match)
355 return copies
352 return copies
356
353
357
354
358 def _backwardrenames(a, b, match):
355 def _backwardrenames(a, b, match):
359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
360 return {}
357 return {}
361
358
362 # Even though we're not taking copies into account, 1:n rename situations
359 # Even though we're not taking copies into account, 1:n rename situations
363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
364 # arbitrarily pick one of the renames.
361 # arbitrarily pick one of the renames.
365 # We don't want to pass in "match" here, since that would filter
362 # We don't want to pass in "match" here, since that would filter
366 # the destination by it. Since we're reversing the copies, we want
363 # the destination by it. Since we're reversing the copies, we want
367 # to filter the source instead.
364 # to filter the source instead.
368 f = _forwardcopies(b, a)
365 f = _forwardcopies(b, a)
369 r = {}
366 r = {}
370 for k, v in sorted(pycompat.iteritems(f)):
367 for k, v in sorted(pycompat.iteritems(f)):
371 if match and not match(v):
368 if match and not match(v):
372 continue
369 continue
373 # remove copies
370 # remove copies
374 if v in a:
371 if v in a:
375 continue
372 continue
376 r[v] = k
373 r[v] = k
377 return r
374 return r
378
375
379
376
380 def pathcopies(x, y, match=None):
377 def pathcopies(x, y, match=None):
381 """find {dst@y: src@x} copy mapping for directed compare"""
378 """find {dst@y: src@x} copy mapping for directed compare"""
382 repo = x._repo
379 repo = x._repo
383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
384 if debug:
381 if debug:
385 repo.ui.debug(
382 repo.ui.debug(
386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
387 )
384 )
388 if x == y or not x or not y:
385 if x == y or not x or not y:
389 return {}
386 return {}
390 a = y.ancestor(x)
387 a = y.ancestor(x)
391 if a == x:
388 if a == x:
392 if debug:
389 if debug:
393 repo.ui.debug(b'debug.copies: search mode: forward\n')
390 repo.ui.debug(b'debug.copies: search mode: forward\n')
394 if y.rev() is None and x == y.p1():
391 if y.rev() is None and x == y.p1():
395 # short-circuit to avoid issues with merge states
392 # short-circuit to avoid issues with merge states
396 return _dirstatecopies(repo, match)
393 return _dirstatecopies(repo, match)
397 copies = _forwardcopies(x, y, match=match)
394 copies = _forwardcopies(x, y, match=match)
398 elif a == y:
395 elif a == y:
399 if debug:
396 if debug:
400 repo.ui.debug(b'debug.copies: search mode: backward\n')
397 repo.ui.debug(b'debug.copies: search mode: backward\n')
401 copies = _backwardrenames(x, y, match=match)
398 copies = _backwardrenames(x, y, match=match)
402 else:
399 else:
403 if debug:
400 if debug:
404 repo.ui.debug(b'debug.copies: search mode: combined\n')
401 repo.ui.debug(b'debug.copies: search mode: combined\n')
405 base = None
402 base = None
406 if a.rev() != node.nullrev:
403 if a.rev() != node.nullrev:
407 base = x
404 base = x
408 copies = _chain(
405 copies = _chain(
409 _backwardrenames(x, a, match=match),
406 _backwardrenames(x, a, match=match),
410 _forwardcopies(a, y, base, match=match),
407 _forwardcopies(a, y, base, match=match),
411 )
408 )
412 _filter(x, y, copies)
409 _filter(x, y, copies)
413 return copies
410 return copies
414
411
415
412
416 def mergecopies(repo, c1, c2, base):
413 def mergecopies(repo, c1, c2, base):
417 """
414 """
418 Finds moves and copies between context c1 and c2 that are relevant for
415 Finds moves and copies between context c1 and c2 that are relevant for
419 merging. 'base' will be used as the merge base.
416 merging. 'base' will be used as the merge base.
420
417
421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
422 files that were moved/ copied in one merge parent and modified in another.
419 files that were moved/ copied in one merge parent and modified in another.
423 For example:
420 For example:
424
421
425 o ---> 4 another commit
422 o ---> 4 another commit
426 |
423 |
427 | o ---> 3 commit that modifies a.txt
424 | o ---> 3 commit that modifies a.txt
428 | /
425 | /
429 o / ---> 2 commit that moves a.txt to b.txt
426 o / ---> 2 commit that moves a.txt to b.txt
430 |/
427 |/
431 o ---> 1 merge base
428 o ---> 1 merge base
432
429
433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
434 revision 4, and if user have copytrace disabled, we prints the following
431 revision 4, and if user have copytrace disabled, we prints the following
435 message:
432 message:
436
433
437 ```other changed <file> which local deleted```
434 ```other changed <file> which local deleted```
438
435
439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
440 "dirmove".
437 "dirmove".
441
438
442 "copy" is a mapping from destination name -> source name,
439 "copy" is a mapping from destination name -> source name,
443 where source is in c1 and destination is in c2 or vice-versa.
440 where source is in c1 and destination is in c2 or vice-versa.
444
441
445 "movewithdir" is a mapping from source name -> destination name,
442 "movewithdir" is a mapping from source name -> destination name,
446 where the file at source present in one context but not the other
443 where the file at source present in one context but not the other
447 needs to be moved to destination by the merge process, because the
444 needs to be moved to destination by the merge process, because the
448 other context moved the directory it is in.
445 other context moved the directory it is in.
449
446
450 "diverge" is a mapping of source name -> list of destination names
447 "diverge" is a mapping of source name -> list of destination names
451 for divergent renames.
448 for divergent renames.
452
449
453 "renamedelete" is a mapping of source name -> list of destination
450 "renamedelete" is a mapping of source name -> list of destination
454 names for files deleted in c1 that were renamed in c2 or vice-versa.
451 names for files deleted in c1 that were renamed in c2 or vice-versa.
455
452
456 "dirmove" is a mapping of detected source dir -> destination dir renames.
453 "dirmove" is a mapping of detected source dir -> destination dir renames.
457 This is needed for handling changes to new files previously grafted into
454 This is needed for handling changes to new files previously grafted into
458 renamed directories.
455 renamed directories.
459
456
460 This function calls different copytracing algorithms based on config.
457 This function calls different copytracing algorithms based on config.
461 """
458 """
462 # avoid silly behavior for update from empty dir
459 # avoid silly behavior for update from empty dir
463 if not c1 or not c2 or c1 == c2:
460 if not c1 or not c2 or c1 == c2:
464 return {}, {}, {}, {}, {}
461 return {}, {}, {}, {}, {}
465
462
466 narrowmatch = c1.repo().narrowmatch()
463 narrowmatch = c1.repo().narrowmatch()
467
464
468 # avoid silly behavior for parent -> working dir
465 # avoid silly behavior for parent -> working dir
469 if c2.node() is None and c1.node() == repo.dirstate.p1():
466 if c2.node() is None and c1.node() == repo.dirstate.p1():
470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
471
468
472 copytracing = repo.ui.config(b'experimental', b'copytrace')
469 copytracing = repo.ui.config(b'experimental', b'copytrace')
473 if stringutil.parsebool(copytracing) is False:
470 if stringutil.parsebool(copytracing) is False:
474 # stringutil.parsebool() returns None when it is unable to parse the
471 # stringutil.parsebool() returns None when it is unable to parse the
475 # value, so we should rely on making sure copytracing is on such cases
472 # value, so we should rely on making sure copytracing is on such cases
476 return {}, {}, {}, {}, {}
473 return {}, {}, {}, {}, {}
477
474
478 if usechangesetcentricalgo(repo):
475 if usechangesetcentricalgo(repo):
479 # The heuristics don't make sense when we need changeset-centric algos
476 # The heuristics don't make sense when we need changeset-centric algos
480 return _fullcopytracing(repo, c1, c2, base)
477 return _fullcopytracing(repo, c1, c2, base)
481
478
482 # Copy trace disabling is explicitly below the node == p1 logic above
479 # Copy trace disabling is explicitly below the node == p1 logic above
483 # because the logic above is required for a simple copy to be kept across a
480 # because the logic above is required for a simple copy to be kept across a
484 # rebase.
481 # rebase.
485 if copytracing == b'heuristics':
482 if copytracing == b'heuristics':
486 # Do full copytracing if only non-public revisions are involved as
483 # Do full copytracing if only non-public revisions are involved as
487 # that will be fast enough and will also cover the copies which could
484 # that will be fast enough and will also cover the copies which could
488 # be missed by heuristics
485 # be missed by heuristics
489 if _isfullcopytraceable(repo, c1, base):
486 if _isfullcopytraceable(repo, c1, base):
490 return _fullcopytracing(repo, c1, c2, base)
487 return _fullcopytracing(repo, c1, c2, base)
491 return _heuristicscopytracing(repo, c1, c2, base)
488 return _heuristicscopytracing(repo, c1, c2, base)
492 else:
489 else:
493 return _fullcopytracing(repo, c1, c2, base)
490 return _fullcopytracing(repo, c1, c2, base)
494
491
495
492
496 def _isfullcopytraceable(repo, c1, base):
493 def _isfullcopytraceable(repo, c1, base):
497 """ Checks that if base, source and destination are all no-public branches,
494 """ Checks that if base, source and destination are all no-public branches,
498 if yes let's use the full copytrace algorithm for increased capabilities
495 if yes let's use the full copytrace algorithm for increased capabilities
499 since it will be fast enough.
496 since it will be fast enough.
500
497
501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
502 number of changesets from c1 to base such that if number of changesets are
499 number of changesets from c1 to base such that if number of changesets are
503 more than the limit, full copytracing algorithm won't be used.
500 more than the limit, full copytracing algorithm won't be used.
504 """
501 """
505 if c1.rev() is None:
502 if c1.rev() is None:
506 c1 = c1.p1()
503 c1 = c1.p1()
507 if c1.mutable() and base.mutable():
504 if c1.mutable() and base.mutable():
508 sourcecommitlimit = repo.ui.configint(
505 sourcecommitlimit = repo.ui.configint(
509 b'experimental', b'copytrace.sourcecommitlimit'
506 b'experimental', b'copytrace.sourcecommitlimit'
510 )
507 )
511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
512 return commits < sourcecommitlimit
509 return commits < sourcecommitlimit
513 return False
510 return False
514
511
515
512
516 def _checksinglesidecopies(
513 def _checksinglesidecopies(
517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
518 ):
515 ):
519 if src not in m2:
516 if src not in m2:
520 # deleted on side 2
517 # deleted on side 2
521 if src not in m1:
518 if src not in m1:
522 # renamed on side 1, deleted on side 2
519 # renamed on side 1, deleted on side 2
523 renamedelete[src] = dsts1
520 renamedelete[src] = dsts1
524 elif m2[src] != mb[src]:
521 elif m2[src] != mb[src]:
525 if not _related(c2[src], base[src]):
522 if not _related(c2[src], base[src]):
526 return
523 return
527 # modified on side 2
524 # modified on side 2
528 for dst in dsts1:
525 for dst in dsts1:
529 if dst not in m2:
526 if dst not in m2:
530 # dst not added on side 2 (handle as regular
527 # dst not added on side 2 (handle as regular
531 # "both created" case in manifestmerge otherwise)
528 # "both created" case in manifestmerge otherwise)
532 copy[dst] = src
529 copy[dst] = src
533
530
534
531
535 def _fullcopytracing(repo, c1, c2, base):
532 def _fullcopytracing(repo, c1, c2, base):
536 """ The full copytracing algorithm which finds all the new files that were
533 """ The full copytracing algorithm which finds all the new files that were
537 added from merge base up to the top commit and for each file it checks if
534 added from merge base up to the top commit and for each file it checks if
538 this file was copied from another file.
535 this file was copied from another file.
539
536
540 This is pretty slow when a lot of changesets are involved but will track all
537 This is pretty slow when a lot of changesets are involved but will track all
541 the copies.
538 the copies.
542 """
539 """
543 m1 = c1.manifest()
540 m1 = c1.manifest()
544 m2 = c2.manifest()
541 m2 = c2.manifest()
545 mb = base.manifest()
542 mb = base.manifest()
546
543
547 copies1 = pathcopies(base, c1)
544 copies1 = pathcopies(base, c1)
548 copies2 = pathcopies(base, c2)
545 copies2 = pathcopies(base, c2)
549
546
550 inversecopies1 = {}
547 inversecopies1 = {}
551 inversecopies2 = {}
548 inversecopies2 = {}
552 for dst, src in copies1.items():
549 for dst, src in copies1.items():
553 inversecopies1.setdefault(src, []).append(dst)
550 inversecopies1.setdefault(src, []).append(dst)
554 for dst, src in copies2.items():
551 for dst, src in copies2.items():
555 inversecopies2.setdefault(src, []).append(dst)
552 inversecopies2.setdefault(src, []).append(dst)
556
553
557 copy = {}
554 copy = {}
558 diverge = {}
555 diverge = {}
559 renamedelete = {}
556 renamedelete = {}
560 allsources = set(inversecopies1) | set(inversecopies2)
557 allsources = set(inversecopies1) | set(inversecopies2)
561 for src in allsources:
558 for src in allsources:
562 dsts1 = inversecopies1.get(src)
559 dsts1 = inversecopies1.get(src)
563 dsts2 = inversecopies2.get(src)
560 dsts2 = inversecopies2.get(src)
564 if dsts1 and dsts2:
561 if dsts1 and dsts2:
565 # copied/renamed on both sides
562 # copied/renamed on both sides
566 if src not in m1 and src not in m2:
563 if src not in m1 and src not in m2:
567 # renamed on both sides
564 # renamed on both sides
568 dsts1 = set(dsts1)
565 dsts1 = set(dsts1)
569 dsts2 = set(dsts2)
566 dsts2 = set(dsts2)
570 # If there's some overlap in the rename destinations, we
567 # If there's some overlap in the rename destinations, we
571 # consider it not divergent. For example, if side 1 copies 'a'
568 # consider it not divergent. For example, if side 1 copies 'a'
572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
573 # and 'd' and deletes 'a'.
570 # and 'd' and deletes 'a'.
574 if dsts1 & dsts2:
571 if dsts1 & dsts2:
575 for dst in dsts1 & dsts2:
572 for dst in dsts1 & dsts2:
576 copy[dst] = src
573 copy[dst] = src
577 else:
574 else:
578 diverge[src] = sorted(dsts1 | dsts2)
575 diverge[src] = sorted(dsts1 | dsts2)
579 elif src in m1 and src in m2:
576 elif src in m1 and src in m2:
580 # copied on both sides
577 # copied on both sides
581 dsts1 = set(dsts1)
578 dsts1 = set(dsts1)
582 dsts2 = set(dsts2)
579 dsts2 = set(dsts2)
583 for dst in dsts1 & dsts2:
580 for dst in dsts1 & dsts2:
584 copy[dst] = src
581 copy[dst] = src
585 # TODO: Handle cases where it was renamed on one side and copied
582 # TODO: Handle cases where it was renamed on one side and copied
586 # on the other side
583 # on the other side
587 elif dsts1:
584 elif dsts1:
588 # copied/renamed only on side 1
585 # copied/renamed only on side 1
589 _checksinglesidecopies(
586 _checksinglesidecopies(
590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
591 )
588 )
592 elif dsts2:
589 elif dsts2:
593 # copied/renamed only on side 2
590 # copied/renamed only on side 2
594 _checksinglesidecopies(
591 _checksinglesidecopies(
595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
596 )
593 )
597
594
598 renamedeleteset = set()
595 renamedeleteset = set()
599 divergeset = set()
596 divergeset = set()
600 for dsts in diverge.values():
597 for dsts in diverge.values():
601 divergeset.update(dsts)
598 divergeset.update(dsts)
602 for dsts in renamedelete.values():
599 for dsts in renamedelete.values():
603 renamedeleteset.update(dsts)
600 renamedeleteset.update(dsts)
604
601
605 # find interesting file sets from manifests
602 # find interesting file sets from manifests
606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
608 u1 = sorted(addedinm1 - addedinm2)
605 u1 = sorted(addedinm1 - addedinm2)
609 u2 = sorted(addedinm2 - addedinm1)
606 u2 = sorted(addedinm2 - addedinm1)
610
607
611 header = b" unmatched files in %s"
608 header = b" unmatched files in %s"
612 if u1:
609 if u1:
613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
614 if u2:
611 if u2:
615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
616
613
617 fullcopy = copies1.copy()
614 fullcopy = copies1.copy()
618 fullcopy.update(copies2)
615 fullcopy.update(copies2)
619 if not fullcopy:
616 if not fullcopy:
620 return copy, {}, diverge, renamedelete, {}
617 return copy, {}, diverge, renamedelete, {}
621
618
622 if repo.ui.debugflag:
619 if repo.ui.debugflag:
623 repo.ui.debug(
620 repo.ui.debug(
624 b" all copies found (* = to merge, ! = divergent, "
621 b" all copies found (* = to merge, ! = divergent, "
625 b"% = renamed and deleted):\n"
622 b"% = renamed and deleted):\n"
626 )
623 )
627 for f in sorted(fullcopy):
624 for f in sorted(fullcopy):
628 note = b""
625 note = b""
629 if f in copy:
626 if f in copy:
630 note += b"*"
627 note += b"*"
631 if f in divergeset:
628 if f in divergeset:
632 note += b"!"
629 note += b"!"
633 if f in renamedeleteset:
630 if f in renamedeleteset:
634 note += b"%"
631 note += b"%"
635 repo.ui.debug(
632 repo.ui.debug(
636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
637 )
634 )
638 del divergeset
635 del divergeset
639
636
640 repo.ui.debug(b" checking for directory renames\n")
637 repo.ui.debug(b" checking for directory renames\n")
641
638
642 # generate a directory move map
639 # generate a directory move map
643 d1, d2 = c1.dirs(), c2.dirs()
640 d1, d2 = c1.dirs(), c2.dirs()
644 invalid = set()
641 invalid = set()
645 dirmove = {}
642 dirmove = {}
646
643
647 # examine each file copy for a potential directory move, which is
644 # examine each file copy for a potential directory move, which is
648 # when all the files in a directory are moved to a new directory
645 # when all the files in a directory are moved to a new directory
649 for dst, src in pycompat.iteritems(fullcopy):
646 for dst, src in pycompat.iteritems(fullcopy):
650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
651 if dsrc in invalid:
648 if dsrc in invalid:
652 # already seen to be uninteresting
649 # already seen to be uninteresting
653 continue
650 continue
654 elif dsrc in d1 and ddst in d1:
651 elif dsrc in d1 and ddst in d1:
655 # directory wasn't entirely moved locally
652 # directory wasn't entirely moved locally
656 invalid.add(dsrc)
653 invalid.add(dsrc)
657 elif dsrc in d2 and ddst in d2:
654 elif dsrc in d2 and ddst in d2:
658 # directory wasn't entirely moved remotely
655 # directory wasn't entirely moved remotely
659 invalid.add(dsrc)
656 invalid.add(dsrc)
660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
661 # files from the same directory moved to two different places
658 # files from the same directory moved to two different places
662 invalid.add(dsrc)
659 invalid.add(dsrc)
663 else:
660 else:
664 # looks good so far
661 # looks good so far
665 dirmove[dsrc] = ddst
662 dirmove[dsrc] = ddst
666
663
667 for i in invalid:
664 for i in invalid:
668 if i in dirmove:
665 if i in dirmove:
669 del dirmove[i]
666 del dirmove[i]
670 del d1, d2, invalid
667 del d1, d2, invalid
671
668
672 if not dirmove:
669 if not dirmove:
673 return copy, {}, diverge, renamedelete, {}
670 return copy, {}, diverge, renamedelete, {}
674
671
675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
676
673
677 for d in dirmove:
674 for d in dirmove:
678 repo.ui.debug(
675 repo.ui.debug(
679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
680 )
677 )
681
678
682 movewithdir = {}
679 movewithdir = {}
683 # check unaccounted nonoverlapping files against directory moves
680 # check unaccounted nonoverlapping files against directory moves
684 for f in u1 + u2:
681 for f in u1 + u2:
685 if f not in fullcopy:
682 if f not in fullcopy:
686 for d in dirmove:
683 for d in dirmove:
687 if f.startswith(d):
684 if f.startswith(d):
688 # new file added in a directory that was moved, move it
685 # new file added in a directory that was moved, move it
689 df = dirmove[d] + f[len(d) :]
686 df = dirmove[d] + f[len(d) :]
690 if df not in copy:
687 if df not in copy:
691 movewithdir[f] = df
688 movewithdir[f] = df
692 repo.ui.debug(
689 repo.ui.debug(
693 b" pending file src: '%s' -> dst: '%s'\n"
690 b" pending file src: '%s' -> dst: '%s'\n"
694 % (f, df)
691 % (f, df)
695 )
692 )
696 break
693 break
697
694
698 return copy, movewithdir, diverge, renamedelete, dirmove
695 return copy, movewithdir, diverge, renamedelete, dirmove
699
696
700
697
701 def _heuristicscopytracing(repo, c1, c2, base):
698 def _heuristicscopytracing(repo, c1, c2, base):
702 """ Fast copytracing using filename heuristics
699 """ Fast copytracing using filename heuristics
703
700
704 Assumes that moves or renames are of following two types:
701 Assumes that moves or renames are of following two types:
705
702
706 1) Inside a directory only (same directory name but different filenames)
703 1) Inside a directory only (same directory name but different filenames)
707 2) Move from one directory to another
704 2) Move from one directory to another
708 (same filenames but different directory names)
705 (same filenames but different directory names)
709
706
710 Works only when there are no merge commits in the "source branch".
707 Works only when there are no merge commits in the "source branch".
711 Source branch is commits from base up to c2 not including base.
708 Source branch is commits from base up to c2 not including base.
712
709
713 If merge is involved it fallbacks to _fullcopytracing().
710 If merge is involved it fallbacks to _fullcopytracing().
714
711
715 Can be used by setting the following config:
712 Can be used by setting the following config:
716
713
717 [experimental]
714 [experimental]
718 copytrace = heuristics
715 copytrace = heuristics
719
716
720 In some cases the copy/move candidates found by heuristics can be very large
717 In some cases the copy/move candidates found by heuristics can be very large
721 in number and that will make the algorithm slow. The number of possible
718 in number and that will make the algorithm slow. The number of possible
722 candidates to check can be limited by using the config
719 candidates to check can be limited by using the config
723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
724 """
721 """
725
722
726 if c1.rev() is None:
723 if c1.rev() is None:
727 c1 = c1.p1()
724 c1 = c1.p1()
728 if c2.rev() is None:
725 if c2.rev() is None:
729 c2 = c2.p1()
726 c2 = c2.p1()
730
727
731 copies = {}
728 copies = {}
732
729
733 changedfiles = set()
730 changedfiles = set()
734 m1 = c1.manifest()
731 m1 = c1.manifest()
735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
736 # If base is not in c2 branch, we switch to fullcopytracing
733 # If base is not in c2 branch, we switch to fullcopytracing
737 repo.ui.debug(
734 repo.ui.debug(
738 b"switching to full copytracing as base is not "
735 b"switching to full copytracing as base is not "
739 b"an ancestor of c2\n"
736 b"an ancestor of c2\n"
740 )
737 )
741 return _fullcopytracing(repo, c1, c2, base)
738 return _fullcopytracing(repo, c1, c2, base)
742
739
743 ctx = c2
740 ctx = c2
744 while ctx != base:
741 while ctx != base:
745 if len(ctx.parents()) == 2:
742 if len(ctx.parents()) == 2:
746 # To keep things simple let's not handle merges
743 # To keep things simple let's not handle merges
747 repo.ui.debug(b"switching to full copytracing because of merges\n")
744 repo.ui.debug(b"switching to full copytracing because of merges\n")
748 return _fullcopytracing(repo, c1, c2, base)
745 return _fullcopytracing(repo, c1, c2, base)
749 changedfiles.update(ctx.files())
746 changedfiles.update(ctx.files())
750 ctx = ctx.p1()
747 ctx = ctx.p1()
751
748
752 cp = _forwardcopies(base, c2)
749 cp = _forwardcopies(base, c2)
753 for dst, src in pycompat.iteritems(cp):
750 for dst, src in pycompat.iteritems(cp):
754 if src in m1:
751 if src in m1:
755 copies[dst] = src
752 copies[dst] = src
756
753
757 # file is missing if it isn't present in the destination, but is present in
754 # file is missing if it isn't present in the destination, but is present in
758 # the base and present in the source.
755 # the base and present in the source.
759 # Presence in the base is important to exclude added files, presence in the
756 # Presence in the base is important to exclude added files, presence in the
760 # source is important to exclude removed files.
757 # source is important to exclude removed files.
761 filt = lambda f: f not in m1 and f in base and f in c2
758 filt = lambda f: f not in m1 and f in base and f in c2
762 missingfiles = [f for f in changedfiles if filt(f)]
759 missingfiles = [f for f in changedfiles if filt(f)]
763
760
764 if missingfiles:
761 if missingfiles:
765 basenametofilename = collections.defaultdict(list)
762 basenametofilename = collections.defaultdict(list)
766 dirnametofilename = collections.defaultdict(list)
763 dirnametofilename = collections.defaultdict(list)
767
764
768 for f in m1.filesnotin(base.manifest()):
765 for f in m1.filesnotin(base.manifest()):
769 basename = os.path.basename(f)
766 basename = os.path.basename(f)
770 dirname = os.path.dirname(f)
767 dirname = os.path.dirname(f)
771 basenametofilename[basename].append(f)
768 basenametofilename[basename].append(f)
772 dirnametofilename[dirname].append(f)
769 dirnametofilename[dirname].append(f)
773
770
774 for f in missingfiles:
771 for f in missingfiles:
775 basename = os.path.basename(f)
772 basename = os.path.basename(f)
776 dirname = os.path.dirname(f)
773 dirname = os.path.dirname(f)
777 samebasename = basenametofilename[basename]
774 samebasename = basenametofilename[basename]
778 samedirname = dirnametofilename[dirname]
775 samedirname = dirnametofilename[dirname]
779 movecandidates = samebasename + samedirname
776 movecandidates = samebasename + samedirname
780 # f is guaranteed to be present in c2, that's why
777 # f is guaranteed to be present in c2, that's why
781 # c2.filectx(f) won't fail
778 # c2.filectx(f) won't fail
782 f2 = c2.filectx(f)
779 f2 = c2.filectx(f)
783 # we can have a lot of candidates which can slow down the heuristics
780 # we can have a lot of candidates which can slow down the heuristics
784 # config value to limit the number of candidates moves to check
781 # config value to limit the number of candidates moves to check
785 maxcandidates = repo.ui.configint(
782 maxcandidates = repo.ui.configint(
786 b'experimental', b'copytrace.movecandidateslimit'
783 b'experimental', b'copytrace.movecandidateslimit'
787 )
784 )
788
785
789 if len(movecandidates) > maxcandidates:
786 if len(movecandidates) > maxcandidates:
790 repo.ui.status(
787 repo.ui.status(
791 _(
788 _(
792 b"skipping copytracing for '%s', more "
789 b"skipping copytracing for '%s', more "
793 b"candidates than the limit: %d\n"
790 b"candidates than the limit: %d\n"
794 )
791 )
795 % (f, len(movecandidates))
792 % (f, len(movecandidates))
796 )
793 )
797 continue
794 continue
798
795
799 for candidate in movecandidates:
796 for candidate in movecandidates:
800 f1 = c1.filectx(candidate)
797 f1 = c1.filectx(candidate)
801 if _related(f1, f2):
798 if _related(f1, f2):
802 # if there are a few related copies then we'll merge
799 # if there are a few related copies then we'll merge
803 # changes into all of them. This matches the behaviour
800 # changes into all of them. This matches the behaviour
804 # of upstream copytracing
801 # of upstream copytracing
805 copies[candidate] = f
802 copies[candidate] = f
806
803
807 return copies, {}, {}, {}, {}
804 return copies, {}, {}, {}, {}
808
805
809
806
810 def _related(f1, f2):
807 def _related(f1, f2):
811 """return True if f1 and f2 filectx have a common ancestor
808 """return True if f1 and f2 filectx have a common ancestor
812
809
813 Walk back to common ancestor to see if the two files originate
810 Walk back to common ancestor to see if the two files originate
814 from the same file. Since workingfilectx's rev() is None it messes
811 from the same file. Since workingfilectx's rev() is None it messes
815 up the integer comparison logic, hence the pre-step check for
812 up the integer comparison logic, hence the pre-step check for
816 None (f1 and f2 can only be workingfilectx's initially).
813 None (f1 and f2 can only be workingfilectx's initially).
817 """
814 """
818
815
819 if f1 == f2:
816 if f1 == f2:
820 return True # a match
817 return True # a match
821
818
822 g1, g2 = f1.ancestors(), f2.ancestors()
819 g1, g2 = f1.ancestors(), f2.ancestors()
823 try:
820 try:
824 f1r, f2r = f1.linkrev(), f2.linkrev()
821 f1r, f2r = f1.linkrev(), f2.linkrev()
825
822
826 if f1r is None:
823 if f1r is None:
827 f1 = next(g1)
824 f1 = next(g1)
828 if f2r is None:
825 if f2r is None:
829 f2 = next(g2)
826 f2 = next(g2)
830
827
831 while True:
828 while True:
832 f1r, f2r = f1.linkrev(), f2.linkrev()
829 f1r, f2r = f1.linkrev(), f2.linkrev()
833 if f1r > f2r:
830 if f1r > f2r:
834 f1 = next(g1)
831 f1 = next(g1)
835 elif f2r > f1r:
832 elif f2r > f1r:
836 f2 = next(g2)
833 f2 = next(g2)
837 else: # f1 and f2 point to files in the same linkrev
834 else: # f1 and f2 point to files in the same linkrev
838 return f1 == f2 # true if they point to the same file
835 return f1 == f2 # true if they point to the same file
839 except StopIteration:
836 except StopIteration:
840 return False
837 return False
841
838
842
839
843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
844 """reproduce copies from fromrev to rev in the dirstate
841 """reproduce copies from fromrev to rev in the dirstate
845
842
846 If skiprev is specified, it's a revision that should be used to
843 If skiprev is specified, it's a revision that should be used to
847 filter copy records. Any copies that occur between fromrev and
844 filter copy records. Any copies that occur between fromrev and
848 skiprev will not be duplicated, even if they appear in the set of
845 skiprev will not be duplicated, even if they appear in the set of
849 copies between fromrev and rev.
846 copies between fromrev and rev.
850 """
847 """
851 exclude = {}
848 exclude = {}
852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
853 bctrace = stringutil.parsebool(ctraceconfig)
850 bctrace = stringutil.parsebool(ctraceconfig)
854 if skiprev is not None and (
851 if skiprev is not None and (
855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
856 ):
853 ):
857 # copytrace='off' skips this line, but not the entire function because
854 # copytrace='off' skips this line, but not the entire function because
858 # the line below is O(size of the repo) during a rebase, while the rest
855 # the line below is O(size of the repo) during a rebase, while the rest
859 # of the function is much faster (and is required for carrying copy
856 # of the function is much faster (and is required for carrying copy
860 # metadata across the rebase anyway).
857 # metadata across the rebase anyway).
861 exclude = pathcopies(repo[fromrev], repo[skiprev])
858 exclude = pathcopies(repo[fromrev], repo[skiprev])
862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
863 if dst in exclude:
860 if dst in exclude:
864 continue
861 continue
865 if dst in wctx:
862 if dst in wctx:
866 wctx[dst].markcopied(src)
863 wctx[dst].markcopied(src)
867
864
868
865
869 def computechangesetfilesadded(ctx):
866 def computechangesetfilesadded(ctx):
870 """return the list of files added in a changeset
867 """return the list of files added in a changeset
871 """
868 """
872 added = []
869 added = []
873 for f in ctx.files():
870 for f in ctx.files():
874 if not any(f in p for p in ctx.parents()):
871 if not any(f in p for p in ctx.parents()):
875 added.append(f)
872 added.append(f)
876 return added
873 return added
877
874
878
875
879 def computechangesetfilesremoved(ctx):
876 def computechangesetfilesremoved(ctx):
880 """return the list of files removed in a changeset
877 """return the list of files removed in a changeset
881 """
878 """
882 removed = []
879 removed = []
883 for f in ctx.files():
880 for f in ctx.files():
884 if f not in ctx:
881 if f not in ctx:
885 removed.append(f)
882 removed.append(f)
886 return removed
883 return removed
887
884
888
885
889 def computechangesetcopies(ctx):
886 def computechangesetcopies(ctx):
890 """return the copies data for a changeset
887 """return the copies data for a changeset
891
888
892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
893
890
894 Each dictionnary are in the form: `{newname: oldname}`
891 Each dictionnary are in the form: `{newname: oldname}`
895 """
892 """
896 p1copies = {}
893 p1copies = {}
897 p2copies = {}
894 p2copies = {}
898 p1 = ctx.p1()
895 p1 = ctx.p1()
899 p2 = ctx.p2()
896 p2 = ctx.p2()
900 narrowmatch = ctx._repo.narrowmatch()
897 narrowmatch = ctx._repo.narrowmatch()
901 for dst in ctx.files():
898 for dst in ctx.files():
902 if not narrowmatch(dst) or dst not in ctx:
899 if not narrowmatch(dst) or dst not in ctx:
903 continue
900 continue
904 copied = ctx[dst].renamed()
901 copied = ctx[dst].renamed()
905 if not copied:
902 if not copied:
906 continue
903 continue
907 src, srcnode = copied
904 src, srcnode = copied
908 if src in p1 and p1[src].filenode() == srcnode:
905 if src in p1 and p1[src].filenode() == srcnode:
909 p1copies[dst] = src
906 p1copies[dst] = src
910 elif src in p2 and p2[src].filenode() == srcnode:
907 elif src in p2 and p2[src].filenode() == srcnode:
911 p2copies[dst] = src
908 p2copies[dst] = src
912 return p1copies, p2copies
909 return p1copies, p2copies
913
910
914
911
915 def encodecopies(files, copies):
912 def encodecopies(files, copies):
916 items = []
913 items = []
917 for i, dst in enumerate(files):
914 for i, dst in enumerate(files):
918 if dst in copies:
915 if dst in copies:
919 items.append(b'%d\0%s' % (i, copies[dst]))
916 items.append(b'%d\0%s' % (i, copies[dst]))
920 if len(items) != len(copies):
917 if len(items) != len(copies):
921 raise error.ProgrammingError(
918 raise error.ProgrammingError(
922 b'some copy targets missing from file list'
919 b'some copy targets missing from file list'
923 )
920 )
924 return b"\n".join(items)
921 return b"\n".join(items)
925
922
926
923
927 def decodecopies(files, data):
924 def decodecopies(files, data):
928 try:
925 try:
929 copies = {}
926 copies = {}
930 if not data:
927 if not data:
931 return copies
928 return copies
932 for l in data.split(b'\n'):
929 for l in data.split(b'\n'):
933 strindex, src = l.split(b'\0')
930 strindex, src = l.split(b'\0')
934 i = int(strindex)
931 i = int(strindex)
935 dst = files[i]
932 dst = files[i]
936 copies[dst] = src
933 copies[dst] = src
937 return copies
934 return copies
938 except (ValueError, IndexError):
935 except (ValueError, IndexError):
939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
940 # used different syntax for the value.
937 # used different syntax for the value.
941 return None
938 return None
942
939
943
940
944 def encodefileindices(files, subset):
941 def encodefileindices(files, subset):
945 subset = set(subset)
942 subset = set(subset)
946 indices = []
943 indices = []
947 for i, f in enumerate(files):
944 for i, f in enumerate(files):
948 if f in subset:
945 if f in subset:
949 indices.append(b'%d' % i)
946 indices.append(b'%d' % i)
950 return b'\n'.join(indices)
947 return b'\n'.join(indices)
951
948
952
949
953 def decodefileindices(files, data):
950 def decodefileindices(files, data):
954 try:
951 try:
955 subset = []
952 subset = []
956 if not data:
953 if not data:
957 return subset
954 return subset
958 for strindex in data.split(b'\n'):
955 for strindex in data.split(b'\n'):
959 i = int(strindex)
956 i = int(strindex)
960 if i < 0 or i >= len(files):
957 if i < 0 or i >= len(files):
961 return None
958 return None
962 subset.append(files[i])
959 subset.append(files[i])
963 return subset
960 return subset
964 except (ValueError, IndexError):
961 except (ValueError, IndexError):
965 # Perhaps someone had chosen the same key name (e.g. "added") and
962 # Perhaps someone had chosen the same key name (e.g. "added") and
966 # used different syntax for the value.
963 # used different syntax for the value.
967 return None
964 return None
968
965
969
966
970 def _getsidedata(srcrepo, rev):
967 def _getsidedata(srcrepo, rev):
971 ctx = srcrepo[rev]
968 ctx = srcrepo[rev]
972 filescopies = computechangesetcopies(ctx)
969 filescopies = computechangesetcopies(ctx)
973 filesadded = computechangesetfilesadded(ctx)
970 filesadded = computechangesetfilesadded(ctx)
974 filesremoved = computechangesetfilesremoved(ctx)
971 filesremoved = computechangesetfilesremoved(ctx)
975 sidedata = {}
972 sidedata = {}
976 if any([filescopies, filesadded, filesremoved]):
973 if any([filescopies, filesadded, filesremoved]):
977 sortedfiles = sorted(ctx.files())
974 sortedfiles = sorted(ctx.files())
978 p1copies, p2copies = filescopies
975 p1copies, p2copies = filescopies
979 p1copies = encodecopies(sortedfiles, p1copies)
976 p1copies = encodecopies(sortedfiles, p1copies)
980 p2copies = encodecopies(sortedfiles, p2copies)
977 p2copies = encodecopies(sortedfiles, p2copies)
981 filesadded = encodefileindices(sortedfiles, filesadded)
978 filesadded = encodefileindices(sortedfiles, filesadded)
982 filesremoved = encodefileindices(sortedfiles, filesremoved)
979 filesremoved = encodefileindices(sortedfiles, filesremoved)
983 if p1copies:
980 if p1copies:
984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
985 if p2copies:
982 if p2copies:
986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
987 if filesadded:
984 if filesadded:
988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
989 if filesremoved:
986 if filesremoved:
990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
991 return sidedata
988 return sidedata
992
989
993
990
994 def getsidedataadder(srcrepo, destrepo):
991 def getsidedataadder(srcrepo, destrepo):
995 def sidedatacompanion(revlog, rev):
992 def sidedatacompanion(revlog, rev):
996 sidedata = {}
993 sidedata = {}
997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
998 sidedata = _getsidedata(srcrepo, rev)
995 sidedata = _getsidedata(srcrepo, rev)
999 return False, (), sidedata
996 return False, (), sidedata
1000
997
1001 return sidedatacompanion
998 return sidedatacompanion
1002
999
1003
1000
1004 def getsidedataremover(srcrepo, destrepo):
1001 def getsidedataremover(srcrepo, destrepo):
1005 def sidedatacompanion(revlog, rev):
1002 def sidedatacompanion(revlog, rev):
1006 f = ()
1003 f = ()
1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1009 f = (
1006 f = (
1010 sidedatamod.SD_P1COPIES,
1007 sidedatamod.SD_P1COPIES,
1011 sidedatamod.SD_P2COPIES,
1008 sidedatamod.SD_P2COPIES,
1012 sidedatamod.SD_FILESADDED,
1009 sidedatamod.SD_FILESADDED,
1013 sidedatamod.SD_FILESREMOVED,
1010 sidedatamod.SD_FILESREMOVED,
1014 )
1011 )
1015 return False, f, {}
1012 return False, f, {}
1016
1013
1017 return sidedatacompanion
1014 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now