##// END OF EJS Templates
copies: do not initialize the dictionary with root in changeset copies...
marmoute -
r44177:fb8f10c0 default draft
parent child Browse files
Show More
@@ -1,1014 +1,1017 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(prefix, suffix):
66 def _chain(prefix, suffix):
67 """chain two sets of copies 'prefix' and 'suffix'"""
67 """chain two sets of copies 'prefix' and 'suffix'"""
68 result = prefix.copy()
68 result = prefix.copy()
69 for key, value in pycompat.iteritems(suffix):
69 for key, value in pycompat.iteritems(suffix):
70 result[key] = prefix.get(value, value)
70 result[key] = prefix.get(value, value)
71 return result
71 return result
72
72
73
73
74 def _tracefile(fctx, am, basemf):
74 def _tracefile(fctx, am, basemf):
75 """return file context that is the ancestor of fctx present in ancestor
75 """return file context that is the ancestor of fctx present in ancestor
76 manifest am
76 manifest am
77
77
78 Note: we used to try and stop after a given limit, however checking if that
78 Note: we used to try and stop after a given limit, however checking if that
79 limit is reached turned out to be very expensive. we are better off
79 limit is reached turned out to be very expensive. we are better off
80 disabling that feature."""
80 disabling that feature."""
81
81
82 for f in fctx.ancestors():
82 for f in fctx.ancestors():
83 path = f.path()
83 path = f.path()
84 if am.get(path, None) == f.filenode():
84 if am.get(path, None) == f.filenode():
85 return path
85 return path
86 if basemf and basemf.get(path, None) == f.filenode():
86 if basemf and basemf.get(path, None) == f.filenode():
87 return path
87 return path
88
88
89
89
90 def _dirstatecopies(repo, match=None):
90 def _dirstatecopies(repo, match=None):
91 ds = repo.dirstate
91 ds = repo.dirstate
92 c = ds.copies().copy()
92 c = ds.copies().copy()
93 for k in list(c):
93 for k in list(c):
94 if ds[k] not in b'anm' or (match and not match(k)):
94 if ds[k] not in b'anm' or (match and not match(k)):
95 del c[k]
95 del c[k]
96 return c
96 return c
97
97
98
98
99 def _computeforwardmissing(a, b, match=None):
99 def _computeforwardmissing(a, b, match=None):
100 """Computes which files are in b but not a.
100 """Computes which files are in b but not a.
101 This is its own function so extensions can easily wrap this call to see what
101 This is its own function so extensions can easily wrap this call to see what
102 files _forwardcopies is about to process.
102 files _forwardcopies is about to process.
103 """
103 """
104 ma = a.manifest()
104 ma = a.manifest()
105 mb = b.manifest()
105 mb = b.manifest()
106 return mb.filesnotin(ma, match=match)
106 return mb.filesnotin(ma, match=match)
107
107
108
108
109 def usechangesetcentricalgo(repo):
109 def usechangesetcentricalgo(repo):
110 """Checks if we should use changeset-centric copy algorithms"""
110 """Checks if we should use changeset-centric copy algorithms"""
111 if repo.filecopiesmode == b'changeset-sidedata':
111 if repo.filecopiesmode == b'changeset-sidedata':
112 return True
112 return True
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 changesetsource = (b'changeset-only', b'compatibility')
114 changesetsource = (b'changeset-only', b'compatibility')
115 return readfrom in changesetsource
115 return readfrom in changesetsource
116
116
117
117
118 def _committedforwardcopies(a, b, base, match):
118 def _committedforwardcopies(a, b, base, match):
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 # files might have to be traced back to the fctx parent of the last
120 # files might have to be traced back to the fctx parent of the last
121 # one-side-only changeset, but not further back than that
121 # one-side-only changeset, but not further back than that
122 repo = a._repo
122 repo = a._repo
123
123
124 if usechangesetcentricalgo(repo):
124 if usechangesetcentricalgo(repo):
125 return _changesetforwardcopies(a, b, match)
125 return _changesetforwardcopies(a, b, match)
126
126
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 dbg = repo.ui.debug
128 dbg = repo.ui.debug
129 if debug:
129 if debug:
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 am = a.manifest()
131 am = a.manifest()
132 basemf = None if base is None else base.manifest()
132 basemf = None if base is None else base.manifest()
133
133
134 # find where new files came from
134 # find where new files came from
135 # we currently don't try to find where old files went, too expensive
135 # we currently don't try to find where old files went, too expensive
136 # this means we can miss a case like 'hg rm b; hg cp a b'
136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 cm = {}
137 cm = {}
138
138
139 # Computing the forward missing is quite expensive on large manifests, since
139 # Computing the forward missing is quite expensive on large manifests, since
140 # it compares the entire manifests. We can optimize it in the common use
140 # it compares the entire manifests. We can optimize it in the common use
141 # case of computing what copies are in a commit versus its parent (like
141 # case of computing what copies are in a commit versus its parent (like
142 # during a rebase or histedit). Note, we exclude merge commits from this
142 # during a rebase or histedit). Note, we exclude merge commits from this
143 # optimization, since the ctx.files() for a merge commit is not correct for
143 # optimization, since the ctx.files() for a merge commit is not correct for
144 # this comparison.
144 # this comparison.
145 forwardmissingmatch = match
145 forwardmissingmatch = match
146 if b.p1() == a and b.p2().node() == node.nullid:
146 if b.p1() == a and b.p2().node() == node.nullid:
147 filesmatcher = matchmod.exact(b.files())
147 filesmatcher = matchmod.exact(b.files())
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150
150
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152
152
153 if debug:
153 if debug:
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155
155
156 for f in sorted(missing):
156 for f in sorted(missing):
157 if debug:
157 if debug:
158 dbg(b'debug.copies: tracing file: %s\n' % f)
158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 fctx = b[f]
159 fctx = b[f]
160 fctx._ancestrycontext = ancestrycontext
160 fctx._ancestrycontext = ancestrycontext
161
161
162 if debug:
162 if debug:
163 start = util.timer()
163 start = util.timer()
164 opath = _tracefile(fctx, am, basemf)
164 opath = _tracefile(fctx, am, basemf)
165 if opath:
165 if opath:
166 if debug:
166 if debug:
167 dbg(b'debug.copies: rename of: %s\n' % opath)
167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 cm[f] = opath
168 cm[f] = opath
169 if debug:
169 if debug:
170 dbg(
170 dbg(
171 b'debug.copies: time: %f seconds\n'
171 b'debug.copies: time: %f seconds\n'
172 % (util.timer() - start)
172 % (util.timer() - start)
173 )
173 )
174 return cm
174 return cm
175
175
176
176
177 def _revinfogetter(repo):
177 def _revinfogetter(repo):
178 """return a function that return multiple data given a <rev>"i
178 """return a function that return multiple data given a <rev>"i
179
179
180 * p1: revision number of first parent
180 * p1: revision number of first parent
181 * p2: revision number of first parent
181 * p2: revision number of first parent
182 * p1copies: mapping of copies from p1
182 * p1copies: mapping of copies from p1
183 * p2copies: mapping of copies from p2
183 * p2copies: mapping of copies from p2
184 * removed: a list of removed files
184 * removed: a list of removed files
185 """
185 """
186 cl = repo.changelog
186 cl = repo.changelog
187 parents = cl.parentrevs
187 parents = cl.parentrevs
188
188
189 if repo.filecopiesmode == b'changeset-sidedata':
189 if repo.filecopiesmode == b'changeset-sidedata':
190 changelogrevision = cl.changelogrevision
190 changelogrevision = cl.changelogrevision
191 flags = cl.flags
191 flags = cl.flags
192
192
193 # A small cache to avoid doing the work twice for merges
193 # A small cache to avoid doing the work twice for merges
194 #
194 #
195 # In the vast majority of cases, if we ask information for a revision
195 # In the vast majority of cases, if we ask information for a revision
196 # about 1 parent, we'll later ask it for the other. So it make sense to
196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # keep the information around when reaching the first parent of a merge
197 # keep the information around when reaching the first parent of a merge
198 # and dropping it after it was provided for the second parents.
198 # and dropping it after it was provided for the second parents.
199 #
199 #
200 # It exists cases were only one parent of the merge will be walked. It
200 # It exists cases were only one parent of the merge will be walked. It
201 # happens when the "destination" the copy tracing is descendant from a
201 # happens when the "destination" the copy tracing is descendant from a
202 # new root, not common with the "source". In that case, we will only walk
202 # new root, not common with the "source". In that case, we will only walk
203 # through merge parents that are descendant of changesets common
203 # through merge parents that are descendant of changesets common
204 # between "source" and "destination".
204 # between "source" and "destination".
205 #
205 #
206 # With the current case implementation if such changesets have a copy
206 # With the current case implementation if such changesets have a copy
207 # information, we'll keep them in memory until the end of
207 # information, we'll keep them in memory until the end of
208 # _changesetforwardcopies. We don't expect the case to be frequent
208 # _changesetforwardcopies. We don't expect the case to be frequent
209 # enough to matters.
209 # enough to matters.
210 #
210 #
211 # In addition, it would be possible to reach pathological case, were
211 # In addition, it would be possible to reach pathological case, were
212 # many first parent are met before any second parent is reached. In
212 # many first parent are met before any second parent is reached. In
213 # that case the cache could grow. If this even become an issue one can
213 # that case the cache could grow. If this even become an issue one can
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # time to save memory.
215 # time to save memory.
216 merge_caches = {}
216 merge_caches = {}
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & REVIDX_SIDEDATA:
220 if flags(rev) & REVIDX_SIDEDATA:
221 e = merge_caches.pop(rev, None)
221 e = merge_caches.pop(rev, None)
222 if e is not None:
222 if e is not None:
223 return e
223 return e
224 c = changelogrevision(rev)
224 c = changelogrevision(rev)
225 p1copies = c.p1copies
225 p1copies = c.p1copies
226 p2copies = c.p2copies
226 p2copies = c.p2copies
227 removed = c.filesremoved
227 removed = c.filesremoved
228 if p1 != node.nullrev and p2 != node.nullrev:
228 if p1 != node.nullrev and p2 != node.nullrev:
229 # XXX some case we over cache, IGNORE
229 # XXX some case we over cache, IGNORE
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 else:
231 else:
232 p1copies = {}
232 p1copies = {}
233 p2copies = {}
233 p2copies = {}
234 removed = []
234 removed = []
235 return p1, p2, p1copies, p2copies, removed
235 return p1, p2, p1copies, p2copies, removed
236
236
237 else:
237 else:
238
238
239 def revinfo(rev):
239 def revinfo(rev):
240 p1, p2 = parents(rev)
240 p1, p2 = parents(rev)
241 ctx = repo[rev]
241 ctx = repo[rev]
242 p1copies, p2copies = ctx._copies
242 p1copies, p2copies = ctx._copies
243 removed = ctx.filesremoved()
243 removed = ctx.filesremoved()
244 return p1, p2, p1copies, p2copies, removed
244 return p1, p2, p1copies, p2copies, removed
245
245
246 return revinfo
246 return revinfo
247
247
248
248
249 def _changesetforwardcopies(a, b, match):
249 def _changesetforwardcopies(a, b, match):
250 if a.rev() in (node.nullrev, b.rev()):
250 if a.rev() in (node.nullrev, b.rev()):
251 return {}
251 return {}
252
252
253 repo = a.repo().unfiltered()
253 repo = a.repo().unfiltered()
254 children = {}
254 children = {}
255 revinfo = _revinfogetter(repo)
255 revinfo = _revinfogetter(repo)
256
256
257 cl = repo.changelog
257 cl = repo.changelog
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 mrset = set(missingrevs)
259 mrset = set(missingrevs)
260 roots = set()
260 roots = set()
261 for r in missingrevs:
261 for r in missingrevs:
262 for p in cl.parentrevs(r):
262 for p in cl.parentrevs(r):
263 if p == node.nullrev:
263 if p == node.nullrev:
264 continue
264 continue
265 if p not in children:
265 if p not in children:
266 children[p] = [r]
266 children[p] = [r]
267 else:
267 else:
268 children[p].append(r)
268 children[p].append(r)
269 if p not in mrset:
269 if p not in mrset:
270 roots.add(p)
270 roots.add(p)
271 if not roots:
271 if not roots:
272 # no common revision to track copies from
272 # no common revision to track copies from
273 return {}
273 return {}
274 min_root = min(roots)
274 min_root = min(roots)
275
275
276 from_head = set(
276 from_head = set(
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 )
278 )
279
279
280 iterrevs = set(from_head)
280 iterrevs = set(from_head)
281 iterrevs &= mrset
281 iterrevs &= mrset
282 iterrevs.update(roots)
282 iterrevs.update(roots)
283 iterrevs.remove(b.rev())
283 iterrevs.remove(b.rev())
284 all_copies = {r: {} for r in roots}
284 all_copies = {}
285 alwaysmatch = match.always()
285 alwaysmatch = match.always()
286 for r in sorted(iterrevs):
286 for r in sorted(iterrevs):
287 copies = all_copies.pop(r)
287 copies = all_copies.pop(r, None)
288 if copies is None:
289 # this is a root
290 copies = {}
288 for i, c in enumerate(children[r]):
291 for i, c in enumerate(children[r]):
289 p1, p2, p1copies, p2copies, removed = revinfo(c)
292 p1, p2, p1copies, p2copies, removed = revinfo(c)
290 if r == p1:
293 if r == p1:
291 parent = 1
294 parent = 1
292 childcopies = p1copies
295 childcopies = p1copies
293 else:
296 else:
294 assert r == p2
297 assert r == p2
295 parent = 2
298 parent = 2
296 childcopies = p2copies
299 childcopies = p2copies
297 if not alwaysmatch:
300 if not alwaysmatch:
298 childcopies = {
301 childcopies = {
299 dst: src for dst, src in childcopies.items() if match(dst)
302 dst: src for dst, src in childcopies.items() if match(dst)
300 }
303 }
301 newcopies = copies
304 newcopies = copies
302 if childcopies:
305 if childcopies:
303 newcopies = _chain(newcopies, childcopies)
306 newcopies = _chain(newcopies, childcopies)
304 # _chain makes a copies, we can avoid doing so in some
307 # _chain makes a copies, we can avoid doing so in some
305 # simple/linear cases.
308 # simple/linear cases.
306 assert newcopies is not copies
309 assert newcopies is not copies
307 for f in removed:
310 for f in removed:
308 if f in newcopies:
311 if f in newcopies:
309 if newcopies is copies:
312 if newcopies is copies:
310 # copy on write to avoid affecting potential other
313 # copy on write to avoid affecting potential other
311 # branches. when there are no other branches, this
314 # branches. when there are no other branches, this
312 # could be avoided.
315 # could be avoided.
313 newcopies = copies.copy()
316 newcopies = copies.copy()
314 del newcopies[f]
317 del newcopies[f]
315 othercopies = all_copies.get(c)
318 othercopies = all_copies.get(c)
316 if othercopies is None:
319 if othercopies is None:
317 all_copies[c] = newcopies
320 all_copies[c] = newcopies
318 else:
321 else:
319 # we are the second parent to work on c, we need to merge our
322 # we are the second parent to work on c, we need to merge our
320 # work with the other.
323 # work with the other.
321 #
324 #
322 # Unlike when copies are stored in the filelog, we consider
325 # Unlike when copies are stored in the filelog, we consider
323 # it a copy even if the destination already existed on the
326 # it a copy even if the destination already existed on the
324 # other branch. It's simply too expensive to check if the
327 # other branch. It's simply too expensive to check if the
325 # file existed in the manifest.
328 # file existed in the manifest.
326 #
329 #
327 # In case of conflict, parent 1 take precedence over parent 2.
330 # In case of conflict, parent 1 take precedence over parent 2.
328 # This is an arbitrary choice made anew when implementing
331 # This is an arbitrary choice made anew when implementing
329 # changeset based copies. It was made without regards with
332 # changeset based copies. It was made without regards with
330 # potential filelog related behavior.
333 # potential filelog related behavior.
331 if parent == 1:
334 if parent == 1:
332 othercopies.update(newcopies)
335 othercopies.update(newcopies)
333 else:
336 else:
334 newcopies.update(othercopies)
337 newcopies.update(othercopies)
335 all_copies[c] = newcopies
338 all_copies[c] = newcopies
336 return all_copies[b.rev()]
339 return all_copies[b.rev()]
337
340
338
341
339 def _forwardcopies(a, b, base=None, match=None):
342 def _forwardcopies(a, b, base=None, match=None):
340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
341
344
342 if base is None:
345 if base is None:
343 base = a
346 base = a
344 match = a.repo().narrowmatch(match)
347 match = a.repo().narrowmatch(match)
345 # check for working copy
348 # check for working copy
346 if b.rev() is None:
349 if b.rev() is None:
347 cm = _committedforwardcopies(a, b.p1(), base, match)
350 cm = _committedforwardcopies(a, b.p1(), base, match)
348 # combine copies from dirstate if necessary
351 # combine copies from dirstate if necessary
349 copies = _chain(cm, _dirstatecopies(b._repo, match))
352 copies = _chain(cm, _dirstatecopies(b._repo, match))
350 else:
353 else:
351 copies = _committedforwardcopies(a, b, base, match)
354 copies = _committedforwardcopies(a, b, base, match)
352 return copies
355 return copies
353
356
354
357
355 def _backwardrenames(a, b, match):
358 def _backwardrenames(a, b, match):
356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
357 return {}
360 return {}
358
361
359 # Even though we're not taking copies into account, 1:n rename situations
362 # Even though we're not taking copies into account, 1:n rename situations
360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
361 # arbitrarily pick one of the renames.
364 # arbitrarily pick one of the renames.
362 # We don't want to pass in "match" here, since that would filter
365 # We don't want to pass in "match" here, since that would filter
363 # the destination by it. Since we're reversing the copies, we want
366 # the destination by it. Since we're reversing the copies, we want
364 # to filter the source instead.
367 # to filter the source instead.
365 f = _forwardcopies(b, a)
368 f = _forwardcopies(b, a)
366 r = {}
369 r = {}
367 for k, v in sorted(pycompat.iteritems(f)):
370 for k, v in sorted(pycompat.iteritems(f)):
368 if match and not match(v):
371 if match and not match(v):
369 continue
372 continue
370 # remove copies
373 # remove copies
371 if v in a:
374 if v in a:
372 continue
375 continue
373 r[v] = k
376 r[v] = k
374 return r
377 return r
375
378
376
379
377 def pathcopies(x, y, match=None):
380 def pathcopies(x, y, match=None):
378 """find {dst@y: src@x} copy mapping for directed compare"""
381 """find {dst@y: src@x} copy mapping for directed compare"""
379 repo = x._repo
382 repo = x._repo
380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
381 if debug:
384 if debug:
382 repo.ui.debug(
385 repo.ui.debug(
383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
384 )
387 )
385 if x == y or not x or not y:
388 if x == y or not x or not y:
386 return {}
389 return {}
387 a = y.ancestor(x)
390 a = y.ancestor(x)
388 if a == x:
391 if a == x:
389 if debug:
392 if debug:
390 repo.ui.debug(b'debug.copies: search mode: forward\n')
393 repo.ui.debug(b'debug.copies: search mode: forward\n')
391 if y.rev() is None and x == y.p1():
394 if y.rev() is None and x == y.p1():
392 # short-circuit to avoid issues with merge states
395 # short-circuit to avoid issues with merge states
393 return _dirstatecopies(repo, match)
396 return _dirstatecopies(repo, match)
394 copies = _forwardcopies(x, y, match=match)
397 copies = _forwardcopies(x, y, match=match)
395 elif a == y:
398 elif a == y:
396 if debug:
399 if debug:
397 repo.ui.debug(b'debug.copies: search mode: backward\n')
400 repo.ui.debug(b'debug.copies: search mode: backward\n')
398 copies = _backwardrenames(x, y, match=match)
401 copies = _backwardrenames(x, y, match=match)
399 else:
402 else:
400 if debug:
403 if debug:
401 repo.ui.debug(b'debug.copies: search mode: combined\n')
404 repo.ui.debug(b'debug.copies: search mode: combined\n')
402 base = None
405 base = None
403 if a.rev() != node.nullrev:
406 if a.rev() != node.nullrev:
404 base = x
407 base = x
405 copies = _chain(
408 copies = _chain(
406 _backwardrenames(x, a, match=match),
409 _backwardrenames(x, a, match=match),
407 _forwardcopies(a, y, base, match=match),
410 _forwardcopies(a, y, base, match=match),
408 )
411 )
409 _filter(x, y, copies)
412 _filter(x, y, copies)
410 return copies
413 return copies
411
414
412
415
413 def mergecopies(repo, c1, c2, base):
416 def mergecopies(repo, c1, c2, base):
414 """
417 """
415 Finds moves and copies between context c1 and c2 that are relevant for
418 Finds moves and copies between context c1 and c2 that are relevant for
416 merging. 'base' will be used as the merge base.
419 merging. 'base' will be used as the merge base.
417
420
418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
419 files that were moved/ copied in one merge parent and modified in another.
422 files that were moved/ copied in one merge parent and modified in another.
420 For example:
423 For example:
421
424
422 o ---> 4 another commit
425 o ---> 4 another commit
423 |
426 |
424 | o ---> 3 commit that modifies a.txt
427 | o ---> 3 commit that modifies a.txt
425 | /
428 | /
426 o / ---> 2 commit that moves a.txt to b.txt
429 o / ---> 2 commit that moves a.txt to b.txt
427 |/
430 |/
428 o ---> 1 merge base
431 o ---> 1 merge base
429
432
430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
431 revision 4, and if user have copytrace disabled, we prints the following
434 revision 4, and if user have copytrace disabled, we prints the following
432 message:
435 message:
433
436
434 ```other changed <file> which local deleted```
437 ```other changed <file> which local deleted```
435
438
436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
437 "dirmove".
440 "dirmove".
438
441
439 "copy" is a mapping from destination name -> source name,
442 "copy" is a mapping from destination name -> source name,
440 where source is in c1 and destination is in c2 or vice-versa.
443 where source is in c1 and destination is in c2 or vice-versa.
441
444
442 "movewithdir" is a mapping from source name -> destination name,
445 "movewithdir" is a mapping from source name -> destination name,
443 where the file at source present in one context but not the other
446 where the file at source present in one context but not the other
444 needs to be moved to destination by the merge process, because the
447 needs to be moved to destination by the merge process, because the
445 other context moved the directory it is in.
448 other context moved the directory it is in.
446
449
447 "diverge" is a mapping of source name -> list of destination names
450 "diverge" is a mapping of source name -> list of destination names
448 for divergent renames.
451 for divergent renames.
449
452
450 "renamedelete" is a mapping of source name -> list of destination
453 "renamedelete" is a mapping of source name -> list of destination
451 names for files deleted in c1 that were renamed in c2 or vice-versa.
454 names for files deleted in c1 that were renamed in c2 or vice-versa.
452
455
453 "dirmove" is a mapping of detected source dir -> destination dir renames.
456 "dirmove" is a mapping of detected source dir -> destination dir renames.
454 This is needed for handling changes to new files previously grafted into
457 This is needed for handling changes to new files previously grafted into
455 renamed directories.
458 renamed directories.
456
459
457 This function calls different copytracing algorithms based on config.
460 This function calls different copytracing algorithms based on config.
458 """
461 """
459 # avoid silly behavior for update from empty dir
462 # avoid silly behavior for update from empty dir
460 if not c1 or not c2 or c1 == c2:
463 if not c1 or not c2 or c1 == c2:
461 return {}, {}, {}, {}, {}
464 return {}, {}, {}, {}, {}
462
465
463 narrowmatch = c1.repo().narrowmatch()
466 narrowmatch = c1.repo().narrowmatch()
464
467
465 # avoid silly behavior for parent -> working dir
468 # avoid silly behavior for parent -> working dir
466 if c2.node() is None and c1.node() == repo.dirstate.p1():
469 if c2.node() is None and c1.node() == repo.dirstate.p1():
467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
468
471
469 copytracing = repo.ui.config(b'experimental', b'copytrace')
472 copytracing = repo.ui.config(b'experimental', b'copytrace')
470 if stringutil.parsebool(copytracing) is False:
473 if stringutil.parsebool(copytracing) is False:
471 # stringutil.parsebool() returns None when it is unable to parse the
474 # stringutil.parsebool() returns None when it is unable to parse the
472 # value, so we should rely on making sure copytracing is on such cases
475 # value, so we should rely on making sure copytracing is on such cases
473 return {}, {}, {}, {}, {}
476 return {}, {}, {}, {}, {}
474
477
475 if usechangesetcentricalgo(repo):
478 if usechangesetcentricalgo(repo):
476 # The heuristics don't make sense when we need changeset-centric algos
479 # The heuristics don't make sense when we need changeset-centric algos
477 return _fullcopytracing(repo, c1, c2, base)
480 return _fullcopytracing(repo, c1, c2, base)
478
481
479 # Copy trace disabling is explicitly below the node == p1 logic above
482 # Copy trace disabling is explicitly below the node == p1 logic above
480 # because the logic above is required for a simple copy to be kept across a
483 # because the logic above is required for a simple copy to be kept across a
481 # rebase.
484 # rebase.
482 if copytracing == b'heuristics':
485 if copytracing == b'heuristics':
483 # Do full copytracing if only non-public revisions are involved as
486 # Do full copytracing if only non-public revisions are involved as
484 # that will be fast enough and will also cover the copies which could
487 # that will be fast enough and will also cover the copies which could
485 # be missed by heuristics
488 # be missed by heuristics
486 if _isfullcopytraceable(repo, c1, base):
489 if _isfullcopytraceable(repo, c1, base):
487 return _fullcopytracing(repo, c1, c2, base)
490 return _fullcopytracing(repo, c1, c2, base)
488 return _heuristicscopytracing(repo, c1, c2, base)
491 return _heuristicscopytracing(repo, c1, c2, base)
489 else:
492 else:
490 return _fullcopytracing(repo, c1, c2, base)
493 return _fullcopytracing(repo, c1, c2, base)
491
494
492
495
493 def _isfullcopytraceable(repo, c1, base):
496 def _isfullcopytraceable(repo, c1, base):
494 """ Checks that if base, source and destination are all no-public branches,
497 """ Checks that if base, source and destination are all no-public branches,
495 if yes let's use the full copytrace algorithm for increased capabilities
498 if yes let's use the full copytrace algorithm for increased capabilities
496 since it will be fast enough.
499 since it will be fast enough.
497
500
498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
499 number of changesets from c1 to base such that if number of changesets are
502 number of changesets from c1 to base such that if number of changesets are
500 more than the limit, full copytracing algorithm won't be used.
503 more than the limit, full copytracing algorithm won't be used.
501 """
504 """
502 if c1.rev() is None:
505 if c1.rev() is None:
503 c1 = c1.p1()
506 c1 = c1.p1()
504 if c1.mutable() and base.mutable():
507 if c1.mutable() and base.mutable():
505 sourcecommitlimit = repo.ui.configint(
508 sourcecommitlimit = repo.ui.configint(
506 b'experimental', b'copytrace.sourcecommitlimit'
509 b'experimental', b'copytrace.sourcecommitlimit'
507 )
510 )
508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
509 return commits < sourcecommitlimit
512 return commits < sourcecommitlimit
510 return False
513 return False
511
514
512
515
513 def _checksinglesidecopies(
516 def _checksinglesidecopies(
514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
515 ):
518 ):
516 if src not in m2:
519 if src not in m2:
517 # deleted on side 2
520 # deleted on side 2
518 if src not in m1:
521 if src not in m1:
519 # renamed on side 1, deleted on side 2
522 # renamed on side 1, deleted on side 2
520 renamedelete[src] = dsts1
523 renamedelete[src] = dsts1
521 elif m2[src] != mb[src]:
524 elif m2[src] != mb[src]:
522 if not _related(c2[src], base[src]):
525 if not _related(c2[src], base[src]):
523 return
526 return
524 # modified on side 2
527 # modified on side 2
525 for dst in dsts1:
528 for dst in dsts1:
526 if dst not in m2:
529 if dst not in m2:
527 # dst not added on side 2 (handle as regular
530 # dst not added on side 2 (handle as regular
528 # "both created" case in manifestmerge otherwise)
531 # "both created" case in manifestmerge otherwise)
529 copy[dst] = src
532 copy[dst] = src
530
533
531
534
532 def _fullcopytracing(repo, c1, c2, base):
535 def _fullcopytracing(repo, c1, c2, base):
533 """ The full copytracing algorithm which finds all the new files that were
536 """ The full copytracing algorithm which finds all the new files that were
534 added from merge base up to the top commit and for each file it checks if
537 added from merge base up to the top commit and for each file it checks if
535 this file was copied from another file.
538 this file was copied from another file.
536
539
537 This is pretty slow when a lot of changesets are involved but will track all
540 This is pretty slow when a lot of changesets are involved but will track all
538 the copies.
541 the copies.
539 """
542 """
540 m1 = c1.manifest()
543 m1 = c1.manifest()
541 m2 = c2.manifest()
544 m2 = c2.manifest()
542 mb = base.manifest()
545 mb = base.manifest()
543
546
544 copies1 = pathcopies(base, c1)
547 copies1 = pathcopies(base, c1)
545 copies2 = pathcopies(base, c2)
548 copies2 = pathcopies(base, c2)
546
549
547 inversecopies1 = {}
550 inversecopies1 = {}
548 inversecopies2 = {}
551 inversecopies2 = {}
549 for dst, src in copies1.items():
552 for dst, src in copies1.items():
550 inversecopies1.setdefault(src, []).append(dst)
553 inversecopies1.setdefault(src, []).append(dst)
551 for dst, src in copies2.items():
554 for dst, src in copies2.items():
552 inversecopies2.setdefault(src, []).append(dst)
555 inversecopies2.setdefault(src, []).append(dst)
553
556
554 copy = {}
557 copy = {}
555 diverge = {}
558 diverge = {}
556 renamedelete = {}
559 renamedelete = {}
557 allsources = set(inversecopies1) | set(inversecopies2)
560 allsources = set(inversecopies1) | set(inversecopies2)
558 for src in allsources:
561 for src in allsources:
559 dsts1 = inversecopies1.get(src)
562 dsts1 = inversecopies1.get(src)
560 dsts2 = inversecopies2.get(src)
563 dsts2 = inversecopies2.get(src)
561 if dsts1 and dsts2:
564 if dsts1 and dsts2:
562 # copied/renamed on both sides
565 # copied/renamed on both sides
563 if src not in m1 and src not in m2:
566 if src not in m1 and src not in m2:
564 # renamed on both sides
567 # renamed on both sides
565 dsts1 = set(dsts1)
568 dsts1 = set(dsts1)
566 dsts2 = set(dsts2)
569 dsts2 = set(dsts2)
567 # If there's some overlap in the rename destinations, we
570 # If there's some overlap in the rename destinations, we
568 # consider it not divergent. For example, if side 1 copies 'a'
571 # consider it not divergent. For example, if side 1 copies 'a'
569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
570 # and 'd' and deletes 'a'.
573 # and 'd' and deletes 'a'.
571 if dsts1 & dsts2:
574 if dsts1 & dsts2:
572 for dst in dsts1 & dsts2:
575 for dst in dsts1 & dsts2:
573 copy[dst] = src
576 copy[dst] = src
574 else:
577 else:
575 diverge[src] = sorted(dsts1 | dsts2)
578 diverge[src] = sorted(dsts1 | dsts2)
576 elif src in m1 and src in m2:
579 elif src in m1 and src in m2:
577 # copied on both sides
580 # copied on both sides
578 dsts1 = set(dsts1)
581 dsts1 = set(dsts1)
579 dsts2 = set(dsts2)
582 dsts2 = set(dsts2)
580 for dst in dsts1 & dsts2:
583 for dst in dsts1 & dsts2:
581 copy[dst] = src
584 copy[dst] = src
582 # TODO: Handle cases where it was renamed on one side and copied
585 # TODO: Handle cases where it was renamed on one side and copied
583 # on the other side
586 # on the other side
584 elif dsts1:
587 elif dsts1:
585 # copied/renamed only on side 1
588 # copied/renamed only on side 1
586 _checksinglesidecopies(
589 _checksinglesidecopies(
587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
588 )
591 )
589 elif dsts2:
592 elif dsts2:
590 # copied/renamed only on side 2
593 # copied/renamed only on side 2
591 _checksinglesidecopies(
594 _checksinglesidecopies(
592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
593 )
596 )
594
597
595 renamedeleteset = set()
598 renamedeleteset = set()
596 divergeset = set()
599 divergeset = set()
597 for dsts in diverge.values():
600 for dsts in diverge.values():
598 divergeset.update(dsts)
601 divergeset.update(dsts)
599 for dsts in renamedelete.values():
602 for dsts in renamedelete.values():
600 renamedeleteset.update(dsts)
603 renamedeleteset.update(dsts)
601
604
602 # find interesting file sets from manifests
605 # find interesting file sets from manifests
603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
605 u1 = sorted(addedinm1 - addedinm2)
608 u1 = sorted(addedinm1 - addedinm2)
606 u2 = sorted(addedinm2 - addedinm1)
609 u2 = sorted(addedinm2 - addedinm1)
607
610
608 header = b" unmatched files in %s"
611 header = b" unmatched files in %s"
609 if u1:
612 if u1:
610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
611 if u2:
614 if u2:
612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
613
616
614 fullcopy = copies1.copy()
617 fullcopy = copies1.copy()
615 fullcopy.update(copies2)
618 fullcopy.update(copies2)
616 if not fullcopy:
619 if not fullcopy:
617 return copy, {}, diverge, renamedelete, {}
620 return copy, {}, diverge, renamedelete, {}
618
621
619 if repo.ui.debugflag:
622 if repo.ui.debugflag:
620 repo.ui.debug(
623 repo.ui.debug(
621 b" all copies found (* = to merge, ! = divergent, "
624 b" all copies found (* = to merge, ! = divergent, "
622 b"% = renamed and deleted):\n"
625 b"% = renamed and deleted):\n"
623 )
626 )
624 for f in sorted(fullcopy):
627 for f in sorted(fullcopy):
625 note = b""
628 note = b""
626 if f in copy:
629 if f in copy:
627 note += b"*"
630 note += b"*"
628 if f in divergeset:
631 if f in divergeset:
629 note += b"!"
632 note += b"!"
630 if f in renamedeleteset:
633 if f in renamedeleteset:
631 note += b"%"
634 note += b"%"
632 repo.ui.debug(
635 repo.ui.debug(
633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
634 )
637 )
635 del divergeset
638 del divergeset
636
639
637 repo.ui.debug(b" checking for directory renames\n")
640 repo.ui.debug(b" checking for directory renames\n")
638
641
639 # generate a directory move map
642 # generate a directory move map
640 d1, d2 = c1.dirs(), c2.dirs()
643 d1, d2 = c1.dirs(), c2.dirs()
641 invalid = set()
644 invalid = set()
642 dirmove = {}
645 dirmove = {}
643
646
644 # examine each file copy for a potential directory move, which is
647 # examine each file copy for a potential directory move, which is
645 # when all the files in a directory are moved to a new directory
648 # when all the files in a directory are moved to a new directory
646 for dst, src in pycompat.iteritems(fullcopy):
649 for dst, src in pycompat.iteritems(fullcopy):
647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
648 if dsrc in invalid:
651 if dsrc in invalid:
649 # already seen to be uninteresting
652 # already seen to be uninteresting
650 continue
653 continue
651 elif dsrc in d1 and ddst in d1:
654 elif dsrc in d1 and ddst in d1:
652 # directory wasn't entirely moved locally
655 # directory wasn't entirely moved locally
653 invalid.add(dsrc)
656 invalid.add(dsrc)
654 elif dsrc in d2 and ddst in d2:
657 elif dsrc in d2 and ddst in d2:
655 # directory wasn't entirely moved remotely
658 # directory wasn't entirely moved remotely
656 invalid.add(dsrc)
659 invalid.add(dsrc)
657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
658 # files from the same directory moved to two different places
661 # files from the same directory moved to two different places
659 invalid.add(dsrc)
662 invalid.add(dsrc)
660 else:
663 else:
661 # looks good so far
664 # looks good so far
662 dirmove[dsrc] = ddst
665 dirmove[dsrc] = ddst
663
666
664 for i in invalid:
667 for i in invalid:
665 if i in dirmove:
668 if i in dirmove:
666 del dirmove[i]
669 del dirmove[i]
667 del d1, d2, invalid
670 del d1, d2, invalid
668
671
669 if not dirmove:
672 if not dirmove:
670 return copy, {}, diverge, renamedelete, {}
673 return copy, {}, diverge, renamedelete, {}
671
674
672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
673
676
674 for d in dirmove:
677 for d in dirmove:
675 repo.ui.debug(
678 repo.ui.debug(
676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
677 )
680 )
678
681
679 movewithdir = {}
682 movewithdir = {}
680 # check unaccounted nonoverlapping files against directory moves
683 # check unaccounted nonoverlapping files against directory moves
681 for f in u1 + u2:
684 for f in u1 + u2:
682 if f not in fullcopy:
685 if f not in fullcopy:
683 for d in dirmove:
686 for d in dirmove:
684 if f.startswith(d):
687 if f.startswith(d):
685 # new file added in a directory that was moved, move it
688 # new file added in a directory that was moved, move it
686 df = dirmove[d] + f[len(d) :]
689 df = dirmove[d] + f[len(d) :]
687 if df not in copy:
690 if df not in copy:
688 movewithdir[f] = df
691 movewithdir[f] = df
689 repo.ui.debug(
692 repo.ui.debug(
690 b" pending file src: '%s' -> dst: '%s'\n"
693 b" pending file src: '%s' -> dst: '%s'\n"
691 % (f, df)
694 % (f, df)
692 )
695 )
693 break
696 break
694
697
695 return copy, movewithdir, diverge, renamedelete, dirmove
698 return copy, movewithdir, diverge, renamedelete, dirmove
696
699
697
700
698 def _heuristicscopytracing(repo, c1, c2, base):
701 def _heuristicscopytracing(repo, c1, c2, base):
699 """ Fast copytracing using filename heuristics
702 """ Fast copytracing using filename heuristics
700
703
701 Assumes that moves or renames are of following two types:
704 Assumes that moves or renames are of following two types:
702
705
703 1) Inside a directory only (same directory name but different filenames)
706 1) Inside a directory only (same directory name but different filenames)
704 2) Move from one directory to another
707 2) Move from one directory to another
705 (same filenames but different directory names)
708 (same filenames but different directory names)
706
709
707 Works only when there are no merge commits in the "source branch".
710 Works only when there are no merge commits in the "source branch".
708 Source branch is commits from base up to c2 not including base.
711 Source branch is commits from base up to c2 not including base.
709
712
710 If merge is involved it fallbacks to _fullcopytracing().
713 If merge is involved it fallbacks to _fullcopytracing().
711
714
712 Can be used by setting the following config:
715 Can be used by setting the following config:
713
716
714 [experimental]
717 [experimental]
715 copytrace = heuristics
718 copytrace = heuristics
716
719
717 In some cases the copy/move candidates found by heuristics can be very large
720 In some cases the copy/move candidates found by heuristics can be very large
718 in number and that will make the algorithm slow. The number of possible
721 in number and that will make the algorithm slow. The number of possible
719 candidates to check can be limited by using the config
722 candidates to check can be limited by using the config
720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
721 """
724 """
722
725
723 if c1.rev() is None:
726 if c1.rev() is None:
724 c1 = c1.p1()
727 c1 = c1.p1()
725 if c2.rev() is None:
728 if c2.rev() is None:
726 c2 = c2.p1()
729 c2 = c2.p1()
727
730
728 copies = {}
731 copies = {}
729
732
730 changedfiles = set()
733 changedfiles = set()
731 m1 = c1.manifest()
734 m1 = c1.manifest()
732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
733 # If base is not in c2 branch, we switch to fullcopytracing
736 # If base is not in c2 branch, we switch to fullcopytracing
734 repo.ui.debug(
737 repo.ui.debug(
735 b"switching to full copytracing as base is not "
738 b"switching to full copytracing as base is not "
736 b"an ancestor of c2\n"
739 b"an ancestor of c2\n"
737 )
740 )
738 return _fullcopytracing(repo, c1, c2, base)
741 return _fullcopytracing(repo, c1, c2, base)
739
742
740 ctx = c2
743 ctx = c2
741 while ctx != base:
744 while ctx != base:
742 if len(ctx.parents()) == 2:
745 if len(ctx.parents()) == 2:
743 # To keep things simple let's not handle merges
746 # To keep things simple let's not handle merges
744 repo.ui.debug(b"switching to full copytracing because of merges\n")
747 repo.ui.debug(b"switching to full copytracing because of merges\n")
745 return _fullcopytracing(repo, c1, c2, base)
748 return _fullcopytracing(repo, c1, c2, base)
746 changedfiles.update(ctx.files())
749 changedfiles.update(ctx.files())
747 ctx = ctx.p1()
750 ctx = ctx.p1()
748
751
749 cp = _forwardcopies(base, c2)
752 cp = _forwardcopies(base, c2)
750 for dst, src in pycompat.iteritems(cp):
753 for dst, src in pycompat.iteritems(cp):
751 if src in m1:
754 if src in m1:
752 copies[dst] = src
755 copies[dst] = src
753
756
754 # file is missing if it isn't present in the destination, but is present in
757 # file is missing if it isn't present in the destination, but is present in
755 # the base and present in the source.
758 # the base and present in the source.
756 # Presence in the base is important to exclude added files, presence in the
759 # Presence in the base is important to exclude added files, presence in the
757 # source is important to exclude removed files.
760 # source is important to exclude removed files.
758 filt = lambda f: f not in m1 and f in base and f in c2
761 filt = lambda f: f not in m1 and f in base and f in c2
759 missingfiles = [f for f in changedfiles if filt(f)]
762 missingfiles = [f for f in changedfiles if filt(f)]
760
763
761 if missingfiles:
764 if missingfiles:
762 basenametofilename = collections.defaultdict(list)
765 basenametofilename = collections.defaultdict(list)
763 dirnametofilename = collections.defaultdict(list)
766 dirnametofilename = collections.defaultdict(list)
764
767
765 for f in m1.filesnotin(base.manifest()):
768 for f in m1.filesnotin(base.manifest()):
766 basename = os.path.basename(f)
769 basename = os.path.basename(f)
767 dirname = os.path.dirname(f)
770 dirname = os.path.dirname(f)
768 basenametofilename[basename].append(f)
771 basenametofilename[basename].append(f)
769 dirnametofilename[dirname].append(f)
772 dirnametofilename[dirname].append(f)
770
773
771 for f in missingfiles:
774 for f in missingfiles:
772 basename = os.path.basename(f)
775 basename = os.path.basename(f)
773 dirname = os.path.dirname(f)
776 dirname = os.path.dirname(f)
774 samebasename = basenametofilename[basename]
777 samebasename = basenametofilename[basename]
775 samedirname = dirnametofilename[dirname]
778 samedirname = dirnametofilename[dirname]
776 movecandidates = samebasename + samedirname
779 movecandidates = samebasename + samedirname
777 # f is guaranteed to be present in c2, that's why
780 # f is guaranteed to be present in c2, that's why
778 # c2.filectx(f) won't fail
781 # c2.filectx(f) won't fail
779 f2 = c2.filectx(f)
782 f2 = c2.filectx(f)
780 # we can have a lot of candidates which can slow down the heuristics
783 # we can have a lot of candidates which can slow down the heuristics
781 # config value to limit the number of candidates moves to check
784 # config value to limit the number of candidates moves to check
782 maxcandidates = repo.ui.configint(
785 maxcandidates = repo.ui.configint(
783 b'experimental', b'copytrace.movecandidateslimit'
786 b'experimental', b'copytrace.movecandidateslimit'
784 )
787 )
785
788
786 if len(movecandidates) > maxcandidates:
789 if len(movecandidates) > maxcandidates:
787 repo.ui.status(
790 repo.ui.status(
788 _(
791 _(
789 b"skipping copytracing for '%s', more "
792 b"skipping copytracing for '%s', more "
790 b"candidates than the limit: %d\n"
793 b"candidates than the limit: %d\n"
791 )
794 )
792 % (f, len(movecandidates))
795 % (f, len(movecandidates))
793 )
796 )
794 continue
797 continue
795
798
796 for candidate in movecandidates:
799 for candidate in movecandidates:
797 f1 = c1.filectx(candidate)
800 f1 = c1.filectx(candidate)
798 if _related(f1, f2):
801 if _related(f1, f2):
799 # if there are a few related copies then we'll merge
802 # if there are a few related copies then we'll merge
800 # changes into all of them. This matches the behaviour
803 # changes into all of them. This matches the behaviour
801 # of upstream copytracing
804 # of upstream copytracing
802 copies[candidate] = f
805 copies[candidate] = f
803
806
804 return copies, {}, {}, {}, {}
807 return copies, {}, {}, {}, {}
805
808
806
809
807 def _related(f1, f2):
810 def _related(f1, f2):
808 """return True if f1 and f2 filectx have a common ancestor
811 """return True if f1 and f2 filectx have a common ancestor
809
812
810 Walk back to common ancestor to see if the two files originate
813 Walk back to common ancestor to see if the two files originate
811 from the same file. Since workingfilectx's rev() is None it messes
814 from the same file. Since workingfilectx's rev() is None it messes
812 up the integer comparison logic, hence the pre-step check for
815 up the integer comparison logic, hence the pre-step check for
813 None (f1 and f2 can only be workingfilectx's initially).
816 None (f1 and f2 can only be workingfilectx's initially).
814 """
817 """
815
818
816 if f1 == f2:
819 if f1 == f2:
817 return True # a match
820 return True # a match
818
821
819 g1, g2 = f1.ancestors(), f2.ancestors()
822 g1, g2 = f1.ancestors(), f2.ancestors()
820 try:
823 try:
821 f1r, f2r = f1.linkrev(), f2.linkrev()
824 f1r, f2r = f1.linkrev(), f2.linkrev()
822
825
823 if f1r is None:
826 if f1r is None:
824 f1 = next(g1)
827 f1 = next(g1)
825 if f2r is None:
828 if f2r is None:
826 f2 = next(g2)
829 f2 = next(g2)
827
830
828 while True:
831 while True:
829 f1r, f2r = f1.linkrev(), f2.linkrev()
832 f1r, f2r = f1.linkrev(), f2.linkrev()
830 if f1r > f2r:
833 if f1r > f2r:
831 f1 = next(g1)
834 f1 = next(g1)
832 elif f2r > f1r:
835 elif f2r > f1r:
833 f2 = next(g2)
836 f2 = next(g2)
834 else: # f1 and f2 point to files in the same linkrev
837 else: # f1 and f2 point to files in the same linkrev
835 return f1 == f2 # true if they point to the same file
838 return f1 == f2 # true if they point to the same file
836 except StopIteration:
839 except StopIteration:
837 return False
840 return False
838
841
839
842
840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
841 """reproduce copies from fromrev to rev in the dirstate
844 """reproduce copies from fromrev to rev in the dirstate
842
845
843 If skiprev is specified, it's a revision that should be used to
846 If skiprev is specified, it's a revision that should be used to
844 filter copy records. Any copies that occur between fromrev and
847 filter copy records. Any copies that occur between fromrev and
845 skiprev will not be duplicated, even if they appear in the set of
848 skiprev will not be duplicated, even if they appear in the set of
846 copies between fromrev and rev.
849 copies between fromrev and rev.
847 """
850 """
848 exclude = {}
851 exclude = {}
849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
850 bctrace = stringutil.parsebool(ctraceconfig)
853 bctrace = stringutil.parsebool(ctraceconfig)
851 if skiprev is not None and (
854 if skiprev is not None and (
852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
853 ):
856 ):
854 # copytrace='off' skips this line, but not the entire function because
857 # copytrace='off' skips this line, but not the entire function because
855 # the line below is O(size of the repo) during a rebase, while the rest
858 # the line below is O(size of the repo) during a rebase, while the rest
856 # of the function is much faster (and is required for carrying copy
859 # of the function is much faster (and is required for carrying copy
857 # metadata across the rebase anyway).
860 # metadata across the rebase anyway).
858 exclude = pathcopies(repo[fromrev], repo[skiprev])
861 exclude = pathcopies(repo[fromrev], repo[skiprev])
859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
860 if dst in exclude:
863 if dst in exclude:
861 continue
864 continue
862 if dst in wctx:
865 if dst in wctx:
863 wctx[dst].markcopied(src)
866 wctx[dst].markcopied(src)
864
867
865
868
866 def computechangesetfilesadded(ctx):
869 def computechangesetfilesadded(ctx):
867 """return the list of files added in a changeset
870 """return the list of files added in a changeset
868 """
871 """
869 added = []
872 added = []
870 for f in ctx.files():
873 for f in ctx.files():
871 if not any(f in p for p in ctx.parents()):
874 if not any(f in p for p in ctx.parents()):
872 added.append(f)
875 added.append(f)
873 return added
876 return added
874
877
875
878
876 def computechangesetfilesremoved(ctx):
879 def computechangesetfilesremoved(ctx):
877 """return the list of files removed in a changeset
880 """return the list of files removed in a changeset
878 """
881 """
879 removed = []
882 removed = []
880 for f in ctx.files():
883 for f in ctx.files():
881 if f not in ctx:
884 if f not in ctx:
882 removed.append(f)
885 removed.append(f)
883 return removed
886 return removed
884
887
885
888
886 def computechangesetcopies(ctx):
889 def computechangesetcopies(ctx):
887 """return the copies data for a changeset
890 """return the copies data for a changeset
888
891
889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
890
893
891 Each dictionnary are in the form: `{newname: oldname}`
894 Each dictionnary are in the form: `{newname: oldname}`
892 """
895 """
893 p1copies = {}
896 p1copies = {}
894 p2copies = {}
897 p2copies = {}
895 p1 = ctx.p1()
898 p1 = ctx.p1()
896 p2 = ctx.p2()
899 p2 = ctx.p2()
897 narrowmatch = ctx._repo.narrowmatch()
900 narrowmatch = ctx._repo.narrowmatch()
898 for dst in ctx.files():
901 for dst in ctx.files():
899 if not narrowmatch(dst) or dst not in ctx:
902 if not narrowmatch(dst) or dst not in ctx:
900 continue
903 continue
901 copied = ctx[dst].renamed()
904 copied = ctx[dst].renamed()
902 if not copied:
905 if not copied:
903 continue
906 continue
904 src, srcnode = copied
907 src, srcnode = copied
905 if src in p1 and p1[src].filenode() == srcnode:
908 if src in p1 and p1[src].filenode() == srcnode:
906 p1copies[dst] = src
909 p1copies[dst] = src
907 elif src in p2 and p2[src].filenode() == srcnode:
910 elif src in p2 and p2[src].filenode() == srcnode:
908 p2copies[dst] = src
911 p2copies[dst] = src
909 return p1copies, p2copies
912 return p1copies, p2copies
910
913
911
914
912 def encodecopies(files, copies):
915 def encodecopies(files, copies):
913 items = []
916 items = []
914 for i, dst in enumerate(files):
917 for i, dst in enumerate(files):
915 if dst in copies:
918 if dst in copies:
916 items.append(b'%d\0%s' % (i, copies[dst]))
919 items.append(b'%d\0%s' % (i, copies[dst]))
917 if len(items) != len(copies):
920 if len(items) != len(copies):
918 raise error.ProgrammingError(
921 raise error.ProgrammingError(
919 b'some copy targets missing from file list'
922 b'some copy targets missing from file list'
920 )
923 )
921 return b"\n".join(items)
924 return b"\n".join(items)
922
925
923
926
924 def decodecopies(files, data):
927 def decodecopies(files, data):
925 try:
928 try:
926 copies = {}
929 copies = {}
927 if not data:
930 if not data:
928 return copies
931 return copies
929 for l in data.split(b'\n'):
932 for l in data.split(b'\n'):
930 strindex, src = l.split(b'\0')
933 strindex, src = l.split(b'\0')
931 i = int(strindex)
934 i = int(strindex)
932 dst = files[i]
935 dst = files[i]
933 copies[dst] = src
936 copies[dst] = src
934 return copies
937 return copies
935 except (ValueError, IndexError):
938 except (ValueError, IndexError):
936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
937 # used different syntax for the value.
940 # used different syntax for the value.
938 return None
941 return None
939
942
940
943
941 def encodefileindices(files, subset):
944 def encodefileindices(files, subset):
942 subset = set(subset)
945 subset = set(subset)
943 indices = []
946 indices = []
944 for i, f in enumerate(files):
947 for i, f in enumerate(files):
945 if f in subset:
948 if f in subset:
946 indices.append(b'%d' % i)
949 indices.append(b'%d' % i)
947 return b'\n'.join(indices)
950 return b'\n'.join(indices)
948
951
949
952
950 def decodefileindices(files, data):
953 def decodefileindices(files, data):
951 try:
954 try:
952 subset = []
955 subset = []
953 if not data:
956 if not data:
954 return subset
957 return subset
955 for strindex in data.split(b'\n'):
958 for strindex in data.split(b'\n'):
956 i = int(strindex)
959 i = int(strindex)
957 if i < 0 or i >= len(files):
960 if i < 0 or i >= len(files):
958 return None
961 return None
959 subset.append(files[i])
962 subset.append(files[i])
960 return subset
963 return subset
961 except (ValueError, IndexError):
964 except (ValueError, IndexError):
962 # Perhaps someone had chosen the same key name (e.g. "added") and
965 # Perhaps someone had chosen the same key name (e.g. "added") and
963 # used different syntax for the value.
966 # used different syntax for the value.
964 return None
967 return None
965
968
966
969
967 def _getsidedata(srcrepo, rev):
970 def _getsidedata(srcrepo, rev):
968 ctx = srcrepo[rev]
971 ctx = srcrepo[rev]
969 filescopies = computechangesetcopies(ctx)
972 filescopies = computechangesetcopies(ctx)
970 filesadded = computechangesetfilesadded(ctx)
973 filesadded = computechangesetfilesadded(ctx)
971 filesremoved = computechangesetfilesremoved(ctx)
974 filesremoved = computechangesetfilesremoved(ctx)
972 sidedata = {}
975 sidedata = {}
973 if any([filescopies, filesadded, filesremoved]):
976 if any([filescopies, filesadded, filesremoved]):
974 sortedfiles = sorted(ctx.files())
977 sortedfiles = sorted(ctx.files())
975 p1copies, p2copies = filescopies
978 p1copies, p2copies = filescopies
976 p1copies = encodecopies(sortedfiles, p1copies)
979 p1copies = encodecopies(sortedfiles, p1copies)
977 p2copies = encodecopies(sortedfiles, p2copies)
980 p2copies = encodecopies(sortedfiles, p2copies)
978 filesadded = encodefileindices(sortedfiles, filesadded)
981 filesadded = encodefileindices(sortedfiles, filesadded)
979 filesremoved = encodefileindices(sortedfiles, filesremoved)
982 filesremoved = encodefileindices(sortedfiles, filesremoved)
980 if p1copies:
983 if p1copies:
981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
982 if p2copies:
985 if p2copies:
983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
984 if filesadded:
987 if filesadded:
985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
986 if filesremoved:
989 if filesremoved:
987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
988 return sidedata
991 return sidedata
989
992
990
993
991 def getsidedataadder(srcrepo, destrepo):
994 def getsidedataadder(srcrepo, destrepo):
992 def sidedatacompanion(revlog, rev):
995 def sidedatacompanion(revlog, rev):
993 sidedata = {}
996 sidedata = {}
994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
995 sidedata = _getsidedata(srcrepo, rev)
998 sidedata = _getsidedata(srcrepo, rev)
996 return False, (), sidedata
999 return False, (), sidedata
997
1000
998 return sidedatacompanion
1001 return sidedatacompanion
999
1002
1000
1003
1001 def getsidedataremover(srcrepo, destrepo):
1004 def getsidedataremover(srcrepo, destrepo):
1002 def sidedatacompanion(revlog, rev):
1005 def sidedatacompanion(revlog, rev):
1003 f = ()
1006 f = ()
1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
1006 f = (
1009 f = (
1007 sidedatamod.SD_P1COPIES,
1010 sidedatamod.SD_P1COPIES,
1008 sidedatamod.SD_P2COPIES,
1011 sidedatamod.SD_P2COPIES,
1009 sidedatamod.SD_FILESADDED,
1012 sidedatamod.SD_FILESADDED,
1010 sidedatamod.SD_FILESREMOVED,
1013 sidedatamod.SD_FILESREMOVED,
1011 )
1014 )
1012 return False, f, {}
1015 return False, f, {}
1013
1016
1014 return sidedatacompanion
1017 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now