##// END OF EJS Templates
sidedatacopies: directly fetch copies information from sidedata...
marmoute -
r43551:675c776f default
parent child Browse files
Show More
@@ -1,955 +1,968 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _revinfogetter(repo):
181 def _revinfogetter(repo):
182 """return a function that return multiple data given a <rev>"i
182 """return a function that return multiple data given a <rev>"i
183
183
184 * p1: revision number of first parent
184 * p1: revision number of first parent
185 * p2: revision number of first parent
185 * p2: revision number of first parent
186 * p1copies: mapping of copies from p1
186 * p1copies: mapping of copies from p1
187 * p2copies: mapping of copies from p2
187 * p2copies: mapping of copies from p2
188 * removed: a list of removed files
188 * removed: a list of removed files
189 """
189 """
190 cl = repo.changelog
190 cl = repo.changelog
191 parents = cl.parentrevs
191 parents = cl.parentrevs
192
192
193 def revinfo(rev):
193 if repo.filecopiesmode == b'changeset-sidedata':
194 p1, p2 = parents(rev)
194 changelogrevision = cl.changelogrevision
195 ctx = repo[rev]
195
196 p1copies, p2copies = ctx._copies
196 def revinfo(rev):
197 removed = ctx.filesremoved()
197 p1, p2 = parents(rev)
198 return p1, p2, p1copies, p2copies, removed
198 c = changelogrevision(rev)
199 p1copies = c.p1copies
200 p2copies = c.p2copies
201 removed = c.filesremoved
202 return p1, p2, p1copies, p2copies, removed
203
204 else:
205
206 def revinfo(rev):
207 p1, p2 = parents(rev)
208 ctx = repo[rev]
209 p1copies, p2copies = ctx._copies
210 removed = ctx.filesremoved()
211 return p1, p2, p1copies, p2copies, removed
199
212
200 return revinfo
213 return revinfo
201
214
202
215
203 def _changesetforwardcopies(a, b, match):
216 def _changesetforwardcopies(a, b, match):
204 if a.rev() in (node.nullrev, b.rev()):
217 if a.rev() in (node.nullrev, b.rev()):
205 return {}
218 return {}
206
219
207 repo = a.repo().unfiltered()
220 repo = a.repo().unfiltered()
208 children = {}
221 children = {}
209 revinfo = _revinfogetter(repo)
222 revinfo = _revinfogetter(repo)
210
223
211 cl = repo.changelog
224 cl = repo.changelog
212 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
225 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
213 for r in missingrevs:
226 for r in missingrevs:
214 for p in cl.parentrevs(r):
227 for p in cl.parentrevs(r):
215 if p == node.nullrev:
228 if p == node.nullrev:
216 continue
229 continue
217 if p not in children:
230 if p not in children:
218 children[p] = [r]
231 children[p] = [r]
219 else:
232 else:
220 children[p].append(r)
233 children[p].append(r)
221
234
222 roots = set(children) - set(missingrevs)
235 roots = set(children) - set(missingrevs)
223 work = list(roots)
236 work = list(roots)
224 all_copies = {r: {} for r in roots}
237 all_copies = {r: {} for r in roots}
225 heapq.heapify(work)
238 heapq.heapify(work)
226 alwaysmatch = match.always()
239 alwaysmatch = match.always()
227 while work:
240 while work:
228 r = heapq.heappop(work)
241 r = heapq.heappop(work)
229 copies = all_copies.pop(r)
242 copies = all_copies.pop(r)
230 if r == b.rev():
243 if r == b.rev():
231 return copies
244 return copies
232 for i, c in enumerate(children[r]):
245 for i, c in enumerate(children[r]):
233 p1, p2, p1copies, p2copies, removed = revinfo(c)
246 p1, p2, p1copies, p2copies, removed = revinfo(c)
234 if r == p1:
247 if r == p1:
235 parent = 1
248 parent = 1
236 childcopies = p1copies
249 childcopies = p1copies
237 else:
250 else:
238 assert r == p2
251 assert r == p2
239 parent = 2
252 parent = 2
240 childcopies = p2copies
253 childcopies = p2copies
241 if not alwaysmatch:
254 if not alwaysmatch:
242 childcopies = {
255 childcopies = {
243 dst: src for dst, src in childcopies.items() if match(dst)
256 dst: src for dst, src in childcopies.items() if match(dst)
244 }
257 }
245 # Copy the dict only if later iterations will also need it
258 # Copy the dict only if later iterations will also need it
246 if i != len(children[r]) - 1:
259 if i != len(children[r]) - 1:
247 newcopies = copies.copy()
260 newcopies = copies.copy()
248 else:
261 else:
249 newcopies = copies
262 newcopies = copies
250 if childcopies:
263 if childcopies:
251 newcopies = _chain(newcopies, childcopies)
264 newcopies = _chain(newcopies, childcopies)
252 for f in removed:
265 for f in removed:
253 if f in newcopies:
266 if f in newcopies:
254 del newcopies[f]
267 del newcopies[f]
255 othercopies = all_copies.get(c)
268 othercopies = all_copies.get(c)
256 if othercopies is None:
269 if othercopies is None:
257 heapq.heappush(work, c)
270 heapq.heappush(work, c)
258 all_copies[c] = newcopies
271 all_copies[c] = newcopies
259 else:
272 else:
260 # we are the second parent to work on c, we need to merge our
273 # we are the second parent to work on c, we need to merge our
261 # work with the other.
274 # work with the other.
262 #
275 #
263 # Unlike when copies are stored in the filelog, we consider
276 # Unlike when copies are stored in the filelog, we consider
264 # it a copy even if the destination already existed on the
277 # it a copy even if the destination already existed on the
265 # other branch. It's simply too expensive to check if the
278 # other branch. It's simply too expensive to check if the
266 # file existed in the manifest.
279 # file existed in the manifest.
267 #
280 #
268 # In case of conflict, parent 1 take precedence over parent 2.
281 # In case of conflict, parent 1 take precedence over parent 2.
269 # This is an arbitrary choice made anew when implementing
282 # This is an arbitrary choice made anew when implementing
270 # changeset based copies. It was made without regards with
283 # changeset based copies. It was made without regards with
271 # potential filelog related behavior.
284 # potential filelog related behavior.
272 if parent == 1:
285 if parent == 1:
273 othercopies.update(newcopies)
286 othercopies.update(newcopies)
274 else:
287 else:
275 newcopies.update(othercopies)
288 newcopies.update(othercopies)
276 all_copies[c] = newcopies
289 all_copies[c] = newcopies
277 assert False
290 assert False
278
291
279
292
280 def _forwardcopies(a, b, base=None, match=None):
293 def _forwardcopies(a, b, base=None, match=None):
281 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
294 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
282
295
283 if base is None:
296 if base is None:
284 base = a
297 base = a
285 match = a.repo().narrowmatch(match)
298 match = a.repo().narrowmatch(match)
286 # check for working copy
299 # check for working copy
287 if b.rev() is None:
300 if b.rev() is None:
288 cm = _committedforwardcopies(a, b.p1(), base, match)
301 cm = _committedforwardcopies(a, b.p1(), base, match)
289 # combine copies from dirstate if necessary
302 # combine copies from dirstate if necessary
290 copies = _chain(cm, _dirstatecopies(b._repo, match))
303 copies = _chain(cm, _dirstatecopies(b._repo, match))
291 else:
304 else:
292 copies = _committedforwardcopies(a, b, base, match)
305 copies = _committedforwardcopies(a, b, base, match)
293 return copies
306 return copies
294
307
295
308
296 def _backwardrenames(a, b, match):
309 def _backwardrenames(a, b, match):
297 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
310 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
298 return {}
311 return {}
299
312
300 # Even though we're not taking copies into account, 1:n rename situations
313 # Even though we're not taking copies into account, 1:n rename situations
301 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
314 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
302 # arbitrarily pick one of the renames.
315 # arbitrarily pick one of the renames.
303 # We don't want to pass in "match" here, since that would filter
316 # We don't want to pass in "match" here, since that would filter
304 # the destination by it. Since we're reversing the copies, we want
317 # the destination by it. Since we're reversing the copies, we want
305 # to filter the source instead.
318 # to filter the source instead.
306 f = _forwardcopies(b, a)
319 f = _forwardcopies(b, a)
307 r = {}
320 r = {}
308 for k, v in sorted(pycompat.iteritems(f)):
321 for k, v in sorted(pycompat.iteritems(f)):
309 if match and not match(v):
322 if match and not match(v):
310 continue
323 continue
311 # remove copies
324 # remove copies
312 if v in a:
325 if v in a:
313 continue
326 continue
314 r[v] = k
327 r[v] = k
315 return r
328 return r
316
329
317
330
318 def pathcopies(x, y, match=None):
331 def pathcopies(x, y, match=None):
319 """find {dst@y: src@x} copy mapping for directed compare"""
332 """find {dst@y: src@x} copy mapping for directed compare"""
320 repo = x._repo
333 repo = x._repo
321 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
334 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
322 if debug:
335 if debug:
323 repo.ui.debug(
336 repo.ui.debug(
324 b'debug.copies: searching copies from %s to %s\n' % (x, y)
337 b'debug.copies: searching copies from %s to %s\n' % (x, y)
325 )
338 )
326 if x == y or not x or not y:
339 if x == y or not x or not y:
327 return {}
340 return {}
328 a = y.ancestor(x)
341 a = y.ancestor(x)
329 if a == x:
342 if a == x:
330 if debug:
343 if debug:
331 repo.ui.debug(b'debug.copies: search mode: forward\n')
344 repo.ui.debug(b'debug.copies: search mode: forward\n')
332 if y.rev() is None and x == y.p1():
345 if y.rev() is None and x == y.p1():
333 # short-circuit to avoid issues with merge states
346 # short-circuit to avoid issues with merge states
334 return _dirstatecopies(repo, match)
347 return _dirstatecopies(repo, match)
335 copies = _forwardcopies(x, y, match=match)
348 copies = _forwardcopies(x, y, match=match)
336 elif a == y:
349 elif a == y:
337 if debug:
350 if debug:
338 repo.ui.debug(b'debug.copies: search mode: backward\n')
351 repo.ui.debug(b'debug.copies: search mode: backward\n')
339 copies = _backwardrenames(x, y, match=match)
352 copies = _backwardrenames(x, y, match=match)
340 else:
353 else:
341 if debug:
354 if debug:
342 repo.ui.debug(b'debug.copies: search mode: combined\n')
355 repo.ui.debug(b'debug.copies: search mode: combined\n')
343 base = None
356 base = None
344 if a.rev() != node.nullrev:
357 if a.rev() != node.nullrev:
345 base = x
358 base = x
346 copies = _chain(
359 copies = _chain(
347 _backwardrenames(x, a, match=match),
360 _backwardrenames(x, a, match=match),
348 _forwardcopies(a, y, base, match=match),
361 _forwardcopies(a, y, base, match=match),
349 )
362 )
350 _filter(x, y, copies)
363 _filter(x, y, copies)
351 return copies
364 return copies
352
365
353
366
354 def mergecopies(repo, c1, c2, base):
367 def mergecopies(repo, c1, c2, base):
355 """
368 """
356 Finds moves and copies between context c1 and c2 that are relevant for
369 Finds moves and copies between context c1 and c2 that are relevant for
357 merging. 'base' will be used as the merge base.
370 merging. 'base' will be used as the merge base.
358
371
359 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
372 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
360 files that were moved/ copied in one merge parent and modified in another.
373 files that were moved/ copied in one merge parent and modified in another.
361 For example:
374 For example:
362
375
363 o ---> 4 another commit
376 o ---> 4 another commit
364 |
377 |
365 | o ---> 3 commit that modifies a.txt
378 | o ---> 3 commit that modifies a.txt
366 | /
379 | /
367 o / ---> 2 commit that moves a.txt to b.txt
380 o / ---> 2 commit that moves a.txt to b.txt
368 |/
381 |/
369 o ---> 1 merge base
382 o ---> 1 merge base
370
383
371 If we try to rebase revision 3 on revision 4, since there is no a.txt in
384 If we try to rebase revision 3 on revision 4, since there is no a.txt in
372 revision 4, and if user have copytrace disabled, we prints the following
385 revision 4, and if user have copytrace disabled, we prints the following
373 message:
386 message:
374
387
375 ```other changed <file> which local deleted```
388 ```other changed <file> which local deleted```
376
389
377 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
390 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
378 "dirmove".
391 "dirmove".
379
392
380 "copy" is a mapping from destination name -> source name,
393 "copy" is a mapping from destination name -> source name,
381 where source is in c1 and destination is in c2 or vice-versa.
394 where source is in c1 and destination is in c2 or vice-versa.
382
395
383 "movewithdir" is a mapping from source name -> destination name,
396 "movewithdir" is a mapping from source name -> destination name,
384 where the file at source present in one context but not the other
397 where the file at source present in one context but not the other
385 needs to be moved to destination by the merge process, because the
398 needs to be moved to destination by the merge process, because the
386 other context moved the directory it is in.
399 other context moved the directory it is in.
387
400
388 "diverge" is a mapping of source name -> list of destination names
401 "diverge" is a mapping of source name -> list of destination names
389 for divergent renames.
402 for divergent renames.
390
403
391 "renamedelete" is a mapping of source name -> list of destination
404 "renamedelete" is a mapping of source name -> list of destination
392 names for files deleted in c1 that were renamed in c2 or vice-versa.
405 names for files deleted in c1 that were renamed in c2 or vice-versa.
393
406
394 "dirmove" is a mapping of detected source dir -> destination dir renames.
407 "dirmove" is a mapping of detected source dir -> destination dir renames.
395 This is needed for handling changes to new files previously grafted into
408 This is needed for handling changes to new files previously grafted into
396 renamed directories.
409 renamed directories.
397
410
398 This function calls different copytracing algorithms based on config.
411 This function calls different copytracing algorithms based on config.
399 """
412 """
400 # avoid silly behavior for update from empty dir
413 # avoid silly behavior for update from empty dir
401 if not c1 or not c2 or c1 == c2:
414 if not c1 or not c2 or c1 == c2:
402 return {}, {}, {}, {}, {}
415 return {}, {}, {}, {}, {}
403
416
404 narrowmatch = c1.repo().narrowmatch()
417 narrowmatch = c1.repo().narrowmatch()
405
418
406 # avoid silly behavior for parent -> working dir
419 # avoid silly behavior for parent -> working dir
407 if c2.node() is None and c1.node() == repo.dirstate.p1():
420 if c2.node() is None and c1.node() == repo.dirstate.p1():
408 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
421 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
409
422
410 copytracing = repo.ui.config(b'experimental', b'copytrace')
423 copytracing = repo.ui.config(b'experimental', b'copytrace')
411 if stringutil.parsebool(copytracing) is False:
424 if stringutil.parsebool(copytracing) is False:
412 # stringutil.parsebool() returns None when it is unable to parse the
425 # stringutil.parsebool() returns None when it is unable to parse the
413 # value, so we should rely on making sure copytracing is on such cases
426 # value, so we should rely on making sure copytracing is on such cases
414 return {}, {}, {}, {}, {}
427 return {}, {}, {}, {}, {}
415
428
416 if usechangesetcentricalgo(repo):
429 if usechangesetcentricalgo(repo):
417 # The heuristics don't make sense when we need changeset-centric algos
430 # The heuristics don't make sense when we need changeset-centric algos
418 return _fullcopytracing(repo, c1, c2, base)
431 return _fullcopytracing(repo, c1, c2, base)
419
432
420 # Copy trace disabling is explicitly below the node == p1 logic above
433 # Copy trace disabling is explicitly below the node == p1 logic above
421 # because the logic above is required for a simple copy to be kept across a
434 # because the logic above is required for a simple copy to be kept across a
422 # rebase.
435 # rebase.
423 if copytracing == b'heuristics':
436 if copytracing == b'heuristics':
424 # Do full copytracing if only non-public revisions are involved as
437 # Do full copytracing if only non-public revisions are involved as
425 # that will be fast enough and will also cover the copies which could
438 # that will be fast enough and will also cover the copies which could
426 # be missed by heuristics
439 # be missed by heuristics
427 if _isfullcopytraceable(repo, c1, base):
440 if _isfullcopytraceable(repo, c1, base):
428 return _fullcopytracing(repo, c1, c2, base)
441 return _fullcopytracing(repo, c1, c2, base)
429 return _heuristicscopytracing(repo, c1, c2, base)
442 return _heuristicscopytracing(repo, c1, c2, base)
430 else:
443 else:
431 return _fullcopytracing(repo, c1, c2, base)
444 return _fullcopytracing(repo, c1, c2, base)
432
445
433
446
434 def _isfullcopytraceable(repo, c1, base):
447 def _isfullcopytraceable(repo, c1, base):
435 """ Checks that if base, source and destination are all no-public branches,
448 """ Checks that if base, source and destination are all no-public branches,
436 if yes let's use the full copytrace algorithm for increased capabilities
449 if yes let's use the full copytrace algorithm for increased capabilities
437 since it will be fast enough.
450 since it will be fast enough.
438
451
439 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
452 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
440 number of changesets from c1 to base such that if number of changesets are
453 number of changesets from c1 to base such that if number of changesets are
441 more than the limit, full copytracing algorithm won't be used.
454 more than the limit, full copytracing algorithm won't be used.
442 """
455 """
443 if c1.rev() is None:
456 if c1.rev() is None:
444 c1 = c1.p1()
457 c1 = c1.p1()
445 if c1.mutable() and base.mutable():
458 if c1.mutable() and base.mutable():
446 sourcecommitlimit = repo.ui.configint(
459 sourcecommitlimit = repo.ui.configint(
447 b'experimental', b'copytrace.sourcecommitlimit'
460 b'experimental', b'copytrace.sourcecommitlimit'
448 )
461 )
449 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
462 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
450 return commits < sourcecommitlimit
463 return commits < sourcecommitlimit
451 return False
464 return False
452
465
453
466
454 def _checksinglesidecopies(
467 def _checksinglesidecopies(
455 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
468 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
456 ):
469 ):
457 if src not in m2:
470 if src not in m2:
458 # deleted on side 2
471 # deleted on side 2
459 if src not in m1:
472 if src not in m1:
460 # renamed on side 1, deleted on side 2
473 # renamed on side 1, deleted on side 2
461 renamedelete[src] = dsts1
474 renamedelete[src] = dsts1
462 elif m2[src] != mb[src]:
475 elif m2[src] != mb[src]:
463 if not _related(c2[src], base[src]):
476 if not _related(c2[src], base[src]):
464 return
477 return
465 # modified on side 2
478 # modified on side 2
466 for dst in dsts1:
479 for dst in dsts1:
467 if dst not in m2:
480 if dst not in m2:
468 # dst not added on side 2 (handle as regular
481 # dst not added on side 2 (handle as regular
469 # "both created" case in manifestmerge otherwise)
482 # "both created" case in manifestmerge otherwise)
470 copy[dst] = src
483 copy[dst] = src
471
484
472
485
473 def _fullcopytracing(repo, c1, c2, base):
486 def _fullcopytracing(repo, c1, c2, base):
474 """ The full copytracing algorithm which finds all the new files that were
487 """ The full copytracing algorithm which finds all the new files that were
475 added from merge base up to the top commit and for each file it checks if
488 added from merge base up to the top commit and for each file it checks if
476 this file was copied from another file.
489 this file was copied from another file.
477
490
478 This is pretty slow when a lot of changesets are involved but will track all
491 This is pretty slow when a lot of changesets are involved but will track all
479 the copies.
492 the copies.
480 """
493 """
481 m1 = c1.manifest()
494 m1 = c1.manifest()
482 m2 = c2.manifest()
495 m2 = c2.manifest()
483 mb = base.manifest()
496 mb = base.manifest()
484
497
485 copies1 = pathcopies(base, c1)
498 copies1 = pathcopies(base, c1)
486 copies2 = pathcopies(base, c2)
499 copies2 = pathcopies(base, c2)
487
500
488 inversecopies1 = {}
501 inversecopies1 = {}
489 inversecopies2 = {}
502 inversecopies2 = {}
490 for dst, src in copies1.items():
503 for dst, src in copies1.items():
491 inversecopies1.setdefault(src, []).append(dst)
504 inversecopies1.setdefault(src, []).append(dst)
492 for dst, src in copies2.items():
505 for dst, src in copies2.items():
493 inversecopies2.setdefault(src, []).append(dst)
506 inversecopies2.setdefault(src, []).append(dst)
494
507
495 copy = {}
508 copy = {}
496 diverge = {}
509 diverge = {}
497 renamedelete = {}
510 renamedelete = {}
498 allsources = set(inversecopies1) | set(inversecopies2)
511 allsources = set(inversecopies1) | set(inversecopies2)
499 for src in allsources:
512 for src in allsources:
500 dsts1 = inversecopies1.get(src)
513 dsts1 = inversecopies1.get(src)
501 dsts2 = inversecopies2.get(src)
514 dsts2 = inversecopies2.get(src)
502 if dsts1 and dsts2:
515 if dsts1 and dsts2:
503 # copied/renamed on both sides
516 # copied/renamed on both sides
504 if src not in m1 and src not in m2:
517 if src not in m1 and src not in m2:
505 # renamed on both sides
518 # renamed on both sides
506 dsts1 = set(dsts1)
519 dsts1 = set(dsts1)
507 dsts2 = set(dsts2)
520 dsts2 = set(dsts2)
508 # If there's some overlap in the rename destinations, we
521 # If there's some overlap in the rename destinations, we
509 # consider it not divergent. For example, if side 1 copies 'a'
522 # consider it not divergent. For example, if side 1 copies 'a'
510 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
523 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
511 # and 'd' and deletes 'a'.
524 # and 'd' and deletes 'a'.
512 if dsts1 & dsts2:
525 if dsts1 & dsts2:
513 for dst in dsts1 & dsts2:
526 for dst in dsts1 & dsts2:
514 copy[dst] = src
527 copy[dst] = src
515 else:
528 else:
516 diverge[src] = sorted(dsts1 | dsts2)
529 diverge[src] = sorted(dsts1 | dsts2)
517 elif src in m1 and src in m2:
530 elif src in m1 and src in m2:
518 # copied on both sides
531 # copied on both sides
519 dsts1 = set(dsts1)
532 dsts1 = set(dsts1)
520 dsts2 = set(dsts2)
533 dsts2 = set(dsts2)
521 for dst in dsts1 & dsts2:
534 for dst in dsts1 & dsts2:
522 copy[dst] = src
535 copy[dst] = src
523 # TODO: Handle cases where it was renamed on one side and copied
536 # TODO: Handle cases where it was renamed on one side and copied
524 # on the other side
537 # on the other side
525 elif dsts1:
538 elif dsts1:
526 # copied/renamed only on side 1
539 # copied/renamed only on side 1
527 _checksinglesidecopies(
540 _checksinglesidecopies(
528 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
541 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
529 )
542 )
530 elif dsts2:
543 elif dsts2:
531 # copied/renamed only on side 2
544 # copied/renamed only on side 2
532 _checksinglesidecopies(
545 _checksinglesidecopies(
533 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
546 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
534 )
547 )
535
548
536 renamedeleteset = set()
549 renamedeleteset = set()
537 divergeset = set()
550 divergeset = set()
538 for dsts in diverge.values():
551 for dsts in diverge.values():
539 divergeset.update(dsts)
552 divergeset.update(dsts)
540 for dsts in renamedelete.values():
553 for dsts in renamedelete.values():
541 renamedeleteset.update(dsts)
554 renamedeleteset.update(dsts)
542
555
543 # find interesting file sets from manifests
556 # find interesting file sets from manifests
544 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
557 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
545 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
558 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
546 u1 = sorted(addedinm1 - addedinm2)
559 u1 = sorted(addedinm1 - addedinm2)
547 u2 = sorted(addedinm2 - addedinm1)
560 u2 = sorted(addedinm2 - addedinm1)
548
561
549 header = b" unmatched files in %s"
562 header = b" unmatched files in %s"
550 if u1:
563 if u1:
551 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
564 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
552 if u2:
565 if u2:
553 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
566 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
554
567
555 fullcopy = copies1.copy()
568 fullcopy = copies1.copy()
556 fullcopy.update(copies2)
569 fullcopy.update(copies2)
557 if not fullcopy:
570 if not fullcopy:
558 return copy, {}, diverge, renamedelete, {}
571 return copy, {}, diverge, renamedelete, {}
559
572
560 if repo.ui.debugflag:
573 if repo.ui.debugflag:
561 repo.ui.debug(
574 repo.ui.debug(
562 b" all copies found (* = to merge, ! = divergent, "
575 b" all copies found (* = to merge, ! = divergent, "
563 b"% = renamed and deleted):\n"
576 b"% = renamed and deleted):\n"
564 )
577 )
565 for f in sorted(fullcopy):
578 for f in sorted(fullcopy):
566 note = b""
579 note = b""
567 if f in copy:
580 if f in copy:
568 note += b"*"
581 note += b"*"
569 if f in divergeset:
582 if f in divergeset:
570 note += b"!"
583 note += b"!"
571 if f in renamedeleteset:
584 if f in renamedeleteset:
572 note += b"%"
585 note += b"%"
573 repo.ui.debug(
586 repo.ui.debug(
574 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
587 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
575 )
588 )
576 del divergeset
589 del divergeset
577
590
578 repo.ui.debug(b" checking for directory renames\n")
591 repo.ui.debug(b" checking for directory renames\n")
579
592
580 # generate a directory move map
593 # generate a directory move map
581 d1, d2 = c1.dirs(), c2.dirs()
594 d1, d2 = c1.dirs(), c2.dirs()
582 invalid = set()
595 invalid = set()
583 dirmove = {}
596 dirmove = {}
584
597
585 # examine each file copy for a potential directory move, which is
598 # examine each file copy for a potential directory move, which is
586 # when all the files in a directory are moved to a new directory
599 # when all the files in a directory are moved to a new directory
587 for dst, src in pycompat.iteritems(fullcopy):
600 for dst, src in pycompat.iteritems(fullcopy):
588 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
601 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
589 if dsrc in invalid:
602 if dsrc in invalid:
590 # already seen to be uninteresting
603 # already seen to be uninteresting
591 continue
604 continue
592 elif dsrc in d1 and ddst in d1:
605 elif dsrc in d1 and ddst in d1:
593 # directory wasn't entirely moved locally
606 # directory wasn't entirely moved locally
594 invalid.add(dsrc)
607 invalid.add(dsrc)
595 elif dsrc in d2 and ddst in d2:
608 elif dsrc in d2 and ddst in d2:
596 # directory wasn't entirely moved remotely
609 # directory wasn't entirely moved remotely
597 invalid.add(dsrc)
610 invalid.add(dsrc)
598 elif dsrc in dirmove and dirmove[dsrc] != ddst:
611 elif dsrc in dirmove and dirmove[dsrc] != ddst:
599 # files from the same directory moved to two different places
612 # files from the same directory moved to two different places
600 invalid.add(dsrc)
613 invalid.add(dsrc)
601 else:
614 else:
602 # looks good so far
615 # looks good so far
603 dirmove[dsrc] = ddst
616 dirmove[dsrc] = ddst
604
617
605 for i in invalid:
618 for i in invalid:
606 if i in dirmove:
619 if i in dirmove:
607 del dirmove[i]
620 del dirmove[i]
608 del d1, d2, invalid
621 del d1, d2, invalid
609
622
610 if not dirmove:
623 if not dirmove:
611 return copy, {}, diverge, renamedelete, {}
624 return copy, {}, diverge, renamedelete, {}
612
625
613 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
626 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
614
627
615 for d in dirmove:
628 for d in dirmove:
616 repo.ui.debug(
629 repo.ui.debug(
617 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
630 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
618 )
631 )
619
632
620 movewithdir = {}
633 movewithdir = {}
621 # check unaccounted nonoverlapping files against directory moves
634 # check unaccounted nonoverlapping files against directory moves
622 for f in u1 + u2:
635 for f in u1 + u2:
623 if f not in fullcopy:
636 if f not in fullcopy:
624 for d in dirmove:
637 for d in dirmove:
625 if f.startswith(d):
638 if f.startswith(d):
626 # new file added in a directory that was moved, move it
639 # new file added in a directory that was moved, move it
627 df = dirmove[d] + f[len(d) :]
640 df = dirmove[d] + f[len(d) :]
628 if df not in copy:
641 if df not in copy:
629 movewithdir[f] = df
642 movewithdir[f] = df
630 repo.ui.debug(
643 repo.ui.debug(
631 b" pending file src: '%s' -> dst: '%s'\n"
644 b" pending file src: '%s' -> dst: '%s'\n"
632 % (f, df)
645 % (f, df)
633 )
646 )
634 break
647 break
635
648
636 return copy, movewithdir, diverge, renamedelete, dirmove
649 return copy, movewithdir, diverge, renamedelete, dirmove
637
650
638
651
639 def _heuristicscopytracing(repo, c1, c2, base):
652 def _heuristicscopytracing(repo, c1, c2, base):
640 """ Fast copytracing using filename heuristics
653 """ Fast copytracing using filename heuristics
641
654
642 Assumes that moves or renames are of following two types:
655 Assumes that moves or renames are of following two types:
643
656
644 1) Inside a directory only (same directory name but different filenames)
657 1) Inside a directory only (same directory name but different filenames)
645 2) Move from one directory to another
658 2) Move from one directory to another
646 (same filenames but different directory names)
659 (same filenames but different directory names)
647
660
648 Works only when there are no merge commits in the "source branch".
661 Works only when there are no merge commits in the "source branch".
649 Source branch is commits from base up to c2 not including base.
662 Source branch is commits from base up to c2 not including base.
650
663
651 If merge is involved it fallbacks to _fullcopytracing().
664 If merge is involved it fallbacks to _fullcopytracing().
652
665
653 Can be used by setting the following config:
666 Can be used by setting the following config:
654
667
655 [experimental]
668 [experimental]
656 copytrace = heuristics
669 copytrace = heuristics
657
670
658 In some cases the copy/move candidates found by heuristics can be very large
671 In some cases the copy/move candidates found by heuristics can be very large
659 in number and that will make the algorithm slow. The number of possible
672 in number and that will make the algorithm slow. The number of possible
660 candidates to check can be limited by using the config
673 candidates to check can be limited by using the config
661 `experimental.copytrace.movecandidateslimit` which defaults to 100.
674 `experimental.copytrace.movecandidateslimit` which defaults to 100.
662 """
675 """
663
676
664 if c1.rev() is None:
677 if c1.rev() is None:
665 c1 = c1.p1()
678 c1 = c1.p1()
666 if c2.rev() is None:
679 if c2.rev() is None:
667 c2 = c2.p1()
680 c2 = c2.p1()
668
681
669 copies = {}
682 copies = {}
670
683
671 changedfiles = set()
684 changedfiles = set()
672 m1 = c1.manifest()
685 m1 = c1.manifest()
673 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
686 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
674 # If base is not in c2 branch, we switch to fullcopytracing
687 # If base is not in c2 branch, we switch to fullcopytracing
675 repo.ui.debug(
688 repo.ui.debug(
676 b"switching to full copytracing as base is not "
689 b"switching to full copytracing as base is not "
677 b"an ancestor of c2\n"
690 b"an ancestor of c2\n"
678 )
691 )
679 return _fullcopytracing(repo, c1, c2, base)
692 return _fullcopytracing(repo, c1, c2, base)
680
693
681 ctx = c2
694 ctx = c2
682 while ctx != base:
695 while ctx != base:
683 if len(ctx.parents()) == 2:
696 if len(ctx.parents()) == 2:
684 # To keep things simple let's not handle merges
697 # To keep things simple let's not handle merges
685 repo.ui.debug(b"switching to full copytracing because of merges\n")
698 repo.ui.debug(b"switching to full copytracing because of merges\n")
686 return _fullcopytracing(repo, c1, c2, base)
699 return _fullcopytracing(repo, c1, c2, base)
687 changedfiles.update(ctx.files())
700 changedfiles.update(ctx.files())
688 ctx = ctx.p1()
701 ctx = ctx.p1()
689
702
690 cp = _forwardcopies(base, c2)
703 cp = _forwardcopies(base, c2)
691 for dst, src in pycompat.iteritems(cp):
704 for dst, src in pycompat.iteritems(cp):
692 if src in m1:
705 if src in m1:
693 copies[dst] = src
706 copies[dst] = src
694
707
695 # file is missing if it isn't present in the destination, but is present in
708 # file is missing if it isn't present in the destination, but is present in
696 # the base and present in the source.
709 # the base and present in the source.
697 # Presence in the base is important to exclude added files, presence in the
710 # Presence in the base is important to exclude added files, presence in the
698 # source is important to exclude removed files.
711 # source is important to exclude removed files.
699 filt = lambda f: f not in m1 and f in base and f in c2
712 filt = lambda f: f not in m1 and f in base and f in c2
700 missingfiles = [f for f in changedfiles if filt(f)]
713 missingfiles = [f for f in changedfiles if filt(f)]
701
714
702 if missingfiles:
715 if missingfiles:
703 basenametofilename = collections.defaultdict(list)
716 basenametofilename = collections.defaultdict(list)
704 dirnametofilename = collections.defaultdict(list)
717 dirnametofilename = collections.defaultdict(list)
705
718
706 for f in m1.filesnotin(base.manifest()):
719 for f in m1.filesnotin(base.manifest()):
707 basename = os.path.basename(f)
720 basename = os.path.basename(f)
708 dirname = os.path.dirname(f)
721 dirname = os.path.dirname(f)
709 basenametofilename[basename].append(f)
722 basenametofilename[basename].append(f)
710 dirnametofilename[dirname].append(f)
723 dirnametofilename[dirname].append(f)
711
724
712 for f in missingfiles:
725 for f in missingfiles:
713 basename = os.path.basename(f)
726 basename = os.path.basename(f)
714 dirname = os.path.dirname(f)
727 dirname = os.path.dirname(f)
715 samebasename = basenametofilename[basename]
728 samebasename = basenametofilename[basename]
716 samedirname = dirnametofilename[dirname]
729 samedirname = dirnametofilename[dirname]
717 movecandidates = samebasename + samedirname
730 movecandidates = samebasename + samedirname
718 # f is guaranteed to be present in c2, that's why
731 # f is guaranteed to be present in c2, that's why
719 # c2.filectx(f) won't fail
732 # c2.filectx(f) won't fail
720 f2 = c2.filectx(f)
733 f2 = c2.filectx(f)
721 # we can have a lot of candidates which can slow down the heuristics
734 # we can have a lot of candidates which can slow down the heuristics
722 # config value to limit the number of candidates moves to check
735 # config value to limit the number of candidates moves to check
723 maxcandidates = repo.ui.configint(
736 maxcandidates = repo.ui.configint(
724 b'experimental', b'copytrace.movecandidateslimit'
737 b'experimental', b'copytrace.movecandidateslimit'
725 )
738 )
726
739
727 if len(movecandidates) > maxcandidates:
740 if len(movecandidates) > maxcandidates:
728 repo.ui.status(
741 repo.ui.status(
729 _(
742 _(
730 b"skipping copytracing for '%s', more "
743 b"skipping copytracing for '%s', more "
731 b"candidates than the limit: %d\n"
744 b"candidates than the limit: %d\n"
732 )
745 )
733 % (f, len(movecandidates))
746 % (f, len(movecandidates))
734 )
747 )
735 continue
748 continue
736
749
737 for candidate in movecandidates:
750 for candidate in movecandidates:
738 f1 = c1.filectx(candidate)
751 f1 = c1.filectx(candidate)
739 if _related(f1, f2):
752 if _related(f1, f2):
740 # if there are a few related copies then we'll merge
753 # if there are a few related copies then we'll merge
741 # changes into all of them. This matches the behaviour
754 # changes into all of them. This matches the behaviour
742 # of upstream copytracing
755 # of upstream copytracing
743 copies[candidate] = f
756 copies[candidate] = f
744
757
745 return copies, {}, {}, {}, {}
758 return copies, {}, {}, {}, {}
746
759
747
760
748 def _related(f1, f2):
761 def _related(f1, f2):
749 """return True if f1 and f2 filectx have a common ancestor
762 """return True if f1 and f2 filectx have a common ancestor
750
763
751 Walk back to common ancestor to see if the two files originate
764 Walk back to common ancestor to see if the two files originate
752 from the same file. Since workingfilectx's rev() is None it messes
765 from the same file. Since workingfilectx's rev() is None it messes
753 up the integer comparison logic, hence the pre-step check for
766 up the integer comparison logic, hence the pre-step check for
754 None (f1 and f2 can only be workingfilectx's initially).
767 None (f1 and f2 can only be workingfilectx's initially).
755 """
768 """
756
769
757 if f1 == f2:
770 if f1 == f2:
758 return True # a match
771 return True # a match
759
772
760 g1, g2 = f1.ancestors(), f2.ancestors()
773 g1, g2 = f1.ancestors(), f2.ancestors()
761 try:
774 try:
762 f1r, f2r = f1.linkrev(), f2.linkrev()
775 f1r, f2r = f1.linkrev(), f2.linkrev()
763
776
764 if f1r is None:
777 if f1r is None:
765 f1 = next(g1)
778 f1 = next(g1)
766 if f2r is None:
779 if f2r is None:
767 f2 = next(g2)
780 f2 = next(g2)
768
781
769 while True:
782 while True:
770 f1r, f2r = f1.linkrev(), f2.linkrev()
783 f1r, f2r = f1.linkrev(), f2.linkrev()
771 if f1r > f2r:
784 if f1r > f2r:
772 f1 = next(g1)
785 f1 = next(g1)
773 elif f2r > f1r:
786 elif f2r > f1r:
774 f2 = next(g2)
787 f2 = next(g2)
775 else: # f1 and f2 point to files in the same linkrev
788 else: # f1 and f2 point to files in the same linkrev
776 return f1 == f2 # true if they point to the same file
789 return f1 == f2 # true if they point to the same file
777 except StopIteration:
790 except StopIteration:
778 return False
791 return False
779
792
780
793
781 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
794 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
782 """reproduce copies from fromrev to rev in the dirstate
795 """reproduce copies from fromrev to rev in the dirstate
783
796
784 If skiprev is specified, it's a revision that should be used to
797 If skiprev is specified, it's a revision that should be used to
785 filter copy records. Any copies that occur between fromrev and
798 filter copy records. Any copies that occur between fromrev and
786 skiprev will not be duplicated, even if they appear in the set of
799 skiprev will not be duplicated, even if they appear in the set of
787 copies between fromrev and rev.
800 copies between fromrev and rev.
788 """
801 """
789 exclude = {}
802 exclude = {}
790 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
803 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
791 bctrace = stringutil.parsebool(ctraceconfig)
804 bctrace = stringutil.parsebool(ctraceconfig)
792 if skiprev is not None and (
805 if skiprev is not None and (
793 ctraceconfig == b'heuristics' or bctrace or bctrace is None
806 ctraceconfig == b'heuristics' or bctrace or bctrace is None
794 ):
807 ):
795 # copytrace='off' skips this line, but not the entire function because
808 # copytrace='off' skips this line, but not the entire function because
796 # the line below is O(size of the repo) during a rebase, while the rest
809 # the line below is O(size of the repo) during a rebase, while the rest
797 # of the function is much faster (and is required for carrying copy
810 # of the function is much faster (and is required for carrying copy
798 # metadata across the rebase anyway).
811 # metadata across the rebase anyway).
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
812 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
813 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
801 if dst in exclude:
814 if dst in exclude:
802 continue
815 continue
803 if dst in wctx:
816 if dst in wctx:
804 wctx[dst].markcopied(src)
817 wctx[dst].markcopied(src)
805
818
806
819
807 def computechangesetfilesadded(ctx):
820 def computechangesetfilesadded(ctx):
808 """return the list of files added in a changeset
821 """return the list of files added in a changeset
809 """
822 """
810 added = []
823 added = []
811 for f in ctx.files():
824 for f in ctx.files():
812 if not any(f in p for p in ctx.parents()):
825 if not any(f in p for p in ctx.parents()):
813 added.append(f)
826 added.append(f)
814 return added
827 return added
815
828
816
829
817 def computechangesetfilesremoved(ctx):
830 def computechangesetfilesremoved(ctx):
818 """return the list of files removed in a changeset
831 """return the list of files removed in a changeset
819 """
832 """
820 removed = []
833 removed = []
821 for f in ctx.files():
834 for f in ctx.files():
822 if f not in ctx:
835 if f not in ctx:
823 removed.append(f)
836 removed.append(f)
824 return removed
837 return removed
825
838
826
839
827 def computechangesetcopies(ctx):
840 def computechangesetcopies(ctx):
828 """return the copies data for a changeset
841 """return the copies data for a changeset
829
842
830 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
843 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
831
844
832 Each dictionnary are in the form: `{newname: oldname}`
845 Each dictionnary are in the form: `{newname: oldname}`
833 """
846 """
834 p1copies = {}
847 p1copies = {}
835 p2copies = {}
848 p2copies = {}
836 p1 = ctx.p1()
849 p1 = ctx.p1()
837 p2 = ctx.p2()
850 p2 = ctx.p2()
838 narrowmatch = ctx._repo.narrowmatch()
851 narrowmatch = ctx._repo.narrowmatch()
839 for dst in ctx.files():
852 for dst in ctx.files():
840 if not narrowmatch(dst) or dst not in ctx:
853 if not narrowmatch(dst) or dst not in ctx:
841 continue
854 continue
842 copied = ctx[dst].renamed()
855 copied = ctx[dst].renamed()
843 if not copied:
856 if not copied:
844 continue
857 continue
845 src, srcnode = copied
858 src, srcnode = copied
846 if src in p1 and p1[src].filenode() == srcnode:
859 if src in p1 and p1[src].filenode() == srcnode:
847 p1copies[dst] = src
860 p1copies[dst] = src
848 elif src in p2 and p2[src].filenode() == srcnode:
861 elif src in p2 and p2[src].filenode() == srcnode:
849 p2copies[dst] = src
862 p2copies[dst] = src
850 return p1copies, p2copies
863 return p1copies, p2copies
851
864
852
865
853 def encodecopies(files, copies):
866 def encodecopies(files, copies):
854 items = []
867 items = []
855 for i, dst in enumerate(files):
868 for i, dst in enumerate(files):
856 if dst in copies:
869 if dst in copies:
857 items.append(b'%d\0%s' % (i, copies[dst]))
870 items.append(b'%d\0%s' % (i, copies[dst]))
858 if len(items) != len(copies):
871 if len(items) != len(copies):
859 raise error.ProgrammingError(
872 raise error.ProgrammingError(
860 b'some copy targets missing from file list'
873 b'some copy targets missing from file list'
861 )
874 )
862 return b"\n".join(items)
875 return b"\n".join(items)
863
876
864
877
865 def decodecopies(files, data):
878 def decodecopies(files, data):
866 try:
879 try:
867 copies = {}
880 copies = {}
868 if not data:
881 if not data:
869 return copies
882 return copies
870 for l in data.split(b'\n'):
883 for l in data.split(b'\n'):
871 strindex, src = l.split(b'\0')
884 strindex, src = l.split(b'\0')
872 i = int(strindex)
885 i = int(strindex)
873 dst = files[i]
886 dst = files[i]
874 copies[dst] = src
887 copies[dst] = src
875 return copies
888 return copies
876 except (ValueError, IndexError):
889 except (ValueError, IndexError):
877 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
890 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
878 # used different syntax for the value.
891 # used different syntax for the value.
879 return None
892 return None
880
893
881
894
882 def encodefileindices(files, subset):
895 def encodefileindices(files, subset):
883 subset = set(subset)
896 subset = set(subset)
884 indices = []
897 indices = []
885 for i, f in enumerate(files):
898 for i, f in enumerate(files):
886 if f in subset:
899 if f in subset:
887 indices.append(b'%d' % i)
900 indices.append(b'%d' % i)
888 return b'\n'.join(indices)
901 return b'\n'.join(indices)
889
902
890
903
891 def decodefileindices(files, data):
904 def decodefileindices(files, data):
892 try:
905 try:
893 subset = []
906 subset = []
894 if not data:
907 if not data:
895 return subset
908 return subset
896 for strindex in data.split(b'\n'):
909 for strindex in data.split(b'\n'):
897 i = int(strindex)
910 i = int(strindex)
898 if i < 0 or i >= len(files):
911 if i < 0 or i >= len(files):
899 return None
912 return None
900 subset.append(files[i])
913 subset.append(files[i])
901 return subset
914 return subset
902 except (ValueError, IndexError):
915 except (ValueError, IndexError):
903 # Perhaps someone had chosen the same key name (e.g. "added") and
916 # Perhaps someone had chosen the same key name (e.g. "added") and
904 # used different syntax for the value.
917 # used different syntax for the value.
905 return None
918 return None
906
919
907
920
908 def _getsidedata(srcrepo, rev):
921 def _getsidedata(srcrepo, rev):
909 ctx = srcrepo[rev]
922 ctx = srcrepo[rev]
910 filescopies = computechangesetcopies(ctx)
923 filescopies = computechangesetcopies(ctx)
911 filesadded = computechangesetfilesadded(ctx)
924 filesadded = computechangesetfilesadded(ctx)
912 filesremoved = computechangesetfilesremoved(ctx)
925 filesremoved = computechangesetfilesremoved(ctx)
913 sidedata = {}
926 sidedata = {}
914 if any([filescopies, filesadded, filesremoved]):
927 if any([filescopies, filesadded, filesremoved]):
915 sortedfiles = sorted(ctx.files())
928 sortedfiles = sorted(ctx.files())
916 p1copies, p2copies = filescopies
929 p1copies, p2copies = filescopies
917 p1copies = encodecopies(sortedfiles, p1copies)
930 p1copies = encodecopies(sortedfiles, p1copies)
918 p2copies = encodecopies(sortedfiles, p2copies)
931 p2copies = encodecopies(sortedfiles, p2copies)
919 filesadded = encodefileindices(sortedfiles, filesadded)
932 filesadded = encodefileindices(sortedfiles, filesadded)
920 filesremoved = encodefileindices(sortedfiles, filesremoved)
933 filesremoved = encodefileindices(sortedfiles, filesremoved)
921 if p1copies:
934 if p1copies:
922 sidedata[sidedatamod.SD_P1COPIES] = p1copies
935 sidedata[sidedatamod.SD_P1COPIES] = p1copies
923 if p2copies:
936 if p2copies:
924 sidedata[sidedatamod.SD_P2COPIES] = p2copies
937 sidedata[sidedatamod.SD_P2COPIES] = p2copies
925 if filesadded:
938 if filesadded:
926 sidedata[sidedatamod.SD_FILESADDED] = filesadded
939 sidedata[sidedatamod.SD_FILESADDED] = filesadded
927 if filesremoved:
940 if filesremoved:
928 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
941 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
929 return sidedata
942 return sidedata
930
943
931
944
932 def getsidedataadder(srcrepo, destrepo):
945 def getsidedataadder(srcrepo, destrepo):
933 def sidedatacompanion(revlog, rev):
946 def sidedatacompanion(revlog, rev):
934 sidedata = {}
947 sidedata = {}
935 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
948 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
936 sidedata = _getsidedata(srcrepo, rev)
949 sidedata = _getsidedata(srcrepo, rev)
937 return False, (), sidedata
950 return False, (), sidedata
938
951
939 return sidedatacompanion
952 return sidedatacompanion
940
953
941
954
942 def getsidedataremover(srcrepo, destrepo):
955 def getsidedataremover(srcrepo, destrepo):
943 def sidedatacompanion(revlog, rev):
956 def sidedatacompanion(revlog, rev):
944 f = ()
957 f = ()
945 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
958 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
946 if revlog.flags(rev) & REVIDX_SIDEDATA:
959 if revlog.flags(rev) & REVIDX_SIDEDATA:
947 f = (
960 f = (
948 sidedatamod.SD_P1COPIES,
961 sidedatamod.SD_P1COPIES,
949 sidedatamod.SD_P2COPIES,
962 sidedatamod.SD_P2COPIES,
950 sidedatamod.SD_FILESADDED,
963 sidedatamod.SD_FILESADDED,
951 sidedatamod.SD_FILESREMOVED,
964 sidedatamod.SD_FILESREMOVED,
952 )
965 )
953 return False, f, {}
966 return False, f, {}
954
967
955 return sidedatacompanion
968 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now