##// END OF EJS Templates
sidedatacopies: fast path data fetching if revision has no sidedata...
marmoute -
r43591:8a292526 default
parent child Browse files
Show More
@@ -1,968 +1,974 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _revinfogetter(repo):
181 def _revinfogetter(repo):
182 """return a function that return multiple data given a <rev>"i
182 """return a function that return multiple data given a <rev>"i
183
183
184 * p1: revision number of first parent
184 * p1: revision number of first parent
185 * p2: revision number of first parent
185 * p2: revision number of first parent
186 * p1copies: mapping of copies from p1
186 * p1copies: mapping of copies from p1
187 * p2copies: mapping of copies from p2
187 * p2copies: mapping of copies from p2
188 * removed: a list of removed files
188 * removed: a list of removed files
189 """
189 """
190 cl = repo.changelog
190 cl = repo.changelog
191 parents = cl.parentrevs
191 parents = cl.parentrevs
192
192
193 if repo.filecopiesmode == b'changeset-sidedata':
193 if repo.filecopiesmode == b'changeset-sidedata':
194 changelogrevision = cl.changelogrevision
194 changelogrevision = cl.changelogrevision
195 flags = cl.flags
195
196
196 def revinfo(rev):
197 def revinfo(rev):
197 p1, p2 = parents(rev)
198 p1, p2 = parents(rev)
198 c = changelogrevision(rev)
199 if flags(rev) & REVIDX_SIDEDATA:
199 p1copies = c.p1copies
200 c = changelogrevision(rev)
200 p2copies = c.p2copies
201 p1copies = c.p1copies
201 removed = c.filesremoved
202 p2copies = c.p2copies
203 removed = c.filesremoved
204 else:
205 p1copies = {}
206 p2copies = {}
207 removed = ()
202 return p1, p2, p1copies, p2copies, removed
208 return p1, p2, p1copies, p2copies, removed
203
209
204 else:
210 else:
205
211
206 def revinfo(rev):
212 def revinfo(rev):
207 p1, p2 = parents(rev)
213 p1, p2 = parents(rev)
208 ctx = repo[rev]
214 ctx = repo[rev]
209 p1copies, p2copies = ctx._copies
215 p1copies, p2copies = ctx._copies
210 removed = ctx.filesremoved()
216 removed = ctx.filesremoved()
211 return p1, p2, p1copies, p2copies, removed
217 return p1, p2, p1copies, p2copies, removed
212
218
213 return revinfo
219 return revinfo
214
220
215
221
216 def _changesetforwardcopies(a, b, match):
222 def _changesetforwardcopies(a, b, match):
217 if a.rev() in (node.nullrev, b.rev()):
223 if a.rev() in (node.nullrev, b.rev()):
218 return {}
224 return {}
219
225
220 repo = a.repo().unfiltered()
226 repo = a.repo().unfiltered()
221 children = {}
227 children = {}
222 revinfo = _revinfogetter(repo)
228 revinfo = _revinfogetter(repo)
223
229
224 cl = repo.changelog
230 cl = repo.changelog
225 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
226 for r in missingrevs:
232 for r in missingrevs:
227 for p in cl.parentrevs(r):
233 for p in cl.parentrevs(r):
228 if p == node.nullrev:
234 if p == node.nullrev:
229 continue
235 continue
230 if p not in children:
236 if p not in children:
231 children[p] = [r]
237 children[p] = [r]
232 else:
238 else:
233 children[p].append(r)
239 children[p].append(r)
234
240
235 roots = set(children) - set(missingrevs)
241 roots = set(children) - set(missingrevs)
236 work = list(roots)
242 work = list(roots)
237 all_copies = {r: {} for r in roots}
243 all_copies = {r: {} for r in roots}
238 heapq.heapify(work)
244 heapq.heapify(work)
239 alwaysmatch = match.always()
245 alwaysmatch = match.always()
240 while work:
246 while work:
241 r = heapq.heappop(work)
247 r = heapq.heappop(work)
242 copies = all_copies.pop(r)
248 copies = all_copies.pop(r)
243 if r == b.rev():
249 if r == b.rev():
244 return copies
250 return copies
245 for i, c in enumerate(children[r]):
251 for i, c in enumerate(children[r]):
246 p1, p2, p1copies, p2copies, removed = revinfo(c)
252 p1, p2, p1copies, p2copies, removed = revinfo(c)
247 if r == p1:
253 if r == p1:
248 parent = 1
254 parent = 1
249 childcopies = p1copies
255 childcopies = p1copies
250 else:
256 else:
251 assert r == p2
257 assert r == p2
252 parent = 2
258 parent = 2
253 childcopies = p2copies
259 childcopies = p2copies
254 if not alwaysmatch:
260 if not alwaysmatch:
255 childcopies = {
261 childcopies = {
256 dst: src for dst, src in childcopies.items() if match(dst)
262 dst: src for dst, src in childcopies.items() if match(dst)
257 }
263 }
258 # Copy the dict only if later iterations will also need it
264 # Copy the dict only if later iterations will also need it
259 if i != len(children[r]) - 1:
265 if i != len(children[r]) - 1:
260 newcopies = copies.copy()
266 newcopies = copies.copy()
261 else:
267 else:
262 newcopies = copies
268 newcopies = copies
263 if childcopies:
269 if childcopies:
264 newcopies = _chain(newcopies, childcopies)
270 newcopies = _chain(newcopies, childcopies)
265 for f in removed:
271 for f in removed:
266 if f in newcopies:
272 if f in newcopies:
267 del newcopies[f]
273 del newcopies[f]
268 othercopies = all_copies.get(c)
274 othercopies = all_copies.get(c)
269 if othercopies is None:
275 if othercopies is None:
270 heapq.heappush(work, c)
276 heapq.heappush(work, c)
271 all_copies[c] = newcopies
277 all_copies[c] = newcopies
272 else:
278 else:
273 # we are the second parent to work on c, we need to merge our
279 # we are the second parent to work on c, we need to merge our
274 # work with the other.
280 # work with the other.
275 #
281 #
276 # Unlike when copies are stored in the filelog, we consider
282 # Unlike when copies are stored in the filelog, we consider
277 # it a copy even if the destination already existed on the
283 # it a copy even if the destination already existed on the
278 # other branch. It's simply too expensive to check if the
284 # other branch. It's simply too expensive to check if the
279 # file existed in the manifest.
285 # file existed in the manifest.
280 #
286 #
281 # In case of conflict, parent 1 take precedence over parent 2.
287 # In case of conflict, parent 1 take precedence over parent 2.
282 # This is an arbitrary choice made anew when implementing
288 # This is an arbitrary choice made anew when implementing
283 # changeset based copies. It was made without regards with
289 # changeset based copies. It was made without regards with
284 # potential filelog related behavior.
290 # potential filelog related behavior.
285 if parent == 1:
291 if parent == 1:
286 othercopies.update(newcopies)
292 othercopies.update(newcopies)
287 else:
293 else:
288 newcopies.update(othercopies)
294 newcopies.update(othercopies)
289 all_copies[c] = newcopies
295 all_copies[c] = newcopies
290 assert False
296 assert False
291
297
292
298
293 def _forwardcopies(a, b, base=None, match=None):
299 def _forwardcopies(a, b, base=None, match=None):
294 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
300 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
295
301
296 if base is None:
302 if base is None:
297 base = a
303 base = a
298 match = a.repo().narrowmatch(match)
304 match = a.repo().narrowmatch(match)
299 # check for working copy
305 # check for working copy
300 if b.rev() is None:
306 if b.rev() is None:
301 cm = _committedforwardcopies(a, b.p1(), base, match)
307 cm = _committedforwardcopies(a, b.p1(), base, match)
302 # combine copies from dirstate if necessary
308 # combine copies from dirstate if necessary
303 copies = _chain(cm, _dirstatecopies(b._repo, match))
309 copies = _chain(cm, _dirstatecopies(b._repo, match))
304 else:
310 else:
305 copies = _committedforwardcopies(a, b, base, match)
311 copies = _committedforwardcopies(a, b, base, match)
306 return copies
312 return copies
307
313
308
314
309 def _backwardrenames(a, b, match):
315 def _backwardrenames(a, b, match):
310 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
316 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
311 return {}
317 return {}
312
318
313 # Even though we're not taking copies into account, 1:n rename situations
319 # Even though we're not taking copies into account, 1:n rename situations
314 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
320 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
315 # arbitrarily pick one of the renames.
321 # arbitrarily pick one of the renames.
316 # We don't want to pass in "match" here, since that would filter
322 # We don't want to pass in "match" here, since that would filter
317 # the destination by it. Since we're reversing the copies, we want
323 # the destination by it. Since we're reversing the copies, we want
318 # to filter the source instead.
324 # to filter the source instead.
319 f = _forwardcopies(b, a)
325 f = _forwardcopies(b, a)
320 r = {}
326 r = {}
321 for k, v in sorted(pycompat.iteritems(f)):
327 for k, v in sorted(pycompat.iteritems(f)):
322 if match and not match(v):
328 if match and not match(v):
323 continue
329 continue
324 # remove copies
330 # remove copies
325 if v in a:
331 if v in a:
326 continue
332 continue
327 r[v] = k
333 r[v] = k
328 return r
334 return r
329
335
330
336
331 def pathcopies(x, y, match=None):
337 def pathcopies(x, y, match=None):
332 """find {dst@y: src@x} copy mapping for directed compare"""
338 """find {dst@y: src@x} copy mapping for directed compare"""
333 repo = x._repo
339 repo = x._repo
334 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
340 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
335 if debug:
341 if debug:
336 repo.ui.debug(
342 repo.ui.debug(
337 b'debug.copies: searching copies from %s to %s\n' % (x, y)
343 b'debug.copies: searching copies from %s to %s\n' % (x, y)
338 )
344 )
339 if x == y or not x or not y:
345 if x == y or not x or not y:
340 return {}
346 return {}
341 a = y.ancestor(x)
347 a = y.ancestor(x)
342 if a == x:
348 if a == x:
343 if debug:
349 if debug:
344 repo.ui.debug(b'debug.copies: search mode: forward\n')
350 repo.ui.debug(b'debug.copies: search mode: forward\n')
345 if y.rev() is None and x == y.p1():
351 if y.rev() is None and x == y.p1():
346 # short-circuit to avoid issues with merge states
352 # short-circuit to avoid issues with merge states
347 return _dirstatecopies(repo, match)
353 return _dirstatecopies(repo, match)
348 copies = _forwardcopies(x, y, match=match)
354 copies = _forwardcopies(x, y, match=match)
349 elif a == y:
355 elif a == y:
350 if debug:
356 if debug:
351 repo.ui.debug(b'debug.copies: search mode: backward\n')
357 repo.ui.debug(b'debug.copies: search mode: backward\n')
352 copies = _backwardrenames(x, y, match=match)
358 copies = _backwardrenames(x, y, match=match)
353 else:
359 else:
354 if debug:
360 if debug:
355 repo.ui.debug(b'debug.copies: search mode: combined\n')
361 repo.ui.debug(b'debug.copies: search mode: combined\n')
356 base = None
362 base = None
357 if a.rev() != node.nullrev:
363 if a.rev() != node.nullrev:
358 base = x
364 base = x
359 copies = _chain(
365 copies = _chain(
360 _backwardrenames(x, a, match=match),
366 _backwardrenames(x, a, match=match),
361 _forwardcopies(a, y, base, match=match),
367 _forwardcopies(a, y, base, match=match),
362 )
368 )
363 _filter(x, y, copies)
369 _filter(x, y, copies)
364 return copies
370 return copies
365
371
366
372
367 def mergecopies(repo, c1, c2, base):
373 def mergecopies(repo, c1, c2, base):
368 """
374 """
369 Finds moves and copies between context c1 and c2 that are relevant for
375 Finds moves and copies between context c1 and c2 that are relevant for
370 merging. 'base' will be used as the merge base.
376 merging. 'base' will be used as the merge base.
371
377
372 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
373 files that were moved/ copied in one merge parent and modified in another.
379 files that were moved/ copied in one merge parent and modified in another.
374 For example:
380 For example:
375
381
376 o ---> 4 another commit
382 o ---> 4 another commit
377 |
383 |
378 | o ---> 3 commit that modifies a.txt
384 | o ---> 3 commit that modifies a.txt
379 | /
385 | /
380 o / ---> 2 commit that moves a.txt to b.txt
386 o / ---> 2 commit that moves a.txt to b.txt
381 |/
387 |/
382 o ---> 1 merge base
388 o ---> 1 merge base
383
389
384 If we try to rebase revision 3 on revision 4, since there is no a.txt in
390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
385 revision 4, and if user have copytrace disabled, we prints the following
391 revision 4, and if user have copytrace disabled, we prints the following
386 message:
392 message:
387
393
388 ```other changed <file> which local deleted```
394 ```other changed <file> which local deleted```
389
395
390 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
391 "dirmove".
397 "dirmove".
392
398
393 "copy" is a mapping from destination name -> source name,
399 "copy" is a mapping from destination name -> source name,
394 where source is in c1 and destination is in c2 or vice-versa.
400 where source is in c1 and destination is in c2 or vice-versa.
395
401
396 "movewithdir" is a mapping from source name -> destination name,
402 "movewithdir" is a mapping from source name -> destination name,
397 where the file at source present in one context but not the other
403 where the file at source present in one context but not the other
398 needs to be moved to destination by the merge process, because the
404 needs to be moved to destination by the merge process, because the
399 other context moved the directory it is in.
405 other context moved the directory it is in.
400
406
401 "diverge" is a mapping of source name -> list of destination names
407 "diverge" is a mapping of source name -> list of destination names
402 for divergent renames.
408 for divergent renames.
403
409
404 "renamedelete" is a mapping of source name -> list of destination
410 "renamedelete" is a mapping of source name -> list of destination
405 names for files deleted in c1 that were renamed in c2 or vice-versa.
411 names for files deleted in c1 that were renamed in c2 or vice-versa.
406
412
407 "dirmove" is a mapping of detected source dir -> destination dir renames.
413 "dirmove" is a mapping of detected source dir -> destination dir renames.
408 This is needed for handling changes to new files previously grafted into
414 This is needed for handling changes to new files previously grafted into
409 renamed directories.
415 renamed directories.
410
416
411 This function calls different copytracing algorithms based on config.
417 This function calls different copytracing algorithms based on config.
412 """
418 """
413 # avoid silly behavior for update from empty dir
419 # avoid silly behavior for update from empty dir
414 if not c1 or not c2 or c1 == c2:
420 if not c1 or not c2 or c1 == c2:
415 return {}, {}, {}, {}, {}
421 return {}, {}, {}, {}, {}
416
422
417 narrowmatch = c1.repo().narrowmatch()
423 narrowmatch = c1.repo().narrowmatch()
418
424
419 # avoid silly behavior for parent -> working dir
425 # avoid silly behavior for parent -> working dir
420 if c2.node() is None and c1.node() == repo.dirstate.p1():
426 if c2.node() is None and c1.node() == repo.dirstate.p1():
421 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
422
428
423 copytracing = repo.ui.config(b'experimental', b'copytrace')
429 copytracing = repo.ui.config(b'experimental', b'copytrace')
424 if stringutil.parsebool(copytracing) is False:
430 if stringutil.parsebool(copytracing) is False:
425 # stringutil.parsebool() returns None when it is unable to parse the
431 # stringutil.parsebool() returns None when it is unable to parse the
426 # value, so we should rely on making sure copytracing is on such cases
432 # value, so we should rely on making sure copytracing is on such cases
427 return {}, {}, {}, {}, {}
433 return {}, {}, {}, {}, {}
428
434
429 if usechangesetcentricalgo(repo):
435 if usechangesetcentricalgo(repo):
430 # The heuristics don't make sense when we need changeset-centric algos
436 # The heuristics don't make sense when we need changeset-centric algos
431 return _fullcopytracing(repo, c1, c2, base)
437 return _fullcopytracing(repo, c1, c2, base)
432
438
433 # Copy trace disabling is explicitly below the node == p1 logic above
439 # Copy trace disabling is explicitly below the node == p1 logic above
434 # because the logic above is required for a simple copy to be kept across a
440 # because the logic above is required for a simple copy to be kept across a
435 # rebase.
441 # rebase.
436 if copytracing == b'heuristics':
442 if copytracing == b'heuristics':
437 # Do full copytracing if only non-public revisions are involved as
443 # Do full copytracing if only non-public revisions are involved as
438 # that will be fast enough and will also cover the copies which could
444 # that will be fast enough and will also cover the copies which could
439 # be missed by heuristics
445 # be missed by heuristics
440 if _isfullcopytraceable(repo, c1, base):
446 if _isfullcopytraceable(repo, c1, base):
441 return _fullcopytracing(repo, c1, c2, base)
447 return _fullcopytracing(repo, c1, c2, base)
442 return _heuristicscopytracing(repo, c1, c2, base)
448 return _heuristicscopytracing(repo, c1, c2, base)
443 else:
449 else:
444 return _fullcopytracing(repo, c1, c2, base)
450 return _fullcopytracing(repo, c1, c2, base)
445
451
446
452
447 def _isfullcopytraceable(repo, c1, base):
453 def _isfullcopytraceable(repo, c1, base):
448 """ Checks that if base, source and destination are all no-public branches,
454 """ Checks that if base, source and destination are all no-public branches,
449 if yes let's use the full copytrace algorithm for increased capabilities
455 if yes let's use the full copytrace algorithm for increased capabilities
450 since it will be fast enough.
456 since it will be fast enough.
451
457
452 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
458 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
453 number of changesets from c1 to base such that if number of changesets are
459 number of changesets from c1 to base such that if number of changesets are
454 more than the limit, full copytracing algorithm won't be used.
460 more than the limit, full copytracing algorithm won't be used.
455 """
461 """
456 if c1.rev() is None:
462 if c1.rev() is None:
457 c1 = c1.p1()
463 c1 = c1.p1()
458 if c1.mutable() and base.mutable():
464 if c1.mutable() and base.mutable():
459 sourcecommitlimit = repo.ui.configint(
465 sourcecommitlimit = repo.ui.configint(
460 b'experimental', b'copytrace.sourcecommitlimit'
466 b'experimental', b'copytrace.sourcecommitlimit'
461 )
467 )
462 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
468 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
463 return commits < sourcecommitlimit
469 return commits < sourcecommitlimit
464 return False
470 return False
465
471
466
472
467 def _checksinglesidecopies(
473 def _checksinglesidecopies(
468 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
474 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
469 ):
475 ):
470 if src not in m2:
476 if src not in m2:
471 # deleted on side 2
477 # deleted on side 2
472 if src not in m1:
478 if src not in m1:
473 # renamed on side 1, deleted on side 2
479 # renamed on side 1, deleted on side 2
474 renamedelete[src] = dsts1
480 renamedelete[src] = dsts1
475 elif m2[src] != mb[src]:
481 elif m2[src] != mb[src]:
476 if not _related(c2[src], base[src]):
482 if not _related(c2[src], base[src]):
477 return
483 return
478 # modified on side 2
484 # modified on side 2
479 for dst in dsts1:
485 for dst in dsts1:
480 if dst not in m2:
486 if dst not in m2:
481 # dst not added on side 2 (handle as regular
487 # dst not added on side 2 (handle as regular
482 # "both created" case in manifestmerge otherwise)
488 # "both created" case in manifestmerge otherwise)
483 copy[dst] = src
489 copy[dst] = src
484
490
485
491
486 def _fullcopytracing(repo, c1, c2, base):
492 def _fullcopytracing(repo, c1, c2, base):
487 """ The full copytracing algorithm which finds all the new files that were
493 """ The full copytracing algorithm which finds all the new files that were
488 added from merge base up to the top commit and for each file it checks if
494 added from merge base up to the top commit and for each file it checks if
489 this file was copied from another file.
495 this file was copied from another file.
490
496
491 This is pretty slow when a lot of changesets are involved but will track all
497 This is pretty slow when a lot of changesets are involved but will track all
492 the copies.
498 the copies.
493 """
499 """
494 m1 = c1.manifest()
500 m1 = c1.manifest()
495 m2 = c2.manifest()
501 m2 = c2.manifest()
496 mb = base.manifest()
502 mb = base.manifest()
497
503
498 copies1 = pathcopies(base, c1)
504 copies1 = pathcopies(base, c1)
499 copies2 = pathcopies(base, c2)
505 copies2 = pathcopies(base, c2)
500
506
501 inversecopies1 = {}
507 inversecopies1 = {}
502 inversecopies2 = {}
508 inversecopies2 = {}
503 for dst, src in copies1.items():
509 for dst, src in copies1.items():
504 inversecopies1.setdefault(src, []).append(dst)
510 inversecopies1.setdefault(src, []).append(dst)
505 for dst, src in copies2.items():
511 for dst, src in copies2.items():
506 inversecopies2.setdefault(src, []).append(dst)
512 inversecopies2.setdefault(src, []).append(dst)
507
513
508 copy = {}
514 copy = {}
509 diverge = {}
515 diverge = {}
510 renamedelete = {}
516 renamedelete = {}
511 allsources = set(inversecopies1) | set(inversecopies2)
517 allsources = set(inversecopies1) | set(inversecopies2)
512 for src in allsources:
518 for src in allsources:
513 dsts1 = inversecopies1.get(src)
519 dsts1 = inversecopies1.get(src)
514 dsts2 = inversecopies2.get(src)
520 dsts2 = inversecopies2.get(src)
515 if dsts1 and dsts2:
521 if dsts1 and dsts2:
516 # copied/renamed on both sides
522 # copied/renamed on both sides
517 if src not in m1 and src not in m2:
523 if src not in m1 and src not in m2:
518 # renamed on both sides
524 # renamed on both sides
519 dsts1 = set(dsts1)
525 dsts1 = set(dsts1)
520 dsts2 = set(dsts2)
526 dsts2 = set(dsts2)
521 # If there's some overlap in the rename destinations, we
527 # If there's some overlap in the rename destinations, we
522 # consider it not divergent. For example, if side 1 copies 'a'
528 # consider it not divergent. For example, if side 1 copies 'a'
523 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
529 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
524 # and 'd' and deletes 'a'.
530 # and 'd' and deletes 'a'.
525 if dsts1 & dsts2:
531 if dsts1 & dsts2:
526 for dst in dsts1 & dsts2:
532 for dst in dsts1 & dsts2:
527 copy[dst] = src
533 copy[dst] = src
528 else:
534 else:
529 diverge[src] = sorted(dsts1 | dsts2)
535 diverge[src] = sorted(dsts1 | dsts2)
530 elif src in m1 and src in m2:
536 elif src in m1 and src in m2:
531 # copied on both sides
537 # copied on both sides
532 dsts1 = set(dsts1)
538 dsts1 = set(dsts1)
533 dsts2 = set(dsts2)
539 dsts2 = set(dsts2)
534 for dst in dsts1 & dsts2:
540 for dst in dsts1 & dsts2:
535 copy[dst] = src
541 copy[dst] = src
536 # TODO: Handle cases where it was renamed on one side and copied
542 # TODO: Handle cases where it was renamed on one side and copied
537 # on the other side
543 # on the other side
538 elif dsts1:
544 elif dsts1:
539 # copied/renamed only on side 1
545 # copied/renamed only on side 1
540 _checksinglesidecopies(
546 _checksinglesidecopies(
541 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
547 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
542 )
548 )
543 elif dsts2:
549 elif dsts2:
544 # copied/renamed only on side 2
550 # copied/renamed only on side 2
545 _checksinglesidecopies(
551 _checksinglesidecopies(
546 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
552 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
547 )
553 )
548
554
549 renamedeleteset = set()
555 renamedeleteset = set()
550 divergeset = set()
556 divergeset = set()
551 for dsts in diverge.values():
557 for dsts in diverge.values():
552 divergeset.update(dsts)
558 divergeset.update(dsts)
553 for dsts in renamedelete.values():
559 for dsts in renamedelete.values():
554 renamedeleteset.update(dsts)
560 renamedeleteset.update(dsts)
555
561
556 # find interesting file sets from manifests
562 # find interesting file sets from manifests
557 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
563 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
558 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
564 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
559 u1 = sorted(addedinm1 - addedinm2)
565 u1 = sorted(addedinm1 - addedinm2)
560 u2 = sorted(addedinm2 - addedinm1)
566 u2 = sorted(addedinm2 - addedinm1)
561
567
562 header = b" unmatched files in %s"
568 header = b" unmatched files in %s"
563 if u1:
569 if u1:
564 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
570 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
565 if u2:
571 if u2:
566 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
572 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
567
573
568 fullcopy = copies1.copy()
574 fullcopy = copies1.copy()
569 fullcopy.update(copies2)
575 fullcopy.update(copies2)
570 if not fullcopy:
576 if not fullcopy:
571 return copy, {}, diverge, renamedelete, {}
577 return copy, {}, diverge, renamedelete, {}
572
578
573 if repo.ui.debugflag:
579 if repo.ui.debugflag:
574 repo.ui.debug(
580 repo.ui.debug(
575 b" all copies found (* = to merge, ! = divergent, "
581 b" all copies found (* = to merge, ! = divergent, "
576 b"% = renamed and deleted):\n"
582 b"% = renamed and deleted):\n"
577 )
583 )
578 for f in sorted(fullcopy):
584 for f in sorted(fullcopy):
579 note = b""
585 note = b""
580 if f in copy:
586 if f in copy:
581 note += b"*"
587 note += b"*"
582 if f in divergeset:
588 if f in divergeset:
583 note += b"!"
589 note += b"!"
584 if f in renamedeleteset:
590 if f in renamedeleteset:
585 note += b"%"
591 note += b"%"
586 repo.ui.debug(
592 repo.ui.debug(
587 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
593 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
588 )
594 )
589 del divergeset
595 del divergeset
590
596
591 repo.ui.debug(b" checking for directory renames\n")
597 repo.ui.debug(b" checking for directory renames\n")
592
598
593 # generate a directory move map
599 # generate a directory move map
594 d1, d2 = c1.dirs(), c2.dirs()
600 d1, d2 = c1.dirs(), c2.dirs()
595 invalid = set()
601 invalid = set()
596 dirmove = {}
602 dirmove = {}
597
603
598 # examine each file copy for a potential directory move, which is
604 # examine each file copy for a potential directory move, which is
599 # when all the files in a directory are moved to a new directory
605 # when all the files in a directory are moved to a new directory
600 for dst, src in pycompat.iteritems(fullcopy):
606 for dst, src in pycompat.iteritems(fullcopy):
601 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
607 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
602 if dsrc in invalid:
608 if dsrc in invalid:
603 # already seen to be uninteresting
609 # already seen to be uninteresting
604 continue
610 continue
605 elif dsrc in d1 and ddst in d1:
611 elif dsrc in d1 and ddst in d1:
606 # directory wasn't entirely moved locally
612 # directory wasn't entirely moved locally
607 invalid.add(dsrc)
613 invalid.add(dsrc)
608 elif dsrc in d2 and ddst in d2:
614 elif dsrc in d2 and ddst in d2:
609 # directory wasn't entirely moved remotely
615 # directory wasn't entirely moved remotely
610 invalid.add(dsrc)
616 invalid.add(dsrc)
611 elif dsrc in dirmove and dirmove[dsrc] != ddst:
617 elif dsrc in dirmove and dirmove[dsrc] != ddst:
612 # files from the same directory moved to two different places
618 # files from the same directory moved to two different places
613 invalid.add(dsrc)
619 invalid.add(dsrc)
614 else:
620 else:
615 # looks good so far
621 # looks good so far
616 dirmove[dsrc] = ddst
622 dirmove[dsrc] = ddst
617
623
618 for i in invalid:
624 for i in invalid:
619 if i in dirmove:
625 if i in dirmove:
620 del dirmove[i]
626 del dirmove[i]
621 del d1, d2, invalid
627 del d1, d2, invalid
622
628
623 if not dirmove:
629 if not dirmove:
624 return copy, {}, diverge, renamedelete, {}
630 return copy, {}, diverge, renamedelete, {}
625
631
626 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
632 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
627
633
628 for d in dirmove:
634 for d in dirmove:
629 repo.ui.debug(
635 repo.ui.debug(
630 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
636 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
631 )
637 )
632
638
633 movewithdir = {}
639 movewithdir = {}
634 # check unaccounted nonoverlapping files against directory moves
640 # check unaccounted nonoverlapping files against directory moves
635 for f in u1 + u2:
641 for f in u1 + u2:
636 if f not in fullcopy:
642 if f not in fullcopy:
637 for d in dirmove:
643 for d in dirmove:
638 if f.startswith(d):
644 if f.startswith(d):
639 # new file added in a directory that was moved, move it
645 # new file added in a directory that was moved, move it
640 df = dirmove[d] + f[len(d) :]
646 df = dirmove[d] + f[len(d) :]
641 if df not in copy:
647 if df not in copy:
642 movewithdir[f] = df
648 movewithdir[f] = df
643 repo.ui.debug(
649 repo.ui.debug(
644 b" pending file src: '%s' -> dst: '%s'\n"
650 b" pending file src: '%s' -> dst: '%s'\n"
645 % (f, df)
651 % (f, df)
646 )
652 )
647 break
653 break
648
654
649 return copy, movewithdir, diverge, renamedelete, dirmove
655 return copy, movewithdir, diverge, renamedelete, dirmove
650
656
651
657
652 def _heuristicscopytracing(repo, c1, c2, base):
658 def _heuristicscopytracing(repo, c1, c2, base):
653 """ Fast copytracing using filename heuristics
659 """ Fast copytracing using filename heuristics
654
660
655 Assumes that moves or renames are of following two types:
661 Assumes that moves or renames are of following two types:
656
662
657 1) Inside a directory only (same directory name but different filenames)
663 1) Inside a directory only (same directory name but different filenames)
658 2) Move from one directory to another
664 2) Move from one directory to another
659 (same filenames but different directory names)
665 (same filenames but different directory names)
660
666
661 Works only when there are no merge commits in the "source branch".
667 Works only when there are no merge commits in the "source branch".
662 Source branch is commits from base up to c2 not including base.
668 Source branch is commits from base up to c2 not including base.
663
669
664 If merge is involved it fallbacks to _fullcopytracing().
670 If merge is involved it fallbacks to _fullcopytracing().
665
671
666 Can be used by setting the following config:
672 Can be used by setting the following config:
667
673
668 [experimental]
674 [experimental]
669 copytrace = heuristics
675 copytrace = heuristics
670
676
671 In some cases the copy/move candidates found by heuristics can be very large
677 In some cases the copy/move candidates found by heuristics can be very large
672 in number and that will make the algorithm slow. The number of possible
678 in number and that will make the algorithm slow. The number of possible
673 candidates to check can be limited by using the config
679 candidates to check can be limited by using the config
674 `experimental.copytrace.movecandidateslimit` which defaults to 100.
680 `experimental.copytrace.movecandidateslimit` which defaults to 100.
675 """
681 """
676
682
677 if c1.rev() is None:
683 if c1.rev() is None:
678 c1 = c1.p1()
684 c1 = c1.p1()
679 if c2.rev() is None:
685 if c2.rev() is None:
680 c2 = c2.p1()
686 c2 = c2.p1()
681
687
682 copies = {}
688 copies = {}
683
689
684 changedfiles = set()
690 changedfiles = set()
685 m1 = c1.manifest()
691 m1 = c1.manifest()
686 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
692 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
687 # If base is not in c2 branch, we switch to fullcopytracing
693 # If base is not in c2 branch, we switch to fullcopytracing
688 repo.ui.debug(
694 repo.ui.debug(
689 b"switching to full copytracing as base is not "
695 b"switching to full copytracing as base is not "
690 b"an ancestor of c2\n"
696 b"an ancestor of c2\n"
691 )
697 )
692 return _fullcopytracing(repo, c1, c2, base)
698 return _fullcopytracing(repo, c1, c2, base)
693
699
694 ctx = c2
700 ctx = c2
695 while ctx != base:
701 while ctx != base:
696 if len(ctx.parents()) == 2:
702 if len(ctx.parents()) == 2:
697 # To keep things simple let's not handle merges
703 # To keep things simple let's not handle merges
698 repo.ui.debug(b"switching to full copytracing because of merges\n")
704 repo.ui.debug(b"switching to full copytracing because of merges\n")
699 return _fullcopytracing(repo, c1, c2, base)
705 return _fullcopytracing(repo, c1, c2, base)
700 changedfiles.update(ctx.files())
706 changedfiles.update(ctx.files())
701 ctx = ctx.p1()
707 ctx = ctx.p1()
702
708
703 cp = _forwardcopies(base, c2)
709 cp = _forwardcopies(base, c2)
704 for dst, src in pycompat.iteritems(cp):
710 for dst, src in pycompat.iteritems(cp):
705 if src in m1:
711 if src in m1:
706 copies[dst] = src
712 copies[dst] = src
707
713
708 # file is missing if it isn't present in the destination, but is present in
714 # file is missing if it isn't present in the destination, but is present in
709 # the base and present in the source.
715 # the base and present in the source.
710 # Presence in the base is important to exclude added files, presence in the
716 # Presence in the base is important to exclude added files, presence in the
711 # source is important to exclude removed files.
717 # source is important to exclude removed files.
712 filt = lambda f: f not in m1 and f in base and f in c2
718 filt = lambda f: f not in m1 and f in base and f in c2
713 missingfiles = [f for f in changedfiles if filt(f)]
719 missingfiles = [f for f in changedfiles if filt(f)]
714
720
715 if missingfiles:
721 if missingfiles:
716 basenametofilename = collections.defaultdict(list)
722 basenametofilename = collections.defaultdict(list)
717 dirnametofilename = collections.defaultdict(list)
723 dirnametofilename = collections.defaultdict(list)
718
724
719 for f in m1.filesnotin(base.manifest()):
725 for f in m1.filesnotin(base.manifest()):
720 basename = os.path.basename(f)
726 basename = os.path.basename(f)
721 dirname = os.path.dirname(f)
727 dirname = os.path.dirname(f)
722 basenametofilename[basename].append(f)
728 basenametofilename[basename].append(f)
723 dirnametofilename[dirname].append(f)
729 dirnametofilename[dirname].append(f)
724
730
725 for f in missingfiles:
731 for f in missingfiles:
726 basename = os.path.basename(f)
732 basename = os.path.basename(f)
727 dirname = os.path.dirname(f)
733 dirname = os.path.dirname(f)
728 samebasename = basenametofilename[basename]
734 samebasename = basenametofilename[basename]
729 samedirname = dirnametofilename[dirname]
735 samedirname = dirnametofilename[dirname]
730 movecandidates = samebasename + samedirname
736 movecandidates = samebasename + samedirname
731 # f is guaranteed to be present in c2, that's why
737 # f is guaranteed to be present in c2, that's why
732 # c2.filectx(f) won't fail
738 # c2.filectx(f) won't fail
733 f2 = c2.filectx(f)
739 f2 = c2.filectx(f)
734 # we can have a lot of candidates which can slow down the heuristics
740 # we can have a lot of candidates which can slow down the heuristics
735 # config value to limit the number of candidates moves to check
741 # config value to limit the number of candidates moves to check
736 maxcandidates = repo.ui.configint(
742 maxcandidates = repo.ui.configint(
737 b'experimental', b'copytrace.movecandidateslimit'
743 b'experimental', b'copytrace.movecandidateslimit'
738 )
744 )
739
745
740 if len(movecandidates) > maxcandidates:
746 if len(movecandidates) > maxcandidates:
741 repo.ui.status(
747 repo.ui.status(
742 _(
748 _(
743 b"skipping copytracing for '%s', more "
749 b"skipping copytracing for '%s', more "
744 b"candidates than the limit: %d\n"
750 b"candidates than the limit: %d\n"
745 )
751 )
746 % (f, len(movecandidates))
752 % (f, len(movecandidates))
747 )
753 )
748 continue
754 continue
749
755
750 for candidate in movecandidates:
756 for candidate in movecandidates:
751 f1 = c1.filectx(candidate)
757 f1 = c1.filectx(candidate)
752 if _related(f1, f2):
758 if _related(f1, f2):
753 # if there are a few related copies then we'll merge
759 # if there are a few related copies then we'll merge
754 # changes into all of them. This matches the behaviour
760 # changes into all of them. This matches the behaviour
755 # of upstream copytracing
761 # of upstream copytracing
756 copies[candidate] = f
762 copies[candidate] = f
757
763
758 return copies, {}, {}, {}, {}
764 return copies, {}, {}, {}, {}
759
765
760
766
761 def _related(f1, f2):
767 def _related(f1, f2):
762 """return True if f1 and f2 filectx have a common ancestor
768 """return True if f1 and f2 filectx have a common ancestor
763
769
764 Walk back to common ancestor to see if the two files originate
770 Walk back to common ancestor to see if the two files originate
765 from the same file. Since workingfilectx's rev() is None it messes
771 from the same file. Since workingfilectx's rev() is None it messes
766 up the integer comparison logic, hence the pre-step check for
772 up the integer comparison logic, hence the pre-step check for
767 None (f1 and f2 can only be workingfilectx's initially).
773 None (f1 and f2 can only be workingfilectx's initially).
768 """
774 """
769
775
770 if f1 == f2:
776 if f1 == f2:
771 return True # a match
777 return True # a match
772
778
773 g1, g2 = f1.ancestors(), f2.ancestors()
779 g1, g2 = f1.ancestors(), f2.ancestors()
774 try:
780 try:
775 f1r, f2r = f1.linkrev(), f2.linkrev()
781 f1r, f2r = f1.linkrev(), f2.linkrev()
776
782
777 if f1r is None:
783 if f1r is None:
778 f1 = next(g1)
784 f1 = next(g1)
779 if f2r is None:
785 if f2r is None:
780 f2 = next(g2)
786 f2 = next(g2)
781
787
782 while True:
788 while True:
783 f1r, f2r = f1.linkrev(), f2.linkrev()
789 f1r, f2r = f1.linkrev(), f2.linkrev()
784 if f1r > f2r:
790 if f1r > f2r:
785 f1 = next(g1)
791 f1 = next(g1)
786 elif f2r > f1r:
792 elif f2r > f1r:
787 f2 = next(g2)
793 f2 = next(g2)
788 else: # f1 and f2 point to files in the same linkrev
794 else: # f1 and f2 point to files in the same linkrev
789 return f1 == f2 # true if they point to the same file
795 return f1 == f2 # true if they point to the same file
790 except StopIteration:
796 except StopIteration:
791 return False
797 return False
792
798
793
799
794 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
800 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
795 """reproduce copies from fromrev to rev in the dirstate
801 """reproduce copies from fromrev to rev in the dirstate
796
802
797 If skiprev is specified, it's a revision that should be used to
803 If skiprev is specified, it's a revision that should be used to
798 filter copy records. Any copies that occur between fromrev and
804 filter copy records. Any copies that occur between fromrev and
799 skiprev will not be duplicated, even if they appear in the set of
805 skiprev will not be duplicated, even if they appear in the set of
800 copies between fromrev and rev.
806 copies between fromrev and rev.
801 """
807 """
802 exclude = {}
808 exclude = {}
803 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
809 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
804 bctrace = stringutil.parsebool(ctraceconfig)
810 bctrace = stringutil.parsebool(ctraceconfig)
805 if skiprev is not None and (
811 if skiprev is not None and (
806 ctraceconfig == b'heuristics' or bctrace or bctrace is None
812 ctraceconfig == b'heuristics' or bctrace or bctrace is None
807 ):
813 ):
808 # copytrace='off' skips this line, but not the entire function because
814 # copytrace='off' skips this line, but not the entire function because
809 # the line below is O(size of the repo) during a rebase, while the rest
815 # the line below is O(size of the repo) during a rebase, while the rest
810 # of the function is much faster (and is required for carrying copy
816 # of the function is much faster (and is required for carrying copy
811 # metadata across the rebase anyway).
817 # metadata across the rebase anyway).
812 exclude = pathcopies(repo[fromrev], repo[skiprev])
818 exclude = pathcopies(repo[fromrev], repo[skiprev])
813 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
819 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
814 if dst in exclude:
820 if dst in exclude:
815 continue
821 continue
816 if dst in wctx:
822 if dst in wctx:
817 wctx[dst].markcopied(src)
823 wctx[dst].markcopied(src)
818
824
819
825
820 def computechangesetfilesadded(ctx):
826 def computechangesetfilesadded(ctx):
821 """return the list of files added in a changeset
827 """return the list of files added in a changeset
822 """
828 """
823 added = []
829 added = []
824 for f in ctx.files():
830 for f in ctx.files():
825 if not any(f in p for p in ctx.parents()):
831 if not any(f in p for p in ctx.parents()):
826 added.append(f)
832 added.append(f)
827 return added
833 return added
828
834
829
835
830 def computechangesetfilesremoved(ctx):
836 def computechangesetfilesremoved(ctx):
831 """return the list of files removed in a changeset
837 """return the list of files removed in a changeset
832 """
838 """
833 removed = []
839 removed = []
834 for f in ctx.files():
840 for f in ctx.files():
835 if f not in ctx:
841 if f not in ctx:
836 removed.append(f)
842 removed.append(f)
837 return removed
843 return removed
838
844
839
845
840 def computechangesetcopies(ctx):
846 def computechangesetcopies(ctx):
841 """return the copies data for a changeset
847 """return the copies data for a changeset
842
848
843 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
849 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
844
850
845 Each dictionnary are in the form: `{newname: oldname}`
851 Each dictionnary are in the form: `{newname: oldname}`
846 """
852 """
847 p1copies = {}
853 p1copies = {}
848 p2copies = {}
854 p2copies = {}
849 p1 = ctx.p1()
855 p1 = ctx.p1()
850 p2 = ctx.p2()
856 p2 = ctx.p2()
851 narrowmatch = ctx._repo.narrowmatch()
857 narrowmatch = ctx._repo.narrowmatch()
852 for dst in ctx.files():
858 for dst in ctx.files():
853 if not narrowmatch(dst) or dst not in ctx:
859 if not narrowmatch(dst) or dst not in ctx:
854 continue
860 continue
855 copied = ctx[dst].renamed()
861 copied = ctx[dst].renamed()
856 if not copied:
862 if not copied:
857 continue
863 continue
858 src, srcnode = copied
864 src, srcnode = copied
859 if src in p1 and p1[src].filenode() == srcnode:
865 if src in p1 and p1[src].filenode() == srcnode:
860 p1copies[dst] = src
866 p1copies[dst] = src
861 elif src in p2 and p2[src].filenode() == srcnode:
867 elif src in p2 and p2[src].filenode() == srcnode:
862 p2copies[dst] = src
868 p2copies[dst] = src
863 return p1copies, p2copies
869 return p1copies, p2copies
864
870
865
871
866 def encodecopies(files, copies):
872 def encodecopies(files, copies):
867 items = []
873 items = []
868 for i, dst in enumerate(files):
874 for i, dst in enumerate(files):
869 if dst in copies:
875 if dst in copies:
870 items.append(b'%d\0%s' % (i, copies[dst]))
876 items.append(b'%d\0%s' % (i, copies[dst]))
871 if len(items) != len(copies):
877 if len(items) != len(copies):
872 raise error.ProgrammingError(
878 raise error.ProgrammingError(
873 b'some copy targets missing from file list'
879 b'some copy targets missing from file list'
874 )
880 )
875 return b"\n".join(items)
881 return b"\n".join(items)
876
882
877
883
878 def decodecopies(files, data):
884 def decodecopies(files, data):
879 try:
885 try:
880 copies = {}
886 copies = {}
881 if not data:
887 if not data:
882 return copies
888 return copies
883 for l in data.split(b'\n'):
889 for l in data.split(b'\n'):
884 strindex, src = l.split(b'\0')
890 strindex, src = l.split(b'\0')
885 i = int(strindex)
891 i = int(strindex)
886 dst = files[i]
892 dst = files[i]
887 copies[dst] = src
893 copies[dst] = src
888 return copies
894 return copies
889 except (ValueError, IndexError):
895 except (ValueError, IndexError):
890 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
896 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
891 # used different syntax for the value.
897 # used different syntax for the value.
892 return None
898 return None
893
899
894
900
895 def encodefileindices(files, subset):
901 def encodefileindices(files, subset):
896 subset = set(subset)
902 subset = set(subset)
897 indices = []
903 indices = []
898 for i, f in enumerate(files):
904 for i, f in enumerate(files):
899 if f in subset:
905 if f in subset:
900 indices.append(b'%d' % i)
906 indices.append(b'%d' % i)
901 return b'\n'.join(indices)
907 return b'\n'.join(indices)
902
908
903
909
904 def decodefileindices(files, data):
910 def decodefileindices(files, data):
905 try:
911 try:
906 subset = []
912 subset = []
907 if not data:
913 if not data:
908 return subset
914 return subset
909 for strindex in data.split(b'\n'):
915 for strindex in data.split(b'\n'):
910 i = int(strindex)
916 i = int(strindex)
911 if i < 0 or i >= len(files):
917 if i < 0 or i >= len(files):
912 return None
918 return None
913 subset.append(files[i])
919 subset.append(files[i])
914 return subset
920 return subset
915 except (ValueError, IndexError):
921 except (ValueError, IndexError):
916 # Perhaps someone had chosen the same key name (e.g. "added") and
922 # Perhaps someone had chosen the same key name (e.g. "added") and
917 # used different syntax for the value.
923 # used different syntax for the value.
918 return None
924 return None
919
925
920
926
921 def _getsidedata(srcrepo, rev):
927 def _getsidedata(srcrepo, rev):
922 ctx = srcrepo[rev]
928 ctx = srcrepo[rev]
923 filescopies = computechangesetcopies(ctx)
929 filescopies = computechangesetcopies(ctx)
924 filesadded = computechangesetfilesadded(ctx)
930 filesadded = computechangesetfilesadded(ctx)
925 filesremoved = computechangesetfilesremoved(ctx)
931 filesremoved = computechangesetfilesremoved(ctx)
926 sidedata = {}
932 sidedata = {}
927 if any([filescopies, filesadded, filesremoved]):
933 if any([filescopies, filesadded, filesremoved]):
928 sortedfiles = sorted(ctx.files())
934 sortedfiles = sorted(ctx.files())
929 p1copies, p2copies = filescopies
935 p1copies, p2copies = filescopies
930 p1copies = encodecopies(sortedfiles, p1copies)
936 p1copies = encodecopies(sortedfiles, p1copies)
931 p2copies = encodecopies(sortedfiles, p2copies)
937 p2copies = encodecopies(sortedfiles, p2copies)
932 filesadded = encodefileindices(sortedfiles, filesadded)
938 filesadded = encodefileindices(sortedfiles, filesadded)
933 filesremoved = encodefileindices(sortedfiles, filesremoved)
939 filesremoved = encodefileindices(sortedfiles, filesremoved)
934 if p1copies:
940 if p1copies:
935 sidedata[sidedatamod.SD_P1COPIES] = p1copies
941 sidedata[sidedatamod.SD_P1COPIES] = p1copies
936 if p2copies:
942 if p2copies:
937 sidedata[sidedatamod.SD_P2COPIES] = p2copies
943 sidedata[sidedatamod.SD_P2COPIES] = p2copies
938 if filesadded:
944 if filesadded:
939 sidedata[sidedatamod.SD_FILESADDED] = filesadded
945 sidedata[sidedatamod.SD_FILESADDED] = filesadded
940 if filesremoved:
946 if filesremoved:
941 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
947 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
942 return sidedata
948 return sidedata
943
949
944
950
945 def getsidedataadder(srcrepo, destrepo):
951 def getsidedataadder(srcrepo, destrepo):
946 def sidedatacompanion(revlog, rev):
952 def sidedatacompanion(revlog, rev):
947 sidedata = {}
953 sidedata = {}
948 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
949 sidedata = _getsidedata(srcrepo, rev)
955 sidedata = _getsidedata(srcrepo, rev)
950 return False, (), sidedata
956 return False, (), sidedata
951
957
952 return sidedatacompanion
958 return sidedatacompanion
953
959
954
960
955 def getsidedataremover(srcrepo, destrepo):
961 def getsidedataremover(srcrepo, destrepo):
956 def sidedatacompanion(revlog, rev):
962 def sidedatacompanion(revlog, rev):
957 f = ()
963 f = ()
958 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
964 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
959 if revlog.flags(rev) & REVIDX_SIDEDATA:
965 if revlog.flags(rev) & REVIDX_SIDEDATA:
960 f = (
966 f = (
961 sidedatamod.SD_P1COPIES,
967 sidedatamod.SD_P1COPIES,
962 sidedatamod.SD_P2COPIES,
968 sidedatamod.SD_P2COPIES,
963 sidedatamod.SD_FILESADDED,
969 sidedatamod.SD_FILESADDED,
964 sidedatamod.SD_FILESREMOVED,
970 sidedatamod.SD_FILESREMOVED,
965 )
971 )
966 return False, f, {}
972 return False, f, {}
967
973
968 return sidedatacompanion
974 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now