##// END OF EJS Templates
copies: simplify the handling of merges...
marmoute -
r43527:0aef9177 default draft
parent child Browse files
Show More
@@ -1,927 +1,931 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _changesetforwardcopies(a, b, match):
181 def _changesetforwardcopies(a, b, match):
182 if a.rev() in (node.nullrev, b.rev()):
182 if a.rev() in (node.nullrev, b.rev()):
183 return {}
183 return {}
184
184
185 repo = a.repo()
185 repo = a.repo()
186 children = {}
186 children = {}
187 cl = repo.changelog
187 cl = repo.changelog
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 for r in missingrevs:
189 for r in missingrevs:
190 for p in cl.parentrevs(r):
190 for p in cl.parentrevs(r):
191 if p == node.nullrev:
191 if p == node.nullrev:
192 continue
192 continue
193 if p not in children:
193 if p not in children:
194 children[p] = [r]
194 children[p] = [r]
195 else:
195 else:
196 children[p].append(r)
196 children[p].append(r)
197
197
198 roots = set(children) - set(missingrevs)
198 roots = set(children) - set(missingrevs)
199 # 'work' contains 3-tuples of a (revision number, parent number, copies).
199 work = list(roots)
200 # The parent number is only used for knowing which parent the copies dict
200 all_copies = {r: {} for r in roots}
201 # came from.
202 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
203 # instance for *one* of the child nodes (the last one). Once an instance
204 # has been put on the queue, it is thus no longer safe to modify it.
205 # Conversely, it *is* safe to modify an instance popped off the queue.
206 work = [(r, 1, {}) for r in roots]
207 heapq.heapify(work)
201 heapq.heapify(work)
208 alwaysmatch = match.always()
202 alwaysmatch = match.always()
209 while work:
203 while work:
210 r, i1, copies = heapq.heappop(work)
204 r = heapq.heappop(work)
211 if work and work[0][0] == r:
205 copies = all_copies.pop(r)
212 # We are tracing copies from both parents
213 r, i2, copies2 = heapq.heappop(work)
214 for dst, src in copies2.items():
215 # Unlike when copies are stored in the filelog, we consider
216 # it a copy even if the destination already existed on the
217 # other branch. It's simply too expensive to check if the
218 # file existed in the manifest.
219 if dst not in copies:
220 # If it was copied on the p1 side, leave it as copied from
221 # that side, even if it was also copied on the p2 side.
222 copies[dst] = copies2[dst]
223 if r == b.rev():
206 if r == b.rev():
224 return copies
207 return copies
225 for i, c in enumerate(children[r]):
208 for i, c in enumerate(children[r]):
226 childctx = repo[c]
209 childctx = repo[c]
227 if r == childctx.p1().rev():
210 if r == childctx.p1().rev():
228 parent = 1
211 parent = 1
229 childcopies = childctx.p1copies()
212 childcopies = childctx.p1copies()
230 else:
213 else:
231 assert r == childctx.p2().rev()
214 assert r == childctx.p2().rev()
232 parent = 2
215 parent = 2
233 childcopies = childctx.p2copies()
216 childcopies = childctx.p2copies()
234 if not alwaysmatch:
217 if not alwaysmatch:
235 childcopies = {
218 childcopies = {
236 dst: src for dst, src in childcopies.items() if match(dst)
219 dst: src for dst, src in childcopies.items() if match(dst)
237 }
220 }
238 # Copy the dict only if later iterations will also need it
221 # Copy the dict only if later iterations will also need it
239 if i != len(children[r]) - 1:
222 if i != len(children[r]) - 1:
240 newcopies = copies.copy()
223 newcopies = copies.copy()
241 else:
224 else:
242 newcopies = copies
225 newcopies = copies
243 if childcopies:
226 if childcopies:
244 newcopies = _chain(newcopies, childcopies)
227 newcopies = _chain(newcopies, childcopies)
245 for f in childctx.filesremoved():
228 for f in childctx.filesremoved():
246 if f in newcopies:
229 if f in newcopies:
247 del newcopies[f]
230 del newcopies[f]
248 heapq.heappush(work, (c, parent, newcopies))
231 othercopies = all_copies.get(c)
232 if othercopies is None:
233 heapq.heappush(work, c)
234 all_copies[c] = newcopies
235 else:
236 # we are the second parent to work on c, we need to merge our
237 # work with the other.
238 #
239 # Unlike when copies are stored in the filelog, we consider
240 # it a copy even if the destination already existed on the
241 # other branch. It's simply too expensive to check if the
242 # file existed in the manifest.
243 #
244 # In case of conflict, parent 1 take precedence over parent 2.
245 # This is an arbitrary choice made anew when implementing
246 # changeset based copies. It was made without regards with
247 # potential filelog related behavior.
248 if parent == 1:
249 othercopies.update(newcopies)
250 else:
251 newcopies.update(othercopies)
252 all_copies[c] = newcopies
249 assert False
253 assert False
250
254
251
255
252 def _forwardcopies(a, b, base=None, match=None):
256 def _forwardcopies(a, b, base=None, match=None):
253 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
257 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
254
258
255 if base is None:
259 if base is None:
256 base = a
260 base = a
257 match = a.repo().narrowmatch(match)
261 match = a.repo().narrowmatch(match)
258 # check for working copy
262 # check for working copy
259 if b.rev() is None:
263 if b.rev() is None:
260 cm = _committedforwardcopies(a, b.p1(), base, match)
264 cm = _committedforwardcopies(a, b.p1(), base, match)
261 # combine copies from dirstate if necessary
265 # combine copies from dirstate if necessary
262 copies = _chain(cm, _dirstatecopies(b._repo, match))
266 copies = _chain(cm, _dirstatecopies(b._repo, match))
263 else:
267 else:
264 copies = _committedforwardcopies(a, b, base, match)
268 copies = _committedforwardcopies(a, b, base, match)
265 return copies
269 return copies
266
270
267
271
268 def _backwardrenames(a, b, match):
272 def _backwardrenames(a, b, match):
269 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
273 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
270 return {}
274 return {}
271
275
272 # Even though we're not taking copies into account, 1:n rename situations
276 # Even though we're not taking copies into account, 1:n rename situations
273 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
277 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
274 # arbitrarily pick one of the renames.
278 # arbitrarily pick one of the renames.
275 # We don't want to pass in "match" here, since that would filter
279 # We don't want to pass in "match" here, since that would filter
276 # the destination by it. Since we're reversing the copies, we want
280 # the destination by it. Since we're reversing the copies, we want
277 # to filter the source instead.
281 # to filter the source instead.
278 f = _forwardcopies(b, a)
282 f = _forwardcopies(b, a)
279 r = {}
283 r = {}
280 for k, v in sorted(pycompat.iteritems(f)):
284 for k, v in sorted(pycompat.iteritems(f)):
281 if match and not match(v):
285 if match and not match(v):
282 continue
286 continue
283 # remove copies
287 # remove copies
284 if v in a:
288 if v in a:
285 continue
289 continue
286 r[v] = k
290 r[v] = k
287 return r
291 return r
288
292
289
293
290 def pathcopies(x, y, match=None):
294 def pathcopies(x, y, match=None):
291 """find {dst@y: src@x} copy mapping for directed compare"""
295 """find {dst@y: src@x} copy mapping for directed compare"""
292 repo = x._repo
296 repo = x._repo
293 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
297 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
294 if debug:
298 if debug:
295 repo.ui.debug(
299 repo.ui.debug(
296 b'debug.copies: searching copies from %s to %s\n' % (x, y)
300 b'debug.copies: searching copies from %s to %s\n' % (x, y)
297 )
301 )
298 if x == y or not x or not y:
302 if x == y or not x or not y:
299 return {}
303 return {}
300 a = y.ancestor(x)
304 a = y.ancestor(x)
301 if a == x:
305 if a == x:
302 if debug:
306 if debug:
303 repo.ui.debug(b'debug.copies: search mode: forward\n')
307 repo.ui.debug(b'debug.copies: search mode: forward\n')
304 if y.rev() is None and x == y.p1():
308 if y.rev() is None and x == y.p1():
305 # short-circuit to avoid issues with merge states
309 # short-circuit to avoid issues with merge states
306 return _dirstatecopies(repo, match)
310 return _dirstatecopies(repo, match)
307 copies = _forwardcopies(x, y, match=match)
311 copies = _forwardcopies(x, y, match=match)
308 elif a == y:
312 elif a == y:
309 if debug:
313 if debug:
310 repo.ui.debug(b'debug.copies: search mode: backward\n')
314 repo.ui.debug(b'debug.copies: search mode: backward\n')
311 copies = _backwardrenames(x, y, match=match)
315 copies = _backwardrenames(x, y, match=match)
312 else:
316 else:
313 if debug:
317 if debug:
314 repo.ui.debug(b'debug.copies: search mode: combined\n')
318 repo.ui.debug(b'debug.copies: search mode: combined\n')
315 base = None
319 base = None
316 if a.rev() != node.nullrev:
320 if a.rev() != node.nullrev:
317 base = x
321 base = x
318 copies = _chain(
322 copies = _chain(
319 _backwardrenames(x, a, match=match),
323 _backwardrenames(x, a, match=match),
320 _forwardcopies(a, y, base, match=match),
324 _forwardcopies(a, y, base, match=match),
321 )
325 )
322 _filter(x, y, copies)
326 _filter(x, y, copies)
323 return copies
327 return copies
324
328
325
329
326 def mergecopies(repo, c1, c2, base):
330 def mergecopies(repo, c1, c2, base):
327 """
331 """
328 Finds moves and copies between context c1 and c2 that are relevant for
332 Finds moves and copies between context c1 and c2 that are relevant for
329 merging. 'base' will be used as the merge base.
333 merging. 'base' will be used as the merge base.
330
334
331 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
335 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
332 files that were moved/ copied in one merge parent and modified in another.
336 files that were moved/ copied in one merge parent and modified in another.
333 For example:
337 For example:
334
338
335 o ---> 4 another commit
339 o ---> 4 another commit
336 |
340 |
337 | o ---> 3 commit that modifies a.txt
341 | o ---> 3 commit that modifies a.txt
338 | /
342 | /
339 o / ---> 2 commit that moves a.txt to b.txt
343 o / ---> 2 commit that moves a.txt to b.txt
340 |/
344 |/
341 o ---> 1 merge base
345 o ---> 1 merge base
342
346
343 If we try to rebase revision 3 on revision 4, since there is no a.txt in
347 If we try to rebase revision 3 on revision 4, since there is no a.txt in
344 revision 4, and if user have copytrace disabled, we prints the following
348 revision 4, and if user have copytrace disabled, we prints the following
345 message:
349 message:
346
350
347 ```other changed <file> which local deleted```
351 ```other changed <file> which local deleted```
348
352
349 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
353 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
350 "dirmove".
354 "dirmove".
351
355
352 "copy" is a mapping from destination name -> source name,
356 "copy" is a mapping from destination name -> source name,
353 where source is in c1 and destination is in c2 or vice-versa.
357 where source is in c1 and destination is in c2 or vice-versa.
354
358
355 "movewithdir" is a mapping from source name -> destination name,
359 "movewithdir" is a mapping from source name -> destination name,
356 where the file at source present in one context but not the other
360 where the file at source present in one context but not the other
357 needs to be moved to destination by the merge process, because the
361 needs to be moved to destination by the merge process, because the
358 other context moved the directory it is in.
362 other context moved the directory it is in.
359
363
360 "diverge" is a mapping of source name -> list of destination names
364 "diverge" is a mapping of source name -> list of destination names
361 for divergent renames.
365 for divergent renames.
362
366
363 "renamedelete" is a mapping of source name -> list of destination
367 "renamedelete" is a mapping of source name -> list of destination
364 names for files deleted in c1 that were renamed in c2 or vice-versa.
368 names for files deleted in c1 that were renamed in c2 or vice-versa.
365
369
366 "dirmove" is a mapping of detected source dir -> destination dir renames.
370 "dirmove" is a mapping of detected source dir -> destination dir renames.
367 This is needed for handling changes to new files previously grafted into
371 This is needed for handling changes to new files previously grafted into
368 renamed directories.
372 renamed directories.
369
373
370 This function calls different copytracing algorithms based on config.
374 This function calls different copytracing algorithms based on config.
371 """
375 """
372 # avoid silly behavior for update from empty dir
376 # avoid silly behavior for update from empty dir
373 if not c1 or not c2 or c1 == c2:
377 if not c1 or not c2 or c1 == c2:
374 return {}, {}, {}, {}, {}
378 return {}, {}, {}, {}, {}
375
379
376 narrowmatch = c1.repo().narrowmatch()
380 narrowmatch = c1.repo().narrowmatch()
377
381
378 # avoid silly behavior for parent -> working dir
382 # avoid silly behavior for parent -> working dir
379 if c2.node() is None and c1.node() == repo.dirstate.p1():
383 if c2.node() is None and c1.node() == repo.dirstate.p1():
380 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
384 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
381
385
382 copytracing = repo.ui.config(b'experimental', b'copytrace')
386 copytracing = repo.ui.config(b'experimental', b'copytrace')
383 if stringutil.parsebool(copytracing) is False:
387 if stringutil.parsebool(copytracing) is False:
384 # stringutil.parsebool() returns None when it is unable to parse the
388 # stringutil.parsebool() returns None when it is unable to parse the
385 # value, so we should rely on making sure copytracing is on such cases
389 # value, so we should rely on making sure copytracing is on such cases
386 return {}, {}, {}, {}, {}
390 return {}, {}, {}, {}, {}
387
391
388 if usechangesetcentricalgo(repo):
392 if usechangesetcentricalgo(repo):
389 # The heuristics don't make sense when we need changeset-centric algos
393 # The heuristics don't make sense when we need changeset-centric algos
390 return _fullcopytracing(repo, c1, c2, base)
394 return _fullcopytracing(repo, c1, c2, base)
391
395
392 # Copy trace disabling is explicitly below the node == p1 logic above
396 # Copy trace disabling is explicitly below the node == p1 logic above
393 # because the logic above is required for a simple copy to be kept across a
397 # because the logic above is required for a simple copy to be kept across a
394 # rebase.
398 # rebase.
395 if copytracing == b'heuristics':
399 if copytracing == b'heuristics':
396 # Do full copytracing if only non-public revisions are involved as
400 # Do full copytracing if only non-public revisions are involved as
397 # that will be fast enough and will also cover the copies which could
401 # that will be fast enough and will also cover the copies which could
398 # be missed by heuristics
402 # be missed by heuristics
399 if _isfullcopytraceable(repo, c1, base):
403 if _isfullcopytraceable(repo, c1, base):
400 return _fullcopytracing(repo, c1, c2, base)
404 return _fullcopytracing(repo, c1, c2, base)
401 return _heuristicscopytracing(repo, c1, c2, base)
405 return _heuristicscopytracing(repo, c1, c2, base)
402 else:
406 else:
403 return _fullcopytracing(repo, c1, c2, base)
407 return _fullcopytracing(repo, c1, c2, base)
404
408
405
409
406 def _isfullcopytraceable(repo, c1, base):
410 def _isfullcopytraceable(repo, c1, base):
407 """ Checks that if base, source and destination are all no-public branches,
411 """ Checks that if base, source and destination are all no-public branches,
408 if yes let's use the full copytrace algorithm for increased capabilities
412 if yes let's use the full copytrace algorithm for increased capabilities
409 since it will be fast enough.
413 since it will be fast enough.
410
414
411 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
415 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
412 number of changesets from c1 to base such that if number of changesets are
416 number of changesets from c1 to base such that if number of changesets are
413 more than the limit, full copytracing algorithm won't be used.
417 more than the limit, full copytracing algorithm won't be used.
414 """
418 """
415 if c1.rev() is None:
419 if c1.rev() is None:
416 c1 = c1.p1()
420 c1 = c1.p1()
417 if c1.mutable() and base.mutable():
421 if c1.mutable() and base.mutable():
418 sourcecommitlimit = repo.ui.configint(
422 sourcecommitlimit = repo.ui.configint(
419 b'experimental', b'copytrace.sourcecommitlimit'
423 b'experimental', b'copytrace.sourcecommitlimit'
420 )
424 )
421 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
425 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
422 return commits < sourcecommitlimit
426 return commits < sourcecommitlimit
423 return False
427 return False
424
428
425
429
426 def _checksinglesidecopies(
430 def _checksinglesidecopies(
427 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
431 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
428 ):
432 ):
429 if src not in m2:
433 if src not in m2:
430 # deleted on side 2
434 # deleted on side 2
431 if src not in m1:
435 if src not in m1:
432 # renamed on side 1, deleted on side 2
436 # renamed on side 1, deleted on side 2
433 renamedelete[src] = dsts1
437 renamedelete[src] = dsts1
434 elif m2[src] != mb[src]:
438 elif m2[src] != mb[src]:
435 if not _related(c2[src], base[src]):
439 if not _related(c2[src], base[src]):
436 return
440 return
437 # modified on side 2
441 # modified on side 2
438 for dst in dsts1:
442 for dst in dsts1:
439 if dst not in m2:
443 if dst not in m2:
440 # dst not added on side 2 (handle as regular
444 # dst not added on side 2 (handle as regular
441 # "both created" case in manifestmerge otherwise)
445 # "both created" case in manifestmerge otherwise)
442 copy[dst] = src
446 copy[dst] = src
443
447
444
448
445 def _fullcopytracing(repo, c1, c2, base):
449 def _fullcopytracing(repo, c1, c2, base):
446 """ The full copytracing algorithm which finds all the new files that were
450 """ The full copytracing algorithm which finds all the new files that were
447 added from merge base up to the top commit and for each file it checks if
451 added from merge base up to the top commit and for each file it checks if
448 this file was copied from another file.
452 this file was copied from another file.
449
453
450 This is pretty slow when a lot of changesets are involved but will track all
454 This is pretty slow when a lot of changesets are involved but will track all
451 the copies.
455 the copies.
452 """
456 """
453 m1 = c1.manifest()
457 m1 = c1.manifest()
454 m2 = c2.manifest()
458 m2 = c2.manifest()
455 mb = base.manifest()
459 mb = base.manifest()
456
460
457 copies1 = pathcopies(base, c1)
461 copies1 = pathcopies(base, c1)
458 copies2 = pathcopies(base, c2)
462 copies2 = pathcopies(base, c2)
459
463
460 inversecopies1 = {}
464 inversecopies1 = {}
461 inversecopies2 = {}
465 inversecopies2 = {}
462 for dst, src in copies1.items():
466 for dst, src in copies1.items():
463 inversecopies1.setdefault(src, []).append(dst)
467 inversecopies1.setdefault(src, []).append(dst)
464 for dst, src in copies2.items():
468 for dst, src in copies2.items():
465 inversecopies2.setdefault(src, []).append(dst)
469 inversecopies2.setdefault(src, []).append(dst)
466
470
467 copy = {}
471 copy = {}
468 diverge = {}
472 diverge = {}
469 renamedelete = {}
473 renamedelete = {}
470 allsources = set(inversecopies1) | set(inversecopies2)
474 allsources = set(inversecopies1) | set(inversecopies2)
471 for src in allsources:
475 for src in allsources:
472 dsts1 = inversecopies1.get(src)
476 dsts1 = inversecopies1.get(src)
473 dsts2 = inversecopies2.get(src)
477 dsts2 = inversecopies2.get(src)
474 if dsts1 and dsts2:
478 if dsts1 and dsts2:
475 # copied/renamed on both sides
479 # copied/renamed on both sides
476 if src not in m1 and src not in m2:
480 if src not in m1 and src not in m2:
477 # renamed on both sides
481 # renamed on both sides
478 dsts1 = set(dsts1)
482 dsts1 = set(dsts1)
479 dsts2 = set(dsts2)
483 dsts2 = set(dsts2)
480 # If there's some overlap in the rename destinations, we
484 # If there's some overlap in the rename destinations, we
481 # consider it not divergent. For example, if side 1 copies 'a'
485 # consider it not divergent. For example, if side 1 copies 'a'
482 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
486 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
483 # and 'd' and deletes 'a'.
487 # and 'd' and deletes 'a'.
484 if dsts1 & dsts2:
488 if dsts1 & dsts2:
485 for dst in dsts1 & dsts2:
489 for dst in dsts1 & dsts2:
486 copy[dst] = src
490 copy[dst] = src
487 else:
491 else:
488 diverge[src] = sorted(dsts1 | dsts2)
492 diverge[src] = sorted(dsts1 | dsts2)
489 elif src in m1 and src in m2:
493 elif src in m1 and src in m2:
490 # copied on both sides
494 # copied on both sides
491 dsts1 = set(dsts1)
495 dsts1 = set(dsts1)
492 dsts2 = set(dsts2)
496 dsts2 = set(dsts2)
493 for dst in dsts1 & dsts2:
497 for dst in dsts1 & dsts2:
494 copy[dst] = src
498 copy[dst] = src
495 # TODO: Handle cases where it was renamed on one side and copied
499 # TODO: Handle cases where it was renamed on one side and copied
496 # on the other side
500 # on the other side
497 elif dsts1:
501 elif dsts1:
498 # copied/renamed only on side 1
502 # copied/renamed only on side 1
499 _checksinglesidecopies(
503 _checksinglesidecopies(
500 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
504 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
501 )
505 )
502 elif dsts2:
506 elif dsts2:
503 # copied/renamed only on side 2
507 # copied/renamed only on side 2
504 _checksinglesidecopies(
508 _checksinglesidecopies(
505 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
509 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
506 )
510 )
507
511
508 renamedeleteset = set()
512 renamedeleteset = set()
509 divergeset = set()
513 divergeset = set()
510 for dsts in diverge.values():
514 for dsts in diverge.values():
511 divergeset.update(dsts)
515 divergeset.update(dsts)
512 for dsts in renamedelete.values():
516 for dsts in renamedelete.values():
513 renamedeleteset.update(dsts)
517 renamedeleteset.update(dsts)
514
518
515 # find interesting file sets from manifests
519 # find interesting file sets from manifests
516 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
520 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
517 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
521 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
518 u1 = sorted(addedinm1 - addedinm2)
522 u1 = sorted(addedinm1 - addedinm2)
519 u2 = sorted(addedinm2 - addedinm1)
523 u2 = sorted(addedinm2 - addedinm1)
520
524
521 header = b" unmatched files in %s"
525 header = b" unmatched files in %s"
522 if u1:
526 if u1:
523 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
527 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
524 if u2:
528 if u2:
525 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
529 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
526
530
527 fullcopy = copies1.copy()
531 fullcopy = copies1.copy()
528 fullcopy.update(copies2)
532 fullcopy.update(copies2)
529 if not fullcopy:
533 if not fullcopy:
530 return copy, {}, diverge, renamedelete, {}
534 return copy, {}, diverge, renamedelete, {}
531
535
532 if repo.ui.debugflag:
536 if repo.ui.debugflag:
533 repo.ui.debug(
537 repo.ui.debug(
534 b" all copies found (* = to merge, ! = divergent, "
538 b" all copies found (* = to merge, ! = divergent, "
535 b"% = renamed and deleted):\n"
539 b"% = renamed and deleted):\n"
536 )
540 )
537 for f in sorted(fullcopy):
541 for f in sorted(fullcopy):
538 note = b""
542 note = b""
539 if f in copy:
543 if f in copy:
540 note += b"*"
544 note += b"*"
541 if f in divergeset:
545 if f in divergeset:
542 note += b"!"
546 note += b"!"
543 if f in renamedeleteset:
547 if f in renamedeleteset:
544 note += b"%"
548 note += b"%"
545 repo.ui.debug(
549 repo.ui.debug(
546 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
550 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
547 )
551 )
548 del divergeset
552 del divergeset
549
553
550 repo.ui.debug(b" checking for directory renames\n")
554 repo.ui.debug(b" checking for directory renames\n")
551
555
552 # generate a directory move map
556 # generate a directory move map
553 d1, d2 = c1.dirs(), c2.dirs()
557 d1, d2 = c1.dirs(), c2.dirs()
554 invalid = set()
558 invalid = set()
555 dirmove = {}
559 dirmove = {}
556
560
557 # examine each file copy for a potential directory move, which is
561 # examine each file copy for a potential directory move, which is
558 # when all the files in a directory are moved to a new directory
562 # when all the files in a directory are moved to a new directory
559 for dst, src in pycompat.iteritems(fullcopy):
563 for dst, src in pycompat.iteritems(fullcopy):
560 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
564 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
561 if dsrc in invalid:
565 if dsrc in invalid:
562 # already seen to be uninteresting
566 # already seen to be uninteresting
563 continue
567 continue
564 elif dsrc in d1 and ddst in d1:
568 elif dsrc in d1 and ddst in d1:
565 # directory wasn't entirely moved locally
569 # directory wasn't entirely moved locally
566 invalid.add(dsrc)
570 invalid.add(dsrc)
567 elif dsrc in d2 and ddst in d2:
571 elif dsrc in d2 and ddst in d2:
568 # directory wasn't entirely moved remotely
572 # directory wasn't entirely moved remotely
569 invalid.add(dsrc)
573 invalid.add(dsrc)
570 elif dsrc in dirmove and dirmove[dsrc] != ddst:
574 elif dsrc in dirmove and dirmove[dsrc] != ddst:
571 # files from the same directory moved to two different places
575 # files from the same directory moved to two different places
572 invalid.add(dsrc)
576 invalid.add(dsrc)
573 else:
577 else:
574 # looks good so far
578 # looks good so far
575 dirmove[dsrc] = ddst
579 dirmove[dsrc] = ddst
576
580
577 for i in invalid:
581 for i in invalid:
578 if i in dirmove:
582 if i in dirmove:
579 del dirmove[i]
583 del dirmove[i]
580 del d1, d2, invalid
584 del d1, d2, invalid
581
585
582 if not dirmove:
586 if not dirmove:
583 return copy, {}, diverge, renamedelete, {}
587 return copy, {}, diverge, renamedelete, {}
584
588
585 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
589 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
586
590
587 for d in dirmove:
591 for d in dirmove:
588 repo.ui.debug(
592 repo.ui.debug(
589 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
593 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
590 )
594 )
591
595
592 movewithdir = {}
596 movewithdir = {}
593 # check unaccounted nonoverlapping files against directory moves
597 # check unaccounted nonoverlapping files against directory moves
594 for f in u1 + u2:
598 for f in u1 + u2:
595 if f not in fullcopy:
599 if f not in fullcopy:
596 for d in dirmove:
600 for d in dirmove:
597 if f.startswith(d):
601 if f.startswith(d):
598 # new file added in a directory that was moved, move it
602 # new file added in a directory that was moved, move it
599 df = dirmove[d] + f[len(d) :]
603 df = dirmove[d] + f[len(d) :]
600 if df not in copy:
604 if df not in copy:
601 movewithdir[f] = df
605 movewithdir[f] = df
602 repo.ui.debug(
606 repo.ui.debug(
603 b" pending file src: '%s' -> dst: '%s'\n"
607 b" pending file src: '%s' -> dst: '%s'\n"
604 % (f, df)
608 % (f, df)
605 )
609 )
606 break
610 break
607
611
608 return copy, movewithdir, diverge, renamedelete, dirmove
612 return copy, movewithdir, diverge, renamedelete, dirmove
609
613
610
614
611 def _heuristicscopytracing(repo, c1, c2, base):
615 def _heuristicscopytracing(repo, c1, c2, base):
612 """ Fast copytracing using filename heuristics
616 """ Fast copytracing using filename heuristics
613
617
614 Assumes that moves or renames are of following two types:
618 Assumes that moves or renames are of following two types:
615
619
616 1) Inside a directory only (same directory name but different filenames)
620 1) Inside a directory only (same directory name but different filenames)
617 2) Move from one directory to another
621 2) Move from one directory to another
618 (same filenames but different directory names)
622 (same filenames but different directory names)
619
623
620 Works only when there are no merge commits in the "source branch".
624 Works only when there are no merge commits in the "source branch".
621 Source branch is commits from base up to c2 not including base.
625 Source branch is commits from base up to c2 not including base.
622
626
623 If merge is involved it fallbacks to _fullcopytracing().
627 If merge is involved it fallbacks to _fullcopytracing().
624
628
625 Can be used by setting the following config:
629 Can be used by setting the following config:
626
630
627 [experimental]
631 [experimental]
628 copytrace = heuristics
632 copytrace = heuristics
629
633
630 In some cases the copy/move candidates found by heuristics can be very large
634 In some cases the copy/move candidates found by heuristics can be very large
631 in number and that will make the algorithm slow. The number of possible
635 in number and that will make the algorithm slow. The number of possible
632 candidates to check can be limited by using the config
636 candidates to check can be limited by using the config
633 `experimental.copytrace.movecandidateslimit` which defaults to 100.
637 `experimental.copytrace.movecandidateslimit` which defaults to 100.
634 """
638 """
635
639
636 if c1.rev() is None:
640 if c1.rev() is None:
637 c1 = c1.p1()
641 c1 = c1.p1()
638 if c2.rev() is None:
642 if c2.rev() is None:
639 c2 = c2.p1()
643 c2 = c2.p1()
640
644
641 copies = {}
645 copies = {}
642
646
643 changedfiles = set()
647 changedfiles = set()
644 m1 = c1.manifest()
648 m1 = c1.manifest()
645 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
649 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
646 # If base is not in c2 branch, we switch to fullcopytracing
650 # If base is not in c2 branch, we switch to fullcopytracing
647 repo.ui.debug(
651 repo.ui.debug(
648 b"switching to full copytracing as base is not "
652 b"switching to full copytracing as base is not "
649 b"an ancestor of c2\n"
653 b"an ancestor of c2\n"
650 )
654 )
651 return _fullcopytracing(repo, c1, c2, base)
655 return _fullcopytracing(repo, c1, c2, base)
652
656
653 ctx = c2
657 ctx = c2
654 while ctx != base:
658 while ctx != base:
655 if len(ctx.parents()) == 2:
659 if len(ctx.parents()) == 2:
656 # To keep things simple let's not handle merges
660 # To keep things simple let's not handle merges
657 repo.ui.debug(b"switching to full copytracing because of merges\n")
661 repo.ui.debug(b"switching to full copytracing because of merges\n")
658 return _fullcopytracing(repo, c1, c2, base)
662 return _fullcopytracing(repo, c1, c2, base)
659 changedfiles.update(ctx.files())
663 changedfiles.update(ctx.files())
660 ctx = ctx.p1()
664 ctx = ctx.p1()
661
665
662 cp = _forwardcopies(base, c2)
666 cp = _forwardcopies(base, c2)
663 for dst, src in pycompat.iteritems(cp):
667 for dst, src in pycompat.iteritems(cp):
664 if src in m1:
668 if src in m1:
665 copies[dst] = src
669 copies[dst] = src
666
670
667 # file is missing if it isn't present in the destination, but is present in
671 # file is missing if it isn't present in the destination, but is present in
668 # the base and present in the source.
672 # the base and present in the source.
669 # Presence in the base is important to exclude added files, presence in the
673 # Presence in the base is important to exclude added files, presence in the
670 # source is important to exclude removed files.
674 # source is important to exclude removed files.
671 filt = lambda f: f not in m1 and f in base and f in c2
675 filt = lambda f: f not in m1 and f in base and f in c2
672 missingfiles = [f for f in changedfiles if filt(f)]
676 missingfiles = [f for f in changedfiles if filt(f)]
673
677
674 if missingfiles:
678 if missingfiles:
675 basenametofilename = collections.defaultdict(list)
679 basenametofilename = collections.defaultdict(list)
676 dirnametofilename = collections.defaultdict(list)
680 dirnametofilename = collections.defaultdict(list)
677
681
678 for f in m1.filesnotin(base.manifest()):
682 for f in m1.filesnotin(base.manifest()):
679 basename = os.path.basename(f)
683 basename = os.path.basename(f)
680 dirname = os.path.dirname(f)
684 dirname = os.path.dirname(f)
681 basenametofilename[basename].append(f)
685 basenametofilename[basename].append(f)
682 dirnametofilename[dirname].append(f)
686 dirnametofilename[dirname].append(f)
683
687
684 for f in missingfiles:
688 for f in missingfiles:
685 basename = os.path.basename(f)
689 basename = os.path.basename(f)
686 dirname = os.path.dirname(f)
690 dirname = os.path.dirname(f)
687 samebasename = basenametofilename[basename]
691 samebasename = basenametofilename[basename]
688 samedirname = dirnametofilename[dirname]
692 samedirname = dirnametofilename[dirname]
689 movecandidates = samebasename + samedirname
693 movecandidates = samebasename + samedirname
690 # f is guaranteed to be present in c2, that's why
694 # f is guaranteed to be present in c2, that's why
691 # c2.filectx(f) won't fail
695 # c2.filectx(f) won't fail
692 f2 = c2.filectx(f)
696 f2 = c2.filectx(f)
693 # we can have a lot of candidates which can slow down the heuristics
697 # we can have a lot of candidates which can slow down the heuristics
694 # config value to limit the number of candidates moves to check
698 # config value to limit the number of candidates moves to check
695 maxcandidates = repo.ui.configint(
699 maxcandidates = repo.ui.configint(
696 b'experimental', b'copytrace.movecandidateslimit'
700 b'experimental', b'copytrace.movecandidateslimit'
697 )
701 )
698
702
699 if len(movecandidates) > maxcandidates:
703 if len(movecandidates) > maxcandidates:
700 repo.ui.status(
704 repo.ui.status(
701 _(
705 _(
702 b"skipping copytracing for '%s', more "
706 b"skipping copytracing for '%s', more "
703 b"candidates than the limit: %d\n"
707 b"candidates than the limit: %d\n"
704 )
708 )
705 % (f, len(movecandidates))
709 % (f, len(movecandidates))
706 )
710 )
707 continue
711 continue
708
712
709 for candidate in movecandidates:
713 for candidate in movecandidates:
710 f1 = c1.filectx(candidate)
714 f1 = c1.filectx(candidate)
711 if _related(f1, f2):
715 if _related(f1, f2):
712 # if there are a few related copies then we'll merge
716 # if there are a few related copies then we'll merge
713 # changes into all of them. This matches the behaviour
717 # changes into all of them. This matches the behaviour
714 # of upstream copytracing
718 # of upstream copytracing
715 copies[candidate] = f
719 copies[candidate] = f
716
720
717 return copies, {}, {}, {}, {}
721 return copies, {}, {}, {}, {}
718
722
719
723
720 def _related(f1, f2):
724 def _related(f1, f2):
721 """return True if f1 and f2 filectx have a common ancestor
725 """return True if f1 and f2 filectx have a common ancestor
722
726
723 Walk back to common ancestor to see if the two files originate
727 Walk back to common ancestor to see if the two files originate
724 from the same file. Since workingfilectx's rev() is None it messes
728 from the same file. Since workingfilectx's rev() is None it messes
725 up the integer comparison logic, hence the pre-step check for
729 up the integer comparison logic, hence the pre-step check for
726 None (f1 and f2 can only be workingfilectx's initially).
730 None (f1 and f2 can only be workingfilectx's initially).
727 """
731 """
728
732
729 if f1 == f2:
733 if f1 == f2:
730 return True # a match
734 return True # a match
731
735
732 g1, g2 = f1.ancestors(), f2.ancestors()
736 g1, g2 = f1.ancestors(), f2.ancestors()
733 try:
737 try:
734 f1r, f2r = f1.linkrev(), f2.linkrev()
738 f1r, f2r = f1.linkrev(), f2.linkrev()
735
739
736 if f1r is None:
740 if f1r is None:
737 f1 = next(g1)
741 f1 = next(g1)
738 if f2r is None:
742 if f2r is None:
739 f2 = next(g2)
743 f2 = next(g2)
740
744
741 while True:
745 while True:
742 f1r, f2r = f1.linkrev(), f2.linkrev()
746 f1r, f2r = f1.linkrev(), f2.linkrev()
743 if f1r > f2r:
747 if f1r > f2r:
744 f1 = next(g1)
748 f1 = next(g1)
745 elif f2r > f1r:
749 elif f2r > f1r:
746 f2 = next(g2)
750 f2 = next(g2)
747 else: # f1 and f2 point to files in the same linkrev
751 else: # f1 and f2 point to files in the same linkrev
748 return f1 == f2 # true if they point to the same file
752 return f1 == f2 # true if they point to the same file
749 except StopIteration:
753 except StopIteration:
750 return False
754 return False
751
755
752
756
753 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
757 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
754 """reproduce copies from fromrev to rev in the dirstate
758 """reproduce copies from fromrev to rev in the dirstate
755
759
756 If skiprev is specified, it's a revision that should be used to
760 If skiprev is specified, it's a revision that should be used to
757 filter copy records. Any copies that occur between fromrev and
761 filter copy records. Any copies that occur between fromrev and
758 skiprev will not be duplicated, even if they appear in the set of
762 skiprev will not be duplicated, even if they appear in the set of
759 copies between fromrev and rev.
763 copies between fromrev and rev.
760 """
764 """
761 exclude = {}
765 exclude = {}
762 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
766 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
763 bctrace = stringutil.parsebool(ctraceconfig)
767 bctrace = stringutil.parsebool(ctraceconfig)
764 if skiprev is not None and (
768 if skiprev is not None and (
765 ctraceconfig == b'heuristics' or bctrace or bctrace is None
769 ctraceconfig == b'heuristics' or bctrace or bctrace is None
766 ):
770 ):
767 # copytrace='off' skips this line, but not the entire function because
771 # copytrace='off' skips this line, but not the entire function because
768 # the line below is O(size of the repo) during a rebase, while the rest
772 # the line below is O(size of the repo) during a rebase, while the rest
769 # of the function is much faster (and is required for carrying copy
773 # of the function is much faster (and is required for carrying copy
770 # metadata across the rebase anyway).
774 # metadata across the rebase anyway).
771 exclude = pathcopies(repo[fromrev], repo[skiprev])
775 exclude = pathcopies(repo[fromrev], repo[skiprev])
772 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
776 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
773 if dst in exclude:
777 if dst in exclude:
774 continue
778 continue
775 if dst in wctx:
779 if dst in wctx:
776 wctx[dst].markcopied(src)
780 wctx[dst].markcopied(src)
777
781
778
782
779 def computechangesetfilesadded(ctx):
783 def computechangesetfilesadded(ctx):
780 """return the list of files added in a changeset
784 """return the list of files added in a changeset
781 """
785 """
782 added = []
786 added = []
783 for f in ctx.files():
787 for f in ctx.files():
784 if not any(f in p for p in ctx.parents()):
788 if not any(f in p for p in ctx.parents()):
785 added.append(f)
789 added.append(f)
786 return added
790 return added
787
791
788
792
789 def computechangesetfilesremoved(ctx):
793 def computechangesetfilesremoved(ctx):
790 """return the list of files removed in a changeset
794 """return the list of files removed in a changeset
791 """
795 """
792 removed = []
796 removed = []
793 for f in ctx.files():
797 for f in ctx.files():
794 if f not in ctx:
798 if f not in ctx:
795 removed.append(f)
799 removed.append(f)
796 return removed
800 return removed
797
801
798
802
799 def computechangesetcopies(ctx):
803 def computechangesetcopies(ctx):
800 """return the copies data for a changeset
804 """return the copies data for a changeset
801
805
802 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
806 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
803
807
804 Each dictionnary are in the form: `{newname: oldname}`
808 Each dictionnary are in the form: `{newname: oldname}`
805 """
809 """
806 p1copies = {}
810 p1copies = {}
807 p2copies = {}
811 p2copies = {}
808 p1 = ctx.p1()
812 p1 = ctx.p1()
809 p2 = ctx.p2()
813 p2 = ctx.p2()
810 narrowmatch = ctx._repo.narrowmatch()
814 narrowmatch = ctx._repo.narrowmatch()
811 for dst in ctx.files():
815 for dst in ctx.files():
812 if not narrowmatch(dst) or dst not in ctx:
816 if not narrowmatch(dst) or dst not in ctx:
813 continue
817 continue
814 copied = ctx[dst].renamed()
818 copied = ctx[dst].renamed()
815 if not copied:
819 if not copied:
816 continue
820 continue
817 src, srcnode = copied
821 src, srcnode = copied
818 if src in p1 and p1[src].filenode() == srcnode:
822 if src in p1 and p1[src].filenode() == srcnode:
819 p1copies[dst] = src
823 p1copies[dst] = src
820 elif src in p2 and p2[src].filenode() == srcnode:
824 elif src in p2 and p2[src].filenode() == srcnode:
821 p2copies[dst] = src
825 p2copies[dst] = src
822 return p1copies, p2copies
826 return p1copies, p2copies
823
827
824
828
825 def encodecopies(files, copies):
829 def encodecopies(files, copies):
826 items = []
830 items = []
827 for i, dst in enumerate(files):
831 for i, dst in enumerate(files):
828 if dst in copies:
832 if dst in copies:
829 items.append(b'%d\0%s' % (i, copies[dst]))
833 items.append(b'%d\0%s' % (i, copies[dst]))
830 if len(items) != len(copies):
834 if len(items) != len(copies):
831 raise error.ProgrammingError(
835 raise error.ProgrammingError(
832 b'some copy targets missing from file list'
836 b'some copy targets missing from file list'
833 )
837 )
834 return b"\n".join(items)
838 return b"\n".join(items)
835
839
836
840
837 def decodecopies(files, data):
841 def decodecopies(files, data):
838 try:
842 try:
839 copies = {}
843 copies = {}
840 if not data:
844 if not data:
841 return copies
845 return copies
842 for l in data.split(b'\n'):
846 for l in data.split(b'\n'):
843 strindex, src = l.split(b'\0')
847 strindex, src = l.split(b'\0')
844 i = int(strindex)
848 i = int(strindex)
845 dst = files[i]
849 dst = files[i]
846 copies[dst] = src
850 copies[dst] = src
847 return copies
851 return copies
848 except (ValueError, IndexError):
852 except (ValueError, IndexError):
849 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
853 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
850 # used different syntax for the value.
854 # used different syntax for the value.
851 return None
855 return None
852
856
853
857
854 def encodefileindices(files, subset):
858 def encodefileindices(files, subset):
855 subset = set(subset)
859 subset = set(subset)
856 indices = []
860 indices = []
857 for i, f in enumerate(files):
861 for i, f in enumerate(files):
858 if f in subset:
862 if f in subset:
859 indices.append(b'%d' % i)
863 indices.append(b'%d' % i)
860 return b'\n'.join(indices)
864 return b'\n'.join(indices)
861
865
862
866
863 def decodefileindices(files, data):
867 def decodefileindices(files, data):
864 try:
868 try:
865 subset = []
869 subset = []
866 if not data:
870 if not data:
867 return subset
871 return subset
868 for strindex in data.split(b'\n'):
872 for strindex in data.split(b'\n'):
869 i = int(strindex)
873 i = int(strindex)
870 if i < 0 or i >= len(files):
874 if i < 0 or i >= len(files):
871 return None
875 return None
872 subset.append(files[i])
876 subset.append(files[i])
873 return subset
877 return subset
874 except (ValueError, IndexError):
878 except (ValueError, IndexError):
875 # Perhaps someone had chosen the same key name (e.g. "added") and
879 # Perhaps someone had chosen the same key name (e.g. "added") and
876 # used different syntax for the value.
880 # used different syntax for the value.
877 return None
881 return None
878
882
879
883
880 def _getsidedata(srcrepo, rev):
884 def _getsidedata(srcrepo, rev):
881 ctx = srcrepo[rev]
885 ctx = srcrepo[rev]
882 filescopies = computechangesetcopies(ctx)
886 filescopies = computechangesetcopies(ctx)
883 filesadded = computechangesetfilesadded(ctx)
887 filesadded = computechangesetfilesadded(ctx)
884 filesremoved = computechangesetfilesremoved(ctx)
888 filesremoved = computechangesetfilesremoved(ctx)
885 sidedata = {}
889 sidedata = {}
886 if any([filescopies, filesadded, filesremoved]):
890 if any([filescopies, filesadded, filesremoved]):
887 sortedfiles = sorted(ctx.files())
891 sortedfiles = sorted(ctx.files())
888 p1copies, p2copies = filescopies
892 p1copies, p2copies = filescopies
889 p1copies = encodecopies(sortedfiles, p1copies)
893 p1copies = encodecopies(sortedfiles, p1copies)
890 p2copies = encodecopies(sortedfiles, p2copies)
894 p2copies = encodecopies(sortedfiles, p2copies)
891 filesadded = encodefileindices(sortedfiles, filesadded)
895 filesadded = encodefileindices(sortedfiles, filesadded)
892 filesremoved = encodefileindices(sortedfiles, filesremoved)
896 filesremoved = encodefileindices(sortedfiles, filesremoved)
893 if p1copies:
897 if p1copies:
894 sidedata[sidedatamod.SD_P1COPIES] = p1copies
898 sidedata[sidedatamod.SD_P1COPIES] = p1copies
895 if p2copies:
899 if p2copies:
896 sidedata[sidedatamod.SD_P2COPIES] = p2copies
900 sidedata[sidedatamod.SD_P2COPIES] = p2copies
897 if filesadded:
901 if filesadded:
898 sidedata[sidedatamod.SD_FILESADDED] = filesadded
902 sidedata[sidedatamod.SD_FILESADDED] = filesadded
899 if filesremoved:
903 if filesremoved:
900 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
904 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
901 return sidedata
905 return sidedata
902
906
903
907
904 def getsidedataadder(srcrepo, destrepo):
908 def getsidedataadder(srcrepo, destrepo):
905 def sidedatacompanion(revlog, rev):
909 def sidedatacompanion(revlog, rev):
906 sidedata = {}
910 sidedata = {}
907 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
911 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
908 sidedata = _getsidedata(srcrepo, rev)
912 sidedata = _getsidedata(srcrepo, rev)
909 return False, (), sidedata
913 return False, (), sidedata
910
914
911 return sidedatacompanion
915 return sidedatacompanion
912
916
913
917
914 def getsidedataremover(srcrepo, destrepo):
918 def getsidedataremover(srcrepo, destrepo):
915 def sidedatacompanion(revlog, rev):
919 def sidedatacompanion(revlog, rev):
916 f = ()
920 f = ()
917 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
921 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
918 if revlog.flags(rev) & REVIDX_SIDEDATA:
922 if revlog.flags(rev) & REVIDX_SIDEDATA:
919 f = (
923 f = (
920 sidedatamod.SD_P1COPIES,
924 sidedatamod.SD_P1COPIES,
921 sidedatamod.SD_P2COPIES,
925 sidedatamod.SD_P2COPIES,
922 sidedatamod.SD_FILESADDED,
926 sidedatamod.SD_FILESADDED,
923 sidedatamod.SD_FILESREMOVED,
927 sidedatamod.SD_FILESREMOVED,
924 )
928 )
925 return False, f, {}
929 return False, f, {}
926
930
927 return sidedatacompanion
931 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now