##// END OF EJS Templates
copies: get copies information directly from _copies...
marmoute -
r43547:82dabad5 default
parent child Browse files
Show More
@@ -1,931 +1,932 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _changesetforwardcopies(a, b, match):
181 def _changesetforwardcopies(a, b, match):
182 if a.rev() in (node.nullrev, b.rev()):
182 if a.rev() in (node.nullrev, b.rev()):
183 return {}
183 return {}
184
184
185 repo = a.repo()
185 repo = a.repo()
186 children = {}
186 children = {}
187 cl = repo.changelog
187 cl = repo.changelog
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 for r in missingrevs:
189 for r in missingrevs:
190 for p in cl.parentrevs(r):
190 for p in cl.parentrevs(r):
191 if p == node.nullrev:
191 if p == node.nullrev:
192 continue
192 continue
193 if p not in children:
193 if p not in children:
194 children[p] = [r]
194 children[p] = [r]
195 else:
195 else:
196 children[p].append(r)
196 children[p].append(r)
197
197
198 roots = set(children) - set(missingrevs)
198 roots = set(children) - set(missingrevs)
199 work = list(roots)
199 work = list(roots)
200 all_copies = {r: {} for r in roots}
200 all_copies = {r: {} for r in roots}
201 heapq.heapify(work)
201 heapq.heapify(work)
202 alwaysmatch = match.always()
202 alwaysmatch = match.always()
203 while work:
203 while work:
204 r = heapq.heappop(work)
204 r = heapq.heappop(work)
205 copies = all_copies.pop(r)
205 copies = all_copies.pop(r)
206 if r == b.rev():
206 if r == b.rev():
207 return copies
207 return copies
208 for i, c in enumerate(children[r]):
208 for i, c in enumerate(children[r]):
209 childctx = repo[c]
209 childctx = repo[c]
210 p1copies, p2copies = childctx._copies
210 if r == childctx.p1().rev():
211 if r == childctx.p1().rev():
211 parent = 1
212 parent = 1
212 childcopies = childctx.p1copies()
213 childcopies = p1copies
213 else:
214 else:
214 assert r == childctx.p2().rev()
215 assert r == childctx.p2().rev()
215 parent = 2
216 parent = 2
216 childcopies = childctx.p2copies()
217 childcopies = p2copies
217 if not alwaysmatch:
218 if not alwaysmatch:
218 childcopies = {
219 childcopies = {
219 dst: src for dst, src in childcopies.items() if match(dst)
220 dst: src for dst, src in childcopies.items() if match(dst)
220 }
221 }
221 # Copy the dict only if later iterations will also need it
222 # Copy the dict only if later iterations will also need it
222 if i != len(children[r]) - 1:
223 if i != len(children[r]) - 1:
223 newcopies = copies.copy()
224 newcopies = copies.copy()
224 else:
225 else:
225 newcopies = copies
226 newcopies = copies
226 if childcopies:
227 if childcopies:
227 newcopies = _chain(newcopies, childcopies)
228 newcopies = _chain(newcopies, childcopies)
228 for f in childctx.filesremoved():
229 for f in childctx.filesremoved():
229 if f in newcopies:
230 if f in newcopies:
230 del newcopies[f]
231 del newcopies[f]
231 othercopies = all_copies.get(c)
232 othercopies = all_copies.get(c)
232 if othercopies is None:
233 if othercopies is None:
233 heapq.heappush(work, c)
234 heapq.heappush(work, c)
234 all_copies[c] = newcopies
235 all_copies[c] = newcopies
235 else:
236 else:
236 # we are the second parent to work on c, we need to merge our
237 # we are the second parent to work on c, we need to merge our
237 # work with the other.
238 # work with the other.
238 #
239 #
239 # Unlike when copies are stored in the filelog, we consider
240 # Unlike when copies are stored in the filelog, we consider
240 # it a copy even if the destination already existed on the
241 # it a copy even if the destination already existed on the
241 # other branch. It's simply too expensive to check if the
242 # other branch. It's simply too expensive to check if the
242 # file existed in the manifest.
243 # file existed in the manifest.
243 #
244 #
244 # In case of conflict, parent 1 take precedence over parent 2.
245 # In case of conflict, parent 1 take precedence over parent 2.
245 # This is an arbitrary choice made anew when implementing
246 # This is an arbitrary choice made anew when implementing
246 # changeset based copies. It was made without regards with
247 # changeset based copies. It was made without regards with
247 # potential filelog related behavior.
248 # potential filelog related behavior.
248 if parent == 1:
249 if parent == 1:
249 othercopies.update(newcopies)
250 othercopies.update(newcopies)
250 else:
251 else:
251 newcopies.update(othercopies)
252 newcopies.update(othercopies)
252 all_copies[c] = newcopies
253 all_copies[c] = newcopies
253 assert False
254 assert False
254
255
255
256
256 def _forwardcopies(a, b, base=None, match=None):
257 def _forwardcopies(a, b, base=None, match=None):
257 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
258 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
258
259
259 if base is None:
260 if base is None:
260 base = a
261 base = a
261 match = a.repo().narrowmatch(match)
262 match = a.repo().narrowmatch(match)
262 # check for working copy
263 # check for working copy
263 if b.rev() is None:
264 if b.rev() is None:
264 cm = _committedforwardcopies(a, b.p1(), base, match)
265 cm = _committedforwardcopies(a, b.p1(), base, match)
265 # combine copies from dirstate if necessary
266 # combine copies from dirstate if necessary
266 copies = _chain(cm, _dirstatecopies(b._repo, match))
267 copies = _chain(cm, _dirstatecopies(b._repo, match))
267 else:
268 else:
268 copies = _committedforwardcopies(a, b, base, match)
269 copies = _committedforwardcopies(a, b, base, match)
269 return copies
270 return copies
270
271
271
272
272 def _backwardrenames(a, b, match):
273 def _backwardrenames(a, b, match):
273 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
274 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
274 return {}
275 return {}
275
276
276 # Even though we're not taking copies into account, 1:n rename situations
277 # Even though we're not taking copies into account, 1:n rename situations
277 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
278 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
278 # arbitrarily pick one of the renames.
279 # arbitrarily pick one of the renames.
279 # We don't want to pass in "match" here, since that would filter
280 # We don't want to pass in "match" here, since that would filter
280 # the destination by it. Since we're reversing the copies, we want
281 # the destination by it. Since we're reversing the copies, we want
281 # to filter the source instead.
282 # to filter the source instead.
282 f = _forwardcopies(b, a)
283 f = _forwardcopies(b, a)
283 r = {}
284 r = {}
284 for k, v in sorted(pycompat.iteritems(f)):
285 for k, v in sorted(pycompat.iteritems(f)):
285 if match and not match(v):
286 if match and not match(v):
286 continue
287 continue
287 # remove copies
288 # remove copies
288 if v in a:
289 if v in a:
289 continue
290 continue
290 r[v] = k
291 r[v] = k
291 return r
292 return r
292
293
293
294
294 def pathcopies(x, y, match=None):
295 def pathcopies(x, y, match=None):
295 """find {dst@y: src@x} copy mapping for directed compare"""
296 """find {dst@y: src@x} copy mapping for directed compare"""
296 repo = x._repo
297 repo = x._repo
297 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
298 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
298 if debug:
299 if debug:
299 repo.ui.debug(
300 repo.ui.debug(
300 b'debug.copies: searching copies from %s to %s\n' % (x, y)
301 b'debug.copies: searching copies from %s to %s\n' % (x, y)
301 )
302 )
302 if x == y or not x or not y:
303 if x == y or not x or not y:
303 return {}
304 return {}
304 a = y.ancestor(x)
305 a = y.ancestor(x)
305 if a == x:
306 if a == x:
306 if debug:
307 if debug:
307 repo.ui.debug(b'debug.copies: search mode: forward\n')
308 repo.ui.debug(b'debug.copies: search mode: forward\n')
308 if y.rev() is None and x == y.p1():
309 if y.rev() is None and x == y.p1():
309 # short-circuit to avoid issues with merge states
310 # short-circuit to avoid issues with merge states
310 return _dirstatecopies(repo, match)
311 return _dirstatecopies(repo, match)
311 copies = _forwardcopies(x, y, match=match)
312 copies = _forwardcopies(x, y, match=match)
312 elif a == y:
313 elif a == y:
313 if debug:
314 if debug:
314 repo.ui.debug(b'debug.copies: search mode: backward\n')
315 repo.ui.debug(b'debug.copies: search mode: backward\n')
315 copies = _backwardrenames(x, y, match=match)
316 copies = _backwardrenames(x, y, match=match)
316 else:
317 else:
317 if debug:
318 if debug:
318 repo.ui.debug(b'debug.copies: search mode: combined\n')
319 repo.ui.debug(b'debug.copies: search mode: combined\n')
319 base = None
320 base = None
320 if a.rev() != node.nullrev:
321 if a.rev() != node.nullrev:
321 base = x
322 base = x
322 copies = _chain(
323 copies = _chain(
323 _backwardrenames(x, a, match=match),
324 _backwardrenames(x, a, match=match),
324 _forwardcopies(a, y, base, match=match),
325 _forwardcopies(a, y, base, match=match),
325 )
326 )
326 _filter(x, y, copies)
327 _filter(x, y, copies)
327 return copies
328 return copies
328
329
329
330
330 def mergecopies(repo, c1, c2, base):
331 def mergecopies(repo, c1, c2, base):
331 """
332 """
332 Finds moves and copies between context c1 and c2 that are relevant for
333 Finds moves and copies between context c1 and c2 that are relevant for
333 merging. 'base' will be used as the merge base.
334 merging. 'base' will be used as the merge base.
334
335
335 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
336 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
336 files that were moved/ copied in one merge parent and modified in another.
337 files that were moved/ copied in one merge parent and modified in another.
337 For example:
338 For example:
338
339
339 o ---> 4 another commit
340 o ---> 4 another commit
340 |
341 |
341 | o ---> 3 commit that modifies a.txt
342 | o ---> 3 commit that modifies a.txt
342 | /
343 | /
343 o / ---> 2 commit that moves a.txt to b.txt
344 o / ---> 2 commit that moves a.txt to b.txt
344 |/
345 |/
345 o ---> 1 merge base
346 o ---> 1 merge base
346
347
347 If we try to rebase revision 3 on revision 4, since there is no a.txt in
348 If we try to rebase revision 3 on revision 4, since there is no a.txt in
348 revision 4, and if user have copytrace disabled, we prints the following
349 revision 4, and if user have copytrace disabled, we prints the following
349 message:
350 message:
350
351
351 ```other changed <file> which local deleted```
352 ```other changed <file> which local deleted```
352
353
353 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
354 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
354 "dirmove".
355 "dirmove".
355
356
356 "copy" is a mapping from destination name -> source name,
357 "copy" is a mapping from destination name -> source name,
357 where source is in c1 and destination is in c2 or vice-versa.
358 where source is in c1 and destination is in c2 or vice-versa.
358
359
359 "movewithdir" is a mapping from source name -> destination name,
360 "movewithdir" is a mapping from source name -> destination name,
360 where the file at source present in one context but not the other
361 where the file at source present in one context but not the other
361 needs to be moved to destination by the merge process, because the
362 needs to be moved to destination by the merge process, because the
362 other context moved the directory it is in.
363 other context moved the directory it is in.
363
364
364 "diverge" is a mapping of source name -> list of destination names
365 "diverge" is a mapping of source name -> list of destination names
365 for divergent renames.
366 for divergent renames.
366
367
367 "renamedelete" is a mapping of source name -> list of destination
368 "renamedelete" is a mapping of source name -> list of destination
368 names for files deleted in c1 that were renamed in c2 or vice-versa.
369 names for files deleted in c1 that were renamed in c2 or vice-versa.
369
370
370 "dirmove" is a mapping of detected source dir -> destination dir renames.
371 "dirmove" is a mapping of detected source dir -> destination dir renames.
371 This is needed for handling changes to new files previously grafted into
372 This is needed for handling changes to new files previously grafted into
372 renamed directories.
373 renamed directories.
373
374
374 This function calls different copytracing algorithms based on config.
375 This function calls different copytracing algorithms based on config.
375 """
376 """
376 # avoid silly behavior for update from empty dir
377 # avoid silly behavior for update from empty dir
377 if not c1 or not c2 or c1 == c2:
378 if not c1 or not c2 or c1 == c2:
378 return {}, {}, {}, {}, {}
379 return {}, {}, {}, {}, {}
379
380
380 narrowmatch = c1.repo().narrowmatch()
381 narrowmatch = c1.repo().narrowmatch()
381
382
382 # avoid silly behavior for parent -> working dir
383 # avoid silly behavior for parent -> working dir
383 if c2.node() is None and c1.node() == repo.dirstate.p1():
384 if c2.node() is None and c1.node() == repo.dirstate.p1():
384 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
385 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
385
386
386 copytracing = repo.ui.config(b'experimental', b'copytrace')
387 copytracing = repo.ui.config(b'experimental', b'copytrace')
387 if stringutil.parsebool(copytracing) is False:
388 if stringutil.parsebool(copytracing) is False:
388 # stringutil.parsebool() returns None when it is unable to parse the
389 # stringutil.parsebool() returns None when it is unable to parse the
389 # value, so we should rely on making sure copytracing is on such cases
390 # value, so we should rely on making sure copytracing is on such cases
390 return {}, {}, {}, {}, {}
391 return {}, {}, {}, {}, {}
391
392
392 if usechangesetcentricalgo(repo):
393 if usechangesetcentricalgo(repo):
393 # The heuristics don't make sense when we need changeset-centric algos
394 # The heuristics don't make sense when we need changeset-centric algos
394 return _fullcopytracing(repo, c1, c2, base)
395 return _fullcopytracing(repo, c1, c2, base)
395
396
396 # Copy trace disabling is explicitly below the node == p1 logic above
397 # Copy trace disabling is explicitly below the node == p1 logic above
397 # because the logic above is required for a simple copy to be kept across a
398 # because the logic above is required for a simple copy to be kept across a
398 # rebase.
399 # rebase.
399 if copytracing == b'heuristics':
400 if copytracing == b'heuristics':
400 # Do full copytracing if only non-public revisions are involved as
401 # Do full copytracing if only non-public revisions are involved as
401 # that will be fast enough and will also cover the copies which could
402 # that will be fast enough and will also cover the copies which could
402 # be missed by heuristics
403 # be missed by heuristics
403 if _isfullcopytraceable(repo, c1, base):
404 if _isfullcopytraceable(repo, c1, base):
404 return _fullcopytracing(repo, c1, c2, base)
405 return _fullcopytracing(repo, c1, c2, base)
405 return _heuristicscopytracing(repo, c1, c2, base)
406 return _heuristicscopytracing(repo, c1, c2, base)
406 else:
407 else:
407 return _fullcopytracing(repo, c1, c2, base)
408 return _fullcopytracing(repo, c1, c2, base)
408
409
409
410
410 def _isfullcopytraceable(repo, c1, base):
411 def _isfullcopytraceable(repo, c1, base):
411 """ Checks that if base, source and destination are all no-public branches,
412 """ Checks that if base, source and destination are all no-public branches,
412 if yes let's use the full copytrace algorithm for increased capabilities
413 if yes let's use the full copytrace algorithm for increased capabilities
413 since it will be fast enough.
414 since it will be fast enough.
414
415
415 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
416 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
416 number of changesets from c1 to base such that if number of changesets are
417 number of changesets from c1 to base such that if number of changesets are
417 more than the limit, full copytracing algorithm won't be used.
418 more than the limit, full copytracing algorithm won't be used.
418 """
419 """
419 if c1.rev() is None:
420 if c1.rev() is None:
420 c1 = c1.p1()
421 c1 = c1.p1()
421 if c1.mutable() and base.mutable():
422 if c1.mutable() and base.mutable():
422 sourcecommitlimit = repo.ui.configint(
423 sourcecommitlimit = repo.ui.configint(
423 b'experimental', b'copytrace.sourcecommitlimit'
424 b'experimental', b'copytrace.sourcecommitlimit'
424 )
425 )
425 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
426 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
426 return commits < sourcecommitlimit
427 return commits < sourcecommitlimit
427 return False
428 return False
428
429
429
430
430 def _checksinglesidecopies(
431 def _checksinglesidecopies(
431 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
432 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
432 ):
433 ):
433 if src not in m2:
434 if src not in m2:
434 # deleted on side 2
435 # deleted on side 2
435 if src not in m1:
436 if src not in m1:
436 # renamed on side 1, deleted on side 2
437 # renamed on side 1, deleted on side 2
437 renamedelete[src] = dsts1
438 renamedelete[src] = dsts1
438 elif m2[src] != mb[src]:
439 elif m2[src] != mb[src]:
439 if not _related(c2[src], base[src]):
440 if not _related(c2[src], base[src]):
440 return
441 return
441 # modified on side 2
442 # modified on side 2
442 for dst in dsts1:
443 for dst in dsts1:
443 if dst not in m2:
444 if dst not in m2:
444 # dst not added on side 2 (handle as regular
445 # dst not added on side 2 (handle as regular
445 # "both created" case in manifestmerge otherwise)
446 # "both created" case in manifestmerge otherwise)
446 copy[dst] = src
447 copy[dst] = src
447
448
448
449
449 def _fullcopytracing(repo, c1, c2, base):
450 def _fullcopytracing(repo, c1, c2, base):
450 """ The full copytracing algorithm which finds all the new files that were
451 """ The full copytracing algorithm which finds all the new files that were
451 added from merge base up to the top commit and for each file it checks if
452 added from merge base up to the top commit and for each file it checks if
452 this file was copied from another file.
453 this file was copied from another file.
453
454
454 This is pretty slow when a lot of changesets are involved but will track all
455 This is pretty slow when a lot of changesets are involved but will track all
455 the copies.
456 the copies.
456 """
457 """
457 m1 = c1.manifest()
458 m1 = c1.manifest()
458 m2 = c2.manifest()
459 m2 = c2.manifest()
459 mb = base.manifest()
460 mb = base.manifest()
460
461
461 copies1 = pathcopies(base, c1)
462 copies1 = pathcopies(base, c1)
462 copies2 = pathcopies(base, c2)
463 copies2 = pathcopies(base, c2)
463
464
464 inversecopies1 = {}
465 inversecopies1 = {}
465 inversecopies2 = {}
466 inversecopies2 = {}
466 for dst, src in copies1.items():
467 for dst, src in copies1.items():
467 inversecopies1.setdefault(src, []).append(dst)
468 inversecopies1.setdefault(src, []).append(dst)
468 for dst, src in copies2.items():
469 for dst, src in copies2.items():
469 inversecopies2.setdefault(src, []).append(dst)
470 inversecopies2.setdefault(src, []).append(dst)
470
471
471 copy = {}
472 copy = {}
472 diverge = {}
473 diverge = {}
473 renamedelete = {}
474 renamedelete = {}
474 allsources = set(inversecopies1) | set(inversecopies2)
475 allsources = set(inversecopies1) | set(inversecopies2)
475 for src in allsources:
476 for src in allsources:
476 dsts1 = inversecopies1.get(src)
477 dsts1 = inversecopies1.get(src)
477 dsts2 = inversecopies2.get(src)
478 dsts2 = inversecopies2.get(src)
478 if dsts1 and dsts2:
479 if dsts1 and dsts2:
479 # copied/renamed on both sides
480 # copied/renamed on both sides
480 if src not in m1 and src not in m2:
481 if src not in m1 and src not in m2:
481 # renamed on both sides
482 # renamed on both sides
482 dsts1 = set(dsts1)
483 dsts1 = set(dsts1)
483 dsts2 = set(dsts2)
484 dsts2 = set(dsts2)
484 # If there's some overlap in the rename destinations, we
485 # If there's some overlap in the rename destinations, we
485 # consider it not divergent. For example, if side 1 copies 'a'
486 # consider it not divergent. For example, if side 1 copies 'a'
486 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
487 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
487 # and 'd' and deletes 'a'.
488 # and 'd' and deletes 'a'.
488 if dsts1 & dsts2:
489 if dsts1 & dsts2:
489 for dst in dsts1 & dsts2:
490 for dst in dsts1 & dsts2:
490 copy[dst] = src
491 copy[dst] = src
491 else:
492 else:
492 diverge[src] = sorted(dsts1 | dsts2)
493 diverge[src] = sorted(dsts1 | dsts2)
493 elif src in m1 and src in m2:
494 elif src in m1 and src in m2:
494 # copied on both sides
495 # copied on both sides
495 dsts1 = set(dsts1)
496 dsts1 = set(dsts1)
496 dsts2 = set(dsts2)
497 dsts2 = set(dsts2)
497 for dst in dsts1 & dsts2:
498 for dst in dsts1 & dsts2:
498 copy[dst] = src
499 copy[dst] = src
499 # TODO: Handle cases where it was renamed on one side and copied
500 # TODO: Handle cases where it was renamed on one side and copied
500 # on the other side
501 # on the other side
501 elif dsts1:
502 elif dsts1:
502 # copied/renamed only on side 1
503 # copied/renamed only on side 1
503 _checksinglesidecopies(
504 _checksinglesidecopies(
504 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
505 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
505 )
506 )
506 elif dsts2:
507 elif dsts2:
507 # copied/renamed only on side 2
508 # copied/renamed only on side 2
508 _checksinglesidecopies(
509 _checksinglesidecopies(
509 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
510 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
510 )
511 )
511
512
512 renamedeleteset = set()
513 renamedeleteset = set()
513 divergeset = set()
514 divergeset = set()
514 for dsts in diverge.values():
515 for dsts in diverge.values():
515 divergeset.update(dsts)
516 divergeset.update(dsts)
516 for dsts in renamedelete.values():
517 for dsts in renamedelete.values():
517 renamedeleteset.update(dsts)
518 renamedeleteset.update(dsts)
518
519
519 # find interesting file sets from manifests
520 # find interesting file sets from manifests
520 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
521 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
521 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
522 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
522 u1 = sorted(addedinm1 - addedinm2)
523 u1 = sorted(addedinm1 - addedinm2)
523 u2 = sorted(addedinm2 - addedinm1)
524 u2 = sorted(addedinm2 - addedinm1)
524
525
525 header = b" unmatched files in %s"
526 header = b" unmatched files in %s"
526 if u1:
527 if u1:
527 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
528 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
528 if u2:
529 if u2:
529 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
530 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
530
531
531 fullcopy = copies1.copy()
532 fullcopy = copies1.copy()
532 fullcopy.update(copies2)
533 fullcopy.update(copies2)
533 if not fullcopy:
534 if not fullcopy:
534 return copy, {}, diverge, renamedelete, {}
535 return copy, {}, diverge, renamedelete, {}
535
536
536 if repo.ui.debugflag:
537 if repo.ui.debugflag:
537 repo.ui.debug(
538 repo.ui.debug(
538 b" all copies found (* = to merge, ! = divergent, "
539 b" all copies found (* = to merge, ! = divergent, "
539 b"% = renamed and deleted):\n"
540 b"% = renamed and deleted):\n"
540 )
541 )
541 for f in sorted(fullcopy):
542 for f in sorted(fullcopy):
542 note = b""
543 note = b""
543 if f in copy:
544 if f in copy:
544 note += b"*"
545 note += b"*"
545 if f in divergeset:
546 if f in divergeset:
546 note += b"!"
547 note += b"!"
547 if f in renamedeleteset:
548 if f in renamedeleteset:
548 note += b"%"
549 note += b"%"
549 repo.ui.debug(
550 repo.ui.debug(
550 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
551 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
551 )
552 )
552 del divergeset
553 del divergeset
553
554
554 repo.ui.debug(b" checking for directory renames\n")
555 repo.ui.debug(b" checking for directory renames\n")
555
556
556 # generate a directory move map
557 # generate a directory move map
557 d1, d2 = c1.dirs(), c2.dirs()
558 d1, d2 = c1.dirs(), c2.dirs()
558 invalid = set()
559 invalid = set()
559 dirmove = {}
560 dirmove = {}
560
561
561 # examine each file copy for a potential directory move, which is
562 # examine each file copy for a potential directory move, which is
562 # when all the files in a directory are moved to a new directory
563 # when all the files in a directory are moved to a new directory
563 for dst, src in pycompat.iteritems(fullcopy):
564 for dst, src in pycompat.iteritems(fullcopy):
564 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
565 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
565 if dsrc in invalid:
566 if dsrc in invalid:
566 # already seen to be uninteresting
567 # already seen to be uninteresting
567 continue
568 continue
568 elif dsrc in d1 and ddst in d1:
569 elif dsrc in d1 and ddst in d1:
569 # directory wasn't entirely moved locally
570 # directory wasn't entirely moved locally
570 invalid.add(dsrc)
571 invalid.add(dsrc)
571 elif dsrc in d2 and ddst in d2:
572 elif dsrc in d2 and ddst in d2:
572 # directory wasn't entirely moved remotely
573 # directory wasn't entirely moved remotely
573 invalid.add(dsrc)
574 invalid.add(dsrc)
574 elif dsrc in dirmove and dirmove[dsrc] != ddst:
575 elif dsrc in dirmove and dirmove[dsrc] != ddst:
575 # files from the same directory moved to two different places
576 # files from the same directory moved to two different places
576 invalid.add(dsrc)
577 invalid.add(dsrc)
577 else:
578 else:
578 # looks good so far
579 # looks good so far
579 dirmove[dsrc] = ddst
580 dirmove[dsrc] = ddst
580
581
581 for i in invalid:
582 for i in invalid:
582 if i in dirmove:
583 if i in dirmove:
583 del dirmove[i]
584 del dirmove[i]
584 del d1, d2, invalid
585 del d1, d2, invalid
585
586
586 if not dirmove:
587 if not dirmove:
587 return copy, {}, diverge, renamedelete, {}
588 return copy, {}, diverge, renamedelete, {}
588
589
589 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
590 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
590
591
591 for d in dirmove:
592 for d in dirmove:
592 repo.ui.debug(
593 repo.ui.debug(
593 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
594 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
594 )
595 )
595
596
596 movewithdir = {}
597 movewithdir = {}
597 # check unaccounted nonoverlapping files against directory moves
598 # check unaccounted nonoverlapping files against directory moves
598 for f in u1 + u2:
599 for f in u1 + u2:
599 if f not in fullcopy:
600 if f not in fullcopy:
600 for d in dirmove:
601 for d in dirmove:
601 if f.startswith(d):
602 if f.startswith(d):
602 # new file added in a directory that was moved, move it
603 # new file added in a directory that was moved, move it
603 df = dirmove[d] + f[len(d) :]
604 df = dirmove[d] + f[len(d) :]
604 if df not in copy:
605 if df not in copy:
605 movewithdir[f] = df
606 movewithdir[f] = df
606 repo.ui.debug(
607 repo.ui.debug(
607 b" pending file src: '%s' -> dst: '%s'\n"
608 b" pending file src: '%s' -> dst: '%s'\n"
608 % (f, df)
609 % (f, df)
609 )
610 )
610 break
611 break
611
612
612 return copy, movewithdir, diverge, renamedelete, dirmove
613 return copy, movewithdir, diverge, renamedelete, dirmove
613
614
614
615
615 def _heuristicscopytracing(repo, c1, c2, base):
616 def _heuristicscopytracing(repo, c1, c2, base):
616 """ Fast copytracing using filename heuristics
617 """ Fast copytracing using filename heuristics
617
618
618 Assumes that moves or renames are of following two types:
619 Assumes that moves or renames are of following two types:
619
620
620 1) Inside a directory only (same directory name but different filenames)
621 1) Inside a directory only (same directory name but different filenames)
621 2) Move from one directory to another
622 2) Move from one directory to another
622 (same filenames but different directory names)
623 (same filenames but different directory names)
623
624
624 Works only when there are no merge commits in the "source branch".
625 Works only when there are no merge commits in the "source branch".
625 Source branch is commits from base up to c2 not including base.
626 Source branch is commits from base up to c2 not including base.
626
627
627 If merge is involved it fallbacks to _fullcopytracing().
628 If merge is involved it fallbacks to _fullcopytracing().
628
629
629 Can be used by setting the following config:
630 Can be used by setting the following config:
630
631
631 [experimental]
632 [experimental]
632 copytrace = heuristics
633 copytrace = heuristics
633
634
634 In some cases the copy/move candidates found by heuristics can be very large
635 In some cases the copy/move candidates found by heuristics can be very large
635 in number and that will make the algorithm slow. The number of possible
636 in number and that will make the algorithm slow. The number of possible
636 candidates to check can be limited by using the config
637 candidates to check can be limited by using the config
637 `experimental.copytrace.movecandidateslimit` which defaults to 100.
638 `experimental.copytrace.movecandidateslimit` which defaults to 100.
638 """
639 """
639
640
640 if c1.rev() is None:
641 if c1.rev() is None:
641 c1 = c1.p1()
642 c1 = c1.p1()
642 if c2.rev() is None:
643 if c2.rev() is None:
643 c2 = c2.p1()
644 c2 = c2.p1()
644
645
645 copies = {}
646 copies = {}
646
647
647 changedfiles = set()
648 changedfiles = set()
648 m1 = c1.manifest()
649 m1 = c1.manifest()
649 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
650 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
650 # If base is not in c2 branch, we switch to fullcopytracing
651 # If base is not in c2 branch, we switch to fullcopytracing
651 repo.ui.debug(
652 repo.ui.debug(
652 b"switching to full copytracing as base is not "
653 b"switching to full copytracing as base is not "
653 b"an ancestor of c2\n"
654 b"an ancestor of c2\n"
654 )
655 )
655 return _fullcopytracing(repo, c1, c2, base)
656 return _fullcopytracing(repo, c1, c2, base)
656
657
657 ctx = c2
658 ctx = c2
658 while ctx != base:
659 while ctx != base:
659 if len(ctx.parents()) == 2:
660 if len(ctx.parents()) == 2:
660 # To keep things simple let's not handle merges
661 # To keep things simple let's not handle merges
661 repo.ui.debug(b"switching to full copytracing because of merges\n")
662 repo.ui.debug(b"switching to full copytracing because of merges\n")
662 return _fullcopytracing(repo, c1, c2, base)
663 return _fullcopytracing(repo, c1, c2, base)
663 changedfiles.update(ctx.files())
664 changedfiles.update(ctx.files())
664 ctx = ctx.p1()
665 ctx = ctx.p1()
665
666
666 cp = _forwardcopies(base, c2)
667 cp = _forwardcopies(base, c2)
667 for dst, src in pycompat.iteritems(cp):
668 for dst, src in pycompat.iteritems(cp):
668 if src in m1:
669 if src in m1:
669 copies[dst] = src
670 copies[dst] = src
670
671
671 # file is missing if it isn't present in the destination, but is present in
672 # file is missing if it isn't present in the destination, but is present in
672 # the base and present in the source.
673 # the base and present in the source.
673 # Presence in the base is important to exclude added files, presence in the
674 # Presence in the base is important to exclude added files, presence in the
674 # source is important to exclude removed files.
675 # source is important to exclude removed files.
675 filt = lambda f: f not in m1 and f in base and f in c2
676 filt = lambda f: f not in m1 and f in base and f in c2
676 missingfiles = [f for f in changedfiles if filt(f)]
677 missingfiles = [f for f in changedfiles if filt(f)]
677
678
678 if missingfiles:
679 if missingfiles:
679 basenametofilename = collections.defaultdict(list)
680 basenametofilename = collections.defaultdict(list)
680 dirnametofilename = collections.defaultdict(list)
681 dirnametofilename = collections.defaultdict(list)
681
682
682 for f in m1.filesnotin(base.manifest()):
683 for f in m1.filesnotin(base.manifest()):
683 basename = os.path.basename(f)
684 basename = os.path.basename(f)
684 dirname = os.path.dirname(f)
685 dirname = os.path.dirname(f)
685 basenametofilename[basename].append(f)
686 basenametofilename[basename].append(f)
686 dirnametofilename[dirname].append(f)
687 dirnametofilename[dirname].append(f)
687
688
688 for f in missingfiles:
689 for f in missingfiles:
689 basename = os.path.basename(f)
690 basename = os.path.basename(f)
690 dirname = os.path.dirname(f)
691 dirname = os.path.dirname(f)
691 samebasename = basenametofilename[basename]
692 samebasename = basenametofilename[basename]
692 samedirname = dirnametofilename[dirname]
693 samedirname = dirnametofilename[dirname]
693 movecandidates = samebasename + samedirname
694 movecandidates = samebasename + samedirname
694 # f is guaranteed to be present in c2, that's why
695 # f is guaranteed to be present in c2, that's why
695 # c2.filectx(f) won't fail
696 # c2.filectx(f) won't fail
696 f2 = c2.filectx(f)
697 f2 = c2.filectx(f)
697 # we can have a lot of candidates which can slow down the heuristics
698 # we can have a lot of candidates which can slow down the heuristics
698 # config value to limit the number of candidates moves to check
699 # config value to limit the number of candidates moves to check
699 maxcandidates = repo.ui.configint(
700 maxcandidates = repo.ui.configint(
700 b'experimental', b'copytrace.movecandidateslimit'
701 b'experimental', b'copytrace.movecandidateslimit'
701 )
702 )
702
703
703 if len(movecandidates) > maxcandidates:
704 if len(movecandidates) > maxcandidates:
704 repo.ui.status(
705 repo.ui.status(
705 _(
706 _(
706 b"skipping copytracing for '%s', more "
707 b"skipping copytracing for '%s', more "
707 b"candidates than the limit: %d\n"
708 b"candidates than the limit: %d\n"
708 )
709 )
709 % (f, len(movecandidates))
710 % (f, len(movecandidates))
710 )
711 )
711 continue
712 continue
712
713
713 for candidate in movecandidates:
714 for candidate in movecandidates:
714 f1 = c1.filectx(candidate)
715 f1 = c1.filectx(candidate)
715 if _related(f1, f2):
716 if _related(f1, f2):
716 # if there are a few related copies then we'll merge
717 # if there are a few related copies then we'll merge
717 # changes into all of them. This matches the behaviour
718 # changes into all of them. This matches the behaviour
718 # of upstream copytracing
719 # of upstream copytracing
719 copies[candidate] = f
720 copies[candidate] = f
720
721
721 return copies, {}, {}, {}, {}
722 return copies, {}, {}, {}, {}
722
723
723
724
724 def _related(f1, f2):
725 def _related(f1, f2):
725 """return True if f1 and f2 filectx have a common ancestor
726 """return True if f1 and f2 filectx have a common ancestor
726
727
727 Walk back to common ancestor to see if the two files originate
728 Walk back to common ancestor to see if the two files originate
728 from the same file. Since workingfilectx's rev() is None it messes
729 from the same file. Since workingfilectx's rev() is None it messes
729 up the integer comparison logic, hence the pre-step check for
730 up the integer comparison logic, hence the pre-step check for
730 None (f1 and f2 can only be workingfilectx's initially).
731 None (f1 and f2 can only be workingfilectx's initially).
731 """
732 """
732
733
733 if f1 == f2:
734 if f1 == f2:
734 return True # a match
735 return True # a match
735
736
736 g1, g2 = f1.ancestors(), f2.ancestors()
737 g1, g2 = f1.ancestors(), f2.ancestors()
737 try:
738 try:
738 f1r, f2r = f1.linkrev(), f2.linkrev()
739 f1r, f2r = f1.linkrev(), f2.linkrev()
739
740
740 if f1r is None:
741 if f1r is None:
741 f1 = next(g1)
742 f1 = next(g1)
742 if f2r is None:
743 if f2r is None:
743 f2 = next(g2)
744 f2 = next(g2)
744
745
745 while True:
746 while True:
746 f1r, f2r = f1.linkrev(), f2.linkrev()
747 f1r, f2r = f1.linkrev(), f2.linkrev()
747 if f1r > f2r:
748 if f1r > f2r:
748 f1 = next(g1)
749 f1 = next(g1)
749 elif f2r > f1r:
750 elif f2r > f1r:
750 f2 = next(g2)
751 f2 = next(g2)
751 else: # f1 and f2 point to files in the same linkrev
752 else: # f1 and f2 point to files in the same linkrev
752 return f1 == f2 # true if they point to the same file
753 return f1 == f2 # true if they point to the same file
753 except StopIteration:
754 except StopIteration:
754 return False
755 return False
755
756
756
757
757 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
758 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
758 """reproduce copies from fromrev to rev in the dirstate
759 """reproduce copies from fromrev to rev in the dirstate
759
760
760 If skiprev is specified, it's a revision that should be used to
761 If skiprev is specified, it's a revision that should be used to
761 filter copy records. Any copies that occur between fromrev and
762 filter copy records. Any copies that occur between fromrev and
762 skiprev will not be duplicated, even if they appear in the set of
763 skiprev will not be duplicated, even if they appear in the set of
763 copies between fromrev and rev.
764 copies between fromrev and rev.
764 """
765 """
765 exclude = {}
766 exclude = {}
766 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
767 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
767 bctrace = stringutil.parsebool(ctraceconfig)
768 bctrace = stringutil.parsebool(ctraceconfig)
768 if skiprev is not None and (
769 if skiprev is not None and (
769 ctraceconfig == b'heuristics' or bctrace or bctrace is None
770 ctraceconfig == b'heuristics' or bctrace or bctrace is None
770 ):
771 ):
771 # copytrace='off' skips this line, but not the entire function because
772 # copytrace='off' skips this line, but not the entire function because
772 # the line below is O(size of the repo) during a rebase, while the rest
773 # the line below is O(size of the repo) during a rebase, while the rest
773 # of the function is much faster (and is required for carrying copy
774 # of the function is much faster (and is required for carrying copy
774 # metadata across the rebase anyway).
775 # metadata across the rebase anyway).
775 exclude = pathcopies(repo[fromrev], repo[skiprev])
776 exclude = pathcopies(repo[fromrev], repo[skiprev])
776 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
777 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
777 if dst in exclude:
778 if dst in exclude:
778 continue
779 continue
779 if dst in wctx:
780 if dst in wctx:
780 wctx[dst].markcopied(src)
781 wctx[dst].markcopied(src)
781
782
782
783
783 def computechangesetfilesadded(ctx):
784 def computechangesetfilesadded(ctx):
784 """return the list of files added in a changeset
785 """return the list of files added in a changeset
785 """
786 """
786 added = []
787 added = []
787 for f in ctx.files():
788 for f in ctx.files():
788 if not any(f in p for p in ctx.parents()):
789 if not any(f in p for p in ctx.parents()):
789 added.append(f)
790 added.append(f)
790 return added
791 return added
791
792
792
793
793 def computechangesetfilesremoved(ctx):
794 def computechangesetfilesremoved(ctx):
794 """return the list of files removed in a changeset
795 """return the list of files removed in a changeset
795 """
796 """
796 removed = []
797 removed = []
797 for f in ctx.files():
798 for f in ctx.files():
798 if f not in ctx:
799 if f not in ctx:
799 removed.append(f)
800 removed.append(f)
800 return removed
801 return removed
801
802
802
803
803 def computechangesetcopies(ctx):
804 def computechangesetcopies(ctx):
804 """return the copies data for a changeset
805 """return the copies data for a changeset
805
806
806 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
807 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
807
808
808 Each dictionnary are in the form: `{newname: oldname}`
809 Each dictionnary are in the form: `{newname: oldname}`
809 """
810 """
810 p1copies = {}
811 p1copies = {}
811 p2copies = {}
812 p2copies = {}
812 p1 = ctx.p1()
813 p1 = ctx.p1()
813 p2 = ctx.p2()
814 p2 = ctx.p2()
814 narrowmatch = ctx._repo.narrowmatch()
815 narrowmatch = ctx._repo.narrowmatch()
815 for dst in ctx.files():
816 for dst in ctx.files():
816 if not narrowmatch(dst) or dst not in ctx:
817 if not narrowmatch(dst) or dst not in ctx:
817 continue
818 continue
818 copied = ctx[dst].renamed()
819 copied = ctx[dst].renamed()
819 if not copied:
820 if not copied:
820 continue
821 continue
821 src, srcnode = copied
822 src, srcnode = copied
822 if src in p1 and p1[src].filenode() == srcnode:
823 if src in p1 and p1[src].filenode() == srcnode:
823 p1copies[dst] = src
824 p1copies[dst] = src
824 elif src in p2 and p2[src].filenode() == srcnode:
825 elif src in p2 and p2[src].filenode() == srcnode:
825 p2copies[dst] = src
826 p2copies[dst] = src
826 return p1copies, p2copies
827 return p1copies, p2copies
827
828
828
829
829 def encodecopies(files, copies):
830 def encodecopies(files, copies):
830 items = []
831 items = []
831 for i, dst in enumerate(files):
832 for i, dst in enumerate(files):
832 if dst in copies:
833 if dst in copies:
833 items.append(b'%d\0%s' % (i, copies[dst]))
834 items.append(b'%d\0%s' % (i, copies[dst]))
834 if len(items) != len(copies):
835 if len(items) != len(copies):
835 raise error.ProgrammingError(
836 raise error.ProgrammingError(
836 b'some copy targets missing from file list'
837 b'some copy targets missing from file list'
837 )
838 )
838 return b"\n".join(items)
839 return b"\n".join(items)
839
840
840
841
841 def decodecopies(files, data):
842 def decodecopies(files, data):
842 try:
843 try:
843 copies = {}
844 copies = {}
844 if not data:
845 if not data:
845 return copies
846 return copies
846 for l in data.split(b'\n'):
847 for l in data.split(b'\n'):
847 strindex, src = l.split(b'\0')
848 strindex, src = l.split(b'\0')
848 i = int(strindex)
849 i = int(strindex)
849 dst = files[i]
850 dst = files[i]
850 copies[dst] = src
851 copies[dst] = src
851 return copies
852 return copies
852 except (ValueError, IndexError):
853 except (ValueError, IndexError):
853 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
854 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
854 # used different syntax for the value.
855 # used different syntax for the value.
855 return None
856 return None
856
857
857
858
858 def encodefileindices(files, subset):
859 def encodefileindices(files, subset):
859 subset = set(subset)
860 subset = set(subset)
860 indices = []
861 indices = []
861 for i, f in enumerate(files):
862 for i, f in enumerate(files):
862 if f in subset:
863 if f in subset:
863 indices.append(b'%d' % i)
864 indices.append(b'%d' % i)
864 return b'\n'.join(indices)
865 return b'\n'.join(indices)
865
866
866
867
867 def decodefileindices(files, data):
868 def decodefileindices(files, data):
868 try:
869 try:
869 subset = []
870 subset = []
870 if not data:
871 if not data:
871 return subset
872 return subset
872 for strindex in data.split(b'\n'):
873 for strindex in data.split(b'\n'):
873 i = int(strindex)
874 i = int(strindex)
874 if i < 0 or i >= len(files):
875 if i < 0 or i >= len(files):
875 return None
876 return None
876 subset.append(files[i])
877 subset.append(files[i])
877 return subset
878 return subset
878 except (ValueError, IndexError):
879 except (ValueError, IndexError):
879 # Perhaps someone had chosen the same key name (e.g. "added") and
880 # Perhaps someone had chosen the same key name (e.g. "added") and
880 # used different syntax for the value.
881 # used different syntax for the value.
881 return None
882 return None
882
883
883
884
884 def _getsidedata(srcrepo, rev):
885 def _getsidedata(srcrepo, rev):
885 ctx = srcrepo[rev]
886 ctx = srcrepo[rev]
886 filescopies = computechangesetcopies(ctx)
887 filescopies = computechangesetcopies(ctx)
887 filesadded = computechangesetfilesadded(ctx)
888 filesadded = computechangesetfilesadded(ctx)
888 filesremoved = computechangesetfilesremoved(ctx)
889 filesremoved = computechangesetfilesremoved(ctx)
889 sidedata = {}
890 sidedata = {}
890 if any([filescopies, filesadded, filesremoved]):
891 if any([filescopies, filesadded, filesremoved]):
891 sortedfiles = sorted(ctx.files())
892 sortedfiles = sorted(ctx.files())
892 p1copies, p2copies = filescopies
893 p1copies, p2copies = filescopies
893 p1copies = encodecopies(sortedfiles, p1copies)
894 p1copies = encodecopies(sortedfiles, p1copies)
894 p2copies = encodecopies(sortedfiles, p2copies)
895 p2copies = encodecopies(sortedfiles, p2copies)
895 filesadded = encodefileindices(sortedfiles, filesadded)
896 filesadded = encodefileindices(sortedfiles, filesadded)
896 filesremoved = encodefileindices(sortedfiles, filesremoved)
897 filesremoved = encodefileindices(sortedfiles, filesremoved)
897 if p1copies:
898 if p1copies:
898 sidedata[sidedatamod.SD_P1COPIES] = p1copies
899 sidedata[sidedatamod.SD_P1COPIES] = p1copies
899 if p2copies:
900 if p2copies:
900 sidedata[sidedatamod.SD_P2COPIES] = p2copies
901 sidedata[sidedatamod.SD_P2COPIES] = p2copies
901 if filesadded:
902 if filesadded:
902 sidedata[sidedatamod.SD_FILESADDED] = filesadded
903 sidedata[sidedatamod.SD_FILESADDED] = filesadded
903 if filesremoved:
904 if filesremoved:
904 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
905 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
905 return sidedata
906 return sidedata
906
907
907
908
908 def getsidedataadder(srcrepo, destrepo):
909 def getsidedataadder(srcrepo, destrepo):
909 def sidedatacompanion(revlog, rev):
910 def sidedatacompanion(revlog, rev):
910 sidedata = {}
911 sidedata = {}
911 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
912 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
912 sidedata = _getsidedata(srcrepo, rev)
913 sidedata = _getsidedata(srcrepo, rev)
913 return False, (), sidedata
914 return False, (), sidedata
914
915
915 return sidedatacompanion
916 return sidedatacompanion
916
917
917
918
918 def getsidedataremover(srcrepo, destrepo):
919 def getsidedataremover(srcrepo, destrepo):
919 def sidedatacompanion(revlog, rev):
920 def sidedatacompanion(revlog, rev):
920 f = ()
921 f = ()
921 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
922 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
922 if revlog.flags(rev) & REVIDX_SIDEDATA:
923 if revlog.flags(rev) & REVIDX_SIDEDATA:
923 f = (
924 f = (
924 sidedatamod.SD_P1COPIES,
925 sidedatamod.SD_P1COPIES,
925 sidedatamod.SD_P2COPIES,
926 sidedatamod.SD_P2COPIES,
926 sidedatamod.SD_FILESADDED,
927 sidedatamod.SD_FILESADDED,
927 sidedatamod.SD_FILESREMOVED,
928 sidedatamod.SD_FILESREMOVED,
928 )
929 )
929 return False, f, {}
930 return False, f, {}
930
931
931 return sidedatacompanion
932 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now