##// END OF EJS Templates
copies: avoid instancing more changectx to access parent revisions...
marmoute -
r43548:181d28ba default
parent child Browse files
Show More
@@ -1,932 +1,933 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(a, b):
67 def _chain(a, b):
68 """chain two sets of copies 'a' and 'b'"""
68 """chain two sets of copies 'a' and 'b'"""
69 t = a.copy()
69 t = a.copy()
70 for k, v in pycompat.iteritems(b):
70 for k, v in pycompat.iteritems(b):
71 if v in t:
71 if v in t:
72 t[k] = t[v]
72 t[k] = t[v]
73 else:
73 else:
74 t[k] = v
74 t[k] = v
75 return t
75 return t
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _changesetforwardcopies(a, b, match):
181 def _changesetforwardcopies(a, b, match):
182 if a.rev() in (node.nullrev, b.rev()):
182 if a.rev() in (node.nullrev, b.rev()):
183 return {}
183 return {}
184
184
185 repo = a.repo()
185 repo = a.repo()
186 children = {}
186 children = {}
187 cl = repo.changelog
187 cl = repo.changelog
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 for r in missingrevs:
189 for r in missingrevs:
190 for p in cl.parentrevs(r):
190 for p in cl.parentrevs(r):
191 if p == node.nullrev:
191 if p == node.nullrev:
192 continue
192 continue
193 if p not in children:
193 if p not in children:
194 children[p] = [r]
194 children[p] = [r]
195 else:
195 else:
196 children[p].append(r)
196 children[p].append(r)
197
197
198 roots = set(children) - set(missingrevs)
198 roots = set(children) - set(missingrevs)
199 work = list(roots)
199 work = list(roots)
200 all_copies = {r: {} for r in roots}
200 all_copies = {r: {} for r in roots}
201 heapq.heapify(work)
201 heapq.heapify(work)
202 alwaysmatch = match.always()
202 alwaysmatch = match.always()
203 while work:
203 while work:
204 r = heapq.heappop(work)
204 r = heapq.heappop(work)
205 copies = all_copies.pop(r)
205 copies = all_copies.pop(r)
206 if r == b.rev():
206 if r == b.rev():
207 return copies
207 return copies
208 for i, c in enumerate(children[r]):
208 for i, c in enumerate(children[r]):
209 childctx = repo[c]
209 childctx = repo[c]
210 p1, p2 = cl.parentrevs(c)
210 p1copies, p2copies = childctx._copies
211 p1copies, p2copies = childctx._copies
211 if r == childctx.p1().rev():
212 if r == p1:
212 parent = 1
213 parent = 1
213 childcopies = p1copies
214 childcopies = p1copies
214 else:
215 else:
215 assert r == childctx.p2().rev()
216 assert r == p2
216 parent = 2
217 parent = 2
217 childcopies = p2copies
218 childcopies = p2copies
218 if not alwaysmatch:
219 if not alwaysmatch:
219 childcopies = {
220 childcopies = {
220 dst: src for dst, src in childcopies.items() if match(dst)
221 dst: src for dst, src in childcopies.items() if match(dst)
221 }
222 }
222 # Copy the dict only if later iterations will also need it
223 # Copy the dict only if later iterations will also need it
223 if i != len(children[r]) - 1:
224 if i != len(children[r]) - 1:
224 newcopies = copies.copy()
225 newcopies = copies.copy()
225 else:
226 else:
226 newcopies = copies
227 newcopies = copies
227 if childcopies:
228 if childcopies:
228 newcopies = _chain(newcopies, childcopies)
229 newcopies = _chain(newcopies, childcopies)
229 for f in childctx.filesremoved():
230 for f in childctx.filesremoved():
230 if f in newcopies:
231 if f in newcopies:
231 del newcopies[f]
232 del newcopies[f]
232 othercopies = all_copies.get(c)
233 othercopies = all_copies.get(c)
233 if othercopies is None:
234 if othercopies is None:
234 heapq.heappush(work, c)
235 heapq.heappush(work, c)
235 all_copies[c] = newcopies
236 all_copies[c] = newcopies
236 else:
237 else:
237 # we are the second parent to work on c, we need to merge our
238 # we are the second parent to work on c, we need to merge our
238 # work with the other.
239 # work with the other.
239 #
240 #
240 # Unlike when copies are stored in the filelog, we consider
241 # Unlike when copies are stored in the filelog, we consider
241 # it a copy even if the destination already existed on the
242 # it a copy even if the destination already existed on the
242 # other branch. It's simply too expensive to check if the
243 # other branch. It's simply too expensive to check if the
243 # file existed in the manifest.
244 # file existed in the manifest.
244 #
245 #
245 # In case of conflict, parent 1 take precedence over parent 2.
246 # In case of conflict, parent 1 take precedence over parent 2.
246 # This is an arbitrary choice made anew when implementing
247 # This is an arbitrary choice made anew when implementing
247 # changeset based copies. It was made without regards with
248 # changeset based copies. It was made without regards with
248 # potential filelog related behavior.
249 # potential filelog related behavior.
249 if parent == 1:
250 if parent == 1:
250 othercopies.update(newcopies)
251 othercopies.update(newcopies)
251 else:
252 else:
252 newcopies.update(othercopies)
253 newcopies.update(othercopies)
253 all_copies[c] = newcopies
254 all_copies[c] = newcopies
254 assert False
255 assert False
255
256
256
257
257 def _forwardcopies(a, b, base=None, match=None):
258 def _forwardcopies(a, b, base=None, match=None):
258 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
259 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
259
260
260 if base is None:
261 if base is None:
261 base = a
262 base = a
262 match = a.repo().narrowmatch(match)
263 match = a.repo().narrowmatch(match)
263 # check for working copy
264 # check for working copy
264 if b.rev() is None:
265 if b.rev() is None:
265 cm = _committedforwardcopies(a, b.p1(), base, match)
266 cm = _committedforwardcopies(a, b.p1(), base, match)
266 # combine copies from dirstate if necessary
267 # combine copies from dirstate if necessary
267 copies = _chain(cm, _dirstatecopies(b._repo, match))
268 copies = _chain(cm, _dirstatecopies(b._repo, match))
268 else:
269 else:
269 copies = _committedforwardcopies(a, b, base, match)
270 copies = _committedforwardcopies(a, b, base, match)
270 return copies
271 return copies
271
272
272
273
273 def _backwardrenames(a, b, match):
274 def _backwardrenames(a, b, match):
274 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
275 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
275 return {}
276 return {}
276
277
277 # Even though we're not taking copies into account, 1:n rename situations
278 # Even though we're not taking copies into account, 1:n rename situations
278 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
279 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
279 # arbitrarily pick one of the renames.
280 # arbitrarily pick one of the renames.
280 # We don't want to pass in "match" here, since that would filter
281 # We don't want to pass in "match" here, since that would filter
281 # the destination by it. Since we're reversing the copies, we want
282 # the destination by it. Since we're reversing the copies, we want
282 # to filter the source instead.
283 # to filter the source instead.
283 f = _forwardcopies(b, a)
284 f = _forwardcopies(b, a)
284 r = {}
285 r = {}
285 for k, v in sorted(pycompat.iteritems(f)):
286 for k, v in sorted(pycompat.iteritems(f)):
286 if match and not match(v):
287 if match and not match(v):
287 continue
288 continue
288 # remove copies
289 # remove copies
289 if v in a:
290 if v in a:
290 continue
291 continue
291 r[v] = k
292 r[v] = k
292 return r
293 return r
293
294
294
295
295 def pathcopies(x, y, match=None):
296 def pathcopies(x, y, match=None):
296 """find {dst@y: src@x} copy mapping for directed compare"""
297 """find {dst@y: src@x} copy mapping for directed compare"""
297 repo = x._repo
298 repo = x._repo
298 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
299 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
299 if debug:
300 if debug:
300 repo.ui.debug(
301 repo.ui.debug(
301 b'debug.copies: searching copies from %s to %s\n' % (x, y)
302 b'debug.copies: searching copies from %s to %s\n' % (x, y)
302 )
303 )
303 if x == y or not x or not y:
304 if x == y or not x or not y:
304 return {}
305 return {}
305 a = y.ancestor(x)
306 a = y.ancestor(x)
306 if a == x:
307 if a == x:
307 if debug:
308 if debug:
308 repo.ui.debug(b'debug.copies: search mode: forward\n')
309 repo.ui.debug(b'debug.copies: search mode: forward\n')
309 if y.rev() is None and x == y.p1():
310 if y.rev() is None and x == y.p1():
310 # short-circuit to avoid issues with merge states
311 # short-circuit to avoid issues with merge states
311 return _dirstatecopies(repo, match)
312 return _dirstatecopies(repo, match)
312 copies = _forwardcopies(x, y, match=match)
313 copies = _forwardcopies(x, y, match=match)
313 elif a == y:
314 elif a == y:
314 if debug:
315 if debug:
315 repo.ui.debug(b'debug.copies: search mode: backward\n')
316 repo.ui.debug(b'debug.copies: search mode: backward\n')
316 copies = _backwardrenames(x, y, match=match)
317 copies = _backwardrenames(x, y, match=match)
317 else:
318 else:
318 if debug:
319 if debug:
319 repo.ui.debug(b'debug.copies: search mode: combined\n')
320 repo.ui.debug(b'debug.copies: search mode: combined\n')
320 base = None
321 base = None
321 if a.rev() != node.nullrev:
322 if a.rev() != node.nullrev:
322 base = x
323 base = x
323 copies = _chain(
324 copies = _chain(
324 _backwardrenames(x, a, match=match),
325 _backwardrenames(x, a, match=match),
325 _forwardcopies(a, y, base, match=match),
326 _forwardcopies(a, y, base, match=match),
326 )
327 )
327 _filter(x, y, copies)
328 _filter(x, y, copies)
328 return copies
329 return copies
329
330
330
331
331 def mergecopies(repo, c1, c2, base):
332 def mergecopies(repo, c1, c2, base):
332 """
333 """
333 Finds moves and copies between context c1 and c2 that are relevant for
334 Finds moves and copies between context c1 and c2 that are relevant for
334 merging. 'base' will be used as the merge base.
335 merging. 'base' will be used as the merge base.
335
336
336 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
337 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
337 files that were moved/ copied in one merge parent and modified in another.
338 files that were moved/ copied in one merge parent and modified in another.
338 For example:
339 For example:
339
340
340 o ---> 4 another commit
341 o ---> 4 another commit
341 |
342 |
342 | o ---> 3 commit that modifies a.txt
343 | o ---> 3 commit that modifies a.txt
343 | /
344 | /
344 o / ---> 2 commit that moves a.txt to b.txt
345 o / ---> 2 commit that moves a.txt to b.txt
345 |/
346 |/
346 o ---> 1 merge base
347 o ---> 1 merge base
347
348
348 If we try to rebase revision 3 on revision 4, since there is no a.txt in
349 If we try to rebase revision 3 on revision 4, since there is no a.txt in
349 revision 4, and if user have copytrace disabled, we prints the following
350 revision 4, and if user have copytrace disabled, we prints the following
350 message:
351 message:
351
352
352 ```other changed <file> which local deleted```
353 ```other changed <file> which local deleted```
353
354
354 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
355 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
355 "dirmove".
356 "dirmove".
356
357
357 "copy" is a mapping from destination name -> source name,
358 "copy" is a mapping from destination name -> source name,
358 where source is in c1 and destination is in c2 or vice-versa.
359 where source is in c1 and destination is in c2 or vice-versa.
359
360
360 "movewithdir" is a mapping from source name -> destination name,
361 "movewithdir" is a mapping from source name -> destination name,
361 where the file at source present in one context but not the other
362 where the file at source present in one context but not the other
362 needs to be moved to destination by the merge process, because the
363 needs to be moved to destination by the merge process, because the
363 other context moved the directory it is in.
364 other context moved the directory it is in.
364
365
365 "diverge" is a mapping of source name -> list of destination names
366 "diverge" is a mapping of source name -> list of destination names
366 for divergent renames.
367 for divergent renames.
367
368
368 "renamedelete" is a mapping of source name -> list of destination
369 "renamedelete" is a mapping of source name -> list of destination
369 names for files deleted in c1 that were renamed in c2 or vice-versa.
370 names for files deleted in c1 that were renamed in c2 or vice-versa.
370
371
371 "dirmove" is a mapping of detected source dir -> destination dir renames.
372 "dirmove" is a mapping of detected source dir -> destination dir renames.
372 This is needed for handling changes to new files previously grafted into
373 This is needed for handling changes to new files previously grafted into
373 renamed directories.
374 renamed directories.
374
375
375 This function calls different copytracing algorithms based on config.
376 This function calls different copytracing algorithms based on config.
376 """
377 """
377 # avoid silly behavior for update from empty dir
378 # avoid silly behavior for update from empty dir
378 if not c1 or not c2 or c1 == c2:
379 if not c1 or not c2 or c1 == c2:
379 return {}, {}, {}, {}, {}
380 return {}, {}, {}, {}, {}
380
381
381 narrowmatch = c1.repo().narrowmatch()
382 narrowmatch = c1.repo().narrowmatch()
382
383
383 # avoid silly behavior for parent -> working dir
384 # avoid silly behavior for parent -> working dir
384 if c2.node() is None and c1.node() == repo.dirstate.p1():
385 if c2.node() is None and c1.node() == repo.dirstate.p1():
385 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
386 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
386
387
387 copytracing = repo.ui.config(b'experimental', b'copytrace')
388 copytracing = repo.ui.config(b'experimental', b'copytrace')
388 if stringutil.parsebool(copytracing) is False:
389 if stringutil.parsebool(copytracing) is False:
389 # stringutil.parsebool() returns None when it is unable to parse the
390 # stringutil.parsebool() returns None when it is unable to parse the
390 # value, so we should rely on making sure copytracing is on such cases
391 # value, so we should rely on making sure copytracing is on such cases
391 return {}, {}, {}, {}, {}
392 return {}, {}, {}, {}, {}
392
393
393 if usechangesetcentricalgo(repo):
394 if usechangesetcentricalgo(repo):
394 # The heuristics don't make sense when we need changeset-centric algos
395 # The heuristics don't make sense when we need changeset-centric algos
395 return _fullcopytracing(repo, c1, c2, base)
396 return _fullcopytracing(repo, c1, c2, base)
396
397
397 # Copy trace disabling is explicitly below the node == p1 logic above
398 # Copy trace disabling is explicitly below the node == p1 logic above
398 # because the logic above is required for a simple copy to be kept across a
399 # because the logic above is required for a simple copy to be kept across a
399 # rebase.
400 # rebase.
400 if copytracing == b'heuristics':
401 if copytracing == b'heuristics':
401 # Do full copytracing if only non-public revisions are involved as
402 # Do full copytracing if only non-public revisions are involved as
402 # that will be fast enough and will also cover the copies which could
403 # that will be fast enough and will also cover the copies which could
403 # be missed by heuristics
404 # be missed by heuristics
404 if _isfullcopytraceable(repo, c1, base):
405 if _isfullcopytraceable(repo, c1, base):
405 return _fullcopytracing(repo, c1, c2, base)
406 return _fullcopytracing(repo, c1, c2, base)
406 return _heuristicscopytracing(repo, c1, c2, base)
407 return _heuristicscopytracing(repo, c1, c2, base)
407 else:
408 else:
408 return _fullcopytracing(repo, c1, c2, base)
409 return _fullcopytracing(repo, c1, c2, base)
409
410
410
411
411 def _isfullcopytraceable(repo, c1, base):
412 def _isfullcopytraceable(repo, c1, base):
412 """ Checks that if base, source and destination are all no-public branches,
413 """ Checks that if base, source and destination are all no-public branches,
413 if yes let's use the full copytrace algorithm for increased capabilities
414 if yes let's use the full copytrace algorithm for increased capabilities
414 since it will be fast enough.
415 since it will be fast enough.
415
416
416 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
417 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
417 number of changesets from c1 to base such that if number of changesets are
418 number of changesets from c1 to base such that if number of changesets are
418 more than the limit, full copytracing algorithm won't be used.
419 more than the limit, full copytracing algorithm won't be used.
419 """
420 """
420 if c1.rev() is None:
421 if c1.rev() is None:
421 c1 = c1.p1()
422 c1 = c1.p1()
422 if c1.mutable() and base.mutable():
423 if c1.mutable() and base.mutable():
423 sourcecommitlimit = repo.ui.configint(
424 sourcecommitlimit = repo.ui.configint(
424 b'experimental', b'copytrace.sourcecommitlimit'
425 b'experimental', b'copytrace.sourcecommitlimit'
425 )
426 )
426 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
427 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
427 return commits < sourcecommitlimit
428 return commits < sourcecommitlimit
428 return False
429 return False
429
430
430
431
431 def _checksinglesidecopies(
432 def _checksinglesidecopies(
432 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
433 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
433 ):
434 ):
434 if src not in m2:
435 if src not in m2:
435 # deleted on side 2
436 # deleted on side 2
436 if src not in m1:
437 if src not in m1:
437 # renamed on side 1, deleted on side 2
438 # renamed on side 1, deleted on side 2
438 renamedelete[src] = dsts1
439 renamedelete[src] = dsts1
439 elif m2[src] != mb[src]:
440 elif m2[src] != mb[src]:
440 if not _related(c2[src], base[src]):
441 if not _related(c2[src], base[src]):
441 return
442 return
442 # modified on side 2
443 # modified on side 2
443 for dst in dsts1:
444 for dst in dsts1:
444 if dst not in m2:
445 if dst not in m2:
445 # dst not added on side 2 (handle as regular
446 # dst not added on side 2 (handle as regular
446 # "both created" case in manifestmerge otherwise)
447 # "both created" case in manifestmerge otherwise)
447 copy[dst] = src
448 copy[dst] = src
448
449
449
450
450 def _fullcopytracing(repo, c1, c2, base):
451 def _fullcopytracing(repo, c1, c2, base):
451 """ The full copytracing algorithm which finds all the new files that were
452 """ The full copytracing algorithm which finds all the new files that were
452 added from merge base up to the top commit and for each file it checks if
453 added from merge base up to the top commit and for each file it checks if
453 this file was copied from another file.
454 this file was copied from another file.
454
455
455 This is pretty slow when a lot of changesets are involved but will track all
456 This is pretty slow when a lot of changesets are involved but will track all
456 the copies.
457 the copies.
457 """
458 """
458 m1 = c1.manifest()
459 m1 = c1.manifest()
459 m2 = c2.manifest()
460 m2 = c2.manifest()
460 mb = base.manifest()
461 mb = base.manifest()
461
462
462 copies1 = pathcopies(base, c1)
463 copies1 = pathcopies(base, c1)
463 copies2 = pathcopies(base, c2)
464 copies2 = pathcopies(base, c2)
464
465
465 inversecopies1 = {}
466 inversecopies1 = {}
466 inversecopies2 = {}
467 inversecopies2 = {}
467 for dst, src in copies1.items():
468 for dst, src in copies1.items():
468 inversecopies1.setdefault(src, []).append(dst)
469 inversecopies1.setdefault(src, []).append(dst)
469 for dst, src in copies2.items():
470 for dst, src in copies2.items():
470 inversecopies2.setdefault(src, []).append(dst)
471 inversecopies2.setdefault(src, []).append(dst)
471
472
472 copy = {}
473 copy = {}
473 diverge = {}
474 diverge = {}
474 renamedelete = {}
475 renamedelete = {}
475 allsources = set(inversecopies1) | set(inversecopies2)
476 allsources = set(inversecopies1) | set(inversecopies2)
476 for src in allsources:
477 for src in allsources:
477 dsts1 = inversecopies1.get(src)
478 dsts1 = inversecopies1.get(src)
478 dsts2 = inversecopies2.get(src)
479 dsts2 = inversecopies2.get(src)
479 if dsts1 and dsts2:
480 if dsts1 and dsts2:
480 # copied/renamed on both sides
481 # copied/renamed on both sides
481 if src not in m1 and src not in m2:
482 if src not in m1 and src not in m2:
482 # renamed on both sides
483 # renamed on both sides
483 dsts1 = set(dsts1)
484 dsts1 = set(dsts1)
484 dsts2 = set(dsts2)
485 dsts2 = set(dsts2)
485 # If there's some overlap in the rename destinations, we
486 # If there's some overlap in the rename destinations, we
486 # consider it not divergent. For example, if side 1 copies 'a'
487 # consider it not divergent. For example, if side 1 copies 'a'
487 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
488 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
488 # and 'd' and deletes 'a'.
489 # and 'd' and deletes 'a'.
489 if dsts1 & dsts2:
490 if dsts1 & dsts2:
490 for dst in dsts1 & dsts2:
491 for dst in dsts1 & dsts2:
491 copy[dst] = src
492 copy[dst] = src
492 else:
493 else:
493 diverge[src] = sorted(dsts1 | dsts2)
494 diverge[src] = sorted(dsts1 | dsts2)
494 elif src in m1 and src in m2:
495 elif src in m1 and src in m2:
495 # copied on both sides
496 # copied on both sides
496 dsts1 = set(dsts1)
497 dsts1 = set(dsts1)
497 dsts2 = set(dsts2)
498 dsts2 = set(dsts2)
498 for dst in dsts1 & dsts2:
499 for dst in dsts1 & dsts2:
499 copy[dst] = src
500 copy[dst] = src
500 # TODO: Handle cases where it was renamed on one side and copied
501 # TODO: Handle cases where it was renamed on one side and copied
501 # on the other side
502 # on the other side
502 elif dsts1:
503 elif dsts1:
503 # copied/renamed only on side 1
504 # copied/renamed only on side 1
504 _checksinglesidecopies(
505 _checksinglesidecopies(
505 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
506 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
506 )
507 )
507 elif dsts2:
508 elif dsts2:
508 # copied/renamed only on side 2
509 # copied/renamed only on side 2
509 _checksinglesidecopies(
510 _checksinglesidecopies(
510 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
511 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
511 )
512 )
512
513
513 renamedeleteset = set()
514 renamedeleteset = set()
514 divergeset = set()
515 divergeset = set()
515 for dsts in diverge.values():
516 for dsts in diverge.values():
516 divergeset.update(dsts)
517 divergeset.update(dsts)
517 for dsts in renamedelete.values():
518 for dsts in renamedelete.values():
518 renamedeleteset.update(dsts)
519 renamedeleteset.update(dsts)
519
520
520 # find interesting file sets from manifests
521 # find interesting file sets from manifests
521 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
522 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
522 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
523 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
523 u1 = sorted(addedinm1 - addedinm2)
524 u1 = sorted(addedinm1 - addedinm2)
524 u2 = sorted(addedinm2 - addedinm1)
525 u2 = sorted(addedinm2 - addedinm1)
525
526
526 header = b" unmatched files in %s"
527 header = b" unmatched files in %s"
527 if u1:
528 if u1:
528 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
529 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
529 if u2:
530 if u2:
530 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
531 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
531
532
532 fullcopy = copies1.copy()
533 fullcopy = copies1.copy()
533 fullcopy.update(copies2)
534 fullcopy.update(copies2)
534 if not fullcopy:
535 if not fullcopy:
535 return copy, {}, diverge, renamedelete, {}
536 return copy, {}, diverge, renamedelete, {}
536
537
537 if repo.ui.debugflag:
538 if repo.ui.debugflag:
538 repo.ui.debug(
539 repo.ui.debug(
539 b" all copies found (* = to merge, ! = divergent, "
540 b" all copies found (* = to merge, ! = divergent, "
540 b"% = renamed and deleted):\n"
541 b"% = renamed and deleted):\n"
541 )
542 )
542 for f in sorted(fullcopy):
543 for f in sorted(fullcopy):
543 note = b""
544 note = b""
544 if f in copy:
545 if f in copy:
545 note += b"*"
546 note += b"*"
546 if f in divergeset:
547 if f in divergeset:
547 note += b"!"
548 note += b"!"
548 if f in renamedeleteset:
549 if f in renamedeleteset:
549 note += b"%"
550 note += b"%"
550 repo.ui.debug(
551 repo.ui.debug(
551 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
552 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
552 )
553 )
553 del divergeset
554 del divergeset
554
555
555 repo.ui.debug(b" checking for directory renames\n")
556 repo.ui.debug(b" checking for directory renames\n")
556
557
557 # generate a directory move map
558 # generate a directory move map
558 d1, d2 = c1.dirs(), c2.dirs()
559 d1, d2 = c1.dirs(), c2.dirs()
559 invalid = set()
560 invalid = set()
560 dirmove = {}
561 dirmove = {}
561
562
562 # examine each file copy for a potential directory move, which is
563 # examine each file copy for a potential directory move, which is
563 # when all the files in a directory are moved to a new directory
564 # when all the files in a directory are moved to a new directory
564 for dst, src in pycompat.iteritems(fullcopy):
565 for dst, src in pycompat.iteritems(fullcopy):
565 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
566 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
566 if dsrc in invalid:
567 if dsrc in invalid:
567 # already seen to be uninteresting
568 # already seen to be uninteresting
568 continue
569 continue
569 elif dsrc in d1 and ddst in d1:
570 elif dsrc in d1 and ddst in d1:
570 # directory wasn't entirely moved locally
571 # directory wasn't entirely moved locally
571 invalid.add(dsrc)
572 invalid.add(dsrc)
572 elif dsrc in d2 and ddst in d2:
573 elif dsrc in d2 and ddst in d2:
573 # directory wasn't entirely moved remotely
574 # directory wasn't entirely moved remotely
574 invalid.add(dsrc)
575 invalid.add(dsrc)
575 elif dsrc in dirmove and dirmove[dsrc] != ddst:
576 elif dsrc in dirmove and dirmove[dsrc] != ddst:
576 # files from the same directory moved to two different places
577 # files from the same directory moved to two different places
577 invalid.add(dsrc)
578 invalid.add(dsrc)
578 else:
579 else:
579 # looks good so far
580 # looks good so far
580 dirmove[dsrc] = ddst
581 dirmove[dsrc] = ddst
581
582
582 for i in invalid:
583 for i in invalid:
583 if i in dirmove:
584 if i in dirmove:
584 del dirmove[i]
585 del dirmove[i]
585 del d1, d2, invalid
586 del d1, d2, invalid
586
587
587 if not dirmove:
588 if not dirmove:
588 return copy, {}, diverge, renamedelete, {}
589 return copy, {}, diverge, renamedelete, {}
589
590
590 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
591 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
591
592
592 for d in dirmove:
593 for d in dirmove:
593 repo.ui.debug(
594 repo.ui.debug(
594 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
595 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
595 )
596 )
596
597
597 movewithdir = {}
598 movewithdir = {}
598 # check unaccounted nonoverlapping files against directory moves
599 # check unaccounted nonoverlapping files against directory moves
599 for f in u1 + u2:
600 for f in u1 + u2:
600 if f not in fullcopy:
601 if f not in fullcopy:
601 for d in dirmove:
602 for d in dirmove:
602 if f.startswith(d):
603 if f.startswith(d):
603 # new file added in a directory that was moved, move it
604 # new file added in a directory that was moved, move it
604 df = dirmove[d] + f[len(d) :]
605 df = dirmove[d] + f[len(d) :]
605 if df not in copy:
606 if df not in copy:
606 movewithdir[f] = df
607 movewithdir[f] = df
607 repo.ui.debug(
608 repo.ui.debug(
608 b" pending file src: '%s' -> dst: '%s'\n"
609 b" pending file src: '%s' -> dst: '%s'\n"
609 % (f, df)
610 % (f, df)
610 )
611 )
611 break
612 break
612
613
613 return copy, movewithdir, diverge, renamedelete, dirmove
614 return copy, movewithdir, diverge, renamedelete, dirmove
614
615
615
616
616 def _heuristicscopytracing(repo, c1, c2, base):
617 def _heuristicscopytracing(repo, c1, c2, base):
617 """ Fast copytracing using filename heuristics
618 """ Fast copytracing using filename heuristics
618
619
619 Assumes that moves or renames are of following two types:
620 Assumes that moves or renames are of following two types:
620
621
621 1) Inside a directory only (same directory name but different filenames)
622 1) Inside a directory only (same directory name but different filenames)
622 2) Move from one directory to another
623 2) Move from one directory to another
623 (same filenames but different directory names)
624 (same filenames but different directory names)
624
625
625 Works only when there are no merge commits in the "source branch".
626 Works only when there are no merge commits in the "source branch".
626 Source branch is commits from base up to c2 not including base.
627 Source branch is commits from base up to c2 not including base.
627
628
628 If merge is involved it fallbacks to _fullcopytracing().
629 If merge is involved it fallbacks to _fullcopytracing().
629
630
630 Can be used by setting the following config:
631 Can be used by setting the following config:
631
632
632 [experimental]
633 [experimental]
633 copytrace = heuristics
634 copytrace = heuristics
634
635
635 In some cases the copy/move candidates found by heuristics can be very large
636 In some cases the copy/move candidates found by heuristics can be very large
636 in number and that will make the algorithm slow. The number of possible
637 in number and that will make the algorithm slow. The number of possible
637 candidates to check can be limited by using the config
638 candidates to check can be limited by using the config
638 `experimental.copytrace.movecandidateslimit` which defaults to 100.
639 `experimental.copytrace.movecandidateslimit` which defaults to 100.
639 """
640 """
640
641
641 if c1.rev() is None:
642 if c1.rev() is None:
642 c1 = c1.p1()
643 c1 = c1.p1()
643 if c2.rev() is None:
644 if c2.rev() is None:
644 c2 = c2.p1()
645 c2 = c2.p1()
645
646
646 copies = {}
647 copies = {}
647
648
648 changedfiles = set()
649 changedfiles = set()
649 m1 = c1.manifest()
650 m1 = c1.manifest()
650 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
651 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
651 # If base is not in c2 branch, we switch to fullcopytracing
652 # If base is not in c2 branch, we switch to fullcopytracing
652 repo.ui.debug(
653 repo.ui.debug(
653 b"switching to full copytracing as base is not "
654 b"switching to full copytracing as base is not "
654 b"an ancestor of c2\n"
655 b"an ancestor of c2\n"
655 )
656 )
656 return _fullcopytracing(repo, c1, c2, base)
657 return _fullcopytracing(repo, c1, c2, base)
657
658
658 ctx = c2
659 ctx = c2
659 while ctx != base:
660 while ctx != base:
660 if len(ctx.parents()) == 2:
661 if len(ctx.parents()) == 2:
661 # To keep things simple let's not handle merges
662 # To keep things simple let's not handle merges
662 repo.ui.debug(b"switching to full copytracing because of merges\n")
663 repo.ui.debug(b"switching to full copytracing because of merges\n")
663 return _fullcopytracing(repo, c1, c2, base)
664 return _fullcopytracing(repo, c1, c2, base)
664 changedfiles.update(ctx.files())
665 changedfiles.update(ctx.files())
665 ctx = ctx.p1()
666 ctx = ctx.p1()
666
667
667 cp = _forwardcopies(base, c2)
668 cp = _forwardcopies(base, c2)
668 for dst, src in pycompat.iteritems(cp):
669 for dst, src in pycompat.iteritems(cp):
669 if src in m1:
670 if src in m1:
670 copies[dst] = src
671 copies[dst] = src
671
672
672 # file is missing if it isn't present in the destination, but is present in
673 # file is missing if it isn't present in the destination, but is present in
673 # the base and present in the source.
674 # the base and present in the source.
674 # Presence in the base is important to exclude added files, presence in the
675 # Presence in the base is important to exclude added files, presence in the
675 # source is important to exclude removed files.
676 # source is important to exclude removed files.
676 filt = lambda f: f not in m1 and f in base and f in c2
677 filt = lambda f: f not in m1 and f in base and f in c2
677 missingfiles = [f for f in changedfiles if filt(f)]
678 missingfiles = [f for f in changedfiles if filt(f)]
678
679
679 if missingfiles:
680 if missingfiles:
680 basenametofilename = collections.defaultdict(list)
681 basenametofilename = collections.defaultdict(list)
681 dirnametofilename = collections.defaultdict(list)
682 dirnametofilename = collections.defaultdict(list)
682
683
683 for f in m1.filesnotin(base.manifest()):
684 for f in m1.filesnotin(base.manifest()):
684 basename = os.path.basename(f)
685 basename = os.path.basename(f)
685 dirname = os.path.dirname(f)
686 dirname = os.path.dirname(f)
686 basenametofilename[basename].append(f)
687 basenametofilename[basename].append(f)
687 dirnametofilename[dirname].append(f)
688 dirnametofilename[dirname].append(f)
688
689
689 for f in missingfiles:
690 for f in missingfiles:
690 basename = os.path.basename(f)
691 basename = os.path.basename(f)
691 dirname = os.path.dirname(f)
692 dirname = os.path.dirname(f)
692 samebasename = basenametofilename[basename]
693 samebasename = basenametofilename[basename]
693 samedirname = dirnametofilename[dirname]
694 samedirname = dirnametofilename[dirname]
694 movecandidates = samebasename + samedirname
695 movecandidates = samebasename + samedirname
695 # f is guaranteed to be present in c2, that's why
696 # f is guaranteed to be present in c2, that's why
696 # c2.filectx(f) won't fail
697 # c2.filectx(f) won't fail
697 f2 = c2.filectx(f)
698 f2 = c2.filectx(f)
698 # we can have a lot of candidates which can slow down the heuristics
699 # we can have a lot of candidates which can slow down the heuristics
699 # config value to limit the number of candidates moves to check
700 # config value to limit the number of candidates moves to check
700 maxcandidates = repo.ui.configint(
701 maxcandidates = repo.ui.configint(
701 b'experimental', b'copytrace.movecandidateslimit'
702 b'experimental', b'copytrace.movecandidateslimit'
702 )
703 )
703
704
704 if len(movecandidates) > maxcandidates:
705 if len(movecandidates) > maxcandidates:
705 repo.ui.status(
706 repo.ui.status(
706 _(
707 _(
707 b"skipping copytracing for '%s', more "
708 b"skipping copytracing for '%s', more "
708 b"candidates than the limit: %d\n"
709 b"candidates than the limit: %d\n"
709 )
710 )
710 % (f, len(movecandidates))
711 % (f, len(movecandidates))
711 )
712 )
712 continue
713 continue
713
714
714 for candidate in movecandidates:
715 for candidate in movecandidates:
715 f1 = c1.filectx(candidate)
716 f1 = c1.filectx(candidate)
716 if _related(f1, f2):
717 if _related(f1, f2):
717 # if there are a few related copies then we'll merge
718 # if there are a few related copies then we'll merge
718 # changes into all of them. This matches the behaviour
719 # changes into all of them. This matches the behaviour
719 # of upstream copytracing
720 # of upstream copytracing
720 copies[candidate] = f
721 copies[candidate] = f
721
722
722 return copies, {}, {}, {}, {}
723 return copies, {}, {}, {}, {}
723
724
724
725
725 def _related(f1, f2):
726 def _related(f1, f2):
726 """return True if f1 and f2 filectx have a common ancestor
727 """return True if f1 and f2 filectx have a common ancestor
727
728
728 Walk back to common ancestor to see if the two files originate
729 Walk back to common ancestor to see if the two files originate
729 from the same file. Since workingfilectx's rev() is None it messes
730 from the same file. Since workingfilectx's rev() is None it messes
730 up the integer comparison logic, hence the pre-step check for
731 up the integer comparison logic, hence the pre-step check for
731 None (f1 and f2 can only be workingfilectx's initially).
732 None (f1 and f2 can only be workingfilectx's initially).
732 """
733 """
733
734
734 if f1 == f2:
735 if f1 == f2:
735 return True # a match
736 return True # a match
736
737
737 g1, g2 = f1.ancestors(), f2.ancestors()
738 g1, g2 = f1.ancestors(), f2.ancestors()
738 try:
739 try:
739 f1r, f2r = f1.linkrev(), f2.linkrev()
740 f1r, f2r = f1.linkrev(), f2.linkrev()
740
741
741 if f1r is None:
742 if f1r is None:
742 f1 = next(g1)
743 f1 = next(g1)
743 if f2r is None:
744 if f2r is None:
744 f2 = next(g2)
745 f2 = next(g2)
745
746
746 while True:
747 while True:
747 f1r, f2r = f1.linkrev(), f2.linkrev()
748 f1r, f2r = f1.linkrev(), f2.linkrev()
748 if f1r > f2r:
749 if f1r > f2r:
749 f1 = next(g1)
750 f1 = next(g1)
750 elif f2r > f1r:
751 elif f2r > f1r:
751 f2 = next(g2)
752 f2 = next(g2)
752 else: # f1 and f2 point to files in the same linkrev
753 else: # f1 and f2 point to files in the same linkrev
753 return f1 == f2 # true if they point to the same file
754 return f1 == f2 # true if they point to the same file
754 except StopIteration:
755 except StopIteration:
755 return False
756 return False
756
757
757
758
758 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
759 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
759 """reproduce copies from fromrev to rev in the dirstate
760 """reproduce copies from fromrev to rev in the dirstate
760
761
761 If skiprev is specified, it's a revision that should be used to
762 If skiprev is specified, it's a revision that should be used to
762 filter copy records. Any copies that occur between fromrev and
763 filter copy records. Any copies that occur between fromrev and
763 skiprev will not be duplicated, even if they appear in the set of
764 skiprev will not be duplicated, even if they appear in the set of
764 copies between fromrev and rev.
765 copies between fromrev and rev.
765 """
766 """
766 exclude = {}
767 exclude = {}
767 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
768 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
768 bctrace = stringutil.parsebool(ctraceconfig)
769 bctrace = stringutil.parsebool(ctraceconfig)
769 if skiprev is not None and (
770 if skiprev is not None and (
770 ctraceconfig == b'heuristics' or bctrace or bctrace is None
771 ctraceconfig == b'heuristics' or bctrace or bctrace is None
771 ):
772 ):
772 # copytrace='off' skips this line, but not the entire function because
773 # copytrace='off' skips this line, but not the entire function because
773 # the line below is O(size of the repo) during a rebase, while the rest
774 # the line below is O(size of the repo) during a rebase, while the rest
774 # of the function is much faster (and is required for carrying copy
775 # of the function is much faster (and is required for carrying copy
775 # metadata across the rebase anyway).
776 # metadata across the rebase anyway).
776 exclude = pathcopies(repo[fromrev], repo[skiprev])
777 exclude = pathcopies(repo[fromrev], repo[skiprev])
777 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
778 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
778 if dst in exclude:
779 if dst in exclude:
779 continue
780 continue
780 if dst in wctx:
781 if dst in wctx:
781 wctx[dst].markcopied(src)
782 wctx[dst].markcopied(src)
782
783
783
784
784 def computechangesetfilesadded(ctx):
785 def computechangesetfilesadded(ctx):
785 """return the list of files added in a changeset
786 """return the list of files added in a changeset
786 """
787 """
787 added = []
788 added = []
788 for f in ctx.files():
789 for f in ctx.files():
789 if not any(f in p for p in ctx.parents()):
790 if not any(f in p for p in ctx.parents()):
790 added.append(f)
791 added.append(f)
791 return added
792 return added
792
793
793
794
794 def computechangesetfilesremoved(ctx):
795 def computechangesetfilesremoved(ctx):
795 """return the list of files removed in a changeset
796 """return the list of files removed in a changeset
796 """
797 """
797 removed = []
798 removed = []
798 for f in ctx.files():
799 for f in ctx.files():
799 if f not in ctx:
800 if f not in ctx:
800 removed.append(f)
801 removed.append(f)
801 return removed
802 return removed
802
803
803
804
804 def computechangesetcopies(ctx):
805 def computechangesetcopies(ctx):
805 """return the copies data for a changeset
806 """return the copies data for a changeset
806
807
807 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
808 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
808
809
809 Each dictionnary are in the form: `{newname: oldname}`
810 Each dictionnary are in the form: `{newname: oldname}`
810 """
811 """
811 p1copies = {}
812 p1copies = {}
812 p2copies = {}
813 p2copies = {}
813 p1 = ctx.p1()
814 p1 = ctx.p1()
814 p2 = ctx.p2()
815 p2 = ctx.p2()
815 narrowmatch = ctx._repo.narrowmatch()
816 narrowmatch = ctx._repo.narrowmatch()
816 for dst in ctx.files():
817 for dst in ctx.files():
817 if not narrowmatch(dst) or dst not in ctx:
818 if not narrowmatch(dst) or dst not in ctx:
818 continue
819 continue
819 copied = ctx[dst].renamed()
820 copied = ctx[dst].renamed()
820 if not copied:
821 if not copied:
821 continue
822 continue
822 src, srcnode = copied
823 src, srcnode = copied
823 if src in p1 and p1[src].filenode() == srcnode:
824 if src in p1 and p1[src].filenode() == srcnode:
824 p1copies[dst] = src
825 p1copies[dst] = src
825 elif src in p2 and p2[src].filenode() == srcnode:
826 elif src in p2 and p2[src].filenode() == srcnode:
826 p2copies[dst] = src
827 p2copies[dst] = src
827 return p1copies, p2copies
828 return p1copies, p2copies
828
829
829
830
830 def encodecopies(files, copies):
831 def encodecopies(files, copies):
831 items = []
832 items = []
832 for i, dst in enumerate(files):
833 for i, dst in enumerate(files):
833 if dst in copies:
834 if dst in copies:
834 items.append(b'%d\0%s' % (i, copies[dst]))
835 items.append(b'%d\0%s' % (i, copies[dst]))
835 if len(items) != len(copies):
836 if len(items) != len(copies):
836 raise error.ProgrammingError(
837 raise error.ProgrammingError(
837 b'some copy targets missing from file list'
838 b'some copy targets missing from file list'
838 )
839 )
839 return b"\n".join(items)
840 return b"\n".join(items)
840
841
841
842
842 def decodecopies(files, data):
843 def decodecopies(files, data):
843 try:
844 try:
844 copies = {}
845 copies = {}
845 if not data:
846 if not data:
846 return copies
847 return copies
847 for l in data.split(b'\n'):
848 for l in data.split(b'\n'):
848 strindex, src = l.split(b'\0')
849 strindex, src = l.split(b'\0')
849 i = int(strindex)
850 i = int(strindex)
850 dst = files[i]
851 dst = files[i]
851 copies[dst] = src
852 copies[dst] = src
852 return copies
853 return copies
853 except (ValueError, IndexError):
854 except (ValueError, IndexError):
854 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
855 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
855 # used different syntax for the value.
856 # used different syntax for the value.
856 return None
857 return None
857
858
858
859
859 def encodefileindices(files, subset):
860 def encodefileindices(files, subset):
860 subset = set(subset)
861 subset = set(subset)
861 indices = []
862 indices = []
862 for i, f in enumerate(files):
863 for i, f in enumerate(files):
863 if f in subset:
864 if f in subset:
864 indices.append(b'%d' % i)
865 indices.append(b'%d' % i)
865 return b'\n'.join(indices)
866 return b'\n'.join(indices)
866
867
867
868
868 def decodefileindices(files, data):
869 def decodefileindices(files, data):
869 try:
870 try:
870 subset = []
871 subset = []
871 if not data:
872 if not data:
872 return subset
873 return subset
873 for strindex in data.split(b'\n'):
874 for strindex in data.split(b'\n'):
874 i = int(strindex)
875 i = int(strindex)
875 if i < 0 or i >= len(files):
876 if i < 0 or i >= len(files):
876 return None
877 return None
877 subset.append(files[i])
878 subset.append(files[i])
878 return subset
879 return subset
879 except (ValueError, IndexError):
880 except (ValueError, IndexError):
880 # Perhaps someone had chosen the same key name (e.g. "added") and
881 # Perhaps someone had chosen the same key name (e.g. "added") and
881 # used different syntax for the value.
882 # used different syntax for the value.
882 return None
883 return None
883
884
884
885
885 def _getsidedata(srcrepo, rev):
886 def _getsidedata(srcrepo, rev):
886 ctx = srcrepo[rev]
887 ctx = srcrepo[rev]
887 filescopies = computechangesetcopies(ctx)
888 filescopies = computechangesetcopies(ctx)
888 filesadded = computechangesetfilesadded(ctx)
889 filesadded = computechangesetfilesadded(ctx)
889 filesremoved = computechangesetfilesremoved(ctx)
890 filesremoved = computechangesetfilesremoved(ctx)
890 sidedata = {}
891 sidedata = {}
891 if any([filescopies, filesadded, filesremoved]):
892 if any([filescopies, filesadded, filesremoved]):
892 sortedfiles = sorted(ctx.files())
893 sortedfiles = sorted(ctx.files())
893 p1copies, p2copies = filescopies
894 p1copies, p2copies = filescopies
894 p1copies = encodecopies(sortedfiles, p1copies)
895 p1copies = encodecopies(sortedfiles, p1copies)
895 p2copies = encodecopies(sortedfiles, p2copies)
896 p2copies = encodecopies(sortedfiles, p2copies)
896 filesadded = encodefileindices(sortedfiles, filesadded)
897 filesadded = encodefileindices(sortedfiles, filesadded)
897 filesremoved = encodefileindices(sortedfiles, filesremoved)
898 filesremoved = encodefileindices(sortedfiles, filesremoved)
898 if p1copies:
899 if p1copies:
899 sidedata[sidedatamod.SD_P1COPIES] = p1copies
900 sidedata[sidedatamod.SD_P1COPIES] = p1copies
900 if p2copies:
901 if p2copies:
901 sidedata[sidedatamod.SD_P2COPIES] = p2copies
902 sidedata[sidedatamod.SD_P2COPIES] = p2copies
902 if filesadded:
903 if filesadded:
903 sidedata[sidedatamod.SD_FILESADDED] = filesadded
904 sidedata[sidedatamod.SD_FILESADDED] = filesadded
904 if filesremoved:
905 if filesremoved:
905 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
906 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
906 return sidedata
907 return sidedata
907
908
908
909
909 def getsidedataadder(srcrepo, destrepo):
910 def getsidedataadder(srcrepo, destrepo):
910 def sidedatacompanion(revlog, rev):
911 def sidedatacompanion(revlog, rev):
911 sidedata = {}
912 sidedata = {}
912 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
913 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
913 sidedata = _getsidedata(srcrepo, rev)
914 sidedata = _getsidedata(srcrepo, rev)
914 return False, (), sidedata
915 return False, (), sidedata
915
916
916 return sidedatacompanion
917 return sidedatacompanion
917
918
918
919
919 def getsidedataremover(srcrepo, destrepo):
920 def getsidedataremover(srcrepo, destrepo):
920 def sidedatacompanion(revlog, rev):
921 def sidedatacompanion(revlog, rev):
921 f = ()
922 f = ()
922 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
923 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
923 if revlog.flags(rev) & REVIDX_SIDEDATA:
924 if revlog.flags(rev) & REVIDX_SIDEDATA:
924 f = (
925 f = (
925 sidedatamod.SD_P1COPIES,
926 sidedatamod.SD_P1COPIES,
926 sidedatamod.SD_P2COPIES,
927 sidedatamod.SD_P2COPIES,
927 sidedatamod.SD_FILESADDED,
928 sidedatamod.SD_FILESADDED,
928 sidedatamod.SD_FILESREMOVED,
929 sidedatamod.SD_FILESREMOVED,
929 )
930 )
930 return False, f, {}
931 return False, f, {}
931
932
932 return sidedatacompanion
933 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now