##// END OF EJS Templates
copies: fix some documentation typos...
Matt Harbison -
r49307:19e6446c default
parent child Browse files
Show More
@@ -1,1305 +1,1305 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import nullrev
15 from .node import nullrev
16
16
17 from . import (
17 from . import (
18 match as matchmod,
18 match as matchmod,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25
25
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 from .revlogutils import (
28 from .revlogutils import (
29 flagutil,
29 flagutil,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32
32
33 rustmod = policy.importrust("copy_tracing")
33 rustmod = policy.importrust("copy_tracing")
34
34
35
35
36 def _filter(src, dst, t):
36 def _filter(src, dst, t):
37 """filters out invalid copies after chaining"""
37 """filters out invalid copies after chaining"""
38
38
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
41 # in the following table (not including trivial cases). For example, case 6
41 # in the following table (not including trivial cases). For example, case 6
42 # is where a file existed in 'src' and remained under that name in 'mid' and
42 # is where a file existed in 'src' and remained under that name in 'mid' and
43 # then was renamed between 'mid' and 'dst'.
43 # then was renamed between 'mid' and 'dst'.
44 #
44 #
45 # case src mid dst result
45 # case src mid dst result
46 # 1 x y - -
46 # 1 x y - -
47 # 2 x y y x->y
47 # 2 x y y x->y
48 # 3 x y x -
48 # 3 x y x -
49 # 4 x y z x->z
49 # 4 x y z x->z
50 # 5 - x y -
50 # 5 - x y -
51 # 6 x x y x->y
51 # 6 x x y x->y
52 #
52 #
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
55 # between 5 and 6, so it includes all cases in its result.
55 # between 5 and 6, so it includes all cases in its result.
56 # Cases 1, 3, and 5 are then removed by _filter().
56 # Cases 1, 3, and 5 are then removed by _filter().
57
57
58 for k, v in list(t.items()):
58 for k, v in list(t.items()):
59 if k == v: # case 3
59 if k == v: # case 3
60 del t[k]
60 del t[k]
61 elif v not in src: # case 5
61 elif v not in src: # case 5
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 del t[k]
63 del t[k]
64 elif k not in dst: # case 1
64 elif k not in dst: # case 1
65 # remove copies to files that were then removed
65 # remove copies to files that were then removed
66 del t[k]
66 del t[k]
67
67
68
68
69 def _chain(prefix, suffix):
69 def _chain(prefix, suffix):
70 """chain two sets of copies 'prefix' and 'suffix'"""
70 """chain two sets of copies 'prefix' and 'suffix'"""
71 result = prefix.copy()
71 result = prefix.copy()
72 for key, value in pycompat.iteritems(suffix):
72 for key, value in pycompat.iteritems(suffix):
73 result[key] = prefix.get(value, value)
73 result[key] = prefix.get(value, value)
74 return result
74 return result
75
75
76
76
77 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
79 manifest am
80
80
81 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
83 disabling that feature."""
84
84
85 for f in fctx.ancestors():
85 for f in fctx.ancestors():
86 path = f.path()
86 path = f.path()
87 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
88 return path
88 return path
89 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
90 return path
90 return path
91
91
92
92
93 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
94 ds = repo.dirstate
95 c = ds.copies().copy()
95 c = ds.copies().copy()
96 for k in list(c):
96 for k in list(c):
97 if not ds.get_entry(k).tracked or (match and not match(k)):
97 if not ds.get_entry(k).tracked or (match and not match(k)):
98 del c[k]
98 del c[k]
99 return c
99 return c
100
100
101
101
102 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
106 """
106 """
107 ma = a.manifest()
107 ma = a.manifest()
108 mb = b.manifest()
108 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
110
110
111
111
112 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
115 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
118 return readfrom in changesetsource
119
119
120
120
121 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
125 repo = a._repo
125 repo = a._repo
126
126
127 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
129
129
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
131 dbg = repo.ui.debug
132 if debug:
132 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
134 am = a.manifest()
135 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
136
136
137 # find where new files came from
137 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
140 cm = {}
141
141
142 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
147 # this comparison.
148 forwardmissingmatch = match
148 forwardmissingmatch = match
149 if b.p1() == a and b.p2().rev() == nullrev:
149 if b.p1() == a and b.p2().rev() == nullrev:
150 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
153 missing = list(b.walk(match))
153 missing = list(b.walk(match))
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
155 if debug:
155 if debug:
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
157 else:
157 else:
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
159 if debug:
159 if debug:
160 dbg(
160 dbg(
161 b'debug.copies: missing files to search: %d\n'
161 b'debug.copies: missing files to search: %d\n'
162 % len(missing)
162 % len(missing)
163 )
163 )
164
164
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
166
166
167 for f in sorted(missing):
167 for f in sorted(missing):
168 if debug:
168 if debug:
169 dbg(b'debug.copies: tracing file: %s\n' % f)
169 dbg(b'debug.copies: tracing file: %s\n' % f)
170 fctx = b[f]
170 fctx = b[f]
171 fctx._ancestrycontext = ancestrycontext
171 fctx._ancestrycontext = ancestrycontext
172
172
173 if debug:
173 if debug:
174 start = util.timer()
174 start = util.timer()
175 opath = _tracefile(fctx, am, basemf)
175 opath = _tracefile(fctx, am, basemf)
176 if opath:
176 if opath:
177 if debug:
177 if debug:
178 dbg(b'debug.copies: rename of: %s\n' % opath)
178 dbg(b'debug.copies: rename of: %s\n' % opath)
179 cm[f] = opath
179 cm[f] = opath
180 if debug:
180 if debug:
181 dbg(
181 dbg(
182 b'debug.copies: time: %f seconds\n'
182 b'debug.copies: time: %f seconds\n'
183 % (util.timer() - start)
183 % (util.timer() - start)
184 )
184 )
185 return cm
185 return cm
186
186
187
187
188 def _revinfo_getter(repo, match):
188 def _revinfo_getter(repo, match):
189 """returns a function that returns the following data given a <rev>"
189 """returns a function that returns the following data given a <rev>"
190
190
191 * p1: revision number of first parent
191 * p1: revision number of first parent
192 * p2: revision number of first parent
192 * p2: revision number of first parent
193 * changes: a ChangingFiles object
193 * changes: a ChangingFiles object
194 """
194 """
195 cl = repo.changelog
195 cl = repo.changelog
196 parents = cl.parentrevs
196 parents = cl.parentrevs
197 flags = cl.flags
197 flags = cl.flags
198
198
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
200
200
201 changelogrevision = cl.changelogrevision
201 changelogrevision = cl.changelogrevision
202
202
203 if rustmod is not None:
203 if rustmod is not None:
204
204
205 def revinfo(rev):
205 def revinfo(rev):
206 p1, p2 = parents(rev)
206 p1, p2 = parents(rev)
207 if flags(rev) & HASCOPIESINFO:
207 if flags(rev) & HASCOPIESINFO:
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
209 else:
209 else:
210 raw = None
210 raw = None
211 return (p1, p2, raw)
211 return (p1, p2, raw)
212
212
213 else:
213 else:
214
214
215 def revinfo(rev):
215 def revinfo(rev):
216 p1, p2 = parents(rev)
216 p1, p2 = parents(rev)
217 if flags(rev) & HASCOPIESINFO:
217 if flags(rev) & HASCOPIESINFO:
218 changes = changelogrevision(rev).changes
218 changes = changelogrevision(rev).changes
219 else:
219 else:
220 changes = None
220 changes = None
221 return (p1, p2, changes)
221 return (p1, p2, changes)
222
222
223 return revinfo
223 return revinfo
224
224
225
225
226 def cached_is_ancestor(is_ancestor):
226 def cached_is_ancestor(is_ancestor):
227 """return a cached version of is_ancestor"""
227 """return a cached version of is_ancestor"""
228 cache = {}
228 cache = {}
229
229
230 def _is_ancestor(anc, desc):
230 def _is_ancestor(anc, desc):
231 if anc > desc:
231 if anc > desc:
232 return False
232 return False
233 elif anc == desc:
233 elif anc == desc:
234 return True
234 return True
235 key = (anc, desc)
235 key = (anc, desc)
236 ret = cache.get(key)
236 ret = cache.get(key)
237 if ret is None:
237 if ret is None:
238 ret = cache[key] = is_ancestor(anc, desc)
238 ret = cache[key] = is_ancestor(anc, desc)
239 return ret
239 return ret
240
240
241 return _is_ancestor
241 return _is_ancestor
242
242
243
243
244 def _changesetforwardcopies(a, b, match):
244 def _changesetforwardcopies(a, b, match):
245 if a.rev() in (nullrev, b.rev()):
245 if a.rev() in (nullrev, b.rev()):
246 return {}
246 return {}
247
247
248 repo = a.repo().unfiltered()
248 repo = a.repo().unfiltered()
249
249
250 cl = repo.changelog
250 cl = repo.changelog
251 isancestor = cl.isancestorrev
251 isancestor = cl.isancestorrev
252
252
253 # To track rename from "A" to B, we need to gather all parent β†’ children
253 # To track rename from "A" to B, we need to gather all parent β†’ children
254 # edges that are contains in `::B` but not in `::A`.
254 # edges that are contains in `::B` but not in `::A`.
255 #
255 #
256 #
256 #
257 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
257 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
258 # ::a`) and also all the "roots point", ie the parents of the exclusive set
258 # ::a`) and also all the "roots point", ie the parents of the exclusive set
259 # that belong to ::a. These are exactly all the revisions needed to express
259 # that belong to ::a. These are exactly all the revisions needed to express
260 # the parent β†’ children we need to combine.
260 # the parent β†’ children we need to combine.
261 #
261 #
262 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
262 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
263 # excluding paths that leads to roots that are not ancestors of `a`. We
263 # excluding paths that leads to roots that are not ancestors of `a`. We
264 # keep this out of the explanation because it is hard enough without this special case..
264 # keep this out of the explanation because it is hard enough without this special case..
265
265
266 parents = cl._uncheckedparentrevs
266 parents = cl._uncheckedparentrevs
267 graph_roots = (nullrev, nullrev)
267 graph_roots = (nullrev, nullrev)
268
268
269 ancestors = cl.ancestors([a.rev()], inclusive=True)
269 ancestors = cl.ancestors([a.rev()], inclusive=True)
270 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
270 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
271 roots = set()
271 roots = set()
272 has_graph_roots = False
272 has_graph_roots = False
273 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
273 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
274
274
275 # iterate over `only(B, A)`
275 # iterate over `only(B, A)`
276 for r in revs:
276 for r in revs:
277 ps = parents(r)
277 ps = parents(r)
278 if ps == graph_roots:
278 if ps == graph_roots:
279 has_graph_roots = True
279 has_graph_roots = True
280 else:
280 else:
281 p1, p2 = ps
281 p1, p2 = ps
282
282
283 # find all the "root points" (see larger comment above)
283 # find all the "root points" (see larger comment above)
284 if p1 != nullrev and p1 in ancestors:
284 if p1 != nullrev and p1 in ancestors:
285 roots.add(p1)
285 roots.add(p1)
286 if p2 != nullrev and p2 in ancestors:
286 if p2 != nullrev and p2 in ancestors:
287 roots.add(p2)
287 roots.add(p2)
288 if not roots:
288 if not roots:
289 # no common revision to track copies from
289 # no common revision to track copies from
290 return {}
290 return {}
291 if has_graph_roots:
291 if has_graph_roots:
292 # this deal with the special case mentionned in the [1] footnotes. We
292 # this deal with the special case mentioned in the [1] footnotes. We
293 # must filter out revisions that leads to non-common graphroots.
293 # must filter out revisions that leads to non-common graphroots.
294 roots = list(roots)
294 roots = list(roots)
295 m = min(roots)
295 m = min(roots)
296 h = [b.rev()]
296 h = [b.rev()]
297 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
297 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
298 roots_to_head = set(roots_to_head)
298 roots_to_head = set(roots_to_head)
299 revs = [r for r in revs if r in roots_to_head]
299 revs = [r for r in revs if r in roots_to_head]
300
300
301 if repo.filecopiesmode == b'changeset-sidedata':
301 if repo.filecopiesmode == b'changeset-sidedata':
302 # When using side-data, we will process the edges "from" the children.
302 # When using side-data, we will process the edges "from" the children.
303 # We iterate over the childre, gathering previous collected data for
303 # We iterate over the children, gathering previous collected data for
304 # the parents. Do know when the parents data is no longer necessary, we
304 # the parents. Do know when the parents data is no longer necessary, we
305 # keep a counter of how many children each revision has.
305 # keep a counter of how many children each revision has.
306 #
306 #
307 # An interresting property of `children_count` is that it only contains
307 # An interesting property of `children_count` is that it only contains
308 # revision that will be relevant for a edge of the graph. So if a
308 # revision that will be relevant for a edge of the graph. So if a
309 # children has parent not in `children_count`, that edges should not be
309 # children has parent not in `children_count`, that edges should not be
310 # processed.
310 # processed.
311 children_count = dict((r, 0) for r in roots)
311 children_count = dict((r, 0) for r in roots)
312 for r in revs:
312 for r in revs:
313 for p in cl.parentrevs(r):
313 for p in cl.parentrevs(r):
314 if p == nullrev:
314 if p == nullrev:
315 continue
315 continue
316 children_count[r] = 0
316 children_count[r] = 0
317 if p in children_count:
317 if p in children_count:
318 children_count[p] += 1
318 children_count[p] += 1
319 revinfo = _revinfo_getter(repo, match)
319 revinfo = _revinfo_getter(repo, match)
320 with repo.changelog.reading():
320 with repo.changelog.reading():
321 return _combine_changeset_copies(
321 return _combine_changeset_copies(
322 revs,
322 revs,
323 children_count,
323 children_count,
324 b.rev(),
324 b.rev(),
325 revinfo,
325 revinfo,
326 match,
326 match,
327 isancestor,
327 isancestor,
328 multi_thread,
328 multi_thread,
329 )
329 )
330 else:
330 else:
331 # When not using side-data, we will process the edges "from" the parent.
331 # When not using side-data, we will process the edges "from" the parent.
332 # so we need a full mapping of the parent -> children relation.
332 # so we need a full mapping of the parent -> children relation.
333 children = dict((r, []) for r in roots)
333 children = dict((r, []) for r in roots)
334 for r in revs:
334 for r in revs:
335 for p in cl.parentrevs(r):
335 for p in cl.parentrevs(r):
336 if p == nullrev:
336 if p == nullrev:
337 continue
337 continue
338 children[r] = []
338 children[r] = []
339 if p in children:
339 if p in children:
340 children[p].append(r)
340 children[p].append(r)
341 x = revs.pop()
341 x = revs.pop()
342 assert x == b.rev()
342 assert x == b.rev()
343 revs.extend(roots)
343 revs.extend(roots)
344 revs.sort()
344 revs.sort()
345
345
346 revinfo = _revinfo_getter_extra(repo)
346 revinfo = _revinfo_getter_extra(repo)
347 return _combine_changeset_copies_extra(
347 return _combine_changeset_copies_extra(
348 revs, children, b.rev(), revinfo, match, isancestor
348 revs, children, b.rev(), revinfo, match, isancestor
349 )
349 )
350
350
351
351
352 def _combine_changeset_copies(
352 def _combine_changeset_copies(
353 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
353 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
354 ):
354 ):
355 """combine the copies information for each item of iterrevs
355 """combine the copies information for each item of iterrevs
356
356
357 revs: sorted iterable of revision to visit
357 revs: sorted iterable of revision to visit
358 children_count: a {parent: <number-of-relevant-children>} mapping.
358 children_count: a {parent: <number-of-relevant-children>} mapping.
359 targetrev: the final copies destination revision (not in iterrevs)
359 targetrev: the final copies destination revision (not in iterrevs)
360 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
360 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
361 match: a matcher
361 match: a matcher
362
362
363 It returns the aggregated copies information for `targetrev`.
363 It returns the aggregated copies information for `targetrev`.
364 """
364 """
365
365
366 alwaysmatch = match.always()
366 alwaysmatch = match.always()
367
367
368 if rustmod is not None:
368 if rustmod is not None:
369 final_copies = rustmod.combine_changeset_copies(
369 final_copies = rustmod.combine_changeset_copies(
370 list(revs), children_count, targetrev, revinfo, multi_thread
370 list(revs), children_count, targetrev, revinfo, multi_thread
371 )
371 )
372 else:
372 else:
373 isancestor = cached_is_ancestor(isancestor)
373 isancestor = cached_is_ancestor(isancestor)
374
374
375 all_copies = {}
375 all_copies = {}
376 # iterate over all the "children" side of copy tracing "edge"
376 # iterate over all the "children" side of copy tracing "edge"
377 for current_rev in revs:
377 for current_rev in revs:
378 p1, p2, changes = revinfo(current_rev)
378 p1, p2, changes = revinfo(current_rev)
379 current_copies = None
379 current_copies = None
380 # iterate over all parents to chain the existing data with the
380 # iterate over all parents to chain the existing data with the
381 # data from the parent β†’ child edge.
381 # data from the parent β†’ child edge.
382 for parent, parent_rev in ((1, p1), (2, p2)):
382 for parent, parent_rev in ((1, p1), (2, p2)):
383 if parent_rev == nullrev:
383 if parent_rev == nullrev:
384 continue
384 continue
385 remaining_children = children_count.get(parent_rev)
385 remaining_children = children_count.get(parent_rev)
386 if remaining_children is None:
386 if remaining_children is None:
387 continue
387 continue
388 remaining_children -= 1
388 remaining_children -= 1
389 children_count[parent_rev] = remaining_children
389 children_count[parent_rev] = remaining_children
390 if remaining_children:
390 if remaining_children:
391 copies = all_copies.get(parent_rev, None)
391 copies = all_copies.get(parent_rev, None)
392 else:
392 else:
393 copies = all_copies.pop(parent_rev, None)
393 copies = all_copies.pop(parent_rev, None)
394
394
395 if copies is None:
395 if copies is None:
396 # this is a root
396 # this is a root
397 newcopies = copies = {}
397 newcopies = copies = {}
398 elif remaining_children:
398 elif remaining_children:
399 newcopies = copies.copy()
399 newcopies = copies.copy()
400 else:
400 else:
401 newcopies = copies
401 newcopies = copies
402 # chain the data in the edge with the existing data
402 # chain the data in the edge with the existing data
403 if changes is not None:
403 if changes is not None:
404 childcopies = {}
404 childcopies = {}
405 if parent == 1:
405 if parent == 1:
406 childcopies = changes.copied_from_p1
406 childcopies = changes.copied_from_p1
407 elif parent == 2:
407 elif parent == 2:
408 childcopies = changes.copied_from_p2
408 childcopies = changes.copied_from_p2
409
409
410 if childcopies:
410 if childcopies:
411 newcopies = copies.copy()
411 newcopies = copies.copy()
412 for dest, source in pycompat.iteritems(childcopies):
412 for dest, source in pycompat.iteritems(childcopies):
413 prev = copies.get(source)
413 prev = copies.get(source)
414 if prev is not None and prev[1] is not None:
414 if prev is not None and prev[1] is not None:
415 source = prev[1]
415 source = prev[1]
416 newcopies[dest] = (current_rev, source)
416 newcopies[dest] = (current_rev, source)
417 assert newcopies is not copies
417 assert newcopies is not copies
418 if changes.removed:
418 if changes.removed:
419 for f in changes.removed:
419 for f in changes.removed:
420 if f in newcopies:
420 if f in newcopies:
421 if newcopies is copies:
421 if newcopies is copies:
422 # copy on write to avoid affecting potential other
422 # copy on write to avoid affecting potential other
423 # branches. when there are no other branches, this
423 # branches. when there are no other branches, this
424 # could be avoided.
424 # could be avoided.
425 newcopies = copies.copy()
425 newcopies = copies.copy()
426 newcopies[f] = (current_rev, None)
426 newcopies[f] = (current_rev, None)
427 # check potential need to combine the data from another parent (for
427 # check potential need to combine the data from another parent (for
428 # that child). See comment below for details.
428 # that child). See comment below for details.
429 if current_copies is None:
429 if current_copies is None:
430 current_copies = newcopies
430 current_copies = newcopies
431 else:
431 else:
432 # we are the second parent to work on c, we need to merge our
432 # we are the second parent to work on c, we need to merge our
433 # work with the other.
433 # work with the other.
434 #
434 #
435 # In case of conflict, parent 1 take precedence over parent 2.
435 # In case of conflict, parent 1 take precedence over parent 2.
436 # This is an arbitrary choice made anew when implementing
436 # This is an arbitrary choice made anew when implementing
437 # changeset based copies. It was made without regards with
437 # changeset based copies. It was made without regards with
438 # potential filelog related behavior.
438 # potential filelog related behavior.
439 assert parent == 2
439 assert parent == 2
440 current_copies = _merge_copies_dict(
440 current_copies = _merge_copies_dict(
441 newcopies,
441 newcopies,
442 current_copies,
442 current_copies,
443 isancestor,
443 isancestor,
444 changes,
444 changes,
445 current_rev,
445 current_rev,
446 )
446 )
447 all_copies[current_rev] = current_copies
447 all_copies[current_rev] = current_copies
448
448
449 # filter out internal details and return a {dest: source mapping}
449 # filter out internal details and return a {dest: source mapping}
450 final_copies = {}
450 final_copies = {}
451 for dest, (tt, source) in all_copies[targetrev].items():
451 for dest, (tt, source) in all_copies[targetrev].items():
452 if source is not None:
452 if source is not None:
453 final_copies[dest] = source
453 final_copies[dest] = source
454 if not alwaysmatch:
454 if not alwaysmatch:
455 for filename in list(final_copies.keys()):
455 for filename in list(final_copies.keys()):
456 if not match(filename):
456 if not match(filename):
457 del final_copies[filename]
457 del final_copies[filename]
458 return final_copies
458 return final_copies
459
459
460
460
461 # constant to decide which side to pick with _merge_copies_dict
461 # constant to decide which side to pick with _merge_copies_dict
462 PICK_MINOR = 0
462 PICK_MINOR = 0
463 PICK_MAJOR = 1
463 PICK_MAJOR = 1
464 PICK_EITHER = 2
464 PICK_EITHER = 2
465
465
466
466
467 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
467 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
468 """merge two copies-mapping together, minor and major
468 """merge two copies-mapping together, minor and major
469
469
470 In case of conflict, value from "major" will be picked.
470 In case of conflict, value from "major" will be picked.
471
471
472 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
472 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
473 ancestors of `high_rev`,
473 ancestors of `high_rev`,
474
474
475 - `ismerged(path)`: callable return True if `path` have been merged in the
475 - `ismerged(path)`: callable return True if `path` have been merged in the
476 current revision,
476 current revision,
477
477
478 return the resulting dict (in practice, the "minor" object, updated)
478 return the resulting dict (in practice, the "minor" object, updated)
479 """
479 """
480 for dest, value in major.items():
480 for dest, value in major.items():
481 other = minor.get(dest)
481 other = minor.get(dest)
482 if other is None:
482 if other is None:
483 minor[dest] = value
483 minor[dest] = value
484 else:
484 else:
485 pick, overwrite = _compare_values(
485 pick, overwrite = _compare_values(
486 changes, isancestor, dest, other, value
486 changes, isancestor, dest, other, value
487 )
487 )
488 if overwrite:
488 if overwrite:
489 if pick == PICK_MAJOR:
489 if pick == PICK_MAJOR:
490 minor[dest] = (current_merge, value[1])
490 minor[dest] = (current_merge, value[1])
491 else:
491 else:
492 minor[dest] = (current_merge, other[1])
492 minor[dest] = (current_merge, other[1])
493 elif pick == PICK_MAJOR:
493 elif pick == PICK_MAJOR:
494 minor[dest] = value
494 minor[dest] = value
495 return minor
495 return minor
496
496
497
497
498 def _compare_values(changes, isancestor, dest, minor, major):
498 def _compare_values(changes, isancestor, dest, minor, major):
499 """compare two value within a _merge_copies_dict loop iteration
499 """compare two value within a _merge_copies_dict loop iteration
500
500
501 return (pick, overwrite).
501 return (pick, overwrite).
502
502
503 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
503 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
504 - overwrite is True if pick is a return of an ambiguity that needs resolution.
504 - overwrite is True if pick is a return of an ambiguity that needs resolution.
505 """
505 """
506 major_tt, major_value = major
506 major_tt, major_value = major
507 minor_tt, minor_value = minor
507 minor_tt, minor_value = minor
508
508
509 if major_tt == minor_tt:
509 if major_tt == minor_tt:
510 # if it comes from the same revision it must be the same value
510 # if it comes from the same revision it must be the same value
511 assert major_value == minor_value
511 assert major_value == minor_value
512 return PICK_EITHER, False
512 return PICK_EITHER, False
513 elif (
513 elif (
514 changes is not None
514 changes is not None
515 and minor_value is not None
515 and minor_value is not None
516 and major_value is None
516 and major_value is None
517 and dest in changes.salvaged
517 and dest in changes.salvaged
518 ):
518 ):
519 # In this case, a deletion was reverted, the "alive" value overwrite
519 # In this case, a deletion was reverted, the "alive" value overwrite
520 # the deleted one.
520 # the deleted one.
521 return PICK_MINOR, True
521 return PICK_MINOR, True
522 elif (
522 elif (
523 changes is not None
523 changes is not None
524 and major_value is not None
524 and major_value is not None
525 and minor_value is None
525 and minor_value is None
526 and dest in changes.salvaged
526 and dest in changes.salvaged
527 ):
527 ):
528 # In this case, a deletion was reverted, the "alive" value overwrite
528 # In this case, a deletion was reverted, the "alive" value overwrite
529 # the deleted one.
529 # the deleted one.
530 return PICK_MAJOR, True
530 return PICK_MAJOR, True
531 elif isancestor(minor_tt, major_tt):
531 elif isancestor(minor_tt, major_tt):
532 if changes is not None and dest in changes.merged:
532 if changes is not None and dest in changes.merged:
533 # change to dest happened on the branch without copy-source change,
533 # change to dest happened on the branch without copy-source change,
534 # so both source are valid and "major" wins.
534 # so both source are valid and "major" wins.
535 return PICK_MAJOR, True
535 return PICK_MAJOR, True
536 else:
536 else:
537 return PICK_MAJOR, False
537 return PICK_MAJOR, False
538 elif isancestor(major_tt, minor_tt):
538 elif isancestor(major_tt, minor_tt):
539 if changes is not None and dest in changes.merged:
539 if changes is not None and dest in changes.merged:
540 # change to dest happened on the branch without copy-source change,
540 # change to dest happened on the branch without copy-source change,
541 # so both source are valid and "major" wins.
541 # so both source are valid and "major" wins.
542 return PICK_MAJOR, True
542 return PICK_MAJOR, True
543 else:
543 else:
544 return PICK_MINOR, False
544 return PICK_MINOR, False
545 elif minor_value is None:
545 elif minor_value is None:
546 # in case of conflict, the "alive" side wins.
546 # in case of conflict, the "alive" side wins.
547 return PICK_MAJOR, True
547 return PICK_MAJOR, True
548 elif major_value is None:
548 elif major_value is None:
549 # in case of conflict, the "alive" side wins.
549 # in case of conflict, the "alive" side wins.
550 return PICK_MINOR, True
550 return PICK_MINOR, True
551 else:
551 else:
552 # in case of conflict where both side are alive, major wins.
552 # in case of conflict where both side are alive, major wins.
553 return PICK_MAJOR, True
553 return PICK_MAJOR, True
554
554
555
555
556 def _revinfo_getter_extra(repo):
556 def _revinfo_getter_extra(repo):
557 """return a function that return multiple data given a <rev>"i
557 """return a function that return multiple data given a <rev>"i
558
558
559 * p1: revision number of first parent
559 * p1: revision number of first parent
560 * p2: revision number of first parent
560 * p2: revision number of first parent
561 * p1copies: mapping of copies from p1
561 * p1copies: mapping of copies from p1
562 * p2copies: mapping of copies from p2
562 * p2copies: mapping of copies from p2
563 * removed: a list of removed files
563 * removed: a list of removed files
564 * ismerged: a callback to know if file was merged in that revision
564 * ismerged: a callback to know if file was merged in that revision
565 """
565 """
566 cl = repo.changelog
566 cl = repo.changelog
567 parents = cl.parentrevs
567 parents = cl.parentrevs
568
568
569 def get_ismerged(rev):
569 def get_ismerged(rev):
570 ctx = repo[rev]
570 ctx = repo[rev]
571
571
572 def ismerged(path):
572 def ismerged(path):
573 if path not in ctx.files():
573 if path not in ctx.files():
574 return False
574 return False
575 fctx = ctx[path]
575 fctx = ctx[path]
576 parents = fctx._filelog.parents(fctx._filenode)
576 parents = fctx._filelog.parents(fctx._filenode)
577 nb_parents = 0
577 nb_parents = 0
578 for n in parents:
578 for n in parents:
579 if n != repo.nullid:
579 if n != repo.nullid:
580 nb_parents += 1
580 nb_parents += 1
581 return nb_parents >= 2
581 return nb_parents >= 2
582
582
583 return ismerged
583 return ismerged
584
584
585 def revinfo(rev):
585 def revinfo(rev):
586 p1, p2 = parents(rev)
586 p1, p2 = parents(rev)
587 ctx = repo[rev]
587 ctx = repo[rev]
588 p1copies, p2copies = ctx._copies
588 p1copies, p2copies = ctx._copies
589 removed = ctx.filesremoved()
589 removed = ctx.filesremoved()
590 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
590 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
591
591
592 return revinfo
592 return revinfo
593
593
594
594
595 def _combine_changeset_copies_extra(
595 def _combine_changeset_copies_extra(
596 revs, children, targetrev, revinfo, match, isancestor
596 revs, children, targetrev, revinfo, match, isancestor
597 ):
597 ):
598 """version of `_combine_changeset_copies` that works with the Google
598 """version of `_combine_changeset_copies` that works with the Google
599 specific "extra" based storage for copy information"""
599 specific "extra" based storage for copy information"""
600 all_copies = {}
600 all_copies = {}
601 alwaysmatch = match.always()
601 alwaysmatch = match.always()
602 for r in revs:
602 for r in revs:
603 copies = all_copies.pop(r, None)
603 copies = all_copies.pop(r, None)
604 if copies is None:
604 if copies is None:
605 # this is a root
605 # this is a root
606 copies = {}
606 copies = {}
607 for i, c in enumerate(children[r]):
607 for i, c in enumerate(children[r]):
608 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
608 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
609 if r == p1:
609 if r == p1:
610 parent = 1
610 parent = 1
611 childcopies = p1copies
611 childcopies = p1copies
612 else:
612 else:
613 assert r == p2
613 assert r == p2
614 parent = 2
614 parent = 2
615 childcopies = p2copies
615 childcopies = p2copies
616 if not alwaysmatch:
616 if not alwaysmatch:
617 childcopies = {
617 childcopies = {
618 dst: src for dst, src in childcopies.items() if match(dst)
618 dst: src for dst, src in childcopies.items() if match(dst)
619 }
619 }
620 newcopies = copies
620 newcopies = copies
621 if childcopies:
621 if childcopies:
622 newcopies = copies.copy()
622 newcopies = copies.copy()
623 for dest, source in pycompat.iteritems(childcopies):
623 for dest, source in pycompat.iteritems(childcopies):
624 prev = copies.get(source)
624 prev = copies.get(source)
625 if prev is not None and prev[1] is not None:
625 if prev is not None and prev[1] is not None:
626 source = prev[1]
626 source = prev[1]
627 newcopies[dest] = (c, source)
627 newcopies[dest] = (c, source)
628 assert newcopies is not copies
628 assert newcopies is not copies
629 for f in removed:
629 for f in removed:
630 if f in newcopies:
630 if f in newcopies:
631 if newcopies is copies:
631 if newcopies is copies:
632 # copy on write to avoid affecting potential other
632 # copy on write to avoid affecting potential other
633 # branches. when there are no other branches, this
633 # branches. when there are no other branches, this
634 # could be avoided.
634 # could be avoided.
635 newcopies = copies.copy()
635 newcopies = copies.copy()
636 newcopies[f] = (c, None)
636 newcopies[f] = (c, None)
637 othercopies = all_copies.get(c)
637 othercopies = all_copies.get(c)
638 if othercopies is None:
638 if othercopies is None:
639 all_copies[c] = newcopies
639 all_copies[c] = newcopies
640 else:
640 else:
641 # we are the second parent to work on c, we need to merge our
641 # we are the second parent to work on c, we need to merge our
642 # work with the other.
642 # work with the other.
643 #
643 #
644 # In case of conflict, parent 1 take precedence over parent 2.
644 # In case of conflict, parent 1 take precedence over parent 2.
645 # This is an arbitrary choice made anew when implementing
645 # This is an arbitrary choice made anew when implementing
646 # changeset based copies. It was made without regards with
646 # changeset based copies. It was made without regards with
647 # potential filelog related behavior.
647 # potential filelog related behavior.
648 if parent == 1:
648 if parent == 1:
649 _merge_copies_dict_extra(
649 _merge_copies_dict_extra(
650 othercopies, newcopies, isancestor, ismerged
650 othercopies, newcopies, isancestor, ismerged
651 )
651 )
652 else:
652 else:
653 _merge_copies_dict_extra(
653 _merge_copies_dict_extra(
654 newcopies, othercopies, isancestor, ismerged
654 newcopies, othercopies, isancestor, ismerged
655 )
655 )
656 all_copies[c] = newcopies
656 all_copies[c] = newcopies
657
657
658 final_copies = {}
658 final_copies = {}
659 for dest, (tt, source) in all_copies[targetrev].items():
659 for dest, (tt, source) in all_copies[targetrev].items():
660 if source is not None:
660 if source is not None:
661 final_copies[dest] = source
661 final_copies[dest] = source
662 return final_copies
662 return final_copies
663
663
664
664
665 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
665 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
666 """version of `_merge_copies_dict` that works with the Google
666 """version of `_merge_copies_dict` that works with the Google
667 specific "extra" based storage for copy information"""
667 specific "extra" based storage for copy information"""
668 for dest, value in major.items():
668 for dest, value in major.items():
669 other = minor.get(dest)
669 other = minor.get(dest)
670 if other is None:
670 if other is None:
671 minor[dest] = value
671 minor[dest] = value
672 else:
672 else:
673 new_tt = value[0]
673 new_tt = value[0]
674 other_tt = other[0]
674 other_tt = other[0]
675 if value[1] == other[1]:
675 if value[1] == other[1]:
676 continue
676 continue
677 # content from "major" wins, unless it is older
677 # content from "major" wins, unless it is older
678 # than the branch point or there is a merge
678 # than the branch point or there is a merge
679 if (
679 if (
680 new_tt == other_tt
680 new_tt == other_tt
681 or not isancestor(new_tt, other_tt)
681 or not isancestor(new_tt, other_tt)
682 or ismerged(dest)
682 or ismerged(dest)
683 ):
683 ):
684 minor[dest] = value
684 minor[dest] = value
685
685
686
686
687 def _forwardcopies(a, b, base=None, match=None):
687 def _forwardcopies(a, b, base=None, match=None):
688 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
688 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
689
689
690 if base is None:
690 if base is None:
691 base = a
691 base = a
692 match = a.repo().narrowmatch(match)
692 match = a.repo().narrowmatch(match)
693 # check for working copy
693 # check for working copy
694 if b.rev() is None:
694 if b.rev() is None:
695 cm = _committedforwardcopies(a, b.p1(), base, match)
695 cm = _committedforwardcopies(a, b.p1(), base, match)
696 # combine copies from dirstate if necessary
696 # combine copies from dirstate if necessary
697 copies = _chain(cm, _dirstatecopies(b._repo, match))
697 copies = _chain(cm, _dirstatecopies(b._repo, match))
698 else:
698 else:
699 copies = _committedforwardcopies(a, b, base, match)
699 copies = _committedforwardcopies(a, b, base, match)
700 return copies
700 return copies
701
701
702
702
703 def _backwardrenames(a, b, match):
703 def _backwardrenames(a, b, match):
704 """find renames from a to b"""
704 """find renames from a to b"""
705 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
705 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
706 return {}
706 return {}
707
707
708 # We don't want to pass in "match" here, since that would filter
708 # We don't want to pass in "match" here, since that would filter
709 # the destination by it. Since we're reversing the copies, we want
709 # the destination by it. Since we're reversing the copies, we want
710 # to filter the source instead.
710 # to filter the source instead.
711 copies = _forwardcopies(b, a)
711 copies = _forwardcopies(b, a)
712 return _reverse_renames(copies, a, match)
712 return _reverse_renames(copies, a, match)
713
713
714
714
715 def _reverse_renames(copies, dst, match):
715 def _reverse_renames(copies, dst, match):
716 """given copies to context 'dst', finds renames from that context"""
716 """given copies to context 'dst', finds renames from that context"""
717 # Even though we're not taking copies into account, 1:n rename situations
717 # Even though we're not taking copies into account, 1:n rename situations
718 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
718 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
719 # arbitrarily pick one of the renames.
719 # arbitrarily pick one of the renames.
720 r = {}
720 r = {}
721 for k, v in sorted(pycompat.iteritems(copies)):
721 for k, v in sorted(pycompat.iteritems(copies)):
722 if match and not match(v):
722 if match and not match(v):
723 continue
723 continue
724 # remove copies
724 # remove copies
725 if v in dst:
725 if v in dst:
726 continue
726 continue
727 r[v] = k
727 r[v] = k
728 return r
728 return r
729
729
730
730
731 def pathcopies(x, y, match=None):
731 def pathcopies(x, y, match=None):
732 """find {dst@y: src@x} copy mapping for directed compare"""
732 """find {dst@y: src@x} copy mapping for directed compare"""
733 repo = x._repo
733 repo = x._repo
734 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
734 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
735 if debug:
735 if debug:
736 repo.ui.debug(
736 repo.ui.debug(
737 b'debug.copies: searching copies from %s to %s\n' % (x, y)
737 b'debug.copies: searching copies from %s to %s\n' % (x, y)
738 )
738 )
739 if x == y or not x or not y:
739 if x == y or not x or not y:
740 return {}
740 return {}
741 if y.rev() is None and x == y.p1():
741 if y.rev() is None and x == y.p1():
742 if debug:
742 if debug:
743 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
743 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
744 # short-circuit to avoid issues with merge states
744 # short-circuit to avoid issues with merge states
745 return _dirstatecopies(repo, match)
745 return _dirstatecopies(repo, match)
746 a = y.ancestor(x)
746 a = y.ancestor(x)
747 if a == x:
747 if a == x:
748 if debug:
748 if debug:
749 repo.ui.debug(b'debug.copies: search mode: forward\n')
749 repo.ui.debug(b'debug.copies: search mode: forward\n')
750 copies = _forwardcopies(x, y, match=match)
750 copies = _forwardcopies(x, y, match=match)
751 elif a == y:
751 elif a == y:
752 if debug:
752 if debug:
753 repo.ui.debug(b'debug.copies: search mode: backward\n')
753 repo.ui.debug(b'debug.copies: search mode: backward\n')
754 copies = _backwardrenames(x, y, match=match)
754 copies = _backwardrenames(x, y, match=match)
755 else:
755 else:
756 if debug:
756 if debug:
757 repo.ui.debug(b'debug.copies: search mode: combined\n')
757 repo.ui.debug(b'debug.copies: search mode: combined\n')
758 base = None
758 base = None
759 if a.rev() != nullrev:
759 if a.rev() != nullrev:
760 base = x
760 base = x
761 x_copies = _forwardcopies(a, x)
761 x_copies = _forwardcopies(a, x)
762 y_copies = _forwardcopies(a, y, base, match=match)
762 y_copies = _forwardcopies(a, y, base, match=match)
763 same_keys = set(x_copies) & set(y_copies)
763 same_keys = set(x_copies) & set(y_copies)
764 for k in same_keys:
764 for k in same_keys:
765 if x_copies.get(k) == y_copies.get(k):
765 if x_copies.get(k) == y_copies.get(k):
766 del x_copies[k]
766 del x_copies[k]
767 del y_copies[k]
767 del y_copies[k]
768 x_backward_renames = _reverse_renames(x_copies, x, match)
768 x_backward_renames = _reverse_renames(x_copies, x, match)
769 copies = _chain(
769 copies = _chain(
770 x_backward_renames,
770 x_backward_renames,
771 y_copies,
771 y_copies,
772 )
772 )
773 _filter(x, y, copies)
773 _filter(x, y, copies)
774 return copies
774 return copies
775
775
776
776
777 def mergecopies(repo, c1, c2, base):
777 def mergecopies(repo, c1, c2, base):
778 """
778 """
779 Finds moves and copies between context c1 and c2 that are relevant for
779 Finds moves and copies between context c1 and c2 that are relevant for
780 merging. 'base' will be used as the merge base.
780 merging. 'base' will be used as the merge base.
781
781
782 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
782 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
783 files that were moved/ copied in one merge parent and modified in another.
783 files that were moved/ copied in one merge parent and modified in another.
784 For example:
784 For example:
785
785
786 o ---> 4 another commit
786 o ---> 4 another commit
787 |
787 |
788 | o ---> 3 commit that modifies a.txt
788 | o ---> 3 commit that modifies a.txt
789 | /
789 | /
790 o / ---> 2 commit that moves a.txt to b.txt
790 o / ---> 2 commit that moves a.txt to b.txt
791 |/
791 |/
792 o ---> 1 merge base
792 o ---> 1 merge base
793
793
794 If we try to rebase revision 3 on revision 4, since there is no a.txt in
794 If we try to rebase revision 3 on revision 4, since there is no a.txt in
795 revision 4, and if user have copytrace disabled, we prints the following
795 revision 4, and if user have copytrace disabled, we prints the following
796 message:
796 message:
797
797
798 ```other changed <file> which local deleted```
798 ```other changed <file> which local deleted```
799
799
800 Returns a tuple where:
800 Returns a tuple where:
801
801
802 "branch_copies" an instance of branch_copies.
802 "branch_copies" an instance of branch_copies.
803
803
804 "diverge" is a mapping of source name -> list of destination names
804 "diverge" is a mapping of source name -> list of destination names
805 for divergent renames.
805 for divergent renames.
806
806
807 This function calls different copytracing algorithms based on config.
807 This function calls different copytracing algorithms based on config.
808 """
808 """
809 # avoid silly behavior for update from empty dir
809 # avoid silly behavior for update from empty dir
810 if not c1 or not c2 or c1 == c2:
810 if not c1 or not c2 or c1 == c2:
811 return branch_copies(), branch_copies(), {}
811 return branch_copies(), branch_copies(), {}
812
812
813 narrowmatch = c1.repo().narrowmatch()
813 narrowmatch = c1.repo().narrowmatch()
814
814
815 # avoid silly behavior for parent -> working dir
815 # avoid silly behavior for parent -> working dir
816 if c2.node() is None and c1.node() == repo.dirstate.p1():
816 if c2.node() is None and c1.node() == repo.dirstate.p1():
817 return (
817 return (
818 branch_copies(_dirstatecopies(repo, narrowmatch)),
818 branch_copies(_dirstatecopies(repo, narrowmatch)),
819 branch_copies(),
819 branch_copies(),
820 {},
820 {},
821 )
821 )
822
822
823 copytracing = repo.ui.config(b'experimental', b'copytrace')
823 copytracing = repo.ui.config(b'experimental', b'copytrace')
824 if stringutil.parsebool(copytracing) is False:
824 if stringutil.parsebool(copytracing) is False:
825 # stringutil.parsebool() returns None when it is unable to parse the
825 # stringutil.parsebool() returns None when it is unable to parse the
826 # value, so we should rely on making sure copytracing is on such cases
826 # value, so we should rely on making sure copytracing is on such cases
827 return branch_copies(), branch_copies(), {}
827 return branch_copies(), branch_copies(), {}
828
828
829 if usechangesetcentricalgo(repo):
829 if usechangesetcentricalgo(repo):
830 # The heuristics don't make sense when we need changeset-centric algos
830 # The heuristics don't make sense when we need changeset-centric algos
831 return _fullcopytracing(repo, c1, c2, base)
831 return _fullcopytracing(repo, c1, c2, base)
832
832
833 # Copy trace disabling is explicitly below the node == p1 logic above
833 # Copy trace disabling is explicitly below the node == p1 logic above
834 # because the logic above is required for a simple copy to be kept across a
834 # because the logic above is required for a simple copy to be kept across a
835 # rebase.
835 # rebase.
836 if copytracing == b'heuristics':
836 if copytracing == b'heuristics':
837 # Do full copytracing if only non-public revisions are involved as
837 # Do full copytracing if only non-public revisions are involved as
838 # that will be fast enough and will also cover the copies which could
838 # that will be fast enough and will also cover the copies which could
839 # be missed by heuristics
839 # be missed by heuristics
840 if _isfullcopytraceable(repo, c1, base):
840 if _isfullcopytraceable(repo, c1, base):
841 return _fullcopytracing(repo, c1, c2, base)
841 return _fullcopytracing(repo, c1, c2, base)
842 return _heuristicscopytracing(repo, c1, c2, base)
842 return _heuristicscopytracing(repo, c1, c2, base)
843 else:
843 else:
844 return _fullcopytracing(repo, c1, c2, base)
844 return _fullcopytracing(repo, c1, c2, base)
845
845
846
846
847 def _isfullcopytraceable(repo, c1, base):
847 def _isfullcopytraceable(repo, c1, base):
848 """Checks that if base, source and destination are all no-public branches,
848 """Checks that if base, source and destination are all no-public branches,
849 if yes let's use the full copytrace algorithm for increased capabilities
849 if yes let's use the full copytrace algorithm for increased capabilities
850 since it will be fast enough.
850 since it will be fast enough.
851
851
852 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
852 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
853 number of changesets from c1 to base such that if number of changesets are
853 number of changesets from c1 to base such that if number of changesets are
854 more than the limit, full copytracing algorithm won't be used.
854 more than the limit, full copytracing algorithm won't be used.
855 """
855 """
856 if c1.rev() is None:
856 if c1.rev() is None:
857 c1 = c1.p1()
857 c1 = c1.p1()
858 if c1.mutable() and base.mutable():
858 if c1.mutable() and base.mutable():
859 sourcecommitlimit = repo.ui.configint(
859 sourcecommitlimit = repo.ui.configint(
860 b'experimental', b'copytrace.sourcecommitlimit'
860 b'experimental', b'copytrace.sourcecommitlimit'
861 )
861 )
862 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
862 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
863 return commits < sourcecommitlimit
863 return commits < sourcecommitlimit
864 return False
864 return False
865
865
866
866
867 def _checksinglesidecopies(
867 def _checksinglesidecopies(
868 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
868 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
869 ):
869 ):
870 if src not in m2:
870 if src not in m2:
871 # deleted on side 2
871 # deleted on side 2
872 if src not in m1:
872 if src not in m1:
873 # renamed on side 1, deleted on side 2
873 # renamed on side 1, deleted on side 2
874 renamedelete[src] = dsts1
874 renamedelete[src] = dsts1
875 elif src not in mb:
875 elif src not in mb:
876 # Work around the "short-circuit to avoid issues with merge states"
876 # Work around the "short-circuit to avoid issues with merge states"
877 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
877 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
878 # destination doesn't exist in y.
878 # destination doesn't exist in y.
879 pass
879 pass
880 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
880 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
881 return
881 return
882 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
882 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
883 # modified on side 2
883 # modified on side 2
884 for dst in dsts1:
884 for dst in dsts1:
885 copy[dst] = src
885 copy[dst] = src
886
886
887
887
888 class branch_copies(object):
888 class branch_copies(object):
889 """Information about copies made on one side of a merge/graft.
889 """Information about copies made on one side of a merge/graft.
890
890
891 "copy" is a mapping from destination name -> source name,
891 "copy" is a mapping from destination name -> source name,
892 where source is in c1 and destination is in c2 or vice-versa.
892 where source is in c1 and destination is in c2 or vice-versa.
893
893
894 "movewithdir" is a mapping from source name -> destination name,
894 "movewithdir" is a mapping from source name -> destination name,
895 where the file at source present in one context but not the other
895 where the file at source present in one context but not the other
896 needs to be moved to destination by the merge process, because the
896 needs to be moved to destination by the merge process, because the
897 other context moved the directory it is in.
897 other context moved the directory it is in.
898
898
899 "renamedelete" is a mapping of source name -> list of destination
899 "renamedelete" is a mapping of source name -> list of destination
900 names for files deleted in c1 that were renamed in c2 or vice-versa.
900 names for files deleted in c1 that were renamed in c2 or vice-versa.
901
901
902 "dirmove" is a mapping of detected source dir -> destination dir renames.
902 "dirmove" is a mapping of detected source dir -> destination dir renames.
903 This is needed for handling changes to new files previously grafted into
903 This is needed for handling changes to new files previously grafted into
904 renamed directories.
904 renamed directories.
905 """
905 """
906
906
907 def __init__(
907 def __init__(
908 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
908 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
909 ):
909 ):
910 self.copy = {} if copy is None else copy
910 self.copy = {} if copy is None else copy
911 self.renamedelete = {} if renamedelete is None else renamedelete
911 self.renamedelete = {} if renamedelete is None else renamedelete
912 self.dirmove = {} if dirmove is None else dirmove
912 self.dirmove = {} if dirmove is None else dirmove
913 self.movewithdir = {} if movewithdir is None else movewithdir
913 self.movewithdir = {} if movewithdir is None else movewithdir
914
914
915 def __repr__(self):
915 def __repr__(self):
916 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
916 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
917 self.copy,
917 self.copy,
918 self.renamedelete,
918 self.renamedelete,
919 self.dirmove,
919 self.dirmove,
920 self.movewithdir,
920 self.movewithdir,
921 )
921 )
922
922
923
923
924 def _fullcopytracing(repo, c1, c2, base):
924 def _fullcopytracing(repo, c1, c2, base):
925 """The full copytracing algorithm which finds all the new files that were
925 """The full copytracing algorithm which finds all the new files that were
926 added from merge base up to the top commit and for each file it checks if
926 added from merge base up to the top commit and for each file it checks if
927 this file was copied from another file.
927 this file was copied from another file.
928
928
929 This is pretty slow when a lot of changesets are involved but will track all
929 This is pretty slow when a lot of changesets are involved but will track all
930 the copies.
930 the copies.
931 """
931 """
932 m1 = c1.manifest()
932 m1 = c1.manifest()
933 m2 = c2.manifest()
933 m2 = c2.manifest()
934 mb = base.manifest()
934 mb = base.manifest()
935
935
936 copies1 = pathcopies(base, c1)
936 copies1 = pathcopies(base, c1)
937 copies2 = pathcopies(base, c2)
937 copies2 = pathcopies(base, c2)
938
938
939 if not (copies1 or copies2):
939 if not (copies1 or copies2):
940 return branch_copies(), branch_copies(), {}
940 return branch_copies(), branch_copies(), {}
941
941
942 inversecopies1 = {}
942 inversecopies1 = {}
943 inversecopies2 = {}
943 inversecopies2 = {}
944 for dst, src in copies1.items():
944 for dst, src in copies1.items():
945 inversecopies1.setdefault(src, []).append(dst)
945 inversecopies1.setdefault(src, []).append(dst)
946 for dst, src in copies2.items():
946 for dst, src in copies2.items():
947 inversecopies2.setdefault(src, []).append(dst)
947 inversecopies2.setdefault(src, []).append(dst)
948
948
949 copy1 = {}
949 copy1 = {}
950 copy2 = {}
950 copy2 = {}
951 diverge = {}
951 diverge = {}
952 renamedelete1 = {}
952 renamedelete1 = {}
953 renamedelete2 = {}
953 renamedelete2 = {}
954 allsources = set(inversecopies1) | set(inversecopies2)
954 allsources = set(inversecopies1) | set(inversecopies2)
955 for src in allsources:
955 for src in allsources:
956 dsts1 = inversecopies1.get(src)
956 dsts1 = inversecopies1.get(src)
957 dsts2 = inversecopies2.get(src)
957 dsts2 = inversecopies2.get(src)
958 if dsts1 and dsts2:
958 if dsts1 and dsts2:
959 # copied/renamed on both sides
959 # copied/renamed on both sides
960 if src not in m1 and src not in m2:
960 if src not in m1 and src not in m2:
961 # renamed on both sides
961 # renamed on both sides
962 dsts1 = set(dsts1)
962 dsts1 = set(dsts1)
963 dsts2 = set(dsts2)
963 dsts2 = set(dsts2)
964 # If there's some overlap in the rename destinations, we
964 # If there's some overlap in the rename destinations, we
965 # consider it not divergent. For example, if side 1 copies 'a'
965 # consider it not divergent. For example, if side 1 copies 'a'
966 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
966 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
967 # and 'd' and deletes 'a'.
967 # and 'd' and deletes 'a'.
968 if dsts1 & dsts2:
968 if dsts1 & dsts2:
969 for dst in dsts1 & dsts2:
969 for dst in dsts1 & dsts2:
970 copy1[dst] = src
970 copy1[dst] = src
971 copy2[dst] = src
971 copy2[dst] = src
972 else:
972 else:
973 diverge[src] = sorted(dsts1 | dsts2)
973 diverge[src] = sorted(dsts1 | dsts2)
974 elif src in m1 and src in m2:
974 elif src in m1 and src in m2:
975 # copied on both sides
975 # copied on both sides
976 dsts1 = set(dsts1)
976 dsts1 = set(dsts1)
977 dsts2 = set(dsts2)
977 dsts2 = set(dsts2)
978 for dst in dsts1 & dsts2:
978 for dst in dsts1 & dsts2:
979 copy1[dst] = src
979 copy1[dst] = src
980 copy2[dst] = src
980 copy2[dst] = src
981 # TODO: Handle cases where it was renamed on one side and copied
981 # TODO: Handle cases where it was renamed on one side and copied
982 # on the other side
982 # on the other side
983 elif dsts1:
983 elif dsts1:
984 # copied/renamed only on side 1
984 # copied/renamed only on side 1
985 _checksinglesidecopies(
985 _checksinglesidecopies(
986 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
986 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
987 )
987 )
988 elif dsts2:
988 elif dsts2:
989 # copied/renamed only on side 2
989 # copied/renamed only on side 2
990 _checksinglesidecopies(
990 _checksinglesidecopies(
991 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
991 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
992 )
992 )
993
993
994 # find interesting file sets from manifests
994 # find interesting file sets from manifests
995 cache = []
995 cache = []
996
996
997 def _get_addedfiles(idx):
997 def _get_addedfiles(idx):
998 if not cache:
998 if not cache:
999 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
999 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1000 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1000 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1001 u1 = sorted(addedinm1 - addedinm2)
1001 u1 = sorted(addedinm1 - addedinm2)
1002 u2 = sorted(addedinm2 - addedinm1)
1002 u2 = sorted(addedinm2 - addedinm1)
1003 cache.extend((u1, u2))
1003 cache.extend((u1, u2))
1004 return cache[idx]
1004 return cache[idx]
1005
1005
1006 u1fn = lambda: _get_addedfiles(0)
1006 u1fn = lambda: _get_addedfiles(0)
1007 u2fn = lambda: _get_addedfiles(1)
1007 u2fn = lambda: _get_addedfiles(1)
1008 if repo.ui.debugflag:
1008 if repo.ui.debugflag:
1009 u1 = u1fn()
1009 u1 = u1fn()
1010 u2 = u2fn()
1010 u2 = u2fn()
1011
1011
1012 header = b" unmatched files in %s"
1012 header = b" unmatched files in %s"
1013 if u1:
1013 if u1:
1014 repo.ui.debug(
1014 repo.ui.debug(
1015 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1015 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1016 )
1016 )
1017 if u2:
1017 if u2:
1018 repo.ui.debug(
1018 repo.ui.debug(
1019 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1019 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1020 )
1020 )
1021
1021
1022 renamedeleteset = set()
1022 renamedeleteset = set()
1023 divergeset = set()
1023 divergeset = set()
1024 for dsts in diverge.values():
1024 for dsts in diverge.values():
1025 divergeset.update(dsts)
1025 divergeset.update(dsts)
1026 for dsts in renamedelete1.values():
1026 for dsts in renamedelete1.values():
1027 renamedeleteset.update(dsts)
1027 renamedeleteset.update(dsts)
1028 for dsts in renamedelete2.values():
1028 for dsts in renamedelete2.values():
1029 renamedeleteset.update(dsts)
1029 renamedeleteset.update(dsts)
1030
1030
1031 repo.ui.debug(
1031 repo.ui.debug(
1032 b" all copies found (* = to merge, ! = divergent, "
1032 b" all copies found (* = to merge, ! = divergent, "
1033 b"% = renamed and deleted):\n"
1033 b"% = renamed and deleted):\n"
1034 )
1034 )
1035 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1035 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1036 if not copies:
1036 if not copies:
1037 continue
1037 continue
1038 repo.ui.debug(b" on %s side:\n" % side)
1038 repo.ui.debug(b" on %s side:\n" % side)
1039 for f in sorted(copies):
1039 for f in sorted(copies):
1040 note = b""
1040 note = b""
1041 if f in copy1 or f in copy2:
1041 if f in copy1 or f in copy2:
1042 note += b"*"
1042 note += b"*"
1043 if f in divergeset:
1043 if f in divergeset:
1044 note += b"!"
1044 note += b"!"
1045 if f in renamedeleteset:
1045 if f in renamedeleteset:
1046 note += b"%"
1046 note += b"%"
1047 repo.ui.debug(
1047 repo.ui.debug(
1048 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1048 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1049 )
1049 )
1050 del renamedeleteset
1050 del renamedeleteset
1051 del divergeset
1051 del divergeset
1052
1052
1053 repo.ui.debug(b" checking for directory renames\n")
1053 repo.ui.debug(b" checking for directory renames\n")
1054
1054
1055 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1055 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1056 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1056 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1057
1057
1058 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1058 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1059 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1059 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1060
1060
1061 return branch_copies1, branch_copies2, diverge
1061 return branch_copies1, branch_copies2, diverge
1062
1062
1063
1063
1064 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1064 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1065 """Finds moved directories and files that should move with them.
1065 """Finds moved directories and files that should move with them.
1066
1066
1067 ctx: the context for one of the sides
1067 ctx: the context for one of the sides
1068 copy: files copied on the same side (as ctx)
1068 copy: files copied on the same side (as ctx)
1069 fullcopy: files copied on the same side (as ctx), including those that
1069 fullcopy: files copied on the same side (as ctx), including those that
1070 merge.manifestmerge() won't care about
1070 merge.manifestmerge() won't care about
1071 addedfilesfn: function returning added files on the other side (compared to
1071 addedfilesfn: function returning added files on the other side (compared to
1072 ctx)
1072 ctx)
1073 """
1073 """
1074 # generate a directory move map
1074 # generate a directory move map
1075 invalid = set()
1075 invalid = set()
1076 dirmove = {}
1076 dirmove = {}
1077
1077
1078 # examine each file copy for a potential directory move, which is
1078 # examine each file copy for a potential directory move, which is
1079 # when all the files in a directory are moved to a new directory
1079 # when all the files in a directory are moved to a new directory
1080 for dst, src in pycompat.iteritems(fullcopy):
1080 for dst, src in pycompat.iteritems(fullcopy):
1081 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1081 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1082 if dsrc in invalid:
1082 if dsrc in invalid:
1083 # already seen to be uninteresting
1083 # already seen to be uninteresting
1084 continue
1084 continue
1085 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1085 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1086 # directory wasn't entirely moved locally
1086 # directory wasn't entirely moved locally
1087 invalid.add(dsrc)
1087 invalid.add(dsrc)
1088 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1088 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1089 # files from the same directory moved to two different places
1089 # files from the same directory moved to two different places
1090 invalid.add(dsrc)
1090 invalid.add(dsrc)
1091 else:
1091 else:
1092 # looks good so far
1092 # looks good so far
1093 dirmove[dsrc] = ddst
1093 dirmove[dsrc] = ddst
1094
1094
1095 for i in invalid:
1095 for i in invalid:
1096 if i in dirmove:
1096 if i in dirmove:
1097 del dirmove[i]
1097 del dirmove[i]
1098 del invalid
1098 del invalid
1099
1099
1100 if not dirmove:
1100 if not dirmove:
1101 return {}, {}
1101 return {}, {}
1102
1102
1103 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1103 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1104
1104
1105 for d in dirmove:
1105 for d in dirmove:
1106 repo.ui.debug(
1106 repo.ui.debug(
1107 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1107 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1108 )
1108 )
1109
1109
1110 # Sort the directories in reverse order, so we find children first
1110 # Sort the directories in reverse order, so we find children first
1111 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1111 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1112 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1112 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1113 # to dir2/subdir2/file (not dir2/subdir1/file)
1113 # to dir2/subdir2/file (not dir2/subdir1/file)
1114 dirmove_children_first = sorted(dirmove, reverse=True)
1114 dirmove_children_first = sorted(dirmove, reverse=True)
1115
1115
1116 movewithdir = {}
1116 movewithdir = {}
1117 # check unaccounted nonoverlapping files against directory moves
1117 # check unaccounted nonoverlapping files against directory moves
1118 for f in addedfilesfn():
1118 for f in addedfilesfn():
1119 if f not in fullcopy:
1119 if f not in fullcopy:
1120 for d in dirmove_children_first:
1120 for d in dirmove_children_first:
1121 if f.startswith(d):
1121 if f.startswith(d):
1122 # new file added in a directory that was moved, move it
1122 # new file added in a directory that was moved, move it
1123 df = dirmove[d] + f[len(d) :]
1123 df = dirmove[d] + f[len(d) :]
1124 if df not in copy:
1124 if df not in copy:
1125 movewithdir[f] = df
1125 movewithdir[f] = df
1126 repo.ui.debug(
1126 repo.ui.debug(
1127 b" pending file src: '%s' -> dst: '%s'\n"
1127 b" pending file src: '%s' -> dst: '%s'\n"
1128 % (f, df)
1128 % (f, df)
1129 )
1129 )
1130 break
1130 break
1131
1131
1132 return dirmove, movewithdir
1132 return dirmove, movewithdir
1133
1133
1134
1134
1135 def _heuristicscopytracing(repo, c1, c2, base):
1135 def _heuristicscopytracing(repo, c1, c2, base):
1136 """Fast copytracing using filename heuristics
1136 """Fast copytracing using filename heuristics
1137
1137
1138 Assumes that moves or renames are of following two types:
1138 Assumes that moves or renames are of following two types:
1139
1139
1140 1) Inside a directory only (same directory name but different filenames)
1140 1) Inside a directory only (same directory name but different filenames)
1141 2) Move from one directory to another
1141 2) Move from one directory to another
1142 (same filenames but different directory names)
1142 (same filenames but different directory names)
1143
1143
1144 Works only when there are no merge commits in the "source branch".
1144 Works only when there are no merge commits in the "source branch".
1145 Source branch is commits from base up to c2 not including base.
1145 Source branch is commits from base up to c2 not including base.
1146
1146
1147 If merge is involved it fallbacks to _fullcopytracing().
1147 If merge is involved it fallbacks to _fullcopytracing().
1148
1148
1149 Can be used by setting the following config:
1149 Can be used by setting the following config:
1150
1150
1151 [experimental]
1151 [experimental]
1152 copytrace = heuristics
1152 copytrace = heuristics
1153
1153
1154 In some cases the copy/move candidates found by heuristics can be very large
1154 In some cases the copy/move candidates found by heuristics can be very large
1155 in number and that will make the algorithm slow. The number of possible
1155 in number and that will make the algorithm slow. The number of possible
1156 candidates to check can be limited by using the config
1156 candidates to check can be limited by using the config
1157 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1157 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1158 """
1158 """
1159
1159
1160 if c1.rev() is None:
1160 if c1.rev() is None:
1161 c1 = c1.p1()
1161 c1 = c1.p1()
1162 if c2.rev() is None:
1162 if c2.rev() is None:
1163 c2 = c2.p1()
1163 c2 = c2.p1()
1164
1164
1165 changedfiles = set()
1165 changedfiles = set()
1166 m1 = c1.manifest()
1166 m1 = c1.manifest()
1167 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1167 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1168 # If base is not in c2 branch, we switch to fullcopytracing
1168 # If base is not in c2 branch, we switch to fullcopytracing
1169 repo.ui.debug(
1169 repo.ui.debug(
1170 b"switching to full copytracing as base is not "
1170 b"switching to full copytracing as base is not "
1171 b"an ancestor of c2\n"
1171 b"an ancestor of c2\n"
1172 )
1172 )
1173 return _fullcopytracing(repo, c1, c2, base)
1173 return _fullcopytracing(repo, c1, c2, base)
1174
1174
1175 ctx = c2
1175 ctx = c2
1176 while ctx != base:
1176 while ctx != base:
1177 if len(ctx.parents()) == 2:
1177 if len(ctx.parents()) == 2:
1178 # To keep things simple let's not handle merges
1178 # To keep things simple let's not handle merges
1179 repo.ui.debug(b"switching to full copytracing because of merges\n")
1179 repo.ui.debug(b"switching to full copytracing because of merges\n")
1180 return _fullcopytracing(repo, c1, c2, base)
1180 return _fullcopytracing(repo, c1, c2, base)
1181 changedfiles.update(ctx.files())
1181 changedfiles.update(ctx.files())
1182 ctx = ctx.p1()
1182 ctx = ctx.p1()
1183
1183
1184 copies2 = {}
1184 copies2 = {}
1185 cp = _forwardcopies(base, c2)
1185 cp = _forwardcopies(base, c2)
1186 for dst, src in pycompat.iteritems(cp):
1186 for dst, src in pycompat.iteritems(cp):
1187 if src in m1:
1187 if src in m1:
1188 copies2[dst] = src
1188 copies2[dst] = src
1189
1189
1190 # file is missing if it isn't present in the destination, but is present in
1190 # file is missing if it isn't present in the destination, but is present in
1191 # the base and present in the source.
1191 # the base and present in the source.
1192 # Presence in the base is important to exclude added files, presence in the
1192 # Presence in the base is important to exclude added files, presence in the
1193 # source is important to exclude removed files.
1193 # source is important to exclude removed files.
1194 filt = lambda f: f not in m1 and f in base and f in c2
1194 filt = lambda f: f not in m1 and f in base and f in c2
1195 missingfiles = [f for f in changedfiles if filt(f)]
1195 missingfiles = [f for f in changedfiles if filt(f)]
1196
1196
1197 copies1 = {}
1197 copies1 = {}
1198 if missingfiles:
1198 if missingfiles:
1199 basenametofilename = collections.defaultdict(list)
1199 basenametofilename = collections.defaultdict(list)
1200 dirnametofilename = collections.defaultdict(list)
1200 dirnametofilename = collections.defaultdict(list)
1201
1201
1202 for f in m1.filesnotin(base.manifest()):
1202 for f in m1.filesnotin(base.manifest()):
1203 basename = os.path.basename(f)
1203 basename = os.path.basename(f)
1204 dirname = os.path.dirname(f)
1204 dirname = os.path.dirname(f)
1205 basenametofilename[basename].append(f)
1205 basenametofilename[basename].append(f)
1206 dirnametofilename[dirname].append(f)
1206 dirnametofilename[dirname].append(f)
1207
1207
1208 for f in missingfiles:
1208 for f in missingfiles:
1209 basename = os.path.basename(f)
1209 basename = os.path.basename(f)
1210 dirname = os.path.dirname(f)
1210 dirname = os.path.dirname(f)
1211 samebasename = basenametofilename[basename]
1211 samebasename = basenametofilename[basename]
1212 samedirname = dirnametofilename[dirname]
1212 samedirname = dirnametofilename[dirname]
1213 movecandidates = samebasename + samedirname
1213 movecandidates = samebasename + samedirname
1214 # f is guaranteed to be present in c2, that's why
1214 # f is guaranteed to be present in c2, that's why
1215 # c2.filectx(f) won't fail
1215 # c2.filectx(f) won't fail
1216 f2 = c2.filectx(f)
1216 f2 = c2.filectx(f)
1217 # we can have a lot of candidates which can slow down the heuristics
1217 # we can have a lot of candidates which can slow down the heuristics
1218 # config value to limit the number of candidates moves to check
1218 # config value to limit the number of candidates moves to check
1219 maxcandidates = repo.ui.configint(
1219 maxcandidates = repo.ui.configint(
1220 b'experimental', b'copytrace.movecandidateslimit'
1220 b'experimental', b'copytrace.movecandidateslimit'
1221 )
1221 )
1222
1222
1223 if len(movecandidates) > maxcandidates:
1223 if len(movecandidates) > maxcandidates:
1224 repo.ui.status(
1224 repo.ui.status(
1225 _(
1225 _(
1226 b"skipping copytracing for '%s', more "
1226 b"skipping copytracing for '%s', more "
1227 b"candidates than the limit: %d\n"
1227 b"candidates than the limit: %d\n"
1228 )
1228 )
1229 % (f, len(movecandidates))
1229 % (f, len(movecandidates))
1230 )
1230 )
1231 continue
1231 continue
1232
1232
1233 for candidate in movecandidates:
1233 for candidate in movecandidates:
1234 f1 = c1.filectx(candidate)
1234 f1 = c1.filectx(candidate)
1235 if _related(f1, f2):
1235 if _related(f1, f2):
1236 # if there are a few related copies then we'll merge
1236 # if there are a few related copies then we'll merge
1237 # changes into all of them. This matches the behaviour
1237 # changes into all of them. This matches the behaviour
1238 # of upstream copytracing
1238 # of upstream copytracing
1239 copies1[candidate] = f
1239 copies1[candidate] = f
1240
1240
1241 return branch_copies(copies1), branch_copies(copies2), {}
1241 return branch_copies(copies1), branch_copies(copies2), {}
1242
1242
1243
1243
1244 def _related(f1, f2):
1244 def _related(f1, f2):
1245 """return True if f1 and f2 filectx have a common ancestor
1245 """return True if f1 and f2 filectx have a common ancestor
1246
1246
1247 Walk back to common ancestor to see if the two files originate
1247 Walk back to common ancestor to see if the two files originate
1248 from the same file. Since workingfilectx's rev() is None it messes
1248 from the same file. Since workingfilectx's rev() is None it messes
1249 up the integer comparison logic, hence the pre-step check for
1249 up the integer comparison logic, hence the pre-step check for
1250 None (f1 and f2 can only be workingfilectx's initially).
1250 None (f1 and f2 can only be workingfilectx's initially).
1251 """
1251 """
1252
1252
1253 if f1 == f2:
1253 if f1 == f2:
1254 return True # a match
1254 return True # a match
1255
1255
1256 g1, g2 = f1.ancestors(), f2.ancestors()
1256 g1, g2 = f1.ancestors(), f2.ancestors()
1257 try:
1257 try:
1258 f1r, f2r = f1.linkrev(), f2.linkrev()
1258 f1r, f2r = f1.linkrev(), f2.linkrev()
1259
1259
1260 if f1r is None:
1260 if f1r is None:
1261 f1 = next(g1)
1261 f1 = next(g1)
1262 if f2r is None:
1262 if f2r is None:
1263 f2 = next(g2)
1263 f2 = next(g2)
1264
1264
1265 while True:
1265 while True:
1266 f1r, f2r = f1.linkrev(), f2.linkrev()
1266 f1r, f2r = f1.linkrev(), f2.linkrev()
1267 if f1r > f2r:
1267 if f1r > f2r:
1268 f1 = next(g1)
1268 f1 = next(g1)
1269 elif f2r > f1r:
1269 elif f2r > f1r:
1270 f2 = next(g2)
1270 f2 = next(g2)
1271 else: # f1 and f2 point to files in the same linkrev
1271 else: # f1 and f2 point to files in the same linkrev
1272 return f1 == f2 # true if they point to the same file
1272 return f1 == f2 # true if they point to the same file
1273 except StopIteration:
1273 except StopIteration:
1274 return False
1274 return False
1275
1275
1276
1276
1277 def graftcopies(wctx, ctx, base):
1277 def graftcopies(wctx, ctx, base):
1278 """reproduce copies between base and ctx in the wctx
1278 """reproduce copies between base and ctx in the wctx
1279
1279
1280 Unlike mergecopies(), this function will only consider copies between base
1280 Unlike mergecopies(), this function will only consider copies between base
1281 and ctx; it will ignore copies between base and wctx. Also unlike
1281 and ctx; it will ignore copies between base and wctx. Also unlike
1282 mergecopies(), this function will apply copies to the working copy (instead
1282 mergecopies(), this function will apply copies to the working copy (instead
1283 of just returning information about the copies). That makes it cheaper
1283 of just returning information about the copies). That makes it cheaper
1284 (especially in the common case of base==ctx.p1()) and useful also when
1284 (especially in the common case of base==ctx.p1()) and useful also when
1285 experimental.copytrace=off.
1285 experimental.copytrace=off.
1286
1286
1287 merge.update() will have already marked most copies, but it will only
1287 merge.update() will have already marked most copies, but it will only
1288 mark copies if it thinks the source files are related (see
1288 mark copies if it thinks the source files are related (see
1289 merge._related()). It will also not mark copies if the file wasn't modified
1289 merge._related()). It will also not mark copies if the file wasn't modified
1290 on the local side. This function adds the copies that were "missed"
1290 on the local side. This function adds the copies that were "missed"
1291 by merge.update().
1291 by merge.update().
1292 """
1292 """
1293 new_copies = pathcopies(base, ctx)
1293 new_copies = pathcopies(base, ctx)
1294 parent = wctx.p1()
1294 parent = wctx.p1()
1295 _filter(parent, wctx, new_copies)
1295 _filter(parent, wctx, new_copies)
1296 # Extra filtering to drop copy information for files that existed before
1296 # Extra filtering to drop copy information for files that existed before
1297 # the graft. This is to handle the case of grafting a rename onto a commit
1297 # the graft. This is to handle the case of grafting a rename onto a commit
1298 # that already has the rename. Otherwise the presence of copy information
1298 # that already has the rename. Otherwise the presence of copy information
1299 # would result in the creation of an empty commit where we would prefer to
1299 # would result in the creation of an empty commit where we would prefer to
1300 # not create one.
1300 # not create one.
1301 for dest, __ in list(new_copies.items()):
1301 for dest, __ in list(new_copies.items()):
1302 if dest in parent:
1302 if dest in parent:
1303 del new_copies[dest]
1303 del new_copies[dest]
1304 for dst, src in pycompat.iteritems(new_copies):
1304 for dst, src in pycompat.iteritems(new_copies):
1305 wctx[dst].markcopied(src)
1305 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now