##// END OF EJS Templates
copies: rearrange all value comparison conditional...
marmoute -
r47309:c692384b default
parent child Browse files
Show More
@@ -1,1241 +1,1269
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 if k == v: # case 3
62 if k == v: # case 3
63 del t[k]
63 del t[k]
64 elif v not in src: # case 5
64 elif v not in src: # case 5
65 # remove copies from files that didn't exist
65 # remove copies from files that didn't exist
66 del t[k]
66 del t[k]
67 elif k not in dst: # case 1
67 elif k not in dst: # case 1
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 del t[k]
69 del t[k]
70
70
71
71
72 def _chain(prefix, suffix):
72 def _chain(prefix, suffix):
73 """chain two sets of copies 'prefix' and 'suffix'"""
73 """chain two sets of copies 'prefix' and 'suffix'"""
74 result = prefix.copy()
74 result = prefix.copy()
75 for key, value in pycompat.iteritems(suffix):
75 for key, value in pycompat.iteritems(suffix):
76 result[key] = prefix.get(value, value)
76 result[key] = prefix.get(value, value)
77 return result
77 return result
78
78
79
79
80 def _tracefile(fctx, am, basemf):
80 def _tracefile(fctx, am, basemf):
81 """return file context that is the ancestor of fctx present in ancestor
81 """return file context that is the ancestor of fctx present in ancestor
82 manifest am
82 manifest am
83
83
84 Note: we used to try and stop after a given limit, however checking if that
84 Note: we used to try and stop after a given limit, however checking if that
85 limit is reached turned out to be very expensive. we are better off
85 limit is reached turned out to be very expensive. we are better off
86 disabling that feature."""
86 disabling that feature."""
87
87
88 for f in fctx.ancestors():
88 for f in fctx.ancestors():
89 path = f.path()
89 path = f.path()
90 if am.get(path, None) == f.filenode():
90 if am.get(path, None) == f.filenode():
91 return path
91 return path
92 if basemf and basemf.get(path, None) == f.filenode():
92 if basemf and basemf.get(path, None) == f.filenode():
93 return path
93 return path
94
94
95
95
96 def _dirstatecopies(repo, match=None):
96 def _dirstatecopies(repo, match=None):
97 ds = repo.dirstate
97 ds = repo.dirstate
98 c = ds.copies().copy()
98 c = ds.copies().copy()
99 for k in list(c):
99 for k in list(c):
100 if ds[k] not in b'anm' or (match and not match(k)):
100 if ds[k] not in b'anm' or (match and not match(k)):
101 del c[k]
101 del c[k]
102 return c
102 return c
103
103
104
104
105 def _computeforwardmissing(a, b, match=None):
105 def _computeforwardmissing(a, b, match=None):
106 """Computes which files are in b but not a.
106 """Computes which files are in b but not a.
107 This is its own function so extensions can easily wrap this call to see what
107 This is its own function so extensions can easily wrap this call to see what
108 files _forwardcopies is about to process.
108 files _forwardcopies is about to process.
109 """
109 """
110 ma = a.manifest()
110 ma = a.manifest()
111 mb = b.manifest()
111 mb = b.manifest()
112 return mb.filesnotin(ma, match=match)
112 return mb.filesnotin(ma, match=match)
113
113
114
114
115 def usechangesetcentricalgo(repo):
115 def usechangesetcentricalgo(repo):
116 """Checks if we should use changeset-centric copy algorithms"""
116 """Checks if we should use changeset-centric copy algorithms"""
117 if repo.filecopiesmode == b'changeset-sidedata':
117 if repo.filecopiesmode == b'changeset-sidedata':
118 return True
118 return True
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 changesetsource = (b'changeset-only', b'compatibility')
120 changesetsource = (b'changeset-only', b'compatibility')
121 return readfrom in changesetsource
121 return readfrom in changesetsource
122
122
123
123
124 def _committedforwardcopies(a, b, base, match):
124 def _committedforwardcopies(a, b, base, match):
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 # files might have to be traced back to the fctx parent of the last
126 # files might have to be traced back to the fctx parent of the last
127 # one-side-only changeset, but not further back than that
127 # one-side-only changeset, but not further back than that
128 repo = a._repo
128 repo = a._repo
129
129
130 if usechangesetcentricalgo(repo):
130 if usechangesetcentricalgo(repo):
131 return _changesetforwardcopies(a, b, match)
131 return _changesetforwardcopies(a, b, match)
132
132
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 dbg = repo.ui.debug
134 dbg = repo.ui.debug
135 if debug:
135 if debug:
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 am = a.manifest()
137 am = a.manifest()
138 basemf = None if base is None else base.manifest()
138 basemf = None if base is None else base.manifest()
139
139
140 # find where new files came from
140 # find where new files came from
141 # we currently don't try to find where old files went, too expensive
141 # we currently don't try to find where old files went, too expensive
142 # this means we can miss a case like 'hg rm b; hg cp a b'
142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 cm = {}
143 cm = {}
144
144
145 # Computing the forward missing is quite expensive on large manifests, since
145 # Computing the forward missing is quite expensive on large manifests, since
146 # it compares the entire manifests. We can optimize it in the common use
146 # it compares the entire manifests. We can optimize it in the common use
147 # case of computing what copies are in a commit versus its parent (like
147 # case of computing what copies are in a commit versus its parent (like
148 # during a rebase or histedit). Note, we exclude merge commits from this
148 # during a rebase or histedit). Note, we exclude merge commits from this
149 # optimization, since the ctx.files() for a merge commit is not correct for
149 # optimization, since the ctx.files() for a merge commit is not correct for
150 # this comparison.
150 # this comparison.
151 forwardmissingmatch = match
151 forwardmissingmatch = match
152 if b.p1() == a and b.p2().node() == nullid:
152 if b.p1() == a and b.p2().node() == nullid:
153 filesmatcher = matchmod.exact(b.files())
153 filesmatcher = matchmod.exact(b.files())
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 missing = list(b.walk(match))
156 missing = list(b.walk(match))
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 if debug:
158 if debug:
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 else:
160 else:
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 if debug:
162 if debug:
163 dbg(
163 dbg(
164 b'debug.copies: missing files to search: %d\n'
164 b'debug.copies: missing files to search: %d\n'
165 % len(missing)
165 % len(missing)
166 )
166 )
167
167
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169
169
170 for f in sorted(missing):
170 for f in sorted(missing):
171 if debug:
171 if debug:
172 dbg(b'debug.copies: tracing file: %s\n' % f)
172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 fctx = b[f]
173 fctx = b[f]
174 fctx._ancestrycontext = ancestrycontext
174 fctx._ancestrycontext = ancestrycontext
175
175
176 if debug:
176 if debug:
177 start = util.timer()
177 start = util.timer()
178 opath = _tracefile(fctx, am, basemf)
178 opath = _tracefile(fctx, am, basemf)
179 if opath:
179 if opath:
180 if debug:
180 if debug:
181 dbg(b'debug.copies: rename of: %s\n' % opath)
181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 cm[f] = opath
182 cm[f] = opath
183 if debug:
183 if debug:
184 dbg(
184 dbg(
185 b'debug.copies: time: %f seconds\n'
185 b'debug.copies: time: %f seconds\n'
186 % (util.timer() - start)
186 % (util.timer() - start)
187 )
187 )
188 return cm
188 return cm
189
189
190
190
191 def _revinfo_getter(repo, match):
191 def _revinfo_getter(repo, match):
192 """returns a function that returns the following data given a <rev>"
192 """returns a function that returns the following data given a <rev>"
193
193
194 * p1: revision number of first parent
194 * p1: revision number of first parent
195 * p2: revision number of first parent
195 * p2: revision number of first parent
196 * changes: a ChangingFiles object
196 * changes: a ChangingFiles object
197 """
197 """
198 cl = repo.changelog
198 cl = repo.changelog
199 parents = cl.parentrevs
199 parents = cl.parentrevs
200 flags = cl.flags
200 flags = cl.flags
201
201
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203
203
204 changelogrevision = cl.changelogrevision
204 changelogrevision = cl.changelogrevision
205
205
206 if rustmod is not None:
206 if rustmod is not None:
207
207
208 def revinfo(rev):
208 def revinfo(rev):
209 p1, p2 = parents(rev)
209 p1, p2 = parents(rev)
210 if flags(rev) & HASCOPIESINFO:
210 if flags(rev) & HASCOPIESINFO:
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 else:
212 else:
213 raw = None
213 raw = None
214 return (p1, p2, raw)
214 return (p1, p2, raw)
215
215
216 else:
216 else:
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & HASCOPIESINFO:
220 if flags(rev) & HASCOPIESINFO:
221 changes = changelogrevision(rev).changes
221 changes = changelogrevision(rev).changes
222 else:
222 else:
223 changes = None
223 changes = None
224 return (p1, p2, changes)
224 return (p1, p2, changes)
225
225
226 return revinfo
226 return revinfo
227
227
228
228
229 def cached_is_ancestor(is_ancestor):
229 def cached_is_ancestor(is_ancestor):
230 """return a cached version of is_ancestor"""
230 """return a cached version of is_ancestor"""
231 cache = {}
231 cache = {}
232
232
233 def _is_ancestor(anc, desc):
233 def _is_ancestor(anc, desc):
234 if anc > desc:
234 if anc > desc:
235 return False
235 return False
236 elif anc == desc:
236 elif anc == desc:
237 return True
237 return True
238 key = (anc, desc)
238 key = (anc, desc)
239 ret = cache.get(key)
239 ret = cache.get(key)
240 if ret is None:
240 if ret is None:
241 ret = cache[key] = is_ancestor(anc, desc)
241 ret = cache[key] = is_ancestor(anc, desc)
242 return ret
242 return ret
243
243
244 return _is_ancestor
244 return _is_ancestor
245
245
246
246
247 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
248 if a.rev() in (nullrev, b.rev()):
248 if a.rev() in (nullrev, b.rev()):
249 return {}
249 return {}
250
250
251 repo = a.repo().unfiltered()
251 repo = a.repo().unfiltered()
252 children = {}
252 children = {}
253
253
254 cl = repo.changelog
254 cl = repo.changelog
255 isancestor = cl.isancestorrev
255 isancestor = cl.isancestorrev
256
256
257 # To track rename from "A" to B, we need to gather all parent β†’ children
257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 # edges that are contains in `::B` but not in `::A`.
258 # edges that are contains in `::B` but not in `::A`.
259 #
259 #
260 #
260 #
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 # that belong to ::a. These are exactly all the revisions needed to express
263 # that belong to ::a. These are exactly all the revisions needed to express
264 # the parent β†’ children we need to combine.
264 # the parent β†’ children we need to combine.
265 #
265 #
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 # excluding paths that leads to roots that are not ancestors of `a`. We
267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 # keep this out of the explanation because it is hard enough without this special case..
268 # keep this out of the explanation because it is hard enough without this special case..
269
269
270 parents = cl._uncheckedparentrevs
270 parents = cl._uncheckedparentrevs
271 graph_roots = (nullrev, nullrev)
271 graph_roots = (nullrev, nullrev)
272
272
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 roots = set()
275 roots = set()
276 has_graph_roots = False
276 has_graph_roots = False
277
277
278 # iterate over `only(B, A)`
278 # iterate over `only(B, A)`
279 for r in revs:
279 for r in revs:
280 ps = parents(r)
280 ps = parents(r)
281 if ps == graph_roots:
281 if ps == graph_roots:
282 has_graph_roots = True
282 has_graph_roots = True
283 else:
283 else:
284 p1, p2 = ps
284 p1, p2 = ps
285
285
286 # find all the "root points" (see larger comment above)
286 # find all the "root points" (see larger comment above)
287 if p1 != nullrev and p1 in ancestors:
287 if p1 != nullrev and p1 in ancestors:
288 roots.add(p1)
288 roots.add(p1)
289 if p2 != nullrev and p2 in ancestors:
289 if p2 != nullrev and p2 in ancestors:
290 roots.add(p2)
290 roots.add(p2)
291 if not roots:
291 if not roots:
292 # no common revision to track copies from
292 # no common revision to track copies from
293 return {}
293 return {}
294 if has_graph_roots:
294 if has_graph_roots:
295 # this deal with the special case mentionned in the [1] footnotes. We
295 # this deal with the special case mentionned in the [1] footnotes. We
296 # must filter out revisions that leads to non-common graphroots.
296 # must filter out revisions that leads to non-common graphroots.
297 roots = list(roots)
297 roots = list(roots)
298 m = min(roots)
298 m = min(roots)
299 h = [b.rev()]
299 h = [b.rev()]
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 roots_to_head = set(roots_to_head)
301 roots_to_head = set(roots_to_head)
302 revs = [r for r in revs if r in roots_to_head]
302 revs = [r for r in revs if r in roots_to_head]
303
303
304 if repo.filecopiesmode == b'changeset-sidedata':
304 if repo.filecopiesmode == b'changeset-sidedata':
305 # When using side-data, we will process the edges "from" the children.
305 # When using side-data, we will process the edges "from" the children.
306 # We iterate over the childre, gathering previous collected data for
306 # We iterate over the childre, gathering previous collected data for
307 # the parents. Do know when the parents data is no longer necessary, we
307 # the parents. Do know when the parents data is no longer necessary, we
308 # keep a counter of how many children each revision has.
308 # keep a counter of how many children each revision has.
309 #
309 #
310 # An interresting property of `children_count` is that it only contains
310 # An interresting property of `children_count` is that it only contains
311 # revision that will be relevant for a edge of the graph. So if a
311 # revision that will be relevant for a edge of the graph. So if a
312 # children has parent not in `children_count`, that edges should not be
312 # children has parent not in `children_count`, that edges should not be
313 # processed.
313 # processed.
314 children_count = dict((r, 0) for r in roots)
314 children_count = dict((r, 0) for r in roots)
315 for r in revs:
315 for r in revs:
316 for p in cl.parentrevs(r):
316 for p in cl.parentrevs(r):
317 if p == nullrev:
317 if p == nullrev:
318 continue
318 continue
319 children_count[r] = 0
319 children_count[r] = 0
320 if p in children_count:
320 if p in children_count:
321 children_count[p] += 1
321 children_count[p] += 1
322 revinfo = _revinfo_getter(repo, match)
322 revinfo = _revinfo_getter(repo, match)
323 return _combine_changeset_copies(
323 return _combine_changeset_copies(
324 revs, children_count, b.rev(), revinfo, match, isancestor
324 revs, children_count, b.rev(), revinfo, match, isancestor
325 )
325 )
326 else:
326 else:
327 # When not using side-data, we will process the edges "from" the parent.
327 # When not using side-data, we will process the edges "from" the parent.
328 # so we need a full mapping of the parent -> children relation.
328 # so we need a full mapping of the parent -> children relation.
329 children = dict((r, []) for r in roots)
329 children = dict((r, []) for r in roots)
330 for r in revs:
330 for r in revs:
331 for p in cl.parentrevs(r):
331 for p in cl.parentrevs(r):
332 if p == nullrev:
332 if p == nullrev:
333 continue
333 continue
334 children[r] = []
334 children[r] = []
335 if p in children:
335 if p in children:
336 children[p].append(r)
336 children[p].append(r)
337 x = revs.pop()
337 x = revs.pop()
338 assert x == b.rev()
338 assert x == b.rev()
339 revs.extend(roots)
339 revs.extend(roots)
340 revs.sort()
340 revs.sort()
341
341
342 revinfo = _revinfo_getter_extra(repo)
342 revinfo = _revinfo_getter_extra(repo)
343 return _combine_changeset_copies_extra(
343 return _combine_changeset_copies_extra(
344 revs, children, b.rev(), revinfo, match, isancestor
344 revs, children, b.rev(), revinfo, match, isancestor
345 )
345 )
346
346
347
347
348 def _combine_changeset_copies(
348 def _combine_changeset_copies(
349 revs, children_count, targetrev, revinfo, match, isancestor
349 revs, children_count, targetrev, revinfo, match, isancestor
350 ):
350 ):
351 """combine the copies information for each item of iterrevs
351 """combine the copies information for each item of iterrevs
352
352
353 revs: sorted iterable of revision to visit
353 revs: sorted iterable of revision to visit
354 children_count: a {parent: <number-of-relevant-children>} mapping.
354 children_count: a {parent: <number-of-relevant-children>} mapping.
355 targetrev: the final copies destination revision (not in iterrevs)
355 targetrev: the final copies destination revision (not in iterrevs)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
357 match: a matcher
357 match: a matcher
358
358
359 It returns the aggregated copies information for `targetrev`.
359 It returns the aggregated copies information for `targetrev`.
360 """
360 """
361
361
362 alwaysmatch = match.always()
362 alwaysmatch = match.always()
363
363
364 if rustmod is not None:
364 if rustmod is not None:
365 final_copies = rustmod.combine_changeset_copies(
365 final_copies = rustmod.combine_changeset_copies(
366 list(revs), children_count, targetrev, revinfo, isancestor
366 list(revs), children_count, targetrev, revinfo, isancestor
367 )
367 )
368 else:
368 else:
369 isancestor = cached_is_ancestor(isancestor)
369 isancestor = cached_is_ancestor(isancestor)
370
370
371 all_copies = {}
371 all_copies = {}
372 # iterate over all the "children" side of copy tracing "edge"
372 # iterate over all the "children" side of copy tracing "edge"
373 for current_rev in revs:
373 for current_rev in revs:
374 p1, p2, changes = revinfo(current_rev)
374 p1, p2, changes = revinfo(current_rev)
375 current_copies = None
375 current_copies = None
376 # iterate over all parents to chain the existing data with the
376 # iterate over all parents to chain the existing data with the
377 # data from the parent β†’ child edge.
377 # data from the parent β†’ child edge.
378 for parent, parent_rev in ((1, p1), (2, p2)):
378 for parent, parent_rev in ((1, p1), (2, p2)):
379 if parent_rev == nullrev:
379 if parent_rev == nullrev:
380 continue
380 continue
381 remaining_children = children_count.get(parent_rev)
381 remaining_children = children_count.get(parent_rev)
382 if remaining_children is None:
382 if remaining_children is None:
383 continue
383 continue
384 remaining_children -= 1
384 remaining_children -= 1
385 children_count[parent_rev] = remaining_children
385 children_count[parent_rev] = remaining_children
386 if remaining_children:
386 if remaining_children:
387 copies = all_copies.get(parent_rev, None)
387 copies = all_copies.get(parent_rev, None)
388 else:
388 else:
389 copies = all_copies.pop(parent_rev, None)
389 copies = all_copies.pop(parent_rev, None)
390
390
391 if copies is None:
391 if copies is None:
392 # this is a root
392 # this is a root
393 newcopies = copies = {}
393 newcopies = copies = {}
394 elif remaining_children:
394 elif remaining_children:
395 newcopies = copies.copy()
395 newcopies = copies.copy()
396 else:
396 else:
397 newcopies = copies
397 newcopies = copies
398 # chain the data in the edge with the existing data
398 # chain the data in the edge with the existing data
399 if changes is not None:
399 if changes is not None:
400 childcopies = {}
400 childcopies = {}
401 if parent == 1:
401 if parent == 1:
402 childcopies = changes.copied_from_p1
402 childcopies = changes.copied_from_p1
403 elif parent == 2:
403 elif parent == 2:
404 childcopies = changes.copied_from_p2
404 childcopies = changes.copied_from_p2
405
405
406 if childcopies:
406 if childcopies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for dest, source in pycompat.iteritems(childcopies):
408 for dest, source in pycompat.iteritems(childcopies):
409 prev = copies.get(source)
409 prev = copies.get(source)
410 if prev is not None and prev[1] is not None:
410 if prev is not None and prev[1] is not None:
411 source = prev[1]
411 source = prev[1]
412 newcopies[dest] = (current_rev, source)
412 newcopies[dest] = (current_rev, source)
413 assert newcopies is not copies
413 assert newcopies is not copies
414 if changes.removed:
414 if changes.removed:
415 for f in changes.removed:
415 for f in changes.removed:
416 if f in newcopies:
416 if f in newcopies:
417 if newcopies is copies:
417 if newcopies is copies:
418 # copy on write to avoid affecting potential other
418 # copy on write to avoid affecting potential other
419 # branches. when there are no other branches, this
419 # branches. when there are no other branches, this
420 # could be avoided.
420 # could be avoided.
421 newcopies = copies.copy()
421 newcopies = copies.copy()
422 newcopies[f] = (current_rev, None)
422 newcopies[f] = (current_rev, None)
423 # check potential need to combine the data from another parent (for
423 # check potential need to combine the data from another parent (for
424 # that child). See comment below for details.
424 # that child). See comment below for details.
425 if current_copies is None:
425 if current_copies is None:
426 current_copies = newcopies
426 current_copies = newcopies
427 else:
427 else:
428 # we are the second parent to work on c, we need to merge our
428 # we are the second parent to work on c, we need to merge our
429 # work with the other.
429 # work with the other.
430 #
430 #
431 # In case of conflict, parent 1 take precedence over parent 2.
431 # In case of conflict, parent 1 take precedence over parent 2.
432 # This is an arbitrary choice made anew when implementing
432 # This is an arbitrary choice made anew when implementing
433 # changeset based copies. It was made without regards with
433 # changeset based copies. It was made without regards with
434 # potential filelog related behavior.
434 # potential filelog related behavior.
435 assert parent == 2
435 assert parent == 2
436 current_copies = _merge_copies_dict(
436 current_copies = _merge_copies_dict(
437 newcopies, current_copies, isancestor, changes
437 newcopies, current_copies, isancestor, changes
438 )
438 )
439 all_copies[current_rev] = current_copies
439 all_copies[current_rev] = current_copies
440
440
441 # filter out internal details and return a {dest: source mapping}
441 # filter out internal details and return a {dest: source mapping}
442 final_copies = {}
442 final_copies = {}
443 for dest, (tt, source) in all_copies[targetrev].items():
443 for dest, (tt, source) in all_copies[targetrev].items():
444 if source is not None:
444 if source is not None:
445 final_copies[dest] = source
445 final_copies[dest] = source
446 if not alwaysmatch:
446 if not alwaysmatch:
447 for filename in list(final_copies.keys()):
447 for filename in list(final_copies.keys()):
448 if not match(filename):
448 if not match(filename):
449 del final_copies[filename]
449 del final_copies[filename]
450 return final_copies
450 return final_copies
451
451
452
452
453 # constant to decide which side to pick with _merge_copies_dict
453 # constant to decide which side to pick with _merge_copies_dict
454 PICK_MINOR = 0
454 PICK_MINOR = 0
455 PICK_MAJOR = 1
455 PICK_MAJOR = 1
456 PICK_EITHER = 2
456 PICK_EITHER = 2
457
457
458
458
459 def _merge_copies_dict(minor, major, isancestor, changes):
459 def _merge_copies_dict(minor, major, isancestor, changes):
460 """merge two copies-mapping together, minor and major
460 """merge two copies-mapping together, minor and major
461
461
462 In case of conflict, value from "major" will be picked.
462 In case of conflict, value from "major" will be picked.
463
463
464 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
464 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
465 ancestors of `high_rev`,
465 ancestors of `high_rev`,
466
466
467 - `ismerged(path)`: callable return True if `path` have been merged in the
467 - `ismerged(path)`: callable return True if `path` have been merged in the
468 current revision,
468 current revision,
469
469
470 return the resulting dict (in practice, the "minor" object, updated)
470 return the resulting dict (in practice, the "minor" object, updated)
471 """
471 """
472 for dest, value in major.items():
472 for dest, value in major.items():
473 other = minor.get(dest)
473 other = minor.get(dest)
474 if other is None:
474 if other is None:
475 minor[dest] = value
475 minor[dest] = value
476 else:
476 else:
477 pick = _compare_values(changes, isancestor, dest, other, value)
477 pick = _compare_values(changes, isancestor, dest, other, value)
478 if pick == PICK_MAJOR:
478 if pick == PICK_MAJOR:
479 minor[dest] = value
479 minor[dest] = value
480 return minor
480 return minor
481
481
482
482
483 def _compare_values(changes, isancestor, dest, minor, major):
483 def _compare_values(changes, isancestor, dest, minor, major):
484 """compare two value within a _merge_copies_dict loop iteration"""
484 """compare two value within a _merge_copies_dict loop iteration
485
486 return pick
487
488 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
489 """
485 major_tt, major_value = major
490 major_tt, major_value = major
486 minor_tt, minor_value = minor
491 minor_tt, minor_value = minor
487
492
488 # evacuate some simple case first:
489 if major_tt == minor_tt:
493 if major_tt == minor_tt:
490 # if it comes from the same revision it must be the same value
494 # if it comes from the same revision it must be the same value
491 assert major_value == minor_value
495 assert major_value == minor_value
492 return PICK_EITHER
496 return PICK_EITHER
493 elif major[1] == minor[1]:
497 elif (
494 return PICK_EITHER
498 changes is not None
495
499 and minor_value is not None
496 # actual merging needed: content from "major" wins, unless it is older than
500 and major_value is None
497 # the branch point or there is a merge
501 and dest in changes.salvaged
498 elif changes is not None and major[1] is None and dest in changes.salvaged:
502 ):
503 # In this case, a deletion was reverted, the "alive" value overwrite
504 # the deleted one.
499 return PICK_MINOR
505 return PICK_MINOR
500 elif changes is not None and minor[1] is None and dest in changes.salvaged:
506 elif (
507 changes is not None
508 and major_value is not None
509 and minor_value is None
510 and dest in changes.salvaged
511 ):
512 # In this case, a deletion was reverted, the "alive" value overwrite
513 # the deleted one.
501 return PICK_MAJOR
514 return PICK_MAJOR
502 elif changes is not None and dest in changes.merged:
515 elif isancestor(minor_tt, major_tt):
516 if changes is not None and dest in changes.merged:
517 # change to dest happened on the branch without copy-source change,
518 # so both source are valid and "major" wins.
519 return PICK_MAJOR
520 else:
521 return PICK_MAJOR
522 elif isancestor(major_tt, minor_tt):
523 if changes is not None and dest in changes.merged:
524 # change to dest happened on the branch without copy-source change,
525 # so both source are valid and "major" wins.
526 return PICK_MAJOR
527 else:
528 return PICK_MINOR
529 elif minor_value is None:
530 # in case of conflict, the "alive" side wins.
503 return PICK_MAJOR
531 return PICK_MAJOR
504 elif not isancestor(major_tt, minor_tt):
532 elif major_value is None:
505 if major[1] is not None:
533 # in case of conflict, the "alive" side wins.
506 return PICK_MAJOR
534 return PICK_MINOR
507 elif isancestor(minor_tt, major_tt):
535 else:
508 return PICK_MAJOR
536 # in case of conflict where both side are alive, major wins.
509 return PICK_MINOR
537 return PICK_MAJOR
510
538
511
539
512 def _revinfo_getter_extra(repo):
540 def _revinfo_getter_extra(repo):
513 """return a function that return multiple data given a <rev>"i
541 """return a function that return multiple data given a <rev>"i
514
542
515 * p1: revision number of first parent
543 * p1: revision number of first parent
516 * p2: revision number of first parent
544 * p2: revision number of first parent
517 * p1copies: mapping of copies from p1
545 * p1copies: mapping of copies from p1
518 * p2copies: mapping of copies from p2
546 * p2copies: mapping of copies from p2
519 * removed: a list of removed files
547 * removed: a list of removed files
520 * ismerged: a callback to know if file was merged in that revision
548 * ismerged: a callback to know if file was merged in that revision
521 """
549 """
522 cl = repo.changelog
550 cl = repo.changelog
523 parents = cl.parentrevs
551 parents = cl.parentrevs
524
552
525 def get_ismerged(rev):
553 def get_ismerged(rev):
526 ctx = repo[rev]
554 ctx = repo[rev]
527
555
528 def ismerged(path):
556 def ismerged(path):
529 if path not in ctx.files():
557 if path not in ctx.files():
530 return False
558 return False
531 fctx = ctx[path]
559 fctx = ctx[path]
532 parents = fctx._filelog.parents(fctx._filenode)
560 parents = fctx._filelog.parents(fctx._filenode)
533 nb_parents = 0
561 nb_parents = 0
534 for n in parents:
562 for n in parents:
535 if n != nullid:
563 if n != nullid:
536 nb_parents += 1
564 nb_parents += 1
537 return nb_parents >= 2
565 return nb_parents >= 2
538
566
539 return ismerged
567 return ismerged
540
568
541 def revinfo(rev):
569 def revinfo(rev):
542 p1, p2 = parents(rev)
570 p1, p2 = parents(rev)
543 ctx = repo[rev]
571 ctx = repo[rev]
544 p1copies, p2copies = ctx._copies
572 p1copies, p2copies = ctx._copies
545 removed = ctx.filesremoved()
573 removed = ctx.filesremoved()
546 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
574 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
547
575
548 return revinfo
576 return revinfo
549
577
550
578
551 def _combine_changeset_copies_extra(
579 def _combine_changeset_copies_extra(
552 revs, children, targetrev, revinfo, match, isancestor
580 revs, children, targetrev, revinfo, match, isancestor
553 ):
581 ):
554 """version of `_combine_changeset_copies` that works with the Google
582 """version of `_combine_changeset_copies` that works with the Google
555 specific "extra" based storage for copy information"""
583 specific "extra" based storage for copy information"""
556 all_copies = {}
584 all_copies = {}
557 alwaysmatch = match.always()
585 alwaysmatch = match.always()
558 for r in revs:
586 for r in revs:
559 copies = all_copies.pop(r, None)
587 copies = all_copies.pop(r, None)
560 if copies is None:
588 if copies is None:
561 # this is a root
589 # this is a root
562 copies = {}
590 copies = {}
563 for i, c in enumerate(children[r]):
591 for i, c in enumerate(children[r]):
564 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
592 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
565 if r == p1:
593 if r == p1:
566 parent = 1
594 parent = 1
567 childcopies = p1copies
595 childcopies = p1copies
568 else:
596 else:
569 assert r == p2
597 assert r == p2
570 parent = 2
598 parent = 2
571 childcopies = p2copies
599 childcopies = p2copies
572 if not alwaysmatch:
600 if not alwaysmatch:
573 childcopies = {
601 childcopies = {
574 dst: src for dst, src in childcopies.items() if match(dst)
602 dst: src for dst, src in childcopies.items() if match(dst)
575 }
603 }
576 newcopies = copies
604 newcopies = copies
577 if childcopies:
605 if childcopies:
578 newcopies = copies.copy()
606 newcopies = copies.copy()
579 for dest, source in pycompat.iteritems(childcopies):
607 for dest, source in pycompat.iteritems(childcopies):
580 prev = copies.get(source)
608 prev = copies.get(source)
581 if prev is not None and prev[1] is not None:
609 if prev is not None and prev[1] is not None:
582 source = prev[1]
610 source = prev[1]
583 newcopies[dest] = (c, source)
611 newcopies[dest] = (c, source)
584 assert newcopies is not copies
612 assert newcopies is not copies
585 for f in removed:
613 for f in removed:
586 if f in newcopies:
614 if f in newcopies:
587 if newcopies is copies:
615 if newcopies is copies:
588 # copy on write to avoid affecting potential other
616 # copy on write to avoid affecting potential other
589 # branches. when there are no other branches, this
617 # branches. when there are no other branches, this
590 # could be avoided.
618 # could be avoided.
591 newcopies = copies.copy()
619 newcopies = copies.copy()
592 newcopies[f] = (c, None)
620 newcopies[f] = (c, None)
593 othercopies = all_copies.get(c)
621 othercopies = all_copies.get(c)
594 if othercopies is None:
622 if othercopies is None:
595 all_copies[c] = newcopies
623 all_copies[c] = newcopies
596 else:
624 else:
597 # we are the second parent to work on c, we need to merge our
625 # we are the second parent to work on c, we need to merge our
598 # work with the other.
626 # work with the other.
599 #
627 #
600 # In case of conflict, parent 1 take precedence over parent 2.
628 # In case of conflict, parent 1 take precedence over parent 2.
601 # This is an arbitrary choice made anew when implementing
629 # This is an arbitrary choice made anew when implementing
602 # changeset based copies. It was made without regards with
630 # changeset based copies. It was made without regards with
603 # potential filelog related behavior.
631 # potential filelog related behavior.
604 if parent == 1:
632 if parent == 1:
605 _merge_copies_dict_extra(
633 _merge_copies_dict_extra(
606 othercopies, newcopies, isancestor, ismerged
634 othercopies, newcopies, isancestor, ismerged
607 )
635 )
608 else:
636 else:
609 _merge_copies_dict_extra(
637 _merge_copies_dict_extra(
610 newcopies, othercopies, isancestor, ismerged
638 newcopies, othercopies, isancestor, ismerged
611 )
639 )
612 all_copies[c] = newcopies
640 all_copies[c] = newcopies
613
641
614 final_copies = {}
642 final_copies = {}
615 for dest, (tt, source) in all_copies[targetrev].items():
643 for dest, (tt, source) in all_copies[targetrev].items():
616 if source is not None:
644 if source is not None:
617 final_copies[dest] = source
645 final_copies[dest] = source
618 return final_copies
646 return final_copies
619
647
620
648
621 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
649 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
622 """version of `_merge_copies_dict` that works with the Google
650 """version of `_merge_copies_dict` that works with the Google
623 specific "extra" based storage for copy information"""
651 specific "extra" based storage for copy information"""
624 for dest, value in major.items():
652 for dest, value in major.items():
625 other = minor.get(dest)
653 other = minor.get(dest)
626 if other is None:
654 if other is None:
627 minor[dest] = value
655 minor[dest] = value
628 else:
656 else:
629 new_tt = value[0]
657 new_tt = value[0]
630 other_tt = other[0]
658 other_tt = other[0]
631 if value[1] == other[1]:
659 if value[1] == other[1]:
632 continue
660 continue
633 # content from "major" wins, unless it is older
661 # content from "major" wins, unless it is older
634 # than the branch point or there is a merge
662 # than the branch point or there is a merge
635 if (
663 if (
636 new_tt == other_tt
664 new_tt == other_tt
637 or not isancestor(new_tt, other_tt)
665 or not isancestor(new_tt, other_tt)
638 or ismerged(dest)
666 or ismerged(dest)
639 ):
667 ):
640 minor[dest] = value
668 minor[dest] = value
641
669
642
670
643 def _forwardcopies(a, b, base=None, match=None):
671 def _forwardcopies(a, b, base=None, match=None):
644 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
672 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
645
673
646 if base is None:
674 if base is None:
647 base = a
675 base = a
648 match = a.repo().narrowmatch(match)
676 match = a.repo().narrowmatch(match)
649 # check for working copy
677 # check for working copy
650 if b.rev() is None:
678 if b.rev() is None:
651 cm = _committedforwardcopies(a, b.p1(), base, match)
679 cm = _committedforwardcopies(a, b.p1(), base, match)
652 # combine copies from dirstate if necessary
680 # combine copies from dirstate if necessary
653 copies = _chain(cm, _dirstatecopies(b._repo, match))
681 copies = _chain(cm, _dirstatecopies(b._repo, match))
654 else:
682 else:
655 copies = _committedforwardcopies(a, b, base, match)
683 copies = _committedforwardcopies(a, b, base, match)
656 return copies
684 return copies
657
685
658
686
659 def _backwardrenames(a, b, match):
687 def _backwardrenames(a, b, match):
660 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
688 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
661 return {}
689 return {}
662
690
663 # Even though we're not taking copies into account, 1:n rename situations
691 # Even though we're not taking copies into account, 1:n rename situations
664 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
692 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
665 # arbitrarily pick one of the renames.
693 # arbitrarily pick one of the renames.
666 # We don't want to pass in "match" here, since that would filter
694 # We don't want to pass in "match" here, since that would filter
667 # the destination by it. Since we're reversing the copies, we want
695 # the destination by it. Since we're reversing the copies, we want
668 # to filter the source instead.
696 # to filter the source instead.
669 f = _forwardcopies(b, a)
697 f = _forwardcopies(b, a)
670 r = {}
698 r = {}
671 for k, v in sorted(pycompat.iteritems(f)):
699 for k, v in sorted(pycompat.iteritems(f)):
672 if match and not match(v):
700 if match and not match(v):
673 continue
701 continue
674 # remove copies
702 # remove copies
675 if v in a:
703 if v in a:
676 continue
704 continue
677 r[v] = k
705 r[v] = k
678 return r
706 return r
679
707
680
708
681 def pathcopies(x, y, match=None):
709 def pathcopies(x, y, match=None):
682 """find {dst@y: src@x} copy mapping for directed compare"""
710 """find {dst@y: src@x} copy mapping for directed compare"""
683 repo = x._repo
711 repo = x._repo
684 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
712 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
685 if debug:
713 if debug:
686 repo.ui.debug(
714 repo.ui.debug(
687 b'debug.copies: searching copies from %s to %s\n' % (x, y)
715 b'debug.copies: searching copies from %s to %s\n' % (x, y)
688 )
716 )
689 if x == y or not x or not y:
717 if x == y or not x or not y:
690 return {}
718 return {}
691 if y.rev() is None and x == y.p1():
719 if y.rev() is None and x == y.p1():
692 if debug:
720 if debug:
693 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
721 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
694 # short-circuit to avoid issues with merge states
722 # short-circuit to avoid issues with merge states
695 return _dirstatecopies(repo, match)
723 return _dirstatecopies(repo, match)
696 a = y.ancestor(x)
724 a = y.ancestor(x)
697 if a == x:
725 if a == x:
698 if debug:
726 if debug:
699 repo.ui.debug(b'debug.copies: search mode: forward\n')
727 repo.ui.debug(b'debug.copies: search mode: forward\n')
700 copies = _forwardcopies(x, y, match=match)
728 copies = _forwardcopies(x, y, match=match)
701 elif a == y:
729 elif a == y:
702 if debug:
730 if debug:
703 repo.ui.debug(b'debug.copies: search mode: backward\n')
731 repo.ui.debug(b'debug.copies: search mode: backward\n')
704 copies = _backwardrenames(x, y, match=match)
732 copies = _backwardrenames(x, y, match=match)
705 else:
733 else:
706 if debug:
734 if debug:
707 repo.ui.debug(b'debug.copies: search mode: combined\n')
735 repo.ui.debug(b'debug.copies: search mode: combined\n')
708 base = None
736 base = None
709 if a.rev() != nullrev:
737 if a.rev() != nullrev:
710 base = x
738 base = x
711 copies = _chain(
739 copies = _chain(
712 _backwardrenames(x, a, match=match),
740 _backwardrenames(x, a, match=match),
713 _forwardcopies(a, y, base, match=match),
741 _forwardcopies(a, y, base, match=match),
714 )
742 )
715 _filter(x, y, copies)
743 _filter(x, y, copies)
716 return copies
744 return copies
717
745
718
746
719 def mergecopies(repo, c1, c2, base):
747 def mergecopies(repo, c1, c2, base):
720 """
748 """
721 Finds moves and copies between context c1 and c2 that are relevant for
749 Finds moves and copies between context c1 and c2 that are relevant for
722 merging. 'base' will be used as the merge base.
750 merging. 'base' will be used as the merge base.
723
751
724 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
752 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
725 files that were moved/ copied in one merge parent and modified in another.
753 files that were moved/ copied in one merge parent and modified in another.
726 For example:
754 For example:
727
755
728 o ---> 4 another commit
756 o ---> 4 another commit
729 |
757 |
730 | o ---> 3 commit that modifies a.txt
758 | o ---> 3 commit that modifies a.txt
731 | /
759 | /
732 o / ---> 2 commit that moves a.txt to b.txt
760 o / ---> 2 commit that moves a.txt to b.txt
733 |/
761 |/
734 o ---> 1 merge base
762 o ---> 1 merge base
735
763
736 If we try to rebase revision 3 on revision 4, since there is no a.txt in
764 If we try to rebase revision 3 on revision 4, since there is no a.txt in
737 revision 4, and if user have copytrace disabled, we prints the following
765 revision 4, and if user have copytrace disabled, we prints the following
738 message:
766 message:
739
767
740 ```other changed <file> which local deleted```
768 ```other changed <file> which local deleted```
741
769
742 Returns a tuple where:
770 Returns a tuple where:
743
771
744 "branch_copies" an instance of branch_copies.
772 "branch_copies" an instance of branch_copies.
745
773
746 "diverge" is a mapping of source name -> list of destination names
774 "diverge" is a mapping of source name -> list of destination names
747 for divergent renames.
775 for divergent renames.
748
776
749 This function calls different copytracing algorithms based on config.
777 This function calls different copytracing algorithms based on config.
750 """
778 """
751 # avoid silly behavior for update from empty dir
779 # avoid silly behavior for update from empty dir
752 if not c1 or not c2 or c1 == c2:
780 if not c1 or not c2 or c1 == c2:
753 return branch_copies(), branch_copies(), {}
781 return branch_copies(), branch_copies(), {}
754
782
755 narrowmatch = c1.repo().narrowmatch()
783 narrowmatch = c1.repo().narrowmatch()
756
784
757 # avoid silly behavior for parent -> working dir
785 # avoid silly behavior for parent -> working dir
758 if c2.node() is None and c1.node() == repo.dirstate.p1():
786 if c2.node() is None and c1.node() == repo.dirstate.p1():
759 return (
787 return (
760 branch_copies(_dirstatecopies(repo, narrowmatch)),
788 branch_copies(_dirstatecopies(repo, narrowmatch)),
761 branch_copies(),
789 branch_copies(),
762 {},
790 {},
763 )
791 )
764
792
765 copytracing = repo.ui.config(b'experimental', b'copytrace')
793 copytracing = repo.ui.config(b'experimental', b'copytrace')
766 if stringutil.parsebool(copytracing) is False:
794 if stringutil.parsebool(copytracing) is False:
767 # stringutil.parsebool() returns None when it is unable to parse the
795 # stringutil.parsebool() returns None when it is unable to parse the
768 # value, so we should rely on making sure copytracing is on such cases
796 # value, so we should rely on making sure copytracing is on such cases
769 return branch_copies(), branch_copies(), {}
797 return branch_copies(), branch_copies(), {}
770
798
771 if usechangesetcentricalgo(repo):
799 if usechangesetcentricalgo(repo):
772 # The heuristics don't make sense when we need changeset-centric algos
800 # The heuristics don't make sense when we need changeset-centric algos
773 return _fullcopytracing(repo, c1, c2, base)
801 return _fullcopytracing(repo, c1, c2, base)
774
802
775 # Copy trace disabling is explicitly below the node == p1 logic above
803 # Copy trace disabling is explicitly below the node == p1 logic above
776 # because the logic above is required for a simple copy to be kept across a
804 # because the logic above is required for a simple copy to be kept across a
777 # rebase.
805 # rebase.
778 if copytracing == b'heuristics':
806 if copytracing == b'heuristics':
779 # Do full copytracing if only non-public revisions are involved as
807 # Do full copytracing if only non-public revisions are involved as
780 # that will be fast enough and will also cover the copies which could
808 # that will be fast enough and will also cover the copies which could
781 # be missed by heuristics
809 # be missed by heuristics
782 if _isfullcopytraceable(repo, c1, base):
810 if _isfullcopytraceable(repo, c1, base):
783 return _fullcopytracing(repo, c1, c2, base)
811 return _fullcopytracing(repo, c1, c2, base)
784 return _heuristicscopytracing(repo, c1, c2, base)
812 return _heuristicscopytracing(repo, c1, c2, base)
785 else:
813 else:
786 return _fullcopytracing(repo, c1, c2, base)
814 return _fullcopytracing(repo, c1, c2, base)
787
815
788
816
789 def _isfullcopytraceable(repo, c1, base):
817 def _isfullcopytraceable(repo, c1, base):
790 """Checks that if base, source and destination are all no-public branches,
818 """Checks that if base, source and destination are all no-public branches,
791 if yes let's use the full copytrace algorithm for increased capabilities
819 if yes let's use the full copytrace algorithm for increased capabilities
792 since it will be fast enough.
820 since it will be fast enough.
793
821
794 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
822 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
795 number of changesets from c1 to base such that if number of changesets are
823 number of changesets from c1 to base such that if number of changesets are
796 more than the limit, full copytracing algorithm won't be used.
824 more than the limit, full copytracing algorithm won't be used.
797 """
825 """
798 if c1.rev() is None:
826 if c1.rev() is None:
799 c1 = c1.p1()
827 c1 = c1.p1()
800 if c1.mutable() and base.mutable():
828 if c1.mutable() and base.mutable():
801 sourcecommitlimit = repo.ui.configint(
829 sourcecommitlimit = repo.ui.configint(
802 b'experimental', b'copytrace.sourcecommitlimit'
830 b'experimental', b'copytrace.sourcecommitlimit'
803 )
831 )
804 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
832 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
805 return commits < sourcecommitlimit
833 return commits < sourcecommitlimit
806 return False
834 return False
807
835
808
836
809 def _checksinglesidecopies(
837 def _checksinglesidecopies(
810 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
838 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
811 ):
839 ):
812 if src not in m2:
840 if src not in m2:
813 # deleted on side 2
841 # deleted on side 2
814 if src not in m1:
842 if src not in m1:
815 # renamed on side 1, deleted on side 2
843 # renamed on side 1, deleted on side 2
816 renamedelete[src] = dsts1
844 renamedelete[src] = dsts1
817 elif src not in mb:
845 elif src not in mb:
818 # Work around the "short-circuit to avoid issues with merge states"
846 # Work around the "short-circuit to avoid issues with merge states"
819 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
847 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
820 # destination doesn't exist in y.
848 # destination doesn't exist in y.
821 pass
849 pass
822 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
850 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
823 return
851 return
824 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
852 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
825 # modified on side 2
853 # modified on side 2
826 for dst in dsts1:
854 for dst in dsts1:
827 copy[dst] = src
855 copy[dst] = src
828
856
829
857
830 class branch_copies(object):
858 class branch_copies(object):
831 """Information about copies made on one side of a merge/graft.
859 """Information about copies made on one side of a merge/graft.
832
860
833 "copy" is a mapping from destination name -> source name,
861 "copy" is a mapping from destination name -> source name,
834 where source is in c1 and destination is in c2 or vice-versa.
862 where source is in c1 and destination is in c2 or vice-versa.
835
863
836 "movewithdir" is a mapping from source name -> destination name,
864 "movewithdir" is a mapping from source name -> destination name,
837 where the file at source present in one context but not the other
865 where the file at source present in one context but not the other
838 needs to be moved to destination by the merge process, because the
866 needs to be moved to destination by the merge process, because the
839 other context moved the directory it is in.
867 other context moved the directory it is in.
840
868
841 "renamedelete" is a mapping of source name -> list of destination
869 "renamedelete" is a mapping of source name -> list of destination
842 names for files deleted in c1 that were renamed in c2 or vice-versa.
870 names for files deleted in c1 that were renamed in c2 or vice-versa.
843
871
844 "dirmove" is a mapping of detected source dir -> destination dir renames.
872 "dirmove" is a mapping of detected source dir -> destination dir renames.
845 This is needed for handling changes to new files previously grafted into
873 This is needed for handling changes to new files previously grafted into
846 renamed directories.
874 renamed directories.
847 """
875 """
848
876
849 def __init__(
877 def __init__(
850 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
878 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
851 ):
879 ):
852 self.copy = {} if copy is None else copy
880 self.copy = {} if copy is None else copy
853 self.renamedelete = {} if renamedelete is None else renamedelete
881 self.renamedelete = {} if renamedelete is None else renamedelete
854 self.dirmove = {} if dirmove is None else dirmove
882 self.dirmove = {} if dirmove is None else dirmove
855 self.movewithdir = {} if movewithdir is None else movewithdir
883 self.movewithdir = {} if movewithdir is None else movewithdir
856
884
857 def __repr__(self):
885 def __repr__(self):
858 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
886 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
859 self.copy,
887 self.copy,
860 self.renamedelete,
888 self.renamedelete,
861 self.dirmove,
889 self.dirmove,
862 self.movewithdir,
890 self.movewithdir,
863 )
891 )
864
892
865
893
866 def _fullcopytracing(repo, c1, c2, base):
894 def _fullcopytracing(repo, c1, c2, base):
867 """The full copytracing algorithm which finds all the new files that were
895 """The full copytracing algorithm which finds all the new files that were
868 added from merge base up to the top commit and for each file it checks if
896 added from merge base up to the top commit and for each file it checks if
869 this file was copied from another file.
897 this file was copied from another file.
870
898
871 This is pretty slow when a lot of changesets are involved but will track all
899 This is pretty slow when a lot of changesets are involved but will track all
872 the copies.
900 the copies.
873 """
901 """
874 m1 = c1.manifest()
902 m1 = c1.manifest()
875 m2 = c2.manifest()
903 m2 = c2.manifest()
876 mb = base.manifest()
904 mb = base.manifest()
877
905
878 copies1 = pathcopies(base, c1)
906 copies1 = pathcopies(base, c1)
879 copies2 = pathcopies(base, c2)
907 copies2 = pathcopies(base, c2)
880
908
881 if not (copies1 or copies2):
909 if not (copies1 or copies2):
882 return branch_copies(), branch_copies(), {}
910 return branch_copies(), branch_copies(), {}
883
911
884 inversecopies1 = {}
912 inversecopies1 = {}
885 inversecopies2 = {}
913 inversecopies2 = {}
886 for dst, src in copies1.items():
914 for dst, src in copies1.items():
887 inversecopies1.setdefault(src, []).append(dst)
915 inversecopies1.setdefault(src, []).append(dst)
888 for dst, src in copies2.items():
916 for dst, src in copies2.items():
889 inversecopies2.setdefault(src, []).append(dst)
917 inversecopies2.setdefault(src, []).append(dst)
890
918
891 copy1 = {}
919 copy1 = {}
892 copy2 = {}
920 copy2 = {}
893 diverge = {}
921 diverge = {}
894 renamedelete1 = {}
922 renamedelete1 = {}
895 renamedelete2 = {}
923 renamedelete2 = {}
896 allsources = set(inversecopies1) | set(inversecopies2)
924 allsources = set(inversecopies1) | set(inversecopies2)
897 for src in allsources:
925 for src in allsources:
898 dsts1 = inversecopies1.get(src)
926 dsts1 = inversecopies1.get(src)
899 dsts2 = inversecopies2.get(src)
927 dsts2 = inversecopies2.get(src)
900 if dsts1 and dsts2:
928 if dsts1 and dsts2:
901 # copied/renamed on both sides
929 # copied/renamed on both sides
902 if src not in m1 and src not in m2:
930 if src not in m1 and src not in m2:
903 # renamed on both sides
931 # renamed on both sides
904 dsts1 = set(dsts1)
932 dsts1 = set(dsts1)
905 dsts2 = set(dsts2)
933 dsts2 = set(dsts2)
906 # If there's some overlap in the rename destinations, we
934 # If there's some overlap in the rename destinations, we
907 # consider it not divergent. For example, if side 1 copies 'a'
935 # consider it not divergent. For example, if side 1 copies 'a'
908 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
936 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
909 # and 'd' and deletes 'a'.
937 # and 'd' and deletes 'a'.
910 if dsts1 & dsts2:
938 if dsts1 & dsts2:
911 for dst in dsts1 & dsts2:
939 for dst in dsts1 & dsts2:
912 copy1[dst] = src
940 copy1[dst] = src
913 copy2[dst] = src
941 copy2[dst] = src
914 else:
942 else:
915 diverge[src] = sorted(dsts1 | dsts2)
943 diverge[src] = sorted(dsts1 | dsts2)
916 elif src in m1 and src in m2:
944 elif src in m1 and src in m2:
917 # copied on both sides
945 # copied on both sides
918 dsts1 = set(dsts1)
946 dsts1 = set(dsts1)
919 dsts2 = set(dsts2)
947 dsts2 = set(dsts2)
920 for dst in dsts1 & dsts2:
948 for dst in dsts1 & dsts2:
921 copy1[dst] = src
949 copy1[dst] = src
922 copy2[dst] = src
950 copy2[dst] = src
923 # TODO: Handle cases where it was renamed on one side and copied
951 # TODO: Handle cases where it was renamed on one side and copied
924 # on the other side
952 # on the other side
925 elif dsts1:
953 elif dsts1:
926 # copied/renamed only on side 1
954 # copied/renamed only on side 1
927 _checksinglesidecopies(
955 _checksinglesidecopies(
928 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
956 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
929 )
957 )
930 elif dsts2:
958 elif dsts2:
931 # copied/renamed only on side 2
959 # copied/renamed only on side 2
932 _checksinglesidecopies(
960 _checksinglesidecopies(
933 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
961 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
934 )
962 )
935
963
936 # find interesting file sets from manifests
964 # find interesting file sets from manifests
937 cache = []
965 cache = []
938
966
939 def _get_addedfiles(idx):
967 def _get_addedfiles(idx):
940 if not cache:
968 if not cache:
941 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
969 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
942 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
970 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
943 u1 = sorted(addedinm1 - addedinm2)
971 u1 = sorted(addedinm1 - addedinm2)
944 u2 = sorted(addedinm2 - addedinm1)
972 u2 = sorted(addedinm2 - addedinm1)
945 cache.extend((u1, u2))
973 cache.extend((u1, u2))
946 return cache[idx]
974 return cache[idx]
947
975
948 u1fn = lambda: _get_addedfiles(0)
976 u1fn = lambda: _get_addedfiles(0)
949 u2fn = lambda: _get_addedfiles(1)
977 u2fn = lambda: _get_addedfiles(1)
950 if repo.ui.debugflag:
978 if repo.ui.debugflag:
951 u1 = u1fn()
979 u1 = u1fn()
952 u2 = u2fn()
980 u2 = u2fn()
953
981
954 header = b" unmatched files in %s"
982 header = b" unmatched files in %s"
955 if u1:
983 if u1:
956 repo.ui.debug(
984 repo.ui.debug(
957 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
985 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
958 )
986 )
959 if u2:
987 if u2:
960 repo.ui.debug(
988 repo.ui.debug(
961 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
989 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
962 )
990 )
963
991
964 renamedeleteset = set()
992 renamedeleteset = set()
965 divergeset = set()
993 divergeset = set()
966 for dsts in diverge.values():
994 for dsts in diverge.values():
967 divergeset.update(dsts)
995 divergeset.update(dsts)
968 for dsts in renamedelete1.values():
996 for dsts in renamedelete1.values():
969 renamedeleteset.update(dsts)
997 renamedeleteset.update(dsts)
970 for dsts in renamedelete2.values():
998 for dsts in renamedelete2.values():
971 renamedeleteset.update(dsts)
999 renamedeleteset.update(dsts)
972
1000
973 repo.ui.debug(
1001 repo.ui.debug(
974 b" all copies found (* = to merge, ! = divergent, "
1002 b" all copies found (* = to merge, ! = divergent, "
975 b"% = renamed and deleted):\n"
1003 b"% = renamed and deleted):\n"
976 )
1004 )
977 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1005 for side, copies in ((b"local", copies1), (b"remote", copies2)):
978 if not copies:
1006 if not copies:
979 continue
1007 continue
980 repo.ui.debug(b" on %s side:\n" % side)
1008 repo.ui.debug(b" on %s side:\n" % side)
981 for f in sorted(copies):
1009 for f in sorted(copies):
982 note = b""
1010 note = b""
983 if f in copy1 or f in copy2:
1011 if f in copy1 or f in copy2:
984 note += b"*"
1012 note += b"*"
985 if f in divergeset:
1013 if f in divergeset:
986 note += b"!"
1014 note += b"!"
987 if f in renamedeleteset:
1015 if f in renamedeleteset:
988 note += b"%"
1016 note += b"%"
989 repo.ui.debug(
1017 repo.ui.debug(
990 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1018 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
991 )
1019 )
992 del renamedeleteset
1020 del renamedeleteset
993 del divergeset
1021 del divergeset
994
1022
995 repo.ui.debug(b" checking for directory renames\n")
1023 repo.ui.debug(b" checking for directory renames\n")
996
1024
997 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1025 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
998 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1026 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
999
1027
1000 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1028 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1001 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1029 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1002
1030
1003 return branch_copies1, branch_copies2, diverge
1031 return branch_copies1, branch_copies2, diverge
1004
1032
1005
1033
1006 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1034 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1007 """Finds moved directories and files that should move with them.
1035 """Finds moved directories and files that should move with them.
1008
1036
1009 ctx: the context for one of the sides
1037 ctx: the context for one of the sides
1010 copy: files copied on the same side (as ctx)
1038 copy: files copied on the same side (as ctx)
1011 fullcopy: files copied on the same side (as ctx), including those that
1039 fullcopy: files copied on the same side (as ctx), including those that
1012 merge.manifestmerge() won't care about
1040 merge.manifestmerge() won't care about
1013 addedfilesfn: function returning added files on the other side (compared to
1041 addedfilesfn: function returning added files on the other side (compared to
1014 ctx)
1042 ctx)
1015 """
1043 """
1016 # generate a directory move map
1044 # generate a directory move map
1017 invalid = set()
1045 invalid = set()
1018 dirmove = {}
1046 dirmove = {}
1019
1047
1020 # examine each file copy for a potential directory move, which is
1048 # examine each file copy for a potential directory move, which is
1021 # when all the files in a directory are moved to a new directory
1049 # when all the files in a directory are moved to a new directory
1022 for dst, src in pycompat.iteritems(fullcopy):
1050 for dst, src in pycompat.iteritems(fullcopy):
1023 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1051 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1024 if dsrc in invalid:
1052 if dsrc in invalid:
1025 # already seen to be uninteresting
1053 # already seen to be uninteresting
1026 continue
1054 continue
1027 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1055 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1028 # directory wasn't entirely moved locally
1056 # directory wasn't entirely moved locally
1029 invalid.add(dsrc)
1057 invalid.add(dsrc)
1030 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1058 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1031 # files from the same directory moved to two different places
1059 # files from the same directory moved to two different places
1032 invalid.add(dsrc)
1060 invalid.add(dsrc)
1033 else:
1061 else:
1034 # looks good so far
1062 # looks good so far
1035 dirmove[dsrc] = ddst
1063 dirmove[dsrc] = ddst
1036
1064
1037 for i in invalid:
1065 for i in invalid:
1038 if i in dirmove:
1066 if i in dirmove:
1039 del dirmove[i]
1067 del dirmove[i]
1040 del invalid
1068 del invalid
1041
1069
1042 if not dirmove:
1070 if not dirmove:
1043 return {}, {}
1071 return {}, {}
1044
1072
1045 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1073 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1046
1074
1047 for d in dirmove:
1075 for d in dirmove:
1048 repo.ui.debug(
1076 repo.ui.debug(
1049 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1077 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1050 )
1078 )
1051
1079
1052 movewithdir = {}
1080 movewithdir = {}
1053 # check unaccounted nonoverlapping files against directory moves
1081 # check unaccounted nonoverlapping files against directory moves
1054 for f in addedfilesfn():
1082 for f in addedfilesfn():
1055 if f not in fullcopy:
1083 if f not in fullcopy:
1056 for d in dirmove:
1084 for d in dirmove:
1057 if f.startswith(d):
1085 if f.startswith(d):
1058 # new file added in a directory that was moved, move it
1086 # new file added in a directory that was moved, move it
1059 df = dirmove[d] + f[len(d) :]
1087 df = dirmove[d] + f[len(d) :]
1060 if df not in copy:
1088 if df not in copy:
1061 movewithdir[f] = df
1089 movewithdir[f] = df
1062 repo.ui.debug(
1090 repo.ui.debug(
1063 b" pending file src: '%s' -> dst: '%s'\n"
1091 b" pending file src: '%s' -> dst: '%s'\n"
1064 % (f, df)
1092 % (f, df)
1065 )
1093 )
1066 break
1094 break
1067
1095
1068 return dirmove, movewithdir
1096 return dirmove, movewithdir
1069
1097
1070
1098
1071 def _heuristicscopytracing(repo, c1, c2, base):
1099 def _heuristicscopytracing(repo, c1, c2, base):
1072 """Fast copytracing using filename heuristics
1100 """Fast copytracing using filename heuristics
1073
1101
1074 Assumes that moves or renames are of following two types:
1102 Assumes that moves or renames are of following two types:
1075
1103
1076 1) Inside a directory only (same directory name but different filenames)
1104 1) Inside a directory only (same directory name but different filenames)
1077 2) Move from one directory to another
1105 2) Move from one directory to another
1078 (same filenames but different directory names)
1106 (same filenames but different directory names)
1079
1107
1080 Works only when there are no merge commits in the "source branch".
1108 Works only when there are no merge commits in the "source branch".
1081 Source branch is commits from base up to c2 not including base.
1109 Source branch is commits from base up to c2 not including base.
1082
1110
1083 If merge is involved it fallbacks to _fullcopytracing().
1111 If merge is involved it fallbacks to _fullcopytracing().
1084
1112
1085 Can be used by setting the following config:
1113 Can be used by setting the following config:
1086
1114
1087 [experimental]
1115 [experimental]
1088 copytrace = heuristics
1116 copytrace = heuristics
1089
1117
1090 In some cases the copy/move candidates found by heuristics can be very large
1118 In some cases the copy/move candidates found by heuristics can be very large
1091 in number and that will make the algorithm slow. The number of possible
1119 in number and that will make the algorithm slow. The number of possible
1092 candidates to check can be limited by using the config
1120 candidates to check can be limited by using the config
1093 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1121 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1094 """
1122 """
1095
1123
1096 if c1.rev() is None:
1124 if c1.rev() is None:
1097 c1 = c1.p1()
1125 c1 = c1.p1()
1098 if c2.rev() is None:
1126 if c2.rev() is None:
1099 c2 = c2.p1()
1127 c2 = c2.p1()
1100
1128
1101 changedfiles = set()
1129 changedfiles = set()
1102 m1 = c1.manifest()
1130 m1 = c1.manifest()
1103 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1131 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1104 # If base is not in c2 branch, we switch to fullcopytracing
1132 # If base is not in c2 branch, we switch to fullcopytracing
1105 repo.ui.debug(
1133 repo.ui.debug(
1106 b"switching to full copytracing as base is not "
1134 b"switching to full copytracing as base is not "
1107 b"an ancestor of c2\n"
1135 b"an ancestor of c2\n"
1108 )
1136 )
1109 return _fullcopytracing(repo, c1, c2, base)
1137 return _fullcopytracing(repo, c1, c2, base)
1110
1138
1111 ctx = c2
1139 ctx = c2
1112 while ctx != base:
1140 while ctx != base:
1113 if len(ctx.parents()) == 2:
1141 if len(ctx.parents()) == 2:
1114 # To keep things simple let's not handle merges
1142 # To keep things simple let's not handle merges
1115 repo.ui.debug(b"switching to full copytracing because of merges\n")
1143 repo.ui.debug(b"switching to full copytracing because of merges\n")
1116 return _fullcopytracing(repo, c1, c2, base)
1144 return _fullcopytracing(repo, c1, c2, base)
1117 changedfiles.update(ctx.files())
1145 changedfiles.update(ctx.files())
1118 ctx = ctx.p1()
1146 ctx = ctx.p1()
1119
1147
1120 copies2 = {}
1148 copies2 = {}
1121 cp = _forwardcopies(base, c2)
1149 cp = _forwardcopies(base, c2)
1122 for dst, src in pycompat.iteritems(cp):
1150 for dst, src in pycompat.iteritems(cp):
1123 if src in m1:
1151 if src in m1:
1124 copies2[dst] = src
1152 copies2[dst] = src
1125
1153
1126 # file is missing if it isn't present in the destination, but is present in
1154 # file is missing if it isn't present in the destination, but is present in
1127 # the base and present in the source.
1155 # the base and present in the source.
1128 # Presence in the base is important to exclude added files, presence in the
1156 # Presence in the base is important to exclude added files, presence in the
1129 # source is important to exclude removed files.
1157 # source is important to exclude removed files.
1130 filt = lambda f: f not in m1 and f in base and f in c2
1158 filt = lambda f: f not in m1 and f in base and f in c2
1131 missingfiles = [f for f in changedfiles if filt(f)]
1159 missingfiles = [f for f in changedfiles if filt(f)]
1132
1160
1133 copies1 = {}
1161 copies1 = {}
1134 if missingfiles:
1162 if missingfiles:
1135 basenametofilename = collections.defaultdict(list)
1163 basenametofilename = collections.defaultdict(list)
1136 dirnametofilename = collections.defaultdict(list)
1164 dirnametofilename = collections.defaultdict(list)
1137
1165
1138 for f in m1.filesnotin(base.manifest()):
1166 for f in m1.filesnotin(base.manifest()):
1139 basename = os.path.basename(f)
1167 basename = os.path.basename(f)
1140 dirname = os.path.dirname(f)
1168 dirname = os.path.dirname(f)
1141 basenametofilename[basename].append(f)
1169 basenametofilename[basename].append(f)
1142 dirnametofilename[dirname].append(f)
1170 dirnametofilename[dirname].append(f)
1143
1171
1144 for f in missingfiles:
1172 for f in missingfiles:
1145 basename = os.path.basename(f)
1173 basename = os.path.basename(f)
1146 dirname = os.path.dirname(f)
1174 dirname = os.path.dirname(f)
1147 samebasename = basenametofilename[basename]
1175 samebasename = basenametofilename[basename]
1148 samedirname = dirnametofilename[dirname]
1176 samedirname = dirnametofilename[dirname]
1149 movecandidates = samebasename + samedirname
1177 movecandidates = samebasename + samedirname
1150 # f is guaranteed to be present in c2, that's why
1178 # f is guaranteed to be present in c2, that's why
1151 # c2.filectx(f) won't fail
1179 # c2.filectx(f) won't fail
1152 f2 = c2.filectx(f)
1180 f2 = c2.filectx(f)
1153 # we can have a lot of candidates which can slow down the heuristics
1181 # we can have a lot of candidates which can slow down the heuristics
1154 # config value to limit the number of candidates moves to check
1182 # config value to limit the number of candidates moves to check
1155 maxcandidates = repo.ui.configint(
1183 maxcandidates = repo.ui.configint(
1156 b'experimental', b'copytrace.movecandidateslimit'
1184 b'experimental', b'copytrace.movecandidateslimit'
1157 )
1185 )
1158
1186
1159 if len(movecandidates) > maxcandidates:
1187 if len(movecandidates) > maxcandidates:
1160 repo.ui.status(
1188 repo.ui.status(
1161 _(
1189 _(
1162 b"skipping copytracing for '%s', more "
1190 b"skipping copytracing for '%s', more "
1163 b"candidates than the limit: %d\n"
1191 b"candidates than the limit: %d\n"
1164 )
1192 )
1165 % (f, len(movecandidates))
1193 % (f, len(movecandidates))
1166 )
1194 )
1167 continue
1195 continue
1168
1196
1169 for candidate in movecandidates:
1197 for candidate in movecandidates:
1170 f1 = c1.filectx(candidate)
1198 f1 = c1.filectx(candidate)
1171 if _related(f1, f2):
1199 if _related(f1, f2):
1172 # if there are a few related copies then we'll merge
1200 # if there are a few related copies then we'll merge
1173 # changes into all of them. This matches the behaviour
1201 # changes into all of them. This matches the behaviour
1174 # of upstream copytracing
1202 # of upstream copytracing
1175 copies1[candidate] = f
1203 copies1[candidate] = f
1176
1204
1177 return branch_copies(copies1), branch_copies(copies2), {}
1205 return branch_copies(copies1), branch_copies(copies2), {}
1178
1206
1179
1207
1180 def _related(f1, f2):
1208 def _related(f1, f2):
1181 """return True if f1 and f2 filectx have a common ancestor
1209 """return True if f1 and f2 filectx have a common ancestor
1182
1210
1183 Walk back to common ancestor to see if the two files originate
1211 Walk back to common ancestor to see if the two files originate
1184 from the same file. Since workingfilectx's rev() is None it messes
1212 from the same file. Since workingfilectx's rev() is None it messes
1185 up the integer comparison logic, hence the pre-step check for
1213 up the integer comparison logic, hence the pre-step check for
1186 None (f1 and f2 can only be workingfilectx's initially).
1214 None (f1 and f2 can only be workingfilectx's initially).
1187 """
1215 """
1188
1216
1189 if f1 == f2:
1217 if f1 == f2:
1190 return True # a match
1218 return True # a match
1191
1219
1192 g1, g2 = f1.ancestors(), f2.ancestors()
1220 g1, g2 = f1.ancestors(), f2.ancestors()
1193 try:
1221 try:
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1222 f1r, f2r = f1.linkrev(), f2.linkrev()
1195
1223
1196 if f1r is None:
1224 if f1r is None:
1197 f1 = next(g1)
1225 f1 = next(g1)
1198 if f2r is None:
1226 if f2r is None:
1199 f2 = next(g2)
1227 f2 = next(g2)
1200
1228
1201 while True:
1229 while True:
1202 f1r, f2r = f1.linkrev(), f2.linkrev()
1230 f1r, f2r = f1.linkrev(), f2.linkrev()
1203 if f1r > f2r:
1231 if f1r > f2r:
1204 f1 = next(g1)
1232 f1 = next(g1)
1205 elif f2r > f1r:
1233 elif f2r > f1r:
1206 f2 = next(g2)
1234 f2 = next(g2)
1207 else: # f1 and f2 point to files in the same linkrev
1235 else: # f1 and f2 point to files in the same linkrev
1208 return f1 == f2 # true if they point to the same file
1236 return f1 == f2 # true if they point to the same file
1209 except StopIteration:
1237 except StopIteration:
1210 return False
1238 return False
1211
1239
1212
1240
1213 def graftcopies(wctx, ctx, base):
1241 def graftcopies(wctx, ctx, base):
1214 """reproduce copies between base and ctx in the wctx
1242 """reproduce copies between base and ctx in the wctx
1215
1243
1216 Unlike mergecopies(), this function will only consider copies between base
1244 Unlike mergecopies(), this function will only consider copies between base
1217 and ctx; it will ignore copies between base and wctx. Also unlike
1245 and ctx; it will ignore copies between base and wctx. Also unlike
1218 mergecopies(), this function will apply copies to the working copy (instead
1246 mergecopies(), this function will apply copies to the working copy (instead
1219 of just returning information about the copies). That makes it cheaper
1247 of just returning information about the copies). That makes it cheaper
1220 (especially in the common case of base==ctx.p1()) and useful also when
1248 (especially in the common case of base==ctx.p1()) and useful also when
1221 experimental.copytrace=off.
1249 experimental.copytrace=off.
1222
1250
1223 merge.update() will have already marked most copies, but it will only
1251 merge.update() will have already marked most copies, but it will only
1224 mark copies if it thinks the source files are related (see
1252 mark copies if it thinks the source files are related (see
1225 merge._related()). It will also not mark copies if the file wasn't modified
1253 merge._related()). It will also not mark copies if the file wasn't modified
1226 on the local side. This function adds the copies that were "missed"
1254 on the local side. This function adds the copies that were "missed"
1227 by merge.update().
1255 by merge.update().
1228 """
1256 """
1229 new_copies = pathcopies(base, ctx)
1257 new_copies = pathcopies(base, ctx)
1230 parent = wctx.p1()
1258 parent = wctx.p1()
1231 _filter(parent, wctx, new_copies)
1259 _filter(parent, wctx, new_copies)
1232 # Extra filtering to drop copy information for files that existed before
1260 # Extra filtering to drop copy information for files that existed before
1233 # the graft. This is to handle the case of grafting a rename onto a commit
1261 # the graft. This is to handle the case of grafting a rename onto a commit
1234 # that already has the rename. Otherwise the presence of copy information
1262 # that already has the rename. Otherwise the presence of copy information
1235 # would result in the creation of an empty commit where we would prefer to
1263 # would result in the creation of an empty commit where we would prefer to
1236 # not create one.
1264 # not create one.
1237 for dest, __ in list(new_copies.items()):
1265 for dest, __ in list(new_copies.items()):
1238 if dest in parent:
1266 if dest in parent:
1239 del new_copies[dest]
1267 del new_copies[dest]
1240 for dst, src in pycompat.iteritems(new_copies):
1268 for dst, src in pycompat.iteritems(new_copies):
1241 wctx[dst].markcopied(src)
1269 wctx[dst].markcopied(src)
@@ -1,797 +1,815
1 use crate::utils::hg_path::HgPath;
1 use crate::utils::hg_path::HgPath;
2 use crate::utils::hg_path::HgPathBuf;
2 use crate::utils::hg_path::HgPathBuf;
3 use crate::Revision;
3 use crate::Revision;
4 use crate::NULL_REVISION;
4 use crate::NULL_REVISION;
5
5
6 use im_rc::ordmap::DiffItem;
6 use im_rc::ordmap::DiffItem;
7 use im_rc::ordmap::Entry;
7 use im_rc::ordmap::Entry;
8 use im_rc::ordmap::OrdMap;
8 use im_rc::ordmap::OrdMap;
9
9
10 use std::cmp::Ordering;
10 use std::cmp::Ordering;
11 use std::collections::HashMap;
11 use std::collections::HashMap;
12 use std::convert::TryInto;
12 use std::convert::TryInto;
13
13
14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15
15
16 type PathToken = usize;
16 type PathToken = usize;
17
17
18 #[derive(Clone, Debug, PartialEq, Copy)]
18 #[derive(Clone, Debug, PartialEq, Copy)]
19 struct TimeStampedPathCopy {
19 struct TimeStampedPathCopy {
20 /// revision at which the copy information was added
20 /// revision at which the copy information was added
21 rev: Revision,
21 rev: Revision,
22 /// the copy source, (Set to None in case of deletion of the associated
22 /// the copy source, (Set to None in case of deletion of the associated
23 /// key)
23 /// key)
24 path: Option<PathToken>,
24 path: Option<PathToken>,
25 }
25 }
26
26
27 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
27 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
28 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
28 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
29
29
30 /// hold parent 1, parent 2 and relevant files actions.
30 /// hold parent 1, parent 2 and relevant files actions.
31 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
31 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
32
32
33 /// represent the files affected by a changesets
33 /// represent the files affected by a changesets
34 ///
34 ///
35 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
35 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
36 /// all the data categories tracked by it.
36 /// all the data categories tracked by it.
37 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
37 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
38 /// all the data categories tracked by it.
38 /// all the data categories tracked by it.
39 pub struct ChangedFiles<'a> {
39 pub struct ChangedFiles<'a> {
40 nb_items: u32,
40 nb_items: u32,
41 index: &'a [u8],
41 index: &'a [u8],
42 data: &'a [u8],
42 data: &'a [u8],
43 }
43 }
44
44
45 /// Represent active changes that affect the copy tracing.
45 /// Represent active changes that affect the copy tracing.
46 enum Action<'a> {
46 enum Action<'a> {
47 /// The parent ? children edge is removing a file
47 /// The parent ? children edge is removing a file
48 ///
48 ///
49 /// (actually, this could be the edge from the other parent, but it does
49 /// (actually, this could be the edge from the other parent, but it does
50 /// not matters)
50 /// not matters)
51 Removed(&'a HgPath),
51 Removed(&'a HgPath),
52 /// The parent ? children edge introduce copy information between (dest,
52 /// The parent ? children edge introduce copy information between (dest,
53 /// source)
53 /// source)
54 Copied(&'a HgPath, &'a HgPath),
54 Copied(&'a HgPath, &'a HgPath),
55 }
55 }
56
56
57 /// This express the possible "special" case we can get in a merge
57 /// This express the possible "special" case we can get in a merge
58 ///
58 ///
59 /// See mercurial/metadata.py for details on these values.
59 /// See mercurial/metadata.py for details on these values.
60 #[derive(PartialEq)]
60 #[derive(PartialEq)]
61 enum MergeCase {
61 enum MergeCase {
62 /// Merged: file had history on both side that needed to be merged
62 /// Merged: file had history on both side that needed to be merged
63 Merged,
63 Merged,
64 /// Salvaged: file was candidate for deletion, but survived the merge
64 /// Salvaged: file was candidate for deletion, but survived the merge
65 Salvaged,
65 Salvaged,
66 /// Normal: Not one of the two cases above
66 /// Normal: Not one of the two cases above
67 Normal,
67 Normal,
68 }
68 }
69
69
70 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
70 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
71
71
72 const EMPTY: &[u8] = b"";
72 const EMPTY: &[u8] = b"";
73 const COPY_MASK: u8 = 3;
73 const COPY_MASK: u8 = 3;
74 const P1_COPY: u8 = 2;
74 const P1_COPY: u8 = 2;
75 const P2_COPY: u8 = 3;
75 const P2_COPY: u8 = 3;
76 const ACTION_MASK: u8 = 28;
76 const ACTION_MASK: u8 = 28;
77 const REMOVED: u8 = 12;
77 const REMOVED: u8 = 12;
78 const MERGED: u8 = 8;
78 const MERGED: u8 = 8;
79 const SALVAGED: u8 = 16;
79 const SALVAGED: u8 = 16;
80
80
81 impl<'a> ChangedFiles<'a> {
81 impl<'a> ChangedFiles<'a> {
82 const INDEX_START: usize = 4;
82 const INDEX_START: usize = 4;
83 const ENTRY_SIZE: u32 = 9;
83 const ENTRY_SIZE: u32 = 9;
84 const FILENAME_START: u32 = 1;
84 const FILENAME_START: u32 = 1;
85 const COPY_SOURCE_START: u32 = 5;
85 const COPY_SOURCE_START: u32 = 5;
86
86
87 pub fn new(data: &'a [u8]) -> Self {
87 pub fn new(data: &'a [u8]) -> Self {
88 assert!(
88 assert!(
89 data.len() >= 4,
89 data.len() >= 4,
90 "data size ({}) is too small to contain the header (4)",
90 "data size ({}) is too small to contain the header (4)",
91 data.len()
91 data.len()
92 );
92 );
93 let nb_items_raw: [u8; 4] = (&data[0..=3])
93 let nb_items_raw: [u8; 4] = (&data[0..=3])
94 .try_into()
94 .try_into()
95 .expect("failed to turn 4 bytes into 4 bytes");
95 .expect("failed to turn 4 bytes into 4 bytes");
96 let nb_items = u32::from_be_bytes(nb_items_raw);
96 let nb_items = u32::from_be_bytes(nb_items_raw);
97
97
98 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
98 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
99 let index_end = Self::INDEX_START + index_size;
99 let index_end = Self::INDEX_START + index_size;
100
100
101 assert!(
101 assert!(
102 data.len() >= index_end,
102 data.len() >= index_end,
103 "data size ({}) is too small to fit the index_data ({})",
103 "data size ({}) is too small to fit the index_data ({})",
104 data.len(),
104 data.len(),
105 index_end
105 index_end
106 );
106 );
107
107
108 let ret = ChangedFiles {
108 let ret = ChangedFiles {
109 nb_items,
109 nb_items,
110 index: &data[Self::INDEX_START..index_end],
110 index: &data[Self::INDEX_START..index_end],
111 data: &data[index_end..],
111 data: &data[index_end..],
112 };
112 };
113 let max_data = ret.filename_end(nb_items - 1) as usize;
113 let max_data = ret.filename_end(nb_items - 1) as usize;
114 assert!(
114 assert!(
115 ret.data.len() >= max_data,
115 ret.data.len() >= max_data,
116 "data size ({}) is too small to fit all data ({})",
116 "data size ({}) is too small to fit all data ({})",
117 data.len(),
117 data.len(),
118 index_end + max_data
118 index_end + max_data
119 );
119 );
120 ret
120 ret
121 }
121 }
122
122
123 pub fn new_empty() -> Self {
123 pub fn new_empty() -> Self {
124 ChangedFiles {
124 ChangedFiles {
125 nb_items: 0,
125 nb_items: 0,
126 index: EMPTY,
126 index: EMPTY,
127 data: EMPTY,
127 data: EMPTY,
128 }
128 }
129 }
129 }
130
130
131 /// internal function to return an individual entry at a given index
131 /// internal function to return an individual entry at a given index
132 fn entry(&'a self, idx: u32) -> FileChange<'a> {
132 fn entry(&'a self, idx: u32) -> FileChange<'a> {
133 if idx >= self.nb_items {
133 if idx >= self.nb_items {
134 panic!(
134 panic!(
135 "index for entry is higher that the number of file {} >= {}",
135 "index for entry is higher that the number of file {} >= {}",
136 idx, self.nb_items
136 idx, self.nb_items
137 )
137 )
138 }
138 }
139 let flags = self.flags(idx);
139 let flags = self.flags(idx);
140 let filename = self.filename(idx);
140 let filename = self.filename(idx);
141 let copy_idx = self.copy_idx(idx);
141 let copy_idx = self.copy_idx(idx);
142 let copy_source = self.filename(copy_idx);
142 let copy_source = self.filename(copy_idx);
143 (flags, filename, copy_source)
143 (flags, filename, copy_source)
144 }
144 }
145
145
146 /// internal function to return the filename of the entry at a given index
146 /// internal function to return the filename of the entry at a given index
147 fn filename(&self, idx: u32) -> &HgPath {
147 fn filename(&self, idx: u32) -> &HgPath {
148 let filename_start;
148 let filename_start;
149 if idx == 0 {
149 if idx == 0 {
150 filename_start = 0;
150 filename_start = 0;
151 } else {
151 } else {
152 filename_start = self.filename_end(idx - 1)
152 filename_start = self.filename_end(idx - 1)
153 }
153 }
154 let filename_end = self.filename_end(idx);
154 let filename_end = self.filename_end(idx);
155 let filename_start = filename_start as usize;
155 let filename_start = filename_start as usize;
156 let filename_end = filename_end as usize;
156 let filename_end = filename_end as usize;
157 HgPath::new(&self.data[filename_start..filename_end])
157 HgPath::new(&self.data[filename_start..filename_end])
158 }
158 }
159
159
160 /// internal function to return the flag field of the entry at a given
160 /// internal function to return the flag field of the entry at a given
161 /// index
161 /// index
162 fn flags(&self, idx: u32) -> u8 {
162 fn flags(&self, idx: u32) -> u8 {
163 let idx = idx as usize;
163 let idx = idx as usize;
164 self.index[idx * (Self::ENTRY_SIZE as usize)]
164 self.index[idx * (Self::ENTRY_SIZE as usize)]
165 }
165 }
166
166
167 /// internal function to return the end of a filename part at a given index
167 /// internal function to return the end of a filename part at a given index
168 fn filename_end(&self, idx: u32) -> u32 {
168 fn filename_end(&self, idx: u32) -> u32 {
169 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
169 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
170 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
170 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
171 let start = start as usize;
171 let start = start as usize;
172 let end = end as usize;
172 let end = end as usize;
173 let raw = (&self.index[start..end])
173 let raw = (&self.index[start..end])
174 .try_into()
174 .try_into()
175 .expect("failed to turn 4 bytes into 4 bytes");
175 .expect("failed to turn 4 bytes into 4 bytes");
176 u32::from_be_bytes(raw)
176 u32::from_be_bytes(raw)
177 }
177 }
178
178
179 /// internal function to return index of the copy source of the entry at a
179 /// internal function to return index of the copy source of the entry at a
180 /// given index
180 /// given index
181 fn copy_idx(&self, idx: u32) -> u32 {
181 fn copy_idx(&self, idx: u32) -> u32 {
182 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
182 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
183 let end = (idx + 1) * Self::ENTRY_SIZE;
183 let end = (idx + 1) * Self::ENTRY_SIZE;
184 let start = start as usize;
184 let start = start as usize;
185 let end = end as usize;
185 let end = end as usize;
186 let raw = (&self.index[start..end])
186 let raw = (&self.index[start..end])
187 .try_into()
187 .try_into()
188 .expect("failed to turn 4 bytes into 4 bytes");
188 .expect("failed to turn 4 bytes into 4 bytes");
189 u32::from_be_bytes(raw)
189 u32::from_be_bytes(raw)
190 }
190 }
191
191
192 /// Return an iterator over all the `Action` in this instance.
192 /// Return an iterator over all the `Action` in this instance.
193 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
193 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
194 ActionsIterator {
194 ActionsIterator {
195 changes: &self,
195 changes: &self,
196 parent: parent,
196 parent: parent,
197 current: 0,
197 current: 0,
198 }
198 }
199 }
199 }
200
200
201 /// return the MergeCase value associated with a filename
201 /// return the MergeCase value associated with a filename
202 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
202 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
203 if self.nb_items == 0 {
203 if self.nb_items == 0 {
204 return MergeCase::Normal;
204 return MergeCase::Normal;
205 }
205 }
206 let mut low_part = 0;
206 let mut low_part = 0;
207 let mut high_part = self.nb_items;
207 let mut high_part = self.nb_items;
208
208
209 while low_part < high_part {
209 while low_part < high_part {
210 let cursor = (low_part + high_part - 1) / 2;
210 let cursor = (low_part + high_part - 1) / 2;
211 let (flags, filename, _source) = self.entry(cursor);
211 let (flags, filename, _source) = self.entry(cursor);
212 match path.cmp(filename) {
212 match path.cmp(filename) {
213 Ordering::Less => low_part = cursor + 1,
213 Ordering::Less => low_part = cursor + 1,
214 Ordering::Greater => high_part = cursor,
214 Ordering::Greater => high_part = cursor,
215 Ordering::Equal => {
215 Ordering::Equal => {
216 return match flags & ACTION_MASK {
216 return match flags & ACTION_MASK {
217 MERGED => MergeCase::Merged,
217 MERGED => MergeCase::Merged,
218 SALVAGED => MergeCase::Salvaged,
218 SALVAGED => MergeCase::Salvaged,
219 _ => MergeCase::Normal,
219 _ => MergeCase::Normal,
220 };
220 };
221 }
221 }
222 }
222 }
223 }
223 }
224 MergeCase::Normal
224 MergeCase::Normal
225 }
225 }
226 }
226 }
227
227
228 /// A struct responsible for answering "is X ancestors of Y" quickly
228 /// A struct responsible for answering "is X ancestors of Y" quickly
229 ///
229 ///
230 /// The structure will delegate ancestors call to a callback, and cache the
230 /// The structure will delegate ancestors call to a callback, and cache the
231 /// result.
231 /// result.
232 #[derive(Debug)]
232 #[derive(Debug)]
233 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
233 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
234 inner: &'a A,
234 inner: &'a A,
235 pairs: HashMap<(Revision, Revision), bool>,
235 pairs: HashMap<(Revision, Revision), bool>,
236 }
236 }
237
237
238 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
238 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
239 fn new(func: &'a A) -> Self {
239 fn new(func: &'a A) -> Self {
240 Self {
240 Self {
241 inner: func,
241 inner: func,
242 pairs: HashMap::default(),
242 pairs: HashMap::default(),
243 }
243 }
244 }
244 }
245
245
246 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
246 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
247 self.pairs.insert((anc, desc), true);
247 self.pairs.insert((anc, desc), true);
248 }
248 }
249
249
250 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
250 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
251 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
251 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
252 if anc > desc {
252 if anc > desc {
253 false
253 false
254 } else if anc == desc {
254 } else if anc == desc {
255 true
255 true
256 } else {
256 } else {
257 if let Some(b) = self.pairs.get(&(anc, desc)) {
257 if let Some(b) = self.pairs.get(&(anc, desc)) {
258 *b
258 *b
259 } else {
259 } else {
260 let b = (self.inner)(anc, desc);
260 let b = (self.inner)(anc, desc);
261 self.pairs.insert((anc, desc), b);
261 self.pairs.insert((anc, desc), b);
262 b
262 b
263 }
263 }
264 }
264 }
265 }
265 }
266 }
266 }
267
267
268 struct ActionsIterator<'a> {
268 struct ActionsIterator<'a> {
269 changes: &'a ChangedFiles<'a>,
269 changes: &'a ChangedFiles<'a>,
270 parent: Parent,
270 parent: Parent,
271 current: u32,
271 current: u32,
272 }
272 }
273
273
274 impl<'a> Iterator for ActionsIterator<'a> {
274 impl<'a> Iterator for ActionsIterator<'a> {
275 type Item = Action<'a>;
275 type Item = Action<'a>;
276
276
277 fn next(&mut self) -> Option<Action<'a>> {
277 fn next(&mut self) -> Option<Action<'a>> {
278 let copy_flag = match self.parent {
278 let copy_flag = match self.parent {
279 Parent::FirstParent => P1_COPY,
279 Parent::FirstParent => P1_COPY,
280 Parent::SecondParent => P2_COPY,
280 Parent::SecondParent => P2_COPY,
281 };
281 };
282 while self.current < self.changes.nb_items {
282 while self.current < self.changes.nb_items {
283 let (flags, file, source) = self.changes.entry(self.current);
283 let (flags, file, source) = self.changes.entry(self.current);
284 self.current += 1;
284 self.current += 1;
285 if (flags & ACTION_MASK) == REMOVED {
285 if (flags & ACTION_MASK) == REMOVED {
286 return Some(Action::Removed(file));
286 return Some(Action::Removed(file));
287 }
287 }
288 let copy = flags & COPY_MASK;
288 let copy = flags & COPY_MASK;
289 if copy == copy_flag {
289 if copy == copy_flag {
290 return Some(Action::Copied(file, source));
290 return Some(Action::Copied(file, source));
291 }
291 }
292 }
292 }
293 return None;
293 return None;
294 }
294 }
295 }
295 }
296
296
297 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
297 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
298 /// ChangedFiles
298 /// ChangedFiles
299 ///
299 ///
300 /// It is passed to the RevInfoMaker callback who can assign any necessary
300 /// It is passed to the RevInfoMaker callback who can assign any necessary
301 /// content to the `data` attribute. The copy tracing code is responsible for
301 /// content to the `data` attribute. The copy tracing code is responsible for
302 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
302 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
303 pub struct DataHolder<D> {
303 pub struct DataHolder<D> {
304 /// RevInfoMaker callback should assign data referenced by the
304 /// RevInfoMaker callback should assign data referenced by the
305 /// ChangedFiles struct it return to this attribute. The DataHolder
305 /// ChangedFiles struct it return to this attribute. The DataHolder
306 /// lifetime will be at least as long as the ChangedFiles one.
306 /// lifetime will be at least as long as the ChangedFiles one.
307 pub data: Option<D>,
307 pub data: Option<D>,
308 }
308 }
309
309
310 pub type RevInfoMaker<'a, D> =
310 pub type RevInfoMaker<'a, D> =
311 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
311 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
312
312
313 /// enum used to carry information about the parent β†’ child currently processed
313 /// enum used to carry information about the parent β†’ child currently processed
314 #[derive(Copy, Clone, Debug)]
314 #[derive(Copy, Clone, Debug)]
315 enum Parent {
315 enum Parent {
316 /// The `p1(x) β†’ x` edge
316 /// The `p1(x) β†’ x` edge
317 FirstParent,
317 FirstParent,
318 /// The `p2(x) β†’ x` edge
318 /// The `p2(x) β†’ x` edge
319 SecondParent,
319 SecondParent,
320 }
320 }
321
321
322 /// A small "tokenizer" responsible of turning full HgPath into lighter
322 /// A small "tokenizer" responsible of turning full HgPath into lighter
323 /// PathToken
323 /// PathToken
324 ///
324 ///
325 /// Dealing with small object, like integer is much faster, so HgPath input are
325 /// Dealing with small object, like integer is much faster, so HgPath input are
326 /// turned into integer "PathToken" and converted back in the end.
326 /// turned into integer "PathToken" and converted back in the end.
327 #[derive(Clone, Debug, Default)]
327 #[derive(Clone, Debug, Default)]
328 struct TwoWayPathMap {
328 struct TwoWayPathMap {
329 token: HashMap<HgPathBuf, PathToken>,
329 token: HashMap<HgPathBuf, PathToken>,
330 path: Vec<HgPathBuf>,
330 path: Vec<HgPathBuf>,
331 }
331 }
332
332
333 impl TwoWayPathMap {
333 impl TwoWayPathMap {
334 fn tokenize(&mut self, path: &HgPath) -> PathToken {
334 fn tokenize(&mut self, path: &HgPath) -> PathToken {
335 match self.token.get(path) {
335 match self.token.get(path) {
336 Some(a) => *a,
336 Some(a) => *a,
337 None => {
337 None => {
338 let a = self.token.len();
338 let a = self.token.len();
339 let buf = path.to_owned();
339 let buf = path.to_owned();
340 self.path.push(buf.clone());
340 self.path.push(buf.clone());
341 self.token.insert(buf, a);
341 self.token.insert(buf, a);
342 a
342 a
343 }
343 }
344 }
344 }
345 }
345 }
346
346
347 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
347 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
348 assert!(token < self.path.len(), format!("Unknown token: {}", token));
348 assert!(token < self.path.len(), format!("Unknown token: {}", token));
349 &self.path[token]
349 &self.path[token]
350 }
350 }
351 }
351 }
352
352
353 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
353 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
354 ///
354 ///
355 /// Arguments are:
355 /// Arguments are:
356 ///
356 ///
357 /// revs: all revisions to be considered
357 /// revs: all revisions to be considered
358 /// children: a {parent ? [childrens]} mapping
358 /// children: a {parent ? [childrens]} mapping
359 /// target_rev: the final revision we are combining copies to
359 /// target_rev: the final revision we are combining copies to
360 /// rev_info(rev): callback to get revision information:
360 /// rev_info(rev): callback to get revision information:
361 /// * first parent
361 /// * first parent
362 /// * second parent
362 /// * second parent
363 /// * ChangedFiles
363 /// * ChangedFiles
364 /// isancestors(low_rev, high_rev): callback to check if a revision is an
364 /// isancestors(low_rev, high_rev): callback to check if a revision is an
365 /// ancestor of another
365 /// ancestor of another
366 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
366 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
367 revs: Vec<Revision>,
367 revs: Vec<Revision>,
368 mut children_count: HashMap<Revision, usize>,
368 mut children_count: HashMap<Revision, usize>,
369 target_rev: Revision,
369 target_rev: Revision,
370 rev_info: RevInfoMaker<D>,
370 rev_info: RevInfoMaker<D>,
371 is_ancestor: &A,
371 is_ancestor: &A,
372 ) -> PathCopies {
372 ) -> PathCopies {
373 let mut all_copies = HashMap::new();
373 let mut all_copies = HashMap::new();
374 let mut oracle = AncestorOracle::new(is_ancestor);
374 let mut oracle = AncestorOracle::new(is_ancestor);
375
375
376 let mut path_map = TwoWayPathMap::default();
376 let mut path_map = TwoWayPathMap::default();
377
377
378 for rev in revs {
378 for rev in revs {
379 let mut d: DataHolder<D> = DataHolder { data: None };
379 let mut d: DataHolder<D> = DataHolder { data: None };
380 let (p1, p2, changes) = rev_info(rev, &mut d);
380 let (p1, p2, changes) = rev_info(rev, &mut d);
381
381
382 // We will chain the copies information accumulated for the parent with
382 // We will chain the copies information accumulated for the parent with
383 // the individual copies information the curent revision. Creating a
383 // the individual copies information the curent revision. Creating a
384 // new TimeStampedPath for each `rev` β†’ `children` vertex.
384 // new TimeStampedPath for each `rev` β†’ `children` vertex.
385 let mut copies: Option<TimeStampedPathCopies> = None;
385 let mut copies: Option<TimeStampedPathCopies> = None;
386 if p1 != NULL_REVISION {
386 if p1 != NULL_REVISION {
387 // Retrieve data computed in a previous iteration
387 // Retrieve data computed in a previous iteration
388 let parent_copies = get_and_clean_parent_copies(
388 let parent_copies = get_and_clean_parent_copies(
389 &mut all_copies,
389 &mut all_copies,
390 &mut children_count,
390 &mut children_count,
391 p1,
391 p1,
392 );
392 );
393 if let Some(parent_copies) = parent_copies {
393 if let Some(parent_copies) = parent_copies {
394 // combine it with data for that revision
394 // combine it with data for that revision
395 let vertex_copies = add_from_changes(
395 let vertex_copies = add_from_changes(
396 &mut path_map,
396 &mut path_map,
397 &mut oracle,
397 &mut oracle,
398 &parent_copies,
398 &parent_copies,
399 &changes,
399 &changes,
400 Parent::FirstParent,
400 Parent::FirstParent,
401 rev,
401 rev,
402 );
402 );
403 // keep that data around for potential later combination
403 // keep that data around for potential later combination
404 copies = Some(vertex_copies);
404 copies = Some(vertex_copies);
405 }
405 }
406 }
406 }
407 if p2 != NULL_REVISION {
407 if p2 != NULL_REVISION {
408 // Retrieve data computed in a previous iteration
408 // Retrieve data computed in a previous iteration
409 let parent_copies = get_and_clean_parent_copies(
409 let parent_copies = get_and_clean_parent_copies(
410 &mut all_copies,
410 &mut all_copies,
411 &mut children_count,
411 &mut children_count,
412 p2,
412 p2,
413 );
413 );
414 if let Some(parent_copies) = parent_copies {
414 if let Some(parent_copies) = parent_copies {
415 // combine it with data for that revision
415 // combine it with data for that revision
416 let vertex_copies = add_from_changes(
416 let vertex_copies = add_from_changes(
417 &mut path_map,
417 &mut path_map,
418 &mut oracle,
418 &mut oracle,
419 &parent_copies,
419 &parent_copies,
420 &changes,
420 &changes,
421 Parent::SecondParent,
421 Parent::SecondParent,
422 rev,
422 rev,
423 );
423 );
424
424
425 copies = match copies {
425 copies = match copies {
426 None => Some(vertex_copies),
426 None => Some(vertex_copies),
427 // Merge has two parents needs to combines their copy
427 // Merge has two parents needs to combines their copy
428 // information.
428 // information.
429 //
429 //
430 // If we got data from both parents, We need to combine
430 // If we got data from both parents, We need to combine
431 // them.
431 // them.
432 Some(copies) => Some(merge_copies_dict(
432 Some(copies) => Some(merge_copies_dict(
433 &path_map,
433 &path_map,
434 rev,
434 rev,
435 vertex_copies,
435 vertex_copies,
436 copies,
436 copies,
437 &changes,
437 &changes,
438 &mut oracle,
438 &mut oracle,
439 )),
439 )),
440 };
440 };
441 }
441 }
442 }
442 }
443 match copies {
443 match copies {
444 Some(copies) => {
444 Some(copies) => {
445 all_copies.insert(rev, copies);
445 all_copies.insert(rev, copies);
446 }
446 }
447 _ => {}
447 _ => {}
448 }
448 }
449 }
449 }
450
450
451 // Drop internal information (like the timestamp) and return the final
451 // Drop internal information (like the timestamp) and return the final
452 // mapping.
452 // mapping.
453 let tt_result = all_copies
453 let tt_result = all_copies
454 .remove(&target_rev)
454 .remove(&target_rev)
455 .expect("target revision was not processed");
455 .expect("target revision was not processed");
456 let mut result = PathCopies::default();
456 let mut result = PathCopies::default();
457 for (dest, tt_source) in tt_result {
457 for (dest, tt_source) in tt_result {
458 if let Some(path) = tt_source.path {
458 if let Some(path) = tt_source.path {
459 let path_dest = path_map.untokenize(dest).to_owned();
459 let path_dest = path_map.untokenize(dest).to_owned();
460 let path_path = path_map.untokenize(path).to_owned();
460 let path_path = path_map.untokenize(path).to_owned();
461 result.insert(path_dest, path_path);
461 result.insert(path_dest, path_path);
462 }
462 }
463 }
463 }
464 result
464 result
465 }
465 }
466
466
467 /// fetch previous computed information
467 /// fetch previous computed information
468 ///
468 ///
469 /// If no other children are expected to need this information, we drop it from
469 /// If no other children are expected to need this information, we drop it from
470 /// the cache.
470 /// the cache.
471 ///
471 ///
472 /// If parent is not part of the set we are expected to walk, return None.
472 /// If parent is not part of the set we are expected to walk, return None.
473 fn get_and_clean_parent_copies(
473 fn get_and_clean_parent_copies(
474 all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
474 all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
475 children_count: &mut HashMap<Revision, usize>,
475 children_count: &mut HashMap<Revision, usize>,
476 parent_rev: Revision,
476 parent_rev: Revision,
477 ) -> Option<TimeStampedPathCopies> {
477 ) -> Option<TimeStampedPathCopies> {
478 let count = children_count.get_mut(&parent_rev)?;
478 let count = children_count.get_mut(&parent_rev)?;
479 *count -= 1;
479 *count -= 1;
480 if *count == 0 {
480 if *count == 0 {
481 match all_copies.remove(&parent_rev) {
481 match all_copies.remove(&parent_rev) {
482 Some(c) => Some(c),
482 Some(c) => Some(c),
483 None => Some(TimeStampedPathCopies::default()),
483 None => Some(TimeStampedPathCopies::default()),
484 }
484 }
485 } else {
485 } else {
486 match all_copies.get(&parent_rev) {
486 match all_copies.get(&parent_rev) {
487 Some(c) => Some(c.clone()),
487 Some(c) => Some(c.clone()),
488 None => Some(TimeStampedPathCopies::default()),
488 None => Some(TimeStampedPathCopies::default()),
489 }
489 }
490 }
490 }
491 }
491 }
492
492
493 /// Combine ChangedFiles with some existing PathCopies information and return
493 /// Combine ChangedFiles with some existing PathCopies information and return
494 /// the result
494 /// the result
495 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
495 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
496 path_map: &mut TwoWayPathMap,
496 path_map: &mut TwoWayPathMap,
497 oracle: &mut AncestorOracle<A>,
497 oracle: &mut AncestorOracle<A>,
498 base_copies: &TimeStampedPathCopies,
498 base_copies: &TimeStampedPathCopies,
499 changes: &ChangedFiles,
499 changes: &ChangedFiles,
500 parent: Parent,
500 parent: Parent,
501 current_rev: Revision,
501 current_rev: Revision,
502 ) -> TimeStampedPathCopies {
502 ) -> TimeStampedPathCopies {
503 let mut copies = base_copies.clone();
503 let mut copies = base_copies.clone();
504 for action in changes.iter_actions(parent) {
504 for action in changes.iter_actions(parent) {
505 match action {
505 match action {
506 Action::Copied(path_dest, path_source) => {
506 Action::Copied(path_dest, path_source) => {
507 let dest = path_map.tokenize(path_dest);
507 let dest = path_map.tokenize(path_dest);
508 let source = path_map.tokenize(path_source);
508 let source = path_map.tokenize(path_source);
509 let entry;
509 let entry;
510 if let Some(v) = base_copies.get(&source) {
510 if let Some(v) = base_copies.get(&source) {
511 entry = match &v.path {
511 entry = match &v.path {
512 Some(path) => Some((*(path)).to_owned()),
512 Some(path) => Some((*(path)).to_owned()),
513 None => Some(source.to_owned()),
513 None => Some(source.to_owned()),
514 }
514 }
515 } else {
515 } else {
516 entry = Some(source.to_owned());
516 entry = Some(source.to_owned());
517 }
517 }
518 // Each new entry is introduced by the children, we
518 // Each new entry is introduced by the children, we
519 // record this information as we will need it to take
519 // record this information as we will need it to take
520 // the right decision when merging conflicting copy
520 // the right decision when merging conflicting copy
521 // information. See merge_copies_dict for details.
521 // information. See merge_copies_dict for details.
522 match copies.entry(dest) {
522 match copies.entry(dest) {
523 Entry::Vacant(slot) => {
523 Entry::Vacant(slot) => {
524 let ttpc = TimeStampedPathCopy {
524 let ttpc = TimeStampedPathCopy {
525 rev: current_rev,
525 rev: current_rev,
526 path: entry,
526 path: entry,
527 };
527 };
528 slot.insert(ttpc);
528 slot.insert(ttpc);
529 }
529 }
530 Entry::Occupied(mut slot) => {
530 Entry::Occupied(mut slot) => {
531 let mut ttpc = slot.get_mut();
531 let mut ttpc = slot.get_mut();
532 oracle.record_overwrite(ttpc.rev, current_rev);
532 oracle.record_overwrite(ttpc.rev, current_rev);
533 ttpc.rev = current_rev;
533 ttpc.rev = current_rev;
534 ttpc.path = entry;
534 ttpc.path = entry;
535 }
535 }
536 }
536 }
537 }
537 }
538 Action::Removed(deleted_path) => {
538 Action::Removed(deleted_path) => {
539 // We must drop copy information for removed file.
539 // We must drop copy information for removed file.
540 //
540 //
541 // We need to explicitly record them as dropped to
541 // We need to explicitly record them as dropped to
542 // propagate this information when merging two
542 // propagate this information when merging two
543 // TimeStampedPathCopies object.
543 // TimeStampedPathCopies object.
544 let deleted = path_map.tokenize(deleted_path);
544 let deleted = path_map.tokenize(deleted_path);
545 copies.entry(deleted).and_modify(|old| {
545 copies.entry(deleted).and_modify(|old| {
546 oracle.record_overwrite(old.rev, current_rev);
546 oracle.record_overwrite(old.rev, current_rev);
547 old.rev = current_rev;
547 old.rev = current_rev;
548 old.path = None;
548 old.path = None;
549 });
549 });
550 }
550 }
551 }
551 }
552 }
552 }
553 copies
553 copies
554 }
554 }
555
555
556 /// merge two copies-mapping together, minor and major
556 /// merge two copies-mapping together, minor and major
557 ///
557 ///
558 /// In case of conflict, value from "major" will be picked, unless in some
558 /// In case of conflict, value from "major" will be picked, unless in some
559 /// cases. See inline documentation for details.
559 /// cases. See inline documentation for details.
560 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
560 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
561 path_map: &TwoWayPathMap,
561 path_map: &TwoWayPathMap,
562 current_merge: Revision,
562 current_merge: Revision,
563 mut minor: TimeStampedPathCopies,
563 mut minor: TimeStampedPathCopies,
564 mut major: TimeStampedPathCopies,
564 mut major: TimeStampedPathCopies,
565 changes: &ChangedFiles,
565 changes: &ChangedFiles,
566 oracle: &mut AncestorOracle<A>,
566 oracle: &mut AncestorOracle<A>,
567 ) -> TimeStampedPathCopies {
567 ) -> TimeStampedPathCopies {
568 // This closure exist as temporary help while multiple developper are
568 // This closure exist as temporary help while multiple developper are
569 // actively working on this code. Feel free to re-inline it once this
569 // actively working on this code. Feel free to re-inline it once this
570 // code is more settled.
570 // code is more settled.
571 let mut cmp_value =
571 let mut cmp_value =
572 |dest: &PathToken,
572 |dest: &PathToken,
573 src_minor: &TimeStampedPathCopy,
573 src_minor: &TimeStampedPathCopy,
574 src_major: &TimeStampedPathCopy| {
574 src_major: &TimeStampedPathCopy| {
575 compare_value(
575 compare_value(
576 path_map,
576 path_map,
577 current_merge,
577 current_merge,
578 changes,
578 changes,
579 oracle,
579 oracle,
580 dest,
580 dest,
581 src_minor,
581 src_minor,
582 src_major,
582 src_major,
583 )
583 )
584 };
584 };
585 if minor.is_empty() {
585 if minor.is_empty() {
586 major
586 major
587 } else if major.is_empty() {
587 } else if major.is_empty() {
588 minor
588 minor
589 } else if minor.len() * 2 < major.len() {
589 } else if minor.len() * 2 < major.len() {
590 // Lets says we are merging two TimeStampedPathCopies instance A and B.
590 // Lets says we are merging two TimeStampedPathCopies instance A and B.
591 //
591 //
592 // If A contains N items, the merge result will never contains more
592 // If A contains N items, the merge result will never contains more
593 // than N values differents than the one in A
593 // than N values differents than the one in A
594 //
594 //
595 // If B contains M items, with M > N, the merge result will always
595 // If B contains M items, with M > N, the merge result will always
596 // result in a minimum of M - N value differents than the on in
596 // result in a minimum of M - N value differents than the on in
597 // A
597 // A
598 //
598 //
599 // As a result, if N < (M-N), we know that simply iterating over A will
599 // As a result, if N < (M-N), we know that simply iterating over A will
600 // yield less difference than iterating over the difference
600 // yield less difference than iterating over the difference
601 // between A and B.
601 // between A and B.
602 //
602 //
603 // This help performance a lot in case were a tiny
603 // This help performance a lot in case were a tiny
604 // TimeStampedPathCopies is merged with a much larger one.
604 // TimeStampedPathCopies is merged with a much larger one.
605 for (dest, src_minor) in minor {
605 for (dest, src_minor) in minor {
606 let src_major = major.get(&dest);
606 let src_major = major.get(&dest);
607 match src_major {
607 match src_major {
608 None => major.insert(dest, src_minor),
608 None => major.insert(dest, src_minor),
609 Some(src_major) => {
609 Some(src_major) => {
610 match cmp_value(&dest, &src_minor, src_major) {
610 match cmp_value(&dest, &src_minor, src_major) {
611 MergePick::Any | MergePick::Major => None,
611 MergePick::Any | MergePick::Major => None,
612 MergePick::Minor => major.insert(dest, src_minor),
612 MergePick::Minor => major.insert(dest, src_minor),
613 }
613 }
614 }
614 }
615 };
615 };
616 }
616 }
617 major
617 major
618 } else if major.len() * 2 < minor.len() {
618 } else if major.len() * 2 < minor.len() {
619 // This use the same rational than the previous block.
619 // This use the same rational than the previous block.
620 // (Check previous block documentation for details.)
620 // (Check previous block documentation for details.)
621 for (dest, src_major) in major {
621 for (dest, src_major) in major {
622 let src_minor = minor.get(&dest);
622 let src_minor = minor.get(&dest);
623 match src_minor {
623 match src_minor {
624 None => minor.insert(dest, src_major),
624 None => minor.insert(dest, src_major),
625 Some(src_minor) => {
625 Some(src_minor) => {
626 match cmp_value(&dest, src_minor, &src_major) {
626 match cmp_value(&dest, src_minor, &src_major) {
627 MergePick::Any | MergePick::Minor => None,
627 MergePick::Any | MergePick::Minor => None,
628 MergePick::Major => minor.insert(dest, src_major),
628 MergePick::Major => minor.insert(dest, src_major),
629 }
629 }
630 }
630 }
631 };
631 };
632 }
632 }
633 minor
633 minor
634 } else {
634 } else {
635 let mut override_minor = Vec::new();
635 let mut override_minor = Vec::new();
636 let mut override_major = Vec::new();
636 let mut override_major = Vec::new();
637
637
638 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
638 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
639 override_major.push((k.clone(), v.clone()))
639 override_major.push((k.clone(), v.clone()))
640 };
640 };
641 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
641 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
642 override_minor.push((k.clone(), v.clone()))
642 override_minor.push((k.clone(), v.clone()))
643 };
643 };
644
644
645 // The diff function leverage detection of the identical subpart if
645 // The diff function leverage detection of the identical subpart if
646 // minor and major has some common ancestors. This make it very
646 // minor and major has some common ancestors. This make it very
647 // fast is most case.
647 // fast is most case.
648 //
648 //
649 // In case where the two map are vastly different in size, the current
649 // In case where the two map are vastly different in size, the current
650 // approach is still slowish because the iteration will iterate over
650 // approach is still slowish because the iteration will iterate over
651 // all the "exclusive" content of the larger on. This situation can be
651 // all the "exclusive" content of the larger on. This situation can be
652 // frequent when the subgraph of revision we are processing has a lot
652 // frequent when the subgraph of revision we are processing has a lot
653 // of roots. Each roots adding they own fully new map to the mix (and
653 // of roots. Each roots adding they own fully new map to the mix (and
654 // likely a small map, if the path from the root to the "main path" is
654 // likely a small map, if the path from the root to the "main path" is
655 // small.
655 // small.
656 //
656 //
657 // We could do better by detecting such situation and processing them
657 // We could do better by detecting such situation and processing them
658 // differently.
658 // differently.
659 for d in minor.diff(&major) {
659 for d in minor.diff(&major) {
660 match d {
660 match d {
661 DiffItem::Add(k, v) => to_minor(k, v),
661 DiffItem::Add(k, v) => to_minor(k, v),
662 DiffItem::Remove(k, v) => to_major(k, v),
662 DiffItem::Remove(k, v) => to_major(k, v),
663 DiffItem::Update { old, new } => {
663 DiffItem::Update { old, new } => {
664 let (dest, src_major) = new;
664 let (dest, src_major) = new;
665 let (_, src_minor) = old;
665 let (_, src_minor) = old;
666 match cmp_value(dest, src_minor, src_major) {
666 match cmp_value(dest, src_minor, src_major) {
667 MergePick::Major => to_minor(dest, src_major),
667 MergePick::Major => to_minor(dest, src_major),
668 MergePick::Minor => to_major(dest, src_minor),
668 MergePick::Minor => to_major(dest, src_minor),
669 // If the two entry are identical, no need to do
669 // If the two entry are identical, no need to do
670 // anything (but diff should not have yield them)
670 // anything (but diff should not have yield them)
671 MergePick::Any => unreachable!(),
671 MergePick::Any => unreachable!(),
672 }
672 }
673 }
673 }
674 };
674 };
675 }
675 }
676
676
677 let updates;
677 let updates;
678 let mut result;
678 let mut result;
679 if override_major.is_empty() {
679 if override_major.is_empty() {
680 result = major
680 result = major
681 } else if override_minor.is_empty() {
681 } else if override_minor.is_empty() {
682 result = minor
682 result = minor
683 } else {
683 } else {
684 if override_minor.len() < override_major.len() {
684 if override_minor.len() < override_major.len() {
685 updates = override_minor;
685 updates = override_minor;
686 result = minor;
686 result = minor;
687 } else {
687 } else {
688 updates = override_major;
688 updates = override_major;
689 result = major;
689 result = major;
690 }
690 }
691 for (k, v) in updates {
691 for (k, v) in updates {
692 result.insert(k, v);
692 result.insert(k, v);
693 }
693 }
694 }
694 }
695 result
695 result
696 }
696 }
697 }
697 }
698
698
699 /// represent the side that should prevail when merging two
699 /// represent the side that should prevail when merging two
700 /// TimeStampedPathCopies
700 /// TimeStampedPathCopies
701 enum MergePick {
701 enum MergePick {
702 /// The "major" (p1) side prevails
702 /// The "major" (p1) side prevails
703 Major,
703 Major,
704 /// The "minor" (p2) side prevails
704 /// The "minor" (p2) side prevails
705 Minor,
705 Minor,
706 /// Any side could be used (because they are the same)
706 /// Any side could be used (because they are the same)
707 Any,
707 Any,
708 }
708 }
709
709
710 /// decide which side prevails in case of conflicting values
710 /// decide which side prevails in case of conflicting values
711 #[allow(clippy::if_same_then_else)]
711 #[allow(clippy::if_same_then_else)]
712 fn compare_value<A: Fn(Revision, Revision) -> bool>(
712 fn compare_value<A: Fn(Revision, Revision) -> bool>(
713 path_map: &TwoWayPathMap,
713 path_map: &TwoWayPathMap,
714 current_merge: Revision,
714 current_merge: Revision,
715 changes: &ChangedFiles,
715 changes: &ChangedFiles,
716 oracle: &mut AncestorOracle<A>,
716 oracle: &mut AncestorOracle<A>,
717 dest: &PathToken,
717 dest: &PathToken,
718 src_minor: &TimeStampedPathCopy,
718 src_minor: &TimeStampedPathCopy,
719 src_major: &TimeStampedPathCopy,
719 src_major: &TimeStampedPathCopy,
720 ) -> MergePick {
720 ) -> MergePick {
721 if src_major.rev == current_merge {
721 if src_major.rev == current_merge {
722 if src_minor.rev == current_merge {
722 if src_minor.rev == current_merge {
723 if src_major.path.is_none() {
723 if src_major.path.is_none() {
724 // We cannot get different copy information for both p1 and p2
724 // We cannot get different copy information for both p1 and p2
725 // from the same revision. Unless this was a
725 // from the same revision. Unless this was a
726 // deletion
726 // deletion
727 MergePick::Any
727 MergePick::Any
728 } else {
728 } else {
729 unreachable!();
729 unreachable!();
730 }
730 }
731 } else {
731 } else {
732 // The last value comes the current merge, this value -will- win
732 // The last value comes the current merge, this value -will- win
733 // eventually.
733 // eventually.
734 oracle.record_overwrite(src_minor.rev, src_major.rev);
734 oracle.record_overwrite(src_minor.rev, src_major.rev);
735 MergePick::Major
735 MergePick::Major
736 }
736 }
737 } else if src_minor.rev == current_merge {
737 } else if src_minor.rev == current_merge {
738 // The last value comes the current merge, this value -will- win
738 // The last value comes the current merge, this value -will- win
739 // eventually.
739 // eventually.
740 oracle.record_overwrite(src_major.rev, src_minor.rev);
740 oracle.record_overwrite(src_major.rev, src_minor.rev);
741 MergePick::Minor
741 MergePick::Minor
742 } else if src_major.path == src_minor.path {
742 } else if src_major.path == src_minor.path {
743 // we have the same value, but from other source;
743 // we have the same value, but from other source;
744 if src_major.rev == src_minor.rev {
744 if src_major.rev == src_minor.rev {
745 // If the two entry are identical, they are both valid
745 // If the two entry are identical, they are both valid
746 MergePick::Any
746 MergePick::Any
747 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
747 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
748 MergePick::Minor
748 MergePick::Minor
749 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
750 MergePick::Major
749 } else {
751 } else {
750 MergePick::Major
752 MergePick::Major
751 }
753 }
752 } else if src_major.rev == src_minor.rev {
754 } else if src_major.rev == src_minor.rev {
753 // We cannot get copy information for both p1 and p2 in the
755 // We cannot get copy information for both p1 and p2 in the
754 // same rev. So this is the same value.
756 // same rev. So this is the same value.
755 unreachable!(
757 unreachable!(
756 "conflict information from p1 and p2 in the same revision"
758 "conflicting information from p1 and p2 in the same revision"
757 );
759 );
758 } else {
760 } else {
759 let dest_path = path_map.untokenize(*dest);
761 let dest_path = path_map.untokenize(*dest);
760 let action = changes.get_merge_case(dest_path);
762 let action = changes.get_merge_case(dest_path);
761 if src_major.path.is_none() && action == MergeCase::Salvaged {
763 if src_minor.path.is_some()
764 && src_major.path.is_none()
765 && action == MergeCase::Salvaged
766 {
762 // If the file is "deleted" in the major side but was
767 // If the file is "deleted" in the major side but was
763 // salvaged by the merge, we keep the minor side alive
768 // salvaged by the merge, we keep the minor side alive
764 MergePick::Minor
769 MergePick::Minor
765 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
770 } else if src_major.path.is_some()
771 && src_minor.path.is_none()
772 && action == MergeCase::Salvaged
773 {
766 // If the file is "deleted" in the minor side but was
774 // If the file is "deleted" in the minor side but was
767 // salvaged by the merge, unconditionnaly preserve the
775 // salvaged by the merge, unconditionnaly preserve the
768 // major side.
776 // major side.
769 MergePick::Major
777 MergePick::Major
770 } else if action == MergeCase::Merged {
778 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
771 // If the file was actively merged, copy information
779 // The information from the minor version are strictly older than
772 // from each side might conflict. The major side will
780 // the major version
773 // win such conflict.
781 if action == MergeCase::Merged {
774 MergePick::Major
782 // If the file was actively merged, its means some non-copy
783 // activity happened on the other branch. It
784 // mean the older copy information are still relevant.
785 //
786 // The major side wins such conflict.
787 MergePick::Major
788 } else {
789 // No activity on the minor branch, pick the newer one.
790 MergePick::Major
791 }
775 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
792 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
776 // If the minor side is strictly newer than the major
793 if action == MergeCase::Merged {
777 // side, it should be kept.
794 // If the file was actively merged, its means some non-copy
778 MergePick::Minor
795 // activity happened on the other branch. It
779 } else if src_major.path.is_some() {
796 // mean the older copy information are still relevant.
780 // without any special case, the "major" value win
797 //
781 // other the "minor" one.
798 // The major side wins such conflict.
799 MergePick::Major
800 } else {
801 // No activity on the minor branch, pick the newer one.
802 MergePick::Minor
803 }
804 } else if src_minor.path.is_none() {
805 // the minor side has no relevant information, pick the alive one
782 MergePick::Major
806 MergePick::Major
783 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
807 } else if src_major.path.is_none() {
784 // the "major" rev is a direct ancestors of "minor",
808 // the major side has no relevant information, pick the alive one
785 // any different value should
809 MergePick::Minor
786 // overwrite
787 MergePick::Major
788 } else {
810 } else {
789 // major version is None (so the file was deleted on
811 // by default the major side wins
790 // that branch) and that branch is independant (neither
812 MergePick::Major
791 // minor nor major is an ancestors of the other one.)
792 // We preserve the new
793 // information about the new file.
794 MergePick::Minor
795 }
813 }
796 }
814 }
797 }
815 }
General Comments 0
You need to be logged in to leave comments. Login now