##// END OF EJS Templates
copies: iterate over children directly (instead of parents)...
marmoute -
r46764:294d5aca default
parent child Browse files
Show More
@@ -1,1210 +1,1263
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 # A small cache to avoid doing the work twice for merges
199 # A small cache to avoid doing the work twice for merges
200 #
200 #
201 # In the vast majority of cases, if we ask information for a revision
201 # In the vast majority of cases, if we ask information for a revision
202 # about 1 parent, we'll later ask it for the other. So it make sense to
202 # about 1 parent, we'll later ask it for the other. So it make sense to
203 # keep the information around when reaching the first parent of a merge
203 # keep the information around when reaching the first parent of a merge
204 # and dropping it after it was provided for the second parents.
204 # and dropping it after it was provided for the second parents.
205 #
205 #
206 # It exists cases were only one parent of the merge will be walked. It
206 # It exists cases were only one parent of the merge will be walked. It
207 # happens when the "destination" the copy tracing is descendant from a
207 # happens when the "destination" the copy tracing is descendant from a
208 # new root, not common with the "source". In that case, we will only walk
208 # new root, not common with the "source". In that case, we will only walk
209 # through merge parents that are descendant of changesets common
209 # through merge parents that are descendant of changesets common
210 # between "source" and "destination".
210 # between "source" and "destination".
211 #
211 #
212 # With the current case implementation if such changesets have a copy
212 # With the current case implementation if such changesets have a copy
213 # information, we'll keep them in memory until the end of
213 # information, we'll keep them in memory until the end of
214 # _changesetforwardcopies. We don't expect the case to be frequent
214 # _changesetforwardcopies. We don't expect the case to be frequent
215 # enough to matters.
215 # enough to matters.
216 #
216 #
217 # In addition, it would be possible to reach pathological case, were
217 # In addition, it would be possible to reach pathological case, were
218 # many first parent are met before any second parent is reached. In
218 # many first parent are met before any second parent is reached. In
219 # that case the cache could grow. If this even become an issue one can
219 # that case the cache could grow. If this even become an issue one can
220 # safely introduce a maximum cache size. This would trade extra CPU/IO
220 # safely introduce a maximum cache size. This would trade extra CPU/IO
221 # time to save memory.
221 # time to save memory.
222 merge_caches = {}
222 merge_caches = {}
223
223
224 alwaysmatch = match.always()
224 alwaysmatch = match.always()
225
225
226 if rustmod is not None and alwaysmatch:
226 if rustmod is not None and alwaysmatch:
227
227
228 def revinfo(rev):
228 def revinfo(rev):
229 p1, p2 = parents(rev)
229 p1, p2 = parents(rev)
230 value = None
230 value = None
231 e = merge_caches.pop(rev, None)
231 e = merge_caches.pop(rev, None)
232 if e is not None:
232 if e is not None:
233 return e
233 return e
234 if flags(rev) & HASCOPIESINFO:
234 if flags(rev) & HASCOPIESINFO:
235 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
235 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
236 else:
236 else:
237 raw = None
237 raw = None
238 value = (p1, p2, raw)
238 value = (p1, p2, raw)
239 if p1 != nullrev and p2 != nullrev:
239 if p1 != nullrev and p2 != nullrev:
240 # XXX some case we over cache, IGNORE
240 # XXX some case we over cache, IGNORE
241 merge_caches[rev] = value
241 merge_caches[rev] = value
242 return value
242 return value
243
243
244 else:
244 else:
245
245
246 def revinfo(rev):
246 def revinfo(rev):
247 p1, p2 = parents(rev)
247 p1, p2 = parents(rev)
248 value = None
248 value = None
249 e = merge_caches.pop(rev, None)
249 e = merge_caches.pop(rev, None)
250 if e is not None:
250 if e is not None:
251 return e
251 return e
252 changes = None
252 changes = None
253 if flags(rev) & HASCOPIESINFO:
253 if flags(rev) & HASCOPIESINFO:
254 changes = changelogrevision(rev).changes
254 changes = changelogrevision(rev).changes
255 value = (p1, p2, changes)
255 value = (p1, p2, changes)
256 if p1 != nullrev and p2 != nullrev:
256 if p1 != nullrev and p2 != nullrev:
257 # XXX some case we over cache, IGNORE
257 # XXX some case we over cache, IGNORE
258 merge_caches[rev] = value
258 merge_caches[rev] = value
259 return value
259 return value
260
260
261 return revinfo
261 return revinfo
262
262
263
263
264 def cached_is_ancestor(is_ancestor):
264 def cached_is_ancestor(is_ancestor):
265 """return a cached version of is_ancestor"""
265 """return a cached version of is_ancestor"""
266 cache = {}
266 cache = {}
267
267
268 def _is_ancestor(anc, desc):
268 def _is_ancestor(anc, desc):
269 if anc > desc:
269 if anc > desc:
270 return False
270 return False
271 elif anc == desc:
271 elif anc == desc:
272 return True
272 return True
273 key = (anc, desc)
273 key = (anc, desc)
274 ret = cache.get(key)
274 ret = cache.get(key)
275 if ret is None:
275 if ret is None:
276 ret = cache[key] = is_ancestor(anc, desc)
276 ret = cache[key] = is_ancestor(anc, desc)
277 return ret
277 return ret
278
278
279 return _is_ancestor
279 return _is_ancestor
280
280
281
281
282 def _changesetforwardcopies(a, b, match):
282 def _changesetforwardcopies(a, b, match):
283 if a.rev() in (nullrev, b.rev()):
283 if a.rev() in (nullrev, b.rev()):
284 return {}
284 return {}
285
285
286 repo = a.repo().unfiltered()
286 repo = a.repo().unfiltered()
287 children = {}
287 children = {}
288
288
289 cl = repo.changelog
289 cl = repo.changelog
290 isancestor = cl.isancestorrev
290 isancestor = cl.isancestorrev
291 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
291
292 mrset = set(missingrevs)
292 # To track rename from "A" to B, we need to gather all parent → children
293 # edges that are contains in `::B` but not in `::A`.
294 #
295 #
296 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
297 # ::a`) and also all the "roots point", ie the parents of the exclusive set
298 # that belong to ::a. These are exactly all the revisions needed to express
299 # the parent → children we need to combine.
300 #
301 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
302 # excluding paths that leads to roots that are not ancestors of `a`. We
303 # keep this out of the explanation because it is hard enough without this special case..
304
305 parents = cl._uncheckedparentrevs
306 graph_roots = (nullrev, nullrev)
307
308 ancestors = cl.ancestors([a.rev()], inclusive=True)
309 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
293 roots = set()
310 roots = set()
294 for r in missingrevs:
311 has_graph_roots = False
295 for p in cl.parentrevs(r):
312
296 if p == nullrev:
313 # iterate over `only(B, A)`
297 continue
314 for r in revs:
298 if p not in children:
315 ps = parents(r)
299 children[p] = [r]
316 if ps == graph_roots:
300 else:
317 has_graph_roots = True
301 children[p].append(r)
318 else:
302 if p not in mrset:
319 p1, p2 = ps
303 roots.add(p)
320
321 # find all the "root points" (see larger comment above)
322 if p1 != nullrev and p1 in ancestors:
323 roots.add(p1)
324 if p2 != nullrev and p2 in ancestors:
325 roots.add(p2)
304 if not roots:
326 if not roots:
305 # no common revision to track copies from
327 # no common revision to track copies from
306 return {}
328 return {}
307 min_root = min(roots)
329 if has_graph_roots:
308
330 # this deal with the special case mentionned in the [1] footnotes. We
309 from_head = set(
331 # must filter out revisions that leads to non-common graphroots.
310 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
332 roots = list(roots)
311 )
333 m = min(roots)
312
334 h = [b.rev()]
313 iterrevs = set(from_head)
335 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
314 iterrevs &= mrset
336 roots_to_head = set(roots_to_head)
315 iterrevs.update(roots)
337 revs = [r for r in revs if r in roots_to_head]
316 iterrevs.remove(b.rev())
317 revs = sorted(iterrevs)
318
338
319 if repo.filecopiesmode == b'changeset-sidedata':
339 if repo.filecopiesmode == b'changeset-sidedata':
340 # When using side-data, we will process the edges "from" the children.
341 # We iterate over the childre, gathering previous collected data for
342 # the parents. Do know when the parents data is no longer necessary, we
343 # keep a counter of how many children each revision has.
344 #
345 # An interresting property of `children_count` is that it only contains
346 # revision that will be relevant for a edge of the graph. So if a
347 # children has parent not in `children_count`, that edges should not be
348 # processed.
349 children_count = dict((r, 0) for r in roots)
350 for r in revs:
351 for p in cl.parentrevs(r):
352 if p == nullrev:
353 continue
354 children_count[r] = 0
355 if p in children_count:
356 children_count[p] += 1
320 revinfo = _revinfo_getter(repo, match)
357 revinfo = _revinfo_getter(repo, match)
321 return _combine_changeset_copies(
358 return _combine_changeset_copies(
322 revs, children, b.rev(), revinfo, match, isancestor
359 revs, children_count, b.rev(), revinfo, match, isancestor
323 )
360 )
324 else:
361 else:
362 # When not using side-data, we will process the edges "from" the parent.
363 # so we need a full mapping of the parent -> children relation.
364 children = dict((r, []) for r in roots)
365 for r in revs:
366 for p in cl.parentrevs(r):
367 if p == nullrev:
368 continue
369 children[r] = []
370 if p in children:
371 children[p].append(r)
372 x = revs.pop()
373 assert x == b.rev()
374 revs.extend(roots)
375 revs.sort()
376
325 revinfo = _revinfo_getter_extra(repo)
377 revinfo = _revinfo_getter_extra(repo)
326 return _combine_changeset_copies_extra(
378 return _combine_changeset_copies_extra(
327 revs, children, b.rev(), revinfo, match, isancestor
379 revs, children, b.rev(), revinfo, match, isancestor
328 )
380 )
329
381
330
382
331 def _combine_changeset_copies(
383 def _combine_changeset_copies(
332 revs, children, targetrev, revinfo, match, isancestor
384 revs, children_count, targetrev, revinfo, match, isancestor
333 ):
385 ):
334 """combine the copies information for each item of iterrevs
386 """combine the copies information for each item of iterrevs
335
387
336 revs: sorted iterable of revision to visit
388 revs: sorted iterable of revision to visit
337 children: a {parent: [children]} mapping.
389 children_count: a {parent: <number-of-relevant-children>} mapping.
338 targetrev: the final copies destination revision (not in iterrevs)
390 targetrev: the final copies destination revision (not in iterrevs)
339 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
391 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
340 match: a matcher
392 match: a matcher
341
393
342 It returns the aggregated copies information for `targetrev`.
394 It returns the aggregated copies information for `targetrev`.
343 """
395 """
344
396
345 alwaysmatch = match.always()
397 alwaysmatch = match.always()
346
398
347 if rustmod is not None and alwaysmatch:
399 if rustmod is not None and alwaysmatch:
348 return rustmod.combine_changeset_copies(
400 return rustmod.combine_changeset_copies(
349 list(revs), children, targetrev, revinfo, isancestor
401 list(revs), children_count, targetrev, revinfo, isancestor
350 )
402 )
351
403
352 isancestor = cached_is_ancestor(isancestor)
404 isancestor = cached_is_ancestor(isancestor)
353
405
354 all_copies = {}
406 all_copies = {}
355 # iterate over all the "parent" side of copy tracing "edge"
407 # iterate over all the "children" side of copy tracing "edge"
356 for r in revs:
408 for current_rev in revs:
357 # fetch potential previously computed data for that parent
409 p1, p2, changes = revinfo(current_rev)
358 copies = all_copies.pop(r, None)
410 current_copies = None
359 if copies is None:
360 # this is a root
361 copies = {}
362
411
363 # iterate over all known children to chain the existing data with the
412 # iterate over all parents to chain the existing data with the
364 # data from the parent → child edge.
413 # data from the parent → child edge.
365 for i, c in enumerate(children[r]):
414 for parent, parent_rev in ((1, p1), (2, p2)):
366 p1, p2, changes = revinfo(c)
415 if parent_rev == nullrev:
367 childcopies = {}
416 continue
417 remaining_children = children_count.get(parent_rev)
418 if remaining_children is None:
419 continue
420 remaining_children -= 1
421 children_count[parent_rev] = remaining_children
422 if remaining_children:
423 copies = all_copies.get(parent_rev, None)
424 else:
425 copies = all_copies.pop(parent_rev, None)
368
426
369 # select the right parent → child edge
427 if copies is None:
370 if r == p1:
428 # this is a root
371 parent = 1
429 copies = {}
372 if changes is not None:
430
431 newcopies = copies
432 # chain the data in the edge with the existing data
433 if changes is not None:
434 childcopies = {}
435 if parent == 1:
373 childcopies = changes.copied_from_p1
436 childcopies = changes.copied_from_p1
374 else:
437 elif parent == 2:
375 assert r == p2
376 parent = 2
377 if changes is not None:
378 childcopies = changes.copied_from_p2
438 childcopies = changes.copied_from_p2
379 if not alwaysmatch:
380 childcopies = {
381 dst: src for dst, src in childcopies.items() if match(dst)
382 }
383
439
384 # chain the data in the edge with the existing data
440 if not alwaysmatch:
385 newcopies = copies
441 childcopies = {
386 if childcopies:
442 dst: src
387 newcopies = copies.copy()
443 for dst, src in childcopies.items()
388 for dest, source in pycompat.iteritems(childcopies):
444 if match(dst)
389 prev = copies.get(source)
445 }
390 if prev is not None and prev[1] is not None:
446 if childcopies:
391 source = prev[1]
392 newcopies[dest] = (c, source)
393 assert newcopies is not copies
394 if changes is not None and changes.removed:
395 if newcopies is copies:
396 newcopies = copies.copy()
447 newcopies = copies.copy()
397 for f in changes.removed:
448 for dest, source in pycompat.iteritems(childcopies):
398 if f in newcopies:
449 prev = copies.get(source)
399 if newcopies is copies:
450 if prev is not None and prev[1] is not None:
400 # copy on write to avoid affecting potential other
451 source = prev[1]
401 # branches. when there are no other branches, this
452 newcopies[dest] = (current_rev, source)
402 # could be avoided.
453 assert newcopies is not copies
403 newcopies = copies.copy()
454 if changes.removed:
404 newcopies[f] = (c, None)
455 if newcopies is copies:
456 newcopies = copies.copy()
457 for f in changes.removed:
458 if f in newcopies:
459 if newcopies is copies:
460 # copy on write to avoid affecting potential other
461 # branches. when there are no other branches, this
462 # could be avoided.
463 newcopies = copies.copy()
464 newcopies[f] = (current_rev, None)
405
465
406 # check potential need to combine the data from another parent (for
466 # check potential need to combine the data from another parent (for
407 # that child). See comment below for details.
467 # that child). See comment below for details.
408 othercopies = all_copies.get(c)
468 if current_copies is None:
409 if othercopies is None:
469 current_copies = newcopies
410 all_copies[c] = newcopies
470 elif current_copies is newcopies:
411 elif newcopies is othercopies:
412 # nothing to merge:
471 # nothing to merge:
413 pass
472 pass
414 else:
473 else:
415 # we are the second parent to work on c, we need to merge our
474 # we are the second parent to work on c, we need to merge our
416 # work with the other.
475 # work with the other.
417 #
476 #
418 # In case of conflict, parent 1 take precedence over parent 2.
477 # In case of conflict, parent 1 take precedence over parent 2.
419 # This is an arbitrary choice made anew when implementing
478 # This is an arbitrary choice made anew when implementing
420 # changeset based copies. It was made without regards with
479 # changeset based copies. It was made without regards with
421 # potential filelog related behavior.
480 # potential filelog related behavior.
422 if parent == 1:
481 assert parent == 2
423 if newcopies is copies:
482 current_copies = _merge_copies_dict(
424 newcopies = copies.copy()
483 newcopies, current_copies, isancestor, changes
425 minor, major = othercopies, newcopies
484 )
426 else:
485 all_copies[current_rev] = current_copies
427 # we do not know if the other dict is a copy or not, so we
428 # need to blindly copy it. Future change should make this
429 # unnecessary.
430 minor, major = newcopies, othercopies.copy()
431 copies = _merge_copies_dict(minor, major, isancestor, changes)
432 all_copies[c] = copies
433
486
434 # filter out internal details and return a {dest: source mapping}
487 # filter out internal details and return a {dest: source mapping}
435 final_copies = {}
488 final_copies = {}
436 for dest, (tt, source) in all_copies[targetrev].items():
489 for dest, (tt, source) in all_copies[targetrev].items():
437 if source is not None:
490 if source is not None:
438 final_copies[dest] = source
491 final_copies[dest] = source
439 return final_copies
492 return final_copies
440
493
441
494
442 def _merge_copies_dict(minor, major, isancestor, changes):
495 def _merge_copies_dict(minor, major, isancestor, changes):
443 """merge two copies-mapping together, minor and major
496 """merge two copies-mapping together, minor and major
444
497
445 In case of conflict, value from "major" will be picked.
498 In case of conflict, value from "major" will be picked.
446
499
447 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
500 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
448 ancestors of `high_rev`,
501 ancestors of `high_rev`,
449
502
450 - `ismerged(path)`: callable return True if `path` have been merged in the
503 - `ismerged(path)`: callable return True if `path` have been merged in the
451 current revision,
504 current revision,
452
505
453 return the resulting dict (in practice, the "minor" object, updated)
506 return the resulting dict (in practice, the "minor" object, updated)
454 """
507 """
455 for dest, value in major.items():
508 for dest, value in major.items():
456 other = minor.get(dest)
509 other = minor.get(dest)
457 if other is None:
510 if other is None:
458 minor[dest] = value
511 minor[dest] = value
459 else:
512 else:
460 new_tt = value[0]
513 new_tt = value[0]
461 other_tt = other[0]
514 other_tt = other[0]
462 if value[1] == other[1]:
515 if value[1] == other[1]:
463 continue
516 continue
464 # content from "major" wins, unless it is older
517 # content from "major" wins, unless it is older
465 # than the branch point or there is a merge
518 # than the branch point or there is a merge
466 if new_tt == other_tt:
519 if new_tt == other_tt:
467 minor[dest] = value
520 minor[dest] = value
468 elif (
521 elif (
469 changes is not None
522 changes is not None
470 and value[1] is None
523 and value[1] is None
471 and dest in changes.salvaged
524 and dest in changes.salvaged
472 ):
525 ):
473 pass
526 pass
474 elif (
527 elif (
475 changes is not None
528 changes is not None
476 and other[1] is None
529 and other[1] is None
477 and dest in changes.salvaged
530 and dest in changes.salvaged
478 ):
531 ):
479 minor[dest] = value
532 minor[dest] = value
480 elif changes is not None and dest in changes.merged:
533 elif changes is not None and dest in changes.merged:
481 minor[dest] = value
534 minor[dest] = value
482 elif not isancestor(new_tt, other_tt):
535 elif not isancestor(new_tt, other_tt):
483 if value[1] is not None:
536 if value[1] is not None:
484 minor[dest] = value
537 minor[dest] = value
485 elif isancestor(other_tt, new_tt):
538 elif isancestor(other_tt, new_tt):
486 minor[dest] = value
539 minor[dest] = value
487 return minor
540 return minor
488
541
489
542
490 def _revinfo_getter_extra(repo):
543 def _revinfo_getter_extra(repo):
491 """return a function that return multiple data given a <rev>"i
544 """return a function that return multiple data given a <rev>"i
492
545
493 * p1: revision number of first parent
546 * p1: revision number of first parent
494 * p2: revision number of first parent
547 * p2: revision number of first parent
495 * p1copies: mapping of copies from p1
548 * p1copies: mapping of copies from p1
496 * p2copies: mapping of copies from p2
549 * p2copies: mapping of copies from p2
497 * removed: a list of removed files
550 * removed: a list of removed files
498 * ismerged: a callback to know if file was merged in that revision
551 * ismerged: a callback to know if file was merged in that revision
499 """
552 """
500 cl = repo.changelog
553 cl = repo.changelog
501 parents = cl.parentrevs
554 parents = cl.parentrevs
502
555
503 def get_ismerged(rev):
556 def get_ismerged(rev):
504 ctx = repo[rev]
557 ctx = repo[rev]
505
558
506 def ismerged(path):
559 def ismerged(path):
507 if path not in ctx.files():
560 if path not in ctx.files():
508 return False
561 return False
509 fctx = ctx[path]
562 fctx = ctx[path]
510 parents = fctx._filelog.parents(fctx._filenode)
563 parents = fctx._filelog.parents(fctx._filenode)
511 nb_parents = 0
564 nb_parents = 0
512 for n in parents:
565 for n in parents:
513 if n != nullid:
566 if n != nullid:
514 nb_parents += 1
567 nb_parents += 1
515 return nb_parents >= 2
568 return nb_parents >= 2
516
569
517 return ismerged
570 return ismerged
518
571
519 def revinfo(rev):
572 def revinfo(rev):
520 p1, p2 = parents(rev)
573 p1, p2 = parents(rev)
521 ctx = repo[rev]
574 ctx = repo[rev]
522 p1copies, p2copies = ctx._copies
575 p1copies, p2copies = ctx._copies
523 removed = ctx.filesremoved()
576 removed = ctx.filesremoved()
524 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
577 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
525
578
526 return revinfo
579 return revinfo
527
580
528
581
529 def _combine_changeset_copies_extra(
582 def _combine_changeset_copies_extra(
530 revs, children, targetrev, revinfo, match, isancestor
583 revs, children, targetrev, revinfo, match, isancestor
531 ):
584 ):
532 """version of `_combine_changeset_copies` that works with the Google
585 """version of `_combine_changeset_copies` that works with the Google
533 specific "extra" based storage for copy information"""
586 specific "extra" based storage for copy information"""
534 all_copies = {}
587 all_copies = {}
535 alwaysmatch = match.always()
588 alwaysmatch = match.always()
536 for r in revs:
589 for r in revs:
537 copies = all_copies.pop(r, None)
590 copies = all_copies.pop(r, None)
538 if copies is None:
591 if copies is None:
539 # this is a root
592 # this is a root
540 copies = {}
593 copies = {}
541 for i, c in enumerate(children[r]):
594 for i, c in enumerate(children[r]):
542 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
595 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
543 if r == p1:
596 if r == p1:
544 parent = 1
597 parent = 1
545 childcopies = p1copies
598 childcopies = p1copies
546 else:
599 else:
547 assert r == p2
600 assert r == p2
548 parent = 2
601 parent = 2
549 childcopies = p2copies
602 childcopies = p2copies
550 if not alwaysmatch:
603 if not alwaysmatch:
551 childcopies = {
604 childcopies = {
552 dst: src for dst, src in childcopies.items() if match(dst)
605 dst: src for dst, src in childcopies.items() if match(dst)
553 }
606 }
554 newcopies = copies
607 newcopies = copies
555 if childcopies:
608 if childcopies:
556 newcopies = copies.copy()
609 newcopies = copies.copy()
557 for dest, source in pycompat.iteritems(childcopies):
610 for dest, source in pycompat.iteritems(childcopies):
558 prev = copies.get(source)
611 prev = copies.get(source)
559 if prev is not None and prev[1] is not None:
612 if prev is not None and prev[1] is not None:
560 source = prev[1]
613 source = prev[1]
561 newcopies[dest] = (c, source)
614 newcopies[dest] = (c, source)
562 assert newcopies is not copies
615 assert newcopies is not copies
563 for f in removed:
616 for f in removed:
564 if f in newcopies:
617 if f in newcopies:
565 if newcopies is copies:
618 if newcopies is copies:
566 # copy on write to avoid affecting potential other
619 # copy on write to avoid affecting potential other
567 # branches. when there are no other branches, this
620 # branches. when there are no other branches, this
568 # could be avoided.
621 # could be avoided.
569 newcopies = copies.copy()
622 newcopies = copies.copy()
570 newcopies[f] = (c, None)
623 newcopies[f] = (c, None)
571 othercopies = all_copies.get(c)
624 othercopies = all_copies.get(c)
572 if othercopies is None:
625 if othercopies is None:
573 all_copies[c] = newcopies
626 all_copies[c] = newcopies
574 else:
627 else:
575 # we are the second parent to work on c, we need to merge our
628 # we are the second parent to work on c, we need to merge our
576 # work with the other.
629 # work with the other.
577 #
630 #
578 # In case of conflict, parent 1 take precedence over parent 2.
631 # In case of conflict, parent 1 take precedence over parent 2.
579 # This is an arbitrary choice made anew when implementing
632 # This is an arbitrary choice made anew when implementing
580 # changeset based copies. It was made without regards with
633 # changeset based copies. It was made without regards with
581 # potential filelog related behavior.
634 # potential filelog related behavior.
582 if parent == 1:
635 if parent == 1:
583 _merge_copies_dict_extra(
636 _merge_copies_dict_extra(
584 othercopies, newcopies, isancestor, ismerged
637 othercopies, newcopies, isancestor, ismerged
585 )
638 )
586 else:
639 else:
587 _merge_copies_dict_extra(
640 _merge_copies_dict_extra(
588 newcopies, othercopies, isancestor, ismerged
641 newcopies, othercopies, isancestor, ismerged
589 )
642 )
590 all_copies[c] = newcopies
643 all_copies[c] = newcopies
591
644
592 final_copies = {}
645 final_copies = {}
593 for dest, (tt, source) in all_copies[targetrev].items():
646 for dest, (tt, source) in all_copies[targetrev].items():
594 if source is not None:
647 if source is not None:
595 final_copies[dest] = source
648 final_copies[dest] = source
596 return final_copies
649 return final_copies
597
650
598
651
599 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
652 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
600 """version of `_merge_copies_dict` that works with the Google
653 """version of `_merge_copies_dict` that works with the Google
601 specific "extra" based storage for copy information"""
654 specific "extra" based storage for copy information"""
602 for dest, value in major.items():
655 for dest, value in major.items():
603 other = minor.get(dest)
656 other = minor.get(dest)
604 if other is None:
657 if other is None:
605 minor[dest] = value
658 minor[dest] = value
606 else:
659 else:
607 new_tt = value[0]
660 new_tt = value[0]
608 other_tt = other[0]
661 other_tt = other[0]
609 if value[1] == other[1]:
662 if value[1] == other[1]:
610 continue
663 continue
611 # content from "major" wins, unless it is older
664 # content from "major" wins, unless it is older
612 # than the branch point or there is a merge
665 # than the branch point or there is a merge
613 if (
666 if (
614 new_tt == other_tt
667 new_tt == other_tt
615 or not isancestor(new_tt, other_tt)
668 or not isancestor(new_tt, other_tt)
616 or ismerged(dest)
669 or ismerged(dest)
617 ):
670 ):
618 minor[dest] = value
671 minor[dest] = value
619
672
620
673
621 def _forwardcopies(a, b, base=None, match=None):
674 def _forwardcopies(a, b, base=None, match=None):
622 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
675 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
623
676
624 if base is None:
677 if base is None:
625 base = a
678 base = a
626 match = a.repo().narrowmatch(match)
679 match = a.repo().narrowmatch(match)
627 # check for working copy
680 # check for working copy
628 if b.rev() is None:
681 if b.rev() is None:
629 cm = _committedforwardcopies(a, b.p1(), base, match)
682 cm = _committedforwardcopies(a, b.p1(), base, match)
630 # combine copies from dirstate if necessary
683 # combine copies from dirstate if necessary
631 copies = _chain(cm, _dirstatecopies(b._repo, match))
684 copies = _chain(cm, _dirstatecopies(b._repo, match))
632 else:
685 else:
633 copies = _committedforwardcopies(a, b, base, match)
686 copies = _committedforwardcopies(a, b, base, match)
634 return copies
687 return copies
635
688
636
689
637 def _backwardrenames(a, b, match):
690 def _backwardrenames(a, b, match):
638 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
691 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
639 return {}
692 return {}
640
693
641 # Even though we're not taking copies into account, 1:n rename situations
694 # Even though we're not taking copies into account, 1:n rename situations
642 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
695 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
643 # arbitrarily pick one of the renames.
696 # arbitrarily pick one of the renames.
644 # We don't want to pass in "match" here, since that would filter
697 # We don't want to pass in "match" here, since that would filter
645 # the destination by it. Since we're reversing the copies, we want
698 # the destination by it. Since we're reversing the copies, we want
646 # to filter the source instead.
699 # to filter the source instead.
647 f = _forwardcopies(b, a)
700 f = _forwardcopies(b, a)
648 r = {}
701 r = {}
649 for k, v in sorted(pycompat.iteritems(f)):
702 for k, v in sorted(pycompat.iteritems(f)):
650 if match and not match(v):
703 if match and not match(v):
651 continue
704 continue
652 # remove copies
705 # remove copies
653 if v in a:
706 if v in a:
654 continue
707 continue
655 r[v] = k
708 r[v] = k
656 return r
709 return r
657
710
658
711
659 def pathcopies(x, y, match=None):
712 def pathcopies(x, y, match=None):
660 """find {dst@y: src@x} copy mapping for directed compare"""
713 """find {dst@y: src@x} copy mapping for directed compare"""
661 repo = x._repo
714 repo = x._repo
662 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
715 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
663 if debug:
716 if debug:
664 repo.ui.debug(
717 repo.ui.debug(
665 b'debug.copies: searching copies from %s to %s\n' % (x, y)
718 b'debug.copies: searching copies from %s to %s\n' % (x, y)
666 )
719 )
667 if x == y or not x or not y:
720 if x == y or not x or not y:
668 return {}
721 return {}
669 if y.rev() is None and x == y.p1():
722 if y.rev() is None and x == y.p1():
670 if debug:
723 if debug:
671 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
724 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
672 # short-circuit to avoid issues with merge states
725 # short-circuit to avoid issues with merge states
673 return _dirstatecopies(repo, match)
726 return _dirstatecopies(repo, match)
674 a = y.ancestor(x)
727 a = y.ancestor(x)
675 if a == x:
728 if a == x:
676 if debug:
729 if debug:
677 repo.ui.debug(b'debug.copies: search mode: forward\n')
730 repo.ui.debug(b'debug.copies: search mode: forward\n')
678 copies = _forwardcopies(x, y, match=match)
731 copies = _forwardcopies(x, y, match=match)
679 elif a == y:
732 elif a == y:
680 if debug:
733 if debug:
681 repo.ui.debug(b'debug.copies: search mode: backward\n')
734 repo.ui.debug(b'debug.copies: search mode: backward\n')
682 copies = _backwardrenames(x, y, match=match)
735 copies = _backwardrenames(x, y, match=match)
683 else:
736 else:
684 if debug:
737 if debug:
685 repo.ui.debug(b'debug.copies: search mode: combined\n')
738 repo.ui.debug(b'debug.copies: search mode: combined\n')
686 base = None
739 base = None
687 if a.rev() != nullrev:
740 if a.rev() != nullrev:
688 base = x
741 base = x
689 copies = _chain(
742 copies = _chain(
690 _backwardrenames(x, a, match=match),
743 _backwardrenames(x, a, match=match),
691 _forwardcopies(a, y, base, match=match),
744 _forwardcopies(a, y, base, match=match),
692 )
745 )
693 _filter(x, y, copies)
746 _filter(x, y, copies)
694 return copies
747 return copies
695
748
696
749
697 def mergecopies(repo, c1, c2, base):
750 def mergecopies(repo, c1, c2, base):
698 """
751 """
699 Finds moves and copies between context c1 and c2 that are relevant for
752 Finds moves and copies between context c1 and c2 that are relevant for
700 merging. 'base' will be used as the merge base.
753 merging. 'base' will be used as the merge base.
701
754
702 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
755 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
703 files that were moved/ copied in one merge parent and modified in another.
756 files that were moved/ copied in one merge parent and modified in another.
704 For example:
757 For example:
705
758
706 o ---> 4 another commit
759 o ---> 4 another commit
707 |
760 |
708 | o ---> 3 commit that modifies a.txt
761 | o ---> 3 commit that modifies a.txt
709 | /
762 | /
710 o / ---> 2 commit that moves a.txt to b.txt
763 o / ---> 2 commit that moves a.txt to b.txt
711 |/
764 |/
712 o ---> 1 merge base
765 o ---> 1 merge base
713
766
714 If we try to rebase revision 3 on revision 4, since there is no a.txt in
767 If we try to rebase revision 3 on revision 4, since there is no a.txt in
715 revision 4, and if user have copytrace disabled, we prints the following
768 revision 4, and if user have copytrace disabled, we prints the following
716 message:
769 message:
717
770
718 ```other changed <file> which local deleted```
771 ```other changed <file> which local deleted```
719
772
720 Returns a tuple where:
773 Returns a tuple where:
721
774
722 "branch_copies" an instance of branch_copies.
775 "branch_copies" an instance of branch_copies.
723
776
724 "diverge" is a mapping of source name -> list of destination names
777 "diverge" is a mapping of source name -> list of destination names
725 for divergent renames.
778 for divergent renames.
726
779
727 This function calls different copytracing algorithms based on config.
780 This function calls different copytracing algorithms based on config.
728 """
781 """
729 # avoid silly behavior for update from empty dir
782 # avoid silly behavior for update from empty dir
730 if not c1 or not c2 or c1 == c2:
783 if not c1 or not c2 or c1 == c2:
731 return branch_copies(), branch_copies(), {}
784 return branch_copies(), branch_copies(), {}
732
785
733 narrowmatch = c1.repo().narrowmatch()
786 narrowmatch = c1.repo().narrowmatch()
734
787
735 # avoid silly behavior for parent -> working dir
788 # avoid silly behavior for parent -> working dir
736 if c2.node() is None and c1.node() == repo.dirstate.p1():
789 if c2.node() is None and c1.node() == repo.dirstate.p1():
737 return (
790 return (
738 branch_copies(_dirstatecopies(repo, narrowmatch)),
791 branch_copies(_dirstatecopies(repo, narrowmatch)),
739 branch_copies(),
792 branch_copies(),
740 {},
793 {},
741 )
794 )
742
795
743 copytracing = repo.ui.config(b'experimental', b'copytrace')
796 copytracing = repo.ui.config(b'experimental', b'copytrace')
744 if stringutil.parsebool(copytracing) is False:
797 if stringutil.parsebool(copytracing) is False:
745 # stringutil.parsebool() returns None when it is unable to parse the
798 # stringutil.parsebool() returns None when it is unable to parse the
746 # value, so we should rely on making sure copytracing is on such cases
799 # value, so we should rely on making sure copytracing is on such cases
747 return branch_copies(), branch_copies(), {}
800 return branch_copies(), branch_copies(), {}
748
801
749 if usechangesetcentricalgo(repo):
802 if usechangesetcentricalgo(repo):
750 # The heuristics don't make sense when we need changeset-centric algos
803 # The heuristics don't make sense when we need changeset-centric algos
751 return _fullcopytracing(repo, c1, c2, base)
804 return _fullcopytracing(repo, c1, c2, base)
752
805
753 # Copy trace disabling is explicitly below the node == p1 logic above
806 # Copy trace disabling is explicitly below the node == p1 logic above
754 # because the logic above is required for a simple copy to be kept across a
807 # because the logic above is required for a simple copy to be kept across a
755 # rebase.
808 # rebase.
756 if copytracing == b'heuristics':
809 if copytracing == b'heuristics':
757 # Do full copytracing if only non-public revisions are involved as
810 # Do full copytracing if only non-public revisions are involved as
758 # that will be fast enough and will also cover the copies which could
811 # that will be fast enough and will also cover the copies which could
759 # be missed by heuristics
812 # be missed by heuristics
760 if _isfullcopytraceable(repo, c1, base):
813 if _isfullcopytraceable(repo, c1, base):
761 return _fullcopytracing(repo, c1, c2, base)
814 return _fullcopytracing(repo, c1, c2, base)
762 return _heuristicscopytracing(repo, c1, c2, base)
815 return _heuristicscopytracing(repo, c1, c2, base)
763 else:
816 else:
764 return _fullcopytracing(repo, c1, c2, base)
817 return _fullcopytracing(repo, c1, c2, base)
765
818
766
819
767 def _isfullcopytraceable(repo, c1, base):
820 def _isfullcopytraceable(repo, c1, base):
768 """Checks that if base, source and destination are all no-public branches,
821 """Checks that if base, source and destination are all no-public branches,
769 if yes let's use the full copytrace algorithm for increased capabilities
822 if yes let's use the full copytrace algorithm for increased capabilities
770 since it will be fast enough.
823 since it will be fast enough.
771
824
772 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
825 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
773 number of changesets from c1 to base such that if number of changesets are
826 number of changesets from c1 to base such that if number of changesets are
774 more than the limit, full copytracing algorithm won't be used.
827 more than the limit, full copytracing algorithm won't be used.
775 """
828 """
776 if c1.rev() is None:
829 if c1.rev() is None:
777 c1 = c1.p1()
830 c1 = c1.p1()
778 if c1.mutable() and base.mutable():
831 if c1.mutable() and base.mutable():
779 sourcecommitlimit = repo.ui.configint(
832 sourcecommitlimit = repo.ui.configint(
780 b'experimental', b'copytrace.sourcecommitlimit'
833 b'experimental', b'copytrace.sourcecommitlimit'
781 )
834 )
782 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
835 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
783 return commits < sourcecommitlimit
836 return commits < sourcecommitlimit
784 return False
837 return False
785
838
786
839
787 def _checksinglesidecopies(
840 def _checksinglesidecopies(
788 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
841 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
789 ):
842 ):
790 if src not in m2:
843 if src not in m2:
791 # deleted on side 2
844 # deleted on side 2
792 if src not in m1:
845 if src not in m1:
793 # renamed on side 1, deleted on side 2
846 # renamed on side 1, deleted on side 2
794 renamedelete[src] = dsts1
847 renamedelete[src] = dsts1
795 elif src not in mb:
848 elif src not in mb:
796 # Work around the "short-circuit to avoid issues with merge states"
849 # Work around the "short-circuit to avoid issues with merge states"
797 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
850 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
798 # destination doesn't exist in y.
851 # destination doesn't exist in y.
799 pass
852 pass
800 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
853 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
801 return
854 return
802 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
855 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
803 # modified on side 2
856 # modified on side 2
804 for dst in dsts1:
857 for dst in dsts1:
805 copy[dst] = src
858 copy[dst] = src
806
859
807
860
808 class branch_copies(object):
861 class branch_copies(object):
809 """Information about copies made on one side of a merge/graft.
862 """Information about copies made on one side of a merge/graft.
810
863
811 "copy" is a mapping from destination name -> source name,
864 "copy" is a mapping from destination name -> source name,
812 where source is in c1 and destination is in c2 or vice-versa.
865 where source is in c1 and destination is in c2 or vice-versa.
813
866
814 "movewithdir" is a mapping from source name -> destination name,
867 "movewithdir" is a mapping from source name -> destination name,
815 where the file at source present in one context but not the other
868 where the file at source present in one context but not the other
816 needs to be moved to destination by the merge process, because the
869 needs to be moved to destination by the merge process, because the
817 other context moved the directory it is in.
870 other context moved the directory it is in.
818
871
819 "renamedelete" is a mapping of source name -> list of destination
872 "renamedelete" is a mapping of source name -> list of destination
820 names for files deleted in c1 that were renamed in c2 or vice-versa.
873 names for files deleted in c1 that were renamed in c2 or vice-versa.
821
874
822 "dirmove" is a mapping of detected source dir -> destination dir renames.
875 "dirmove" is a mapping of detected source dir -> destination dir renames.
823 This is needed for handling changes to new files previously grafted into
876 This is needed for handling changes to new files previously grafted into
824 renamed directories.
877 renamed directories.
825 """
878 """
826
879
827 def __init__(
880 def __init__(
828 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
881 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
829 ):
882 ):
830 self.copy = {} if copy is None else copy
883 self.copy = {} if copy is None else copy
831 self.renamedelete = {} if renamedelete is None else renamedelete
884 self.renamedelete = {} if renamedelete is None else renamedelete
832 self.dirmove = {} if dirmove is None else dirmove
885 self.dirmove = {} if dirmove is None else dirmove
833 self.movewithdir = {} if movewithdir is None else movewithdir
886 self.movewithdir = {} if movewithdir is None else movewithdir
834
887
835 def __repr__(self):
888 def __repr__(self):
836 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
889 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
837 self.copy,
890 self.copy,
838 self.renamedelete,
891 self.renamedelete,
839 self.dirmove,
892 self.dirmove,
840 self.movewithdir,
893 self.movewithdir,
841 )
894 )
842
895
843
896
844 def _fullcopytracing(repo, c1, c2, base):
897 def _fullcopytracing(repo, c1, c2, base):
845 """The full copytracing algorithm which finds all the new files that were
898 """The full copytracing algorithm which finds all the new files that were
846 added from merge base up to the top commit and for each file it checks if
899 added from merge base up to the top commit and for each file it checks if
847 this file was copied from another file.
900 this file was copied from another file.
848
901
849 This is pretty slow when a lot of changesets are involved but will track all
902 This is pretty slow when a lot of changesets are involved but will track all
850 the copies.
903 the copies.
851 """
904 """
852 m1 = c1.manifest()
905 m1 = c1.manifest()
853 m2 = c2.manifest()
906 m2 = c2.manifest()
854 mb = base.manifest()
907 mb = base.manifest()
855
908
856 copies1 = pathcopies(base, c1)
909 copies1 = pathcopies(base, c1)
857 copies2 = pathcopies(base, c2)
910 copies2 = pathcopies(base, c2)
858
911
859 if not (copies1 or copies2):
912 if not (copies1 or copies2):
860 return branch_copies(), branch_copies(), {}
913 return branch_copies(), branch_copies(), {}
861
914
862 inversecopies1 = {}
915 inversecopies1 = {}
863 inversecopies2 = {}
916 inversecopies2 = {}
864 for dst, src in copies1.items():
917 for dst, src in copies1.items():
865 inversecopies1.setdefault(src, []).append(dst)
918 inversecopies1.setdefault(src, []).append(dst)
866 for dst, src in copies2.items():
919 for dst, src in copies2.items():
867 inversecopies2.setdefault(src, []).append(dst)
920 inversecopies2.setdefault(src, []).append(dst)
868
921
869 copy1 = {}
922 copy1 = {}
870 copy2 = {}
923 copy2 = {}
871 diverge = {}
924 diverge = {}
872 renamedelete1 = {}
925 renamedelete1 = {}
873 renamedelete2 = {}
926 renamedelete2 = {}
874 allsources = set(inversecopies1) | set(inversecopies2)
927 allsources = set(inversecopies1) | set(inversecopies2)
875 for src in allsources:
928 for src in allsources:
876 dsts1 = inversecopies1.get(src)
929 dsts1 = inversecopies1.get(src)
877 dsts2 = inversecopies2.get(src)
930 dsts2 = inversecopies2.get(src)
878 if dsts1 and dsts2:
931 if dsts1 and dsts2:
879 # copied/renamed on both sides
932 # copied/renamed on both sides
880 if src not in m1 and src not in m2:
933 if src not in m1 and src not in m2:
881 # renamed on both sides
934 # renamed on both sides
882 dsts1 = set(dsts1)
935 dsts1 = set(dsts1)
883 dsts2 = set(dsts2)
936 dsts2 = set(dsts2)
884 # If there's some overlap in the rename destinations, we
937 # If there's some overlap in the rename destinations, we
885 # consider it not divergent. For example, if side 1 copies 'a'
938 # consider it not divergent. For example, if side 1 copies 'a'
886 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
939 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
887 # and 'd' and deletes 'a'.
940 # and 'd' and deletes 'a'.
888 if dsts1 & dsts2:
941 if dsts1 & dsts2:
889 for dst in dsts1 & dsts2:
942 for dst in dsts1 & dsts2:
890 copy1[dst] = src
943 copy1[dst] = src
891 copy2[dst] = src
944 copy2[dst] = src
892 else:
945 else:
893 diverge[src] = sorted(dsts1 | dsts2)
946 diverge[src] = sorted(dsts1 | dsts2)
894 elif src in m1 and src in m2:
947 elif src in m1 and src in m2:
895 # copied on both sides
948 # copied on both sides
896 dsts1 = set(dsts1)
949 dsts1 = set(dsts1)
897 dsts2 = set(dsts2)
950 dsts2 = set(dsts2)
898 for dst in dsts1 & dsts2:
951 for dst in dsts1 & dsts2:
899 copy1[dst] = src
952 copy1[dst] = src
900 copy2[dst] = src
953 copy2[dst] = src
901 # TODO: Handle cases where it was renamed on one side and copied
954 # TODO: Handle cases where it was renamed on one side and copied
902 # on the other side
955 # on the other side
903 elif dsts1:
956 elif dsts1:
904 # copied/renamed only on side 1
957 # copied/renamed only on side 1
905 _checksinglesidecopies(
958 _checksinglesidecopies(
906 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
959 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
907 )
960 )
908 elif dsts2:
961 elif dsts2:
909 # copied/renamed only on side 2
962 # copied/renamed only on side 2
910 _checksinglesidecopies(
963 _checksinglesidecopies(
911 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
964 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
912 )
965 )
913
966
914 # find interesting file sets from manifests
967 # find interesting file sets from manifests
915 cache = []
968 cache = []
916
969
917 def _get_addedfiles(idx):
970 def _get_addedfiles(idx):
918 if not cache:
971 if not cache:
919 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
972 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
920 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
973 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
921 u1 = sorted(addedinm1 - addedinm2)
974 u1 = sorted(addedinm1 - addedinm2)
922 u2 = sorted(addedinm2 - addedinm1)
975 u2 = sorted(addedinm2 - addedinm1)
923 cache.extend((u1, u2))
976 cache.extend((u1, u2))
924 return cache[idx]
977 return cache[idx]
925
978
926 u1fn = lambda: _get_addedfiles(0)
979 u1fn = lambda: _get_addedfiles(0)
927 u2fn = lambda: _get_addedfiles(1)
980 u2fn = lambda: _get_addedfiles(1)
928 if repo.ui.debugflag:
981 if repo.ui.debugflag:
929 u1 = u1fn()
982 u1 = u1fn()
930 u2 = u2fn()
983 u2 = u2fn()
931
984
932 header = b" unmatched files in %s"
985 header = b" unmatched files in %s"
933 if u1:
986 if u1:
934 repo.ui.debug(
987 repo.ui.debug(
935 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
988 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
936 )
989 )
937 if u2:
990 if u2:
938 repo.ui.debug(
991 repo.ui.debug(
939 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
992 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
940 )
993 )
941
994
942 renamedeleteset = set()
995 renamedeleteset = set()
943 divergeset = set()
996 divergeset = set()
944 for dsts in diverge.values():
997 for dsts in diverge.values():
945 divergeset.update(dsts)
998 divergeset.update(dsts)
946 for dsts in renamedelete1.values():
999 for dsts in renamedelete1.values():
947 renamedeleteset.update(dsts)
1000 renamedeleteset.update(dsts)
948 for dsts in renamedelete2.values():
1001 for dsts in renamedelete2.values():
949 renamedeleteset.update(dsts)
1002 renamedeleteset.update(dsts)
950
1003
951 repo.ui.debug(
1004 repo.ui.debug(
952 b" all copies found (* = to merge, ! = divergent, "
1005 b" all copies found (* = to merge, ! = divergent, "
953 b"% = renamed and deleted):\n"
1006 b"% = renamed and deleted):\n"
954 )
1007 )
955 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1008 for side, copies in ((b"local", copies1), (b"remote", copies2)):
956 if not copies:
1009 if not copies:
957 continue
1010 continue
958 repo.ui.debug(b" on %s side:\n" % side)
1011 repo.ui.debug(b" on %s side:\n" % side)
959 for f in sorted(copies):
1012 for f in sorted(copies):
960 note = b""
1013 note = b""
961 if f in copy1 or f in copy2:
1014 if f in copy1 or f in copy2:
962 note += b"*"
1015 note += b"*"
963 if f in divergeset:
1016 if f in divergeset:
964 note += b"!"
1017 note += b"!"
965 if f in renamedeleteset:
1018 if f in renamedeleteset:
966 note += b"%"
1019 note += b"%"
967 repo.ui.debug(
1020 repo.ui.debug(
968 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1021 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
969 )
1022 )
970 del renamedeleteset
1023 del renamedeleteset
971 del divergeset
1024 del divergeset
972
1025
973 repo.ui.debug(b" checking for directory renames\n")
1026 repo.ui.debug(b" checking for directory renames\n")
974
1027
975 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1028 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
976 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1029 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
977
1030
978 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1031 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
979 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1032 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
980
1033
981 return branch_copies1, branch_copies2, diverge
1034 return branch_copies1, branch_copies2, diverge
982
1035
983
1036
984 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1037 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
985 """Finds moved directories and files that should move with them.
1038 """Finds moved directories and files that should move with them.
986
1039
987 ctx: the context for one of the sides
1040 ctx: the context for one of the sides
988 copy: files copied on the same side (as ctx)
1041 copy: files copied on the same side (as ctx)
989 fullcopy: files copied on the same side (as ctx), including those that
1042 fullcopy: files copied on the same side (as ctx), including those that
990 merge.manifestmerge() won't care about
1043 merge.manifestmerge() won't care about
991 addedfilesfn: function returning added files on the other side (compared to
1044 addedfilesfn: function returning added files on the other side (compared to
992 ctx)
1045 ctx)
993 """
1046 """
994 # generate a directory move map
1047 # generate a directory move map
995 invalid = set()
1048 invalid = set()
996 dirmove = {}
1049 dirmove = {}
997
1050
998 # examine each file copy for a potential directory move, which is
1051 # examine each file copy for a potential directory move, which is
999 # when all the files in a directory are moved to a new directory
1052 # when all the files in a directory are moved to a new directory
1000 for dst, src in pycompat.iteritems(fullcopy):
1053 for dst, src in pycompat.iteritems(fullcopy):
1001 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1054 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1002 if dsrc in invalid:
1055 if dsrc in invalid:
1003 # already seen to be uninteresting
1056 # already seen to be uninteresting
1004 continue
1057 continue
1005 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1058 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1006 # directory wasn't entirely moved locally
1059 # directory wasn't entirely moved locally
1007 invalid.add(dsrc)
1060 invalid.add(dsrc)
1008 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1061 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1009 # files from the same directory moved to two different places
1062 # files from the same directory moved to two different places
1010 invalid.add(dsrc)
1063 invalid.add(dsrc)
1011 else:
1064 else:
1012 # looks good so far
1065 # looks good so far
1013 dirmove[dsrc] = ddst
1066 dirmove[dsrc] = ddst
1014
1067
1015 for i in invalid:
1068 for i in invalid:
1016 if i in dirmove:
1069 if i in dirmove:
1017 del dirmove[i]
1070 del dirmove[i]
1018 del invalid
1071 del invalid
1019
1072
1020 if not dirmove:
1073 if not dirmove:
1021 return {}, {}
1074 return {}, {}
1022
1075
1023 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1076 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1024
1077
1025 for d in dirmove:
1078 for d in dirmove:
1026 repo.ui.debug(
1079 repo.ui.debug(
1027 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1080 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1028 )
1081 )
1029
1082
1030 movewithdir = {}
1083 movewithdir = {}
1031 # check unaccounted nonoverlapping files against directory moves
1084 # check unaccounted nonoverlapping files against directory moves
1032 for f in addedfilesfn():
1085 for f in addedfilesfn():
1033 if f not in fullcopy:
1086 if f not in fullcopy:
1034 for d in dirmove:
1087 for d in dirmove:
1035 if f.startswith(d):
1088 if f.startswith(d):
1036 # new file added in a directory that was moved, move it
1089 # new file added in a directory that was moved, move it
1037 df = dirmove[d] + f[len(d) :]
1090 df = dirmove[d] + f[len(d) :]
1038 if df not in copy:
1091 if df not in copy:
1039 movewithdir[f] = df
1092 movewithdir[f] = df
1040 repo.ui.debug(
1093 repo.ui.debug(
1041 b" pending file src: '%s' -> dst: '%s'\n"
1094 b" pending file src: '%s' -> dst: '%s'\n"
1042 % (f, df)
1095 % (f, df)
1043 )
1096 )
1044 break
1097 break
1045
1098
1046 return dirmove, movewithdir
1099 return dirmove, movewithdir
1047
1100
1048
1101
1049 def _heuristicscopytracing(repo, c1, c2, base):
1102 def _heuristicscopytracing(repo, c1, c2, base):
1050 """Fast copytracing using filename heuristics
1103 """Fast copytracing using filename heuristics
1051
1104
1052 Assumes that moves or renames are of following two types:
1105 Assumes that moves or renames are of following two types:
1053
1106
1054 1) Inside a directory only (same directory name but different filenames)
1107 1) Inside a directory only (same directory name but different filenames)
1055 2) Move from one directory to another
1108 2) Move from one directory to another
1056 (same filenames but different directory names)
1109 (same filenames but different directory names)
1057
1110
1058 Works only when there are no merge commits in the "source branch".
1111 Works only when there are no merge commits in the "source branch".
1059 Source branch is commits from base up to c2 not including base.
1112 Source branch is commits from base up to c2 not including base.
1060
1113
1061 If merge is involved it fallbacks to _fullcopytracing().
1114 If merge is involved it fallbacks to _fullcopytracing().
1062
1115
1063 Can be used by setting the following config:
1116 Can be used by setting the following config:
1064
1117
1065 [experimental]
1118 [experimental]
1066 copytrace = heuristics
1119 copytrace = heuristics
1067
1120
1068 In some cases the copy/move candidates found by heuristics can be very large
1121 In some cases the copy/move candidates found by heuristics can be very large
1069 in number and that will make the algorithm slow. The number of possible
1122 in number and that will make the algorithm slow. The number of possible
1070 candidates to check can be limited by using the config
1123 candidates to check can be limited by using the config
1071 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1124 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1072 """
1125 """
1073
1126
1074 if c1.rev() is None:
1127 if c1.rev() is None:
1075 c1 = c1.p1()
1128 c1 = c1.p1()
1076 if c2.rev() is None:
1129 if c2.rev() is None:
1077 c2 = c2.p1()
1130 c2 = c2.p1()
1078
1131
1079 changedfiles = set()
1132 changedfiles = set()
1080 m1 = c1.manifest()
1133 m1 = c1.manifest()
1081 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1134 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1082 # If base is not in c2 branch, we switch to fullcopytracing
1135 # If base is not in c2 branch, we switch to fullcopytracing
1083 repo.ui.debug(
1136 repo.ui.debug(
1084 b"switching to full copytracing as base is not "
1137 b"switching to full copytracing as base is not "
1085 b"an ancestor of c2\n"
1138 b"an ancestor of c2\n"
1086 )
1139 )
1087 return _fullcopytracing(repo, c1, c2, base)
1140 return _fullcopytracing(repo, c1, c2, base)
1088
1141
1089 ctx = c2
1142 ctx = c2
1090 while ctx != base:
1143 while ctx != base:
1091 if len(ctx.parents()) == 2:
1144 if len(ctx.parents()) == 2:
1092 # To keep things simple let's not handle merges
1145 # To keep things simple let's not handle merges
1093 repo.ui.debug(b"switching to full copytracing because of merges\n")
1146 repo.ui.debug(b"switching to full copytracing because of merges\n")
1094 return _fullcopytracing(repo, c1, c2, base)
1147 return _fullcopytracing(repo, c1, c2, base)
1095 changedfiles.update(ctx.files())
1148 changedfiles.update(ctx.files())
1096 ctx = ctx.p1()
1149 ctx = ctx.p1()
1097
1150
1098 copies2 = {}
1151 copies2 = {}
1099 cp = _forwardcopies(base, c2)
1152 cp = _forwardcopies(base, c2)
1100 for dst, src in pycompat.iteritems(cp):
1153 for dst, src in pycompat.iteritems(cp):
1101 if src in m1:
1154 if src in m1:
1102 copies2[dst] = src
1155 copies2[dst] = src
1103
1156
1104 # file is missing if it isn't present in the destination, but is present in
1157 # file is missing if it isn't present in the destination, but is present in
1105 # the base and present in the source.
1158 # the base and present in the source.
1106 # Presence in the base is important to exclude added files, presence in the
1159 # Presence in the base is important to exclude added files, presence in the
1107 # source is important to exclude removed files.
1160 # source is important to exclude removed files.
1108 filt = lambda f: f not in m1 and f in base and f in c2
1161 filt = lambda f: f not in m1 and f in base and f in c2
1109 missingfiles = [f for f in changedfiles if filt(f)]
1162 missingfiles = [f for f in changedfiles if filt(f)]
1110
1163
1111 copies1 = {}
1164 copies1 = {}
1112 if missingfiles:
1165 if missingfiles:
1113 basenametofilename = collections.defaultdict(list)
1166 basenametofilename = collections.defaultdict(list)
1114 dirnametofilename = collections.defaultdict(list)
1167 dirnametofilename = collections.defaultdict(list)
1115
1168
1116 for f in m1.filesnotin(base.manifest()):
1169 for f in m1.filesnotin(base.manifest()):
1117 basename = os.path.basename(f)
1170 basename = os.path.basename(f)
1118 dirname = os.path.dirname(f)
1171 dirname = os.path.dirname(f)
1119 basenametofilename[basename].append(f)
1172 basenametofilename[basename].append(f)
1120 dirnametofilename[dirname].append(f)
1173 dirnametofilename[dirname].append(f)
1121
1174
1122 for f in missingfiles:
1175 for f in missingfiles:
1123 basename = os.path.basename(f)
1176 basename = os.path.basename(f)
1124 dirname = os.path.dirname(f)
1177 dirname = os.path.dirname(f)
1125 samebasename = basenametofilename[basename]
1178 samebasename = basenametofilename[basename]
1126 samedirname = dirnametofilename[dirname]
1179 samedirname = dirnametofilename[dirname]
1127 movecandidates = samebasename + samedirname
1180 movecandidates = samebasename + samedirname
1128 # f is guaranteed to be present in c2, that's why
1181 # f is guaranteed to be present in c2, that's why
1129 # c2.filectx(f) won't fail
1182 # c2.filectx(f) won't fail
1130 f2 = c2.filectx(f)
1183 f2 = c2.filectx(f)
1131 # we can have a lot of candidates which can slow down the heuristics
1184 # we can have a lot of candidates which can slow down the heuristics
1132 # config value to limit the number of candidates moves to check
1185 # config value to limit the number of candidates moves to check
1133 maxcandidates = repo.ui.configint(
1186 maxcandidates = repo.ui.configint(
1134 b'experimental', b'copytrace.movecandidateslimit'
1187 b'experimental', b'copytrace.movecandidateslimit'
1135 )
1188 )
1136
1189
1137 if len(movecandidates) > maxcandidates:
1190 if len(movecandidates) > maxcandidates:
1138 repo.ui.status(
1191 repo.ui.status(
1139 _(
1192 _(
1140 b"skipping copytracing for '%s', more "
1193 b"skipping copytracing for '%s', more "
1141 b"candidates than the limit: %d\n"
1194 b"candidates than the limit: %d\n"
1142 )
1195 )
1143 % (f, len(movecandidates))
1196 % (f, len(movecandidates))
1144 )
1197 )
1145 continue
1198 continue
1146
1199
1147 for candidate in movecandidates:
1200 for candidate in movecandidates:
1148 f1 = c1.filectx(candidate)
1201 f1 = c1.filectx(candidate)
1149 if _related(f1, f2):
1202 if _related(f1, f2):
1150 # if there are a few related copies then we'll merge
1203 # if there are a few related copies then we'll merge
1151 # changes into all of them. This matches the behaviour
1204 # changes into all of them. This matches the behaviour
1152 # of upstream copytracing
1205 # of upstream copytracing
1153 copies1[candidate] = f
1206 copies1[candidate] = f
1154
1207
1155 return branch_copies(copies1), branch_copies(copies2), {}
1208 return branch_copies(copies1), branch_copies(copies2), {}
1156
1209
1157
1210
1158 def _related(f1, f2):
1211 def _related(f1, f2):
1159 """return True if f1 and f2 filectx have a common ancestor
1212 """return True if f1 and f2 filectx have a common ancestor
1160
1213
1161 Walk back to common ancestor to see if the two files originate
1214 Walk back to common ancestor to see if the two files originate
1162 from the same file. Since workingfilectx's rev() is None it messes
1215 from the same file. Since workingfilectx's rev() is None it messes
1163 up the integer comparison logic, hence the pre-step check for
1216 up the integer comparison logic, hence the pre-step check for
1164 None (f1 and f2 can only be workingfilectx's initially).
1217 None (f1 and f2 can only be workingfilectx's initially).
1165 """
1218 """
1166
1219
1167 if f1 == f2:
1220 if f1 == f2:
1168 return True # a match
1221 return True # a match
1169
1222
1170 g1, g2 = f1.ancestors(), f2.ancestors()
1223 g1, g2 = f1.ancestors(), f2.ancestors()
1171 try:
1224 try:
1172 f1r, f2r = f1.linkrev(), f2.linkrev()
1225 f1r, f2r = f1.linkrev(), f2.linkrev()
1173
1226
1174 if f1r is None:
1227 if f1r is None:
1175 f1 = next(g1)
1228 f1 = next(g1)
1176 if f2r is None:
1229 if f2r is None:
1177 f2 = next(g2)
1230 f2 = next(g2)
1178
1231
1179 while True:
1232 while True:
1180 f1r, f2r = f1.linkrev(), f2.linkrev()
1233 f1r, f2r = f1.linkrev(), f2.linkrev()
1181 if f1r > f2r:
1234 if f1r > f2r:
1182 f1 = next(g1)
1235 f1 = next(g1)
1183 elif f2r > f1r:
1236 elif f2r > f1r:
1184 f2 = next(g2)
1237 f2 = next(g2)
1185 else: # f1 and f2 point to files in the same linkrev
1238 else: # f1 and f2 point to files in the same linkrev
1186 return f1 == f2 # true if they point to the same file
1239 return f1 == f2 # true if they point to the same file
1187 except StopIteration:
1240 except StopIteration:
1188 return False
1241 return False
1189
1242
1190
1243
1191 def graftcopies(wctx, ctx, base):
1244 def graftcopies(wctx, ctx, base):
1192 """reproduce copies between base and ctx in the wctx
1245 """reproduce copies between base and ctx in the wctx
1193
1246
1194 Unlike mergecopies(), this function will only consider copies between base
1247 Unlike mergecopies(), this function will only consider copies between base
1195 and ctx; it will ignore copies between base and wctx. Also unlike
1248 and ctx; it will ignore copies between base and wctx. Also unlike
1196 mergecopies(), this function will apply copies to the working copy (instead
1249 mergecopies(), this function will apply copies to the working copy (instead
1197 of just returning information about the copies). That makes it cheaper
1250 of just returning information about the copies). That makes it cheaper
1198 (especially in the common case of base==ctx.p1()) and useful also when
1251 (especially in the common case of base==ctx.p1()) and useful also when
1199 experimental.copytrace=off.
1252 experimental.copytrace=off.
1200
1253
1201 merge.update() will have already marked most copies, but it will only
1254 merge.update() will have already marked most copies, but it will only
1202 mark copies if it thinks the source files are related (see
1255 mark copies if it thinks the source files are related (see
1203 merge._related()). It will also not mark copies if the file wasn't modified
1256 merge._related()). It will also not mark copies if the file wasn't modified
1204 on the local side. This function adds the copies that were "missed"
1257 on the local side. This function adds the copies that were "missed"
1205 by merge.update().
1258 by merge.update().
1206 """
1259 """
1207 new_copies = pathcopies(base, ctx)
1260 new_copies = pathcopies(base, ctx)
1208 _filter(wctx.p1(), wctx, new_copies)
1261 _filter(wctx.p1(), wctx, new_copies)
1209 for dst, src in pycompat.iteritems(new_copies):
1262 for dst, src in pycompat.iteritems(new_copies):
1210 wctx[dst].markcopied(src)
1263 wctx[dst].markcopied(src)
@@ -1,665 +1,704
1 use crate::utils::hg_path::HgPath;
1 use crate::utils::hg_path::HgPath;
2 use crate::utils::hg_path::HgPathBuf;
2 use crate::utils::hg_path::HgPathBuf;
3 use crate::Revision;
3 use crate::Revision;
4 use crate::NULL_REVISION;
4
5
5 use im_rc::ordmap::DiffItem;
6 use im_rc::ordmap::DiffItem;
6 use im_rc::ordmap::OrdMap;
7 use im_rc::ordmap::OrdMap;
7
8
8 use std::cmp::Ordering;
9 use std::cmp::Ordering;
9 use std::collections::HashMap;
10 use std::collections::HashMap;
10 use std::convert::TryInto;
11 use std::convert::TryInto;
11
12
12 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
13 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
13
14
14 type PathToken = HgPathBuf;
15 type PathToken = HgPathBuf;
15
16
16 #[derive(Clone, Debug, PartialEq)]
17 #[derive(Clone, Debug, PartialEq)]
17 struct TimeStampedPathCopy {
18 struct TimeStampedPathCopy {
18 /// revision at which the copy information was added
19 /// revision at which the copy information was added
19 rev: Revision,
20 rev: Revision,
20 /// the copy source, (Set to None in case of deletion of the associated
21 /// the copy source, (Set to None in case of deletion of the associated
21 /// key)
22 /// key)
22 path: Option<PathToken>,
23 path: Option<PathToken>,
23 }
24 }
24
25
25 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
26 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
26 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
27 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
27
28
28 /// hold parent 1, parent 2 and relevant files actions.
29 /// hold parent 1, parent 2 and relevant files actions.
29 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
30 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
30
31
31 /// represent the files affected by a changesets
32 /// represent the files affected by a changesets
32 ///
33 ///
33 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 /// all the data categories tracked by it.
35 /// all the data categories tracked by it.
35 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
36 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
36 /// all the data categories tracked by it.
37 /// all the data categories tracked by it.
37 pub struct ChangedFiles<'a> {
38 pub struct ChangedFiles<'a> {
38 nb_items: u32,
39 nb_items: u32,
39 index: &'a [u8],
40 index: &'a [u8],
40 data: &'a [u8],
41 data: &'a [u8],
41 }
42 }
42
43
43 /// Represent active changes that affect the copy tracing.
44 /// Represent active changes that affect the copy tracing.
44 enum Action<'a> {
45 enum Action<'a> {
45 /// The parent ? children edge is removing a file
46 /// The parent ? children edge is removing a file
46 ///
47 ///
47 /// (actually, this could be the edge from the other parent, but it does
48 /// (actually, this could be the edge from the other parent, but it does
48 /// not matters)
49 /// not matters)
49 Removed(&'a HgPath),
50 Removed(&'a HgPath),
50 /// The parent ? children edge introduce copy information between (dest,
51 /// The parent ? children edge introduce copy information between (dest,
51 /// source)
52 /// source)
52 Copied(&'a HgPath, &'a HgPath),
53 Copied(&'a HgPath, &'a HgPath),
53 }
54 }
54
55
55 /// This express the possible "special" case we can get in a merge
56 /// This express the possible "special" case we can get in a merge
56 ///
57 ///
57 /// See mercurial/metadata.py for details on these values.
58 /// See mercurial/metadata.py for details on these values.
58 #[derive(PartialEq)]
59 #[derive(PartialEq)]
59 enum MergeCase {
60 enum MergeCase {
60 /// Merged: file had history on both side that needed to be merged
61 /// Merged: file had history on both side that needed to be merged
61 Merged,
62 Merged,
62 /// Salvaged: file was candidate for deletion, but survived the merge
63 /// Salvaged: file was candidate for deletion, but survived the merge
63 Salvaged,
64 Salvaged,
64 /// Normal: Not one of the two cases above
65 /// Normal: Not one of the two cases above
65 Normal,
66 Normal,
66 }
67 }
67
68
68 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
69 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
69
70
70 const EMPTY: &[u8] = b"";
71 const EMPTY: &[u8] = b"";
71 const COPY_MASK: u8 = 3;
72 const COPY_MASK: u8 = 3;
72 const P1_COPY: u8 = 2;
73 const P1_COPY: u8 = 2;
73 const P2_COPY: u8 = 3;
74 const P2_COPY: u8 = 3;
74 const ACTION_MASK: u8 = 28;
75 const ACTION_MASK: u8 = 28;
75 const REMOVED: u8 = 12;
76 const REMOVED: u8 = 12;
76 const MERGED: u8 = 8;
77 const MERGED: u8 = 8;
77 const SALVAGED: u8 = 16;
78 const SALVAGED: u8 = 16;
78
79
79 impl<'a> ChangedFiles<'a> {
80 impl<'a> ChangedFiles<'a> {
80 const INDEX_START: usize = 4;
81 const INDEX_START: usize = 4;
81 const ENTRY_SIZE: u32 = 9;
82 const ENTRY_SIZE: u32 = 9;
82 const FILENAME_START: u32 = 1;
83 const FILENAME_START: u32 = 1;
83 const COPY_SOURCE_START: u32 = 5;
84 const COPY_SOURCE_START: u32 = 5;
84
85
85 pub fn new(data: &'a [u8]) -> Self {
86 pub fn new(data: &'a [u8]) -> Self {
86 assert!(
87 assert!(
87 data.len() >= 4,
88 data.len() >= 4,
88 "data size ({}) is too small to contain the header (4)",
89 "data size ({}) is too small to contain the header (4)",
89 data.len()
90 data.len()
90 );
91 );
91 let nb_items_raw: [u8; 4] = (&data[0..=3])
92 let nb_items_raw: [u8; 4] = (&data[0..=3])
92 .try_into()
93 .try_into()
93 .expect("failed to turn 4 bytes into 4 bytes");
94 .expect("failed to turn 4 bytes into 4 bytes");
94 let nb_items = u32::from_be_bytes(nb_items_raw);
95 let nb_items = u32::from_be_bytes(nb_items_raw);
95
96
96 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
97 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
97 let index_end = Self::INDEX_START + index_size;
98 let index_end = Self::INDEX_START + index_size;
98
99
99 assert!(
100 assert!(
100 data.len() >= index_end,
101 data.len() >= index_end,
101 "data size ({}) is too small to fit the index_data ({})",
102 "data size ({}) is too small to fit the index_data ({})",
102 data.len(),
103 data.len(),
103 index_end
104 index_end
104 );
105 );
105
106
106 let ret = ChangedFiles {
107 let ret = ChangedFiles {
107 nb_items,
108 nb_items,
108 index: &data[Self::INDEX_START..index_end],
109 index: &data[Self::INDEX_START..index_end],
109 data: &data[index_end..],
110 data: &data[index_end..],
110 };
111 };
111 let max_data = ret.filename_end(nb_items - 1) as usize;
112 let max_data = ret.filename_end(nb_items - 1) as usize;
112 assert!(
113 assert!(
113 ret.data.len() >= max_data,
114 ret.data.len() >= max_data,
114 "data size ({}) is too small to fit all data ({})",
115 "data size ({}) is too small to fit all data ({})",
115 data.len(),
116 data.len(),
116 index_end + max_data
117 index_end + max_data
117 );
118 );
118 ret
119 ret
119 }
120 }
120
121
121 pub fn new_empty() -> Self {
122 pub fn new_empty() -> Self {
122 ChangedFiles {
123 ChangedFiles {
123 nb_items: 0,
124 nb_items: 0,
124 index: EMPTY,
125 index: EMPTY,
125 data: EMPTY,
126 data: EMPTY,
126 }
127 }
127 }
128 }
128
129
129 /// internal function to return an individual entry at a given index
130 /// internal function to return an individual entry at a given index
130 fn entry(&'a self, idx: u32) -> FileChange<'a> {
131 fn entry(&'a self, idx: u32) -> FileChange<'a> {
131 if idx >= self.nb_items {
132 if idx >= self.nb_items {
132 panic!(
133 panic!(
133 "index for entry is higher that the number of file {} >= {}",
134 "index for entry is higher that the number of file {} >= {}",
134 idx, self.nb_items
135 idx, self.nb_items
135 )
136 )
136 }
137 }
137 let flags = self.flags(idx);
138 let flags = self.flags(idx);
138 let filename = self.filename(idx);
139 let filename = self.filename(idx);
139 let copy_idx = self.copy_idx(idx);
140 let copy_idx = self.copy_idx(idx);
140 let copy_source = self.filename(copy_idx);
141 let copy_source = self.filename(copy_idx);
141 (flags, filename, copy_source)
142 (flags, filename, copy_source)
142 }
143 }
143
144
144 /// internal function to return the filename of the entry at a given index
145 /// internal function to return the filename of the entry at a given index
145 fn filename(&self, idx: u32) -> &HgPath {
146 fn filename(&self, idx: u32) -> &HgPath {
146 let filename_start;
147 let filename_start;
147 if idx == 0 {
148 if idx == 0 {
148 filename_start = 0;
149 filename_start = 0;
149 } else {
150 } else {
150 filename_start = self.filename_end(idx - 1)
151 filename_start = self.filename_end(idx - 1)
151 }
152 }
152 let filename_end = self.filename_end(idx);
153 let filename_end = self.filename_end(idx);
153 let filename_start = filename_start as usize;
154 let filename_start = filename_start as usize;
154 let filename_end = filename_end as usize;
155 let filename_end = filename_end as usize;
155 HgPath::new(&self.data[filename_start..filename_end])
156 HgPath::new(&self.data[filename_start..filename_end])
156 }
157 }
157
158
158 /// internal function to return the flag field of the entry at a given
159 /// internal function to return the flag field of the entry at a given
159 /// index
160 /// index
160 fn flags(&self, idx: u32) -> u8 {
161 fn flags(&self, idx: u32) -> u8 {
161 let idx = idx as usize;
162 let idx = idx as usize;
162 self.index[idx * (Self::ENTRY_SIZE as usize)]
163 self.index[idx * (Self::ENTRY_SIZE as usize)]
163 }
164 }
164
165
165 /// internal function to return the end of a filename part at a given index
166 /// internal function to return the end of a filename part at a given index
166 fn filename_end(&self, idx: u32) -> u32 {
167 fn filename_end(&self, idx: u32) -> u32 {
167 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
168 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
168 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
169 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
169 let start = start as usize;
170 let start = start as usize;
170 let end = end as usize;
171 let end = end as usize;
171 let raw = (&self.index[start..end])
172 let raw = (&self.index[start..end])
172 .try_into()
173 .try_into()
173 .expect("failed to turn 4 bytes into 4 bytes");
174 .expect("failed to turn 4 bytes into 4 bytes");
174 u32::from_be_bytes(raw)
175 u32::from_be_bytes(raw)
175 }
176 }
176
177
177 /// internal function to return index of the copy source of the entry at a
178 /// internal function to return index of the copy source of the entry at a
178 /// given index
179 /// given index
179 fn copy_idx(&self, idx: u32) -> u32 {
180 fn copy_idx(&self, idx: u32) -> u32 {
180 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
181 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
181 let end = (idx + 1) * Self::ENTRY_SIZE;
182 let end = (idx + 1) * Self::ENTRY_SIZE;
182 let start = start as usize;
183 let start = start as usize;
183 let end = end as usize;
184 let end = end as usize;
184 let raw = (&self.index[start..end])
185 let raw = (&self.index[start..end])
185 .try_into()
186 .try_into()
186 .expect("failed to turn 4 bytes into 4 bytes");
187 .expect("failed to turn 4 bytes into 4 bytes");
187 u32::from_be_bytes(raw)
188 u32::from_be_bytes(raw)
188 }
189 }
189
190
190 /// Return an iterator over all the `Action` in this instance.
191 /// Return an iterator over all the `Action` in this instance.
191 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
192 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
192 ActionsIterator {
193 ActionsIterator {
193 changes: &self,
194 changes: &self,
194 parent: parent,
195 parent: parent,
195 current: 0,
196 current: 0,
196 }
197 }
197 }
198 }
198
199
199 /// return the MergeCase value associated with a filename
200 /// return the MergeCase value associated with a filename
200 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
201 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
201 if self.nb_items == 0 {
202 if self.nb_items == 0 {
202 return MergeCase::Normal;
203 return MergeCase::Normal;
203 }
204 }
204 let mut low_part = 0;
205 let mut low_part = 0;
205 let mut high_part = self.nb_items;
206 let mut high_part = self.nb_items;
206
207
207 while low_part < high_part {
208 while low_part < high_part {
208 let cursor = (low_part + high_part - 1) / 2;
209 let cursor = (low_part + high_part - 1) / 2;
209 let (flags, filename, _source) = self.entry(cursor);
210 let (flags, filename, _source) = self.entry(cursor);
210 match path.cmp(filename) {
211 match path.cmp(filename) {
211 Ordering::Less => low_part = cursor + 1,
212 Ordering::Less => low_part = cursor + 1,
212 Ordering::Greater => high_part = cursor,
213 Ordering::Greater => high_part = cursor,
213 Ordering::Equal => {
214 Ordering::Equal => {
214 return match flags & ACTION_MASK {
215 return match flags & ACTION_MASK {
215 MERGED => MergeCase::Merged,
216 MERGED => MergeCase::Merged,
216 SALVAGED => MergeCase::Salvaged,
217 SALVAGED => MergeCase::Salvaged,
217 _ => MergeCase::Normal,
218 _ => MergeCase::Normal,
218 };
219 };
219 }
220 }
220 }
221 }
221 }
222 }
222 MergeCase::Normal
223 MergeCase::Normal
223 }
224 }
224 }
225 }
225
226
226 /// A struct responsible for answering "is X ancestors of Y" quickly
227 /// A struct responsible for answering "is X ancestors of Y" quickly
227 ///
228 ///
228 /// The structure will delegate ancestors call to a callback, and cache the
229 /// The structure will delegate ancestors call to a callback, and cache the
229 /// result.
230 /// result.
230 #[derive(Debug)]
231 #[derive(Debug)]
231 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
232 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
232 inner: &'a A,
233 inner: &'a A,
233 pairs: HashMap<(Revision, Revision), bool>,
234 pairs: HashMap<(Revision, Revision), bool>,
234 }
235 }
235
236
236 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
237 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
237 fn new(func: &'a A) -> Self {
238 fn new(func: &'a A) -> Self {
238 Self {
239 Self {
239 inner: func,
240 inner: func,
240 pairs: HashMap::default(),
241 pairs: HashMap::default(),
241 }
242 }
242 }
243 }
243
244
244 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
245 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
245 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
246 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
246 if anc > desc {
247 if anc > desc {
247 false
248 false
248 } else if anc == desc {
249 } else if anc == desc {
249 true
250 true
250 } else {
251 } else {
251 if let Some(b) = self.pairs.get(&(anc, desc)) {
252 if let Some(b) = self.pairs.get(&(anc, desc)) {
252 *b
253 *b
253 } else {
254 } else {
254 let b = (self.inner)(anc, desc);
255 let b = (self.inner)(anc, desc);
255 self.pairs.insert((anc, desc), b);
256 self.pairs.insert((anc, desc), b);
256 b
257 b
257 }
258 }
258 }
259 }
259 }
260 }
260 }
261 }
261
262
262 struct ActionsIterator<'a> {
263 struct ActionsIterator<'a> {
263 changes: &'a ChangedFiles<'a>,
264 changes: &'a ChangedFiles<'a>,
264 parent: Parent,
265 parent: Parent,
265 current: u32,
266 current: u32,
266 }
267 }
267
268
268 impl<'a> Iterator for ActionsIterator<'a> {
269 impl<'a> Iterator for ActionsIterator<'a> {
269 type Item = Action<'a>;
270 type Item = Action<'a>;
270
271
271 fn next(&mut self) -> Option<Action<'a>> {
272 fn next(&mut self) -> Option<Action<'a>> {
272 let copy_flag = match self.parent {
273 let copy_flag = match self.parent {
273 Parent::FirstParent => P1_COPY,
274 Parent::FirstParent => P1_COPY,
274 Parent::SecondParent => P2_COPY,
275 Parent::SecondParent => P2_COPY,
275 };
276 };
276 while self.current < self.changes.nb_items {
277 while self.current < self.changes.nb_items {
277 let (flags, file, source) = self.changes.entry(self.current);
278 let (flags, file, source) = self.changes.entry(self.current);
278 self.current += 1;
279 self.current += 1;
279 if (flags & ACTION_MASK) == REMOVED {
280 if (flags & ACTION_MASK) == REMOVED {
280 return Some(Action::Removed(file));
281 return Some(Action::Removed(file));
281 }
282 }
282 let copy = flags & COPY_MASK;
283 let copy = flags & COPY_MASK;
283 if copy == copy_flag {
284 if copy == copy_flag {
284 return Some(Action::Copied(file, source));
285 return Some(Action::Copied(file, source));
285 }
286 }
286 }
287 }
287 return None;
288 return None;
288 }
289 }
289 }
290 }
290
291
291 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
292 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
292 /// ChangedFiles
293 /// ChangedFiles
293 ///
294 ///
294 /// It is passed to the RevInfoMaker callback who can assign any necessary
295 /// It is passed to the RevInfoMaker callback who can assign any necessary
295 /// content to the `data` attribute. The copy tracing code is responsible for
296 /// content to the `data` attribute. The copy tracing code is responsible for
296 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
297 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
297 pub struct DataHolder<D> {
298 pub struct DataHolder<D> {
298 /// RevInfoMaker callback should assign data referenced by the
299 /// RevInfoMaker callback should assign data referenced by the
299 /// ChangedFiles struct it return to this attribute. The DataHolder
300 /// ChangedFiles struct it return to this attribute. The DataHolder
300 /// lifetime will be at least as long as the ChangedFiles one.
301 /// lifetime will be at least as long as the ChangedFiles one.
301 pub data: Option<D>,
302 pub data: Option<D>,
302 }
303 }
303
304
304 pub type RevInfoMaker<'a, D> =
305 pub type RevInfoMaker<'a, D> =
305 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
306 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
306
307
307 /// enum used to carry information about the parent → child currently processed
308 /// enum used to carry information about the parent → child currently processed
308 #[derive(Copy, Clone, Debug)]
309 #[derive(Copy, Clone, Debug)]
309 enum Parent {
310 enum Parent {
310 /// The `p1(x) → x` edge
311 /// The `p1(x) → x` edge
311 FirstParent,
312 FirstParent,
312 /// The `p2(x) → x` edge
313 /// The `p2(x) → x` edge
313 SecondParent,
314 SecondParent,
314 }
315 }
315
316
316 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
317 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
317 ///
318 ///
318 /// Arguments are:
319 /// Arguments are:
319 ///
320 ///
320 /// revs: all revisions to be considered
321 /// revs: all revisions to be considered
321 /// children: a {parent ? [childrens]} mapping
322 /// children: a {parent ? [childrens]} mapping
322 /// target_rev: the final revision we are combining copies to
323 /// target_rev: the final revision we are combining copies to
323 /// rev_info(rev): callback to get revision information:
324 /// rev_info(rev): callback to get revision information:
324 /// * first parent
325 /// * first parent
325 /// * second parent
326 /// * second parent
326 /// * ChangedFiles
327 /// * ChangedFiles
327 /// isancestors(low_rev, high_rev): callback to check if a revision is an
328 /// isancestors(low_rev, high_rev): callback to check if a revision is an
328 /// ancestor of another
329 /// ancestor of another
329 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
330 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
330 revs: Vec<Revision>,
331 revs: Vec<Revision>,
331 children: HashMap<Revision, Vec<Revision>>,
332 mut children_count: HashMap<Revision, usize>,
332 target_rev: Revision,
333 target_rev: Revision,
333 rev_info: RevInfoMaker<D>,
334 rev_info: RevInfoMaker<D>,
334 is_ancestor: &A,
335 is_ancestor: &A,
335 ) -> PathCopies {
336 ) -> PathCopies {
336 let mut all_copies = HashMap::new();
337 let mut all_copies = HashMap::new();
337 let mut oracle = AncestorOracle::new(is_ancestor);
338 let mut oracle = AncestorOracle::new(is_ancestor);
338
339
339 for rev in revs {
340 for rev in revs {
340 // Retrieve data computed in a previous iteration
341 let mut d: DataHolder<D> = DataHolder { data: None };
341 let copies = all_copies.remove(&rev);
342 let (p1, p2, changes) = rev_info(rev, &mut d);
342 let copies = match copies {
343 Some(c) => c,
344 None => TimeStampedPathCopies::default(), // root of the walked set
345 };
346
347 let current_children = match children.get(&rev) {
348 Some(c) => c,
349 None => panic!("inconsistent `revs` and `children`"),
350 };
351
352 for child in current_children {
353 // We will chain the copies information accumulated for `rev` with
354 // the individual copies information for each of its children.
355 // Creating a new PathCopies for each `rev` → `children` vertex.
356 let mut d: DataHolder<D> = DataHolder { data: None };
357 let (p1, p2, changes) = rev_info(*child, &mut d);
358
343
359 let parent = if rev == p1 {
344 // We will chain the copies information accumulated for the parent with
360 Parent::FirstParent
345 // the individual copies information the curent revision. Creating a
361 } else {
346 // new TimeStampedPath for each `rev` → `children` vertex.
362 assert_eq!(rev, p2);
347 let mut copies: Option<TimeStampedPathCopies> = None;
363 Parent::SecondParent
348 if p1 != NULL_REVISION {
364 };
349 // Retrieve data computed in a previous iteration
365 let new_copies =
350 let parent_copies = get_and_clean_parent_copies(
366 add_from_changes(&copies, &changes, parent, *child);
351 &mut all_copies,
352 &mut children_count,
353 p1,
354 );
355 if let Some(parent_copies) = parent_copies {
356 // combine it with data for that revision
357 let vertex_copies = add_from_changes(
358 &parent_copies,
359 &changes,
360 Parent::FirstParent,
361 rev,
362 );
363 // keep that data around for potential later combination
364 copies = Some(vertex_copies);
365 }
366 }
367 if p2 != NULL_REVISION {
368 // Retrieve data computed in a previous iteration
369 let parent_copies = get_and_clean_parent_copies(
370 &mut all_copies,
371 &mut children_count,
372 p2,
373 );
374 if let Some(parent_copies) = parent_copies {
375 // combine it with data for that revision
376 let vertex_copies = add_from_changes(
377 &parent_copies,
378 &changes,
379 Parent::SecondParent,
380 rev,
381 );
367
382
368 // Merge has two parents needs to combines their copy information.
383 copies = match copies {
369 //
384 None => Some(vertex_copies),
370 // If the vertex from the other parent was already processed, we
385 // Merge has two parents needs to combines their copy
371 // will have a value for the child ready to be used. We need to
386 // information.
372 // grab it and combine it with the one we already
387 //
373 // computed. If not we can simply store the newly
388 // If we got data from both parents, We need to combine
374 // computed data. The processing happening at
389 // them.
375 // the time of the second parent will take care of combining the
390 Some(copies) => Some(merge_copies_dict(
376 // two TimeStampedPathCopies instance.
391 vertex_copies,
377 match all_copies.remove(child) {
392 copies,
378 None => {
393 &changes,
379 all_copies.insert(child, new_copies);
394 &mut oracle,
380 }
395 )),
381 Some(other_copies) => {
396 };
382 let (minor, major) = match parent {
397 }
383 Parent::FirstParent => (other_copies, new_copies),
398 }
384 Parent::SecondParent => (new_copies, other_copies),
399 match copies {
385 };
400 Some(copies) => {
386 let merged_copies =
401 all_copies.insert(rev, copies);
387 merge_copies_dict(minor, major, &changes, &mut oracle);
402 }
388 all_copies.insert(child, merged_copies);
403 _ => {}
389 }
390 };
391 }
404 }
392 }
405 }
393
406
394 // Drop internal information (like the timestamp) and return the final
407 // Drop internal information (like the timestamp) and return the final
395 // mapping.
408 // mapping.
396 let tt_result = all_copies
409 let tt_result = all_copies
397 .remove(&target_rev)
410 .remove(&target_rev)
398 .expect("target revision was not processed");
411 .expect("target revision was not processed");
399 let mut result = PathCopies::default();
412 let mut result = PathCopies::default();
400 for (dest, tt_source) in tt_result {
413 for (dest, tt_source) in tt_result {
401 if let Some(path) = tt_source.path {
414 if let Some(path) = tt_source.path {
402 result.insert(dest, path);
415 result.insert(dest, path);
403 }
416 }
404 }
417 }
405 result
418 result
406 }
419 }
407
420
421 /// fetch previous computed information
422 ///
423 /// If no other children are expected to need this information, we drop it from
424 /// the cache.
425 ///
426 /// If parent is not part of the set we are expected to walk, return None.
427 fn get_and_clean_parent_copies(
428 all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
429 children_count: &mut HashMap<Revision, usize>,
430 parent_rev: Revision,
431 ) -> Option<TimeStampedPathCopies> {
432 let count = children_count.get_mut(&parent_rev)?;
433 *count -= 1;
434 if *count == 0 {
435 match all_copies.remove(&parent_rev) {
436 Some(c) => Some(c),
437 None => Some(TimeStampedPathCopies::default()),
438 }
439 } else {
440 match all_copies.get(&parent_rev) {
441 Some(c) => Some(c.clone()),
442 None => Some(TimeStampedPathCopies::default()),
443 }
444 }
445 }
446
408 /// Combine ChangedFiles with some existing PathCopies information and return
447 /// Combine ChangedFiles with some existing PathCopies information and return
409 /// the result
448 /// the result
410 fn add_from_changes(
449 fn add_from_changes(
411 base_copies: &TimeStampedPathCopies,
450 base_copies: &TimeStampedPathCopies,
412 changes: &ChangedFiles,
451 changes: &ChangedFiles,
413 parent: Parent,
452 parent: Parent,
414 current_rev: Revision,
453 current_rev: Revision,
415 ) -> TimeStampedPathCopies {
454 ) -> TimeStampedPathCopies {
416 let mut copies = base_copies.clone();
455 let mut copies = base_copies.clone();
417 for action in changes.iter_actions(parent) {
456 for action in changes.iter_actions(parent) {
418 match action {
457 match action {
419 Action::Copied(dest, source) => {
458 Action::Copied(dest, source) => {
420 let entry;
459 let entry;
421 if let Some(v) = base_copies.get(source) {
460 if let Some(v) = base_copies.get(source) {
422 entry = match &v.path {
461 entry = match &v.path {
423 Some(path) => Some((*(path)).to_owned()),
462 Some(path) => Some((*(path)).to_owned()),
424 None => Some(source.to_owned()),
463 None => Some(source.to_owned()),
425 }
464 }
426 } else {
465 } else {
427 entry = Some(source.to_owned());
466 entry = Some(source.to_owned());
428 }
467 }
429 // Each new entry is introduced by the children, we
468 // Each new entry is introduced by the children, we
430 // record this information as we will need it to take
469 // record this information as we will need it to take
431 // the right decision when merging conflicting copy
470 // the right decision when merging conflicting copy
432 // information. See merge_copies_dict for details.
471 // information. See merge_copies_dict for details.
433 let ttpc = TimeStampedPathCopy {
472 let ttpc = TimeStampedPathCopy {
434 rev: current_rev,
473 rev: current_rev,
435 path: entry,
474 path: entry,
436 };
475 };
437 copies.insert(dest.to_owned(), ttpc);
476 copies.insert(dest.to_owned(), ttpc);
438 }
477 }
439 Action::Removed(f) => {
478 Action::Removed(f) => {
440 // We must drop copy information for removed file.
479 // We must drop copy information for removed file.
441 //
480 //
442 // We need to explicitly record them as dropped to
481 // We need to explicitly record them as dropped to
443 // propagate this information when merging two
482 // propagate this information when merging two
444 // TimeStampedPathCopies object.
483 // TimeStampedPathCopies object.
445 if copies.contains_key(f.as_ref()) {
484 if copies.contains_key(f.as_ref()) {
446 let ttpc = TimeStampedPathCopy {
485 let ttpc = TimeStampedPathCopy {
447 rev: current_rev,
486 rev: current_rev,
448 path: None,
487 path: None,
449 };
488 };
450 copies.insert(f.to_owned(), ttpc);
489 copies.insert(f.to_owned(), ttpc);
451 }
490 }
452 }
491 }
453 }
492 }
454 }
493 }
455 copies
494 copies
456 }
495 }
457
496
458 /// merge two copies-mapping together, minor and major
497 /// merge two copies-mapping together, minor and major
459 ///
498 ///
460 /// In case of conflict, value from "major" will be picked, unless in some
499 /// In case of conflict, value from "major" will be picked, unless in some
461 /// cases. See inline documentation for details.
500 /// cases. See inline documentation for details.
462 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
501 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
463 mut minor: TimeStampedPathCopies,
502 mut minor: TimeStampedPathCopies,
464 mut major: TimeStampedPathCopies,
503 mut major: TimeStampedPathCopies,
465 changes: &ChangedFiles,
504 changes: &ChangedFiles,
466 oracle: &mut AncestorOracle<A>,
505 oracle: &mut AncestorOracle<A>,
467 ) -> TimeStampedPathCopies {
506 ) -> TimeStampedPathCopies {
468 // This closure exist as temporary help while multiple developper are
507 // This closure exist as temporary help while multiple developper are
469 // actively working on this code. Feel free to re-inline it once this
508 // actively working on this code. Feel free to re-inline it once this
470 // code is more settled.
509 // code is more settled.
471 let mut cmp_value =
510 let mut cmp_value =
472 |dest: &PathToken,
511 |dest: &PathToken,
473 src_minor: &TimeStampedPathCopy,
512 src_minor: &TimeStampedPathCopy,
474 src_major: &TimeStampedPathCopy| {
513 src_major: &TimeStampedPathCopy| {
475 compare_value(changes, oracle, dest, src_minor, src_major)
514 compare_value(changes, oracle, dest, src_minor, src_major)
476 };
515 };
477 if minor.is_empty() {
516 if minor.is_empty() {
478 major
517 major
479 } else if major.is_empty() {
518 } else if major.is_empty() {
480 minor
519 minor
481 } else if minor.len() * 2 < major.len() {
520 } else if minor.len() * 2 < major.len() {
482 // Lets says we are merging two TimeStampedPathCopies instance A and B.
521 // Lets says we are merging two TimeStampedPathCopies instance A and B.
483 //
522 //
484 // If A contains N items, the merge result will never contains more
523 // If A contains N items, the merge result will never contains more
485 // than N values differents than the one in A
524 // than N values differents than the one in A
486 //
525 //
487 // If B contains M items, with M > N, the merge result will always
526 // If B contains M items, with M > N, the merge result will always
488 // result in a minimum of M - N value differents than the on in
527 // result in a minimum of M - N value differents than the on in
489 // A
528 // A
490 //
529 //
491 // As a result, if N < (M-N), we know that simply iterating over A will
530 // As a result, if N < (M-N), we know that simply iterating over A will
492 // yield less difference than iterating over the difference
531 // yield less difference than iterating over the difference
493 // between A and B.
532 // between A and B.
494 //
533 //
495 // This help performance a lot in case were a tiny
534 // This help performance a lot in case were a tiny
496 // TimeStampedPathCopies is merged with a much larger one.
535 // TimeStampedPathCopies is merged with a much larger one.
497 for (dest, src_minor) in minor {
536 for (dest, src_minor) in minor {
498 let src_major = major.get(&dest);
537 let src_major = major.get(&dest);
499 match src_major {
538 match src_major {
500 None => major.insert(dest, src_minor),
539 None => major.insert(dest, src_minor),
501 Some(src_major) => {
540 Some(src_major) => {
502 match cmp_value(&dest, &src_minor, src_major) {
541 match cmp_value(&dest, &src_minor, src_major) {
503 MergePick::Any | MergePick::Major => None,
542 MergePick::Any | MergePick::Major => None,
504 MergePick::Minor => major.insert(dest, src_minor),
543 MergePick::Minor => major.insert(dest, src_minor),
505 }
544 }
506 }
545 }
507 };
546 };
508 }
547 }
509 major
548 major
510 } else if major.len() * 2 < minor.len() {
549 } else if major.len() * 2 < minor.len() {
511 // This use the same rational than the previous block.
550 // This use the same rational than the previous block.
512 // (Check previous block documentation for details.)
551 // (Check previous block documentation for details.)
513 for (dest, src_major) in major {
552 for (dest, src_major) in major {
514 let src_minor = minor.get(&dest);
553 let src_minor = minor.get(&dest);
515 match src_minor {
554 match src_minor {
516 None => minor.insert(dest, src_major),
555 None => minor.insert(dest, src_major),
517 Some(src_minor) => {
556 Some(src_minor) => {
518 match cmp_value(&dest, src_minor, &src_major) {
557 match cmp_value(&dest, src_minor, &src_major) {
519 MergePick::Any | MergePick::Minor => None,
558 MergePick::Any | MergePick::Minor => None,
520 MergePick::Major => minor.insert(dest, src_major),
559 MergePick::Major => minor.insert(dest, src_major),
521 }
560 }
522 }
561 }
523 };
562 };
524 }
563 }
525 minor
564 minor
526 } else {
565 } else {
527 let mut override_minor = Vec::new();
566 let mut override_minor = Vec::new();
528 let mut override_major = Vec::new();
567 let mut override_major = Vec::new();
529
568
530 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
569 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
531 override_major.push((k.clone(), v.clone()))
570 override_major.push((k.clone(), v.clone()))
532 };
571 };
533 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
572 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
534 override_minor.push((k.clone(), v.clone()))
573 override_minor.push((k.clone(), v.clone()))
535 };
574 };
536
575
537 // The diff function leverage detection of the identical subpart if
576 // The diff function leverage detection of the identical subpart if
538 // minor and major has some common ancestors. This make it very
577 // minor and major has some common ancestors. This make it very
539 // fast is most case.
578 // fast is most case.
540 //
579 //
541 // In case where the two map are vastly different in size, the current
580 // In case where the two map are vastly different in size, the current
542 // approach is still slowish because the iteration will iterate over
581 // approach is still slowish because the iteration will iterate over
543 // all the "exclusive" content of the larger on. This situation can be
582 // all the "exclusive" content of the larger on. This situation can be
544 // frequent when the subgraph of revision we are processing has a lot
583 // frequent when the subgraph of revision we are processing has a lot
545 // of roots. Each roots adding they own fully new map to the mix (and
584 // of roots. Each roots adding they own fully new map to the mix (and
546 // likely a small map, if the path from the root to the "main path" is
585 // likely a small map, if the path from the root to the "main path" is
547 // small.
586 // small.
548 //
587 //
549 // We could do better by detecting such situation and processing them
588 // We could do better by detecting such situation and processing them
550 // differently.
589 // differently.
551 for d in minor.diff(&major) {
590 for d in minor.diff(&major) {
552 match d {
591 match d {
553 DiffItem::Add(k, v) => to_minor(k, v),
592 DiffItem::Add(k, v) => to_minor(k, v),
554 DiffItem::Remove(k, v) => to_major(k, v),
593 DiffItem::Remove(k, v) => to_major(k, v),
555 DiffItem::Update { old, new } => {
594 DiffItem::Update { old, new } => {
556 let (dest, src_major) = new;
595 let (dest, src_major) = new;
557 let (_, src_minor) = old;
596 let (_, src_minor) = old;
558 match cmp_value(dest, src_minor, src_major) {
597 match cmp_value(dest, src_minor, src_major) {
559 MergePick::Major => to_minor(dest, src_major),
598 MergePick::Major => to_minor(dest, src_major),
560 MergePick::Minor => to_major(dest, src_minor),
599 MergePick::Minor => to_major(dest, src_minor),
561 // If the two entry are identical, no need to do
600 // If the two entry are identical, no need to do
562 // anything (but diff should not have yield them)
601 // anything (but diff should not have yield them)
563 MergePick::Any => unreachable!(),
602 MergePick::Any => unreachable!(),
564 }
603 }
565 }
604 }
566 };
605 };
567 }
606 }
568
607
569 let updates;
608 let updates;
570 let mut result;
609 let mut result;
571 if override_major.is_empty() {
610 if override_major.is_empty() {
572 result = major
611 result = major
573 } else if override_minor.is_empty() {
612 } else if override_minor.is_empty() {
574 result = minor
613 result = minor
575 } else {
614 } else {
576 if override_minor.len() < override_major.len() {
615 if override_minor.len() < override_major.len() {
577 updates = override_minor;
616 updates = override_minor;
578 result = minor;
617 result = minor;
579 } else {
618 } else {
580 updates = override_major;
619 updates = override_major;
581 result = major;
620 result = major;
582 }
621 }
583 for (k, v) in updates {
622 for (k, v) in updates {
584 result.insert(k, v);
623 result.insert(k, v);
585 }
624 }
586 }
625 }
587 result
626 result
588 }
627 }
589 }
628 }
590
629
591 /// represent the side that should prevail when merging two
630 /// represent the side that should prevail when merging two
592 /// TimeStampedPathCopies
631 /// TimeStampedPathCopies
593 enum MergePick {
632 enum MergePick {
594 /// The "major" (p1) side prevails
633 /// The "major" (p1) side prevails
595 Major,
634 Major,
596 /// The "minor" (p2) side prevails
635 /// The "minor" (p2) side prevails
597 Minor,
636 Minor,
598 /// Any side could be used (because they are the same)
637 /// Any side could be used (because they are the same)
599 Any,
638 Any,
600 }
639 }
601
640
602 /// decide which side prevails in case of conflicting values
641 /// decide which side prevails in case of conflicting values
603 #[allow(clippy::if_same_then_else)]
642 #[allow(clippy::if_same_then_else)]
604 fn compare_value<A: Fn(Revision, Revision) -> bool>(
643 fn compare_value<A: Fn(Revision, Revision) -> bool>(
605 changes: &ChangedFiles,
644 changes: &ChangedFiles,
606 oracle: &mut AncestorOracle<A>,
645 oracle: &mut AncestorOracle<A>,
607 dest: &PathToken,
646 dest: &PathToken,
608 src_minor: &TimeStampedPathCopy,
647 src_minor: &TimeStampedPathCopy,
609 src_major: &TimeStampedPathCopy,
648 src_major: &TimeStampedPathCopy,
610 ) -> MergePick {
649 ) -> MergePick {
611 if src_major.path == src_minor.path {
650 if src_major.path == src_minor.path {
612 // we have the same value, but from other source;
651 // we have the same value, but from other source;
613 if src_major.rev == src_minor.rev {
652 if src_major.rev == src_minor.rev {
614 // If the two entry are identical, they are both valid
653 // If the two entry are identical, they are both valid
615 MergePick::Any
654 MergePick::Any
616 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
655 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
617 MergePick::Minor
656 MergePick::Minor
618 } else {
657 } else {
619 MergePick::Major
658 MergePick::Major
620 }
659 }
621 } else if src_major.rev == src_minor.rev {
660 } else if src_major.rev == src_minor.rev {
622 // We cannot get copy information for both p1 and p2 in the
661 // We cannot get copy information for both p1 and p2 in the
623 // same rev. So this is the same value.
662 // same rev. So this is the same value.
624 unreachable!(
663 unreachable!(
625 "conflict information from p1 and p2 in the same revision"
664 "conflict information from p1 and p2 in the same revision"
626 );
665 );
627 } else {
666 } else {
628 let action = changes.get_merge_case(&dest);
667 let action = changes.get_merge_case(&dest);
629 if src_major.path.is_none() && action == MergeCase::Salvaged {
668 if src_major.path.is_none() && action == MergeCase::Salvaged {
630 // If the file is "deleted" in the major side but was
669 // If the file is "deleted" in the major side but was
631 // salvaged by the merge, we keep the minor side alive
670 // salvaged by the merge, we keep the minor side alive
632 MergePick::Minor
671 MergePick::Minor
633 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
672 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
634 // If the file is "deleted" in the minor side but was
673 // If the file is "deleted" in the minor side but was
635 // salvaged by the merge, unconditionnaly preserve the
674 // salvaged by the merge, unconditionnaly preserve the
636 // major side.
675 // major side.
637 MergePick::Major
676 MergePick::Major
638 } else if action == MergeCase::Merged {
677 } else if action == MergeCase::Merged {
639 // If the file was actively merged, copy information
678 // If the file was actively merged, copy information
640 // from each side might conflict. The major side will
679 // from each side might conflict. The major side will
641 // win such conflict.
680 // win such conflict.
642 MergePick::Major
681 MergePick::Major
643 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
682 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
644 // If the minor side is strictly newer than the major
683 // If the minor side is strictly newer than the major
645 // side, it should be kept.
684 // side, it should be kept.
646 MergePick::Minor
685 MergePick::Minor
647 } else if src_major.path.is_some() {
686 } else if src_major.path.is_some() {
648 // without any special case, the "major" value win
687 // without any special case, the "major" value win
649 // other the "minor" one.
688 // other the "minor" one.
650 MergePick::Major
689 MergePick::Major
651 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
690 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
652 // the "major" rev is a direct ancestors of "minor",
691 // the "major" rev is a direct ancestors of "minor",
653 // any different value should
692 // any different value should
654 // overwrite
693 // overwrite
655 MergePick::Major
694 MergePick::Major
656 } else {
695 } else {
657 // major version is None (so the file was deleted on
696 // major version is None (so the file was deleted on
658 // that branch) and that branch is independant (neither
697 // that branch) and that branch is independant (neither
659 // minor nor major is an ancestors of the other one.)
698 // minor nor major is an ancestors of the other one.)
660 // We preserve the new
699 // We preserve the new
661 // information about the new file.
700 // information about the new file.
662 MergePick::Minor
701 MergePick::Minor
663 }
702 }
664 }
703 }
665 }
704 }
@@ -1,153 +1,148
1 use cpython::ObjectProtocol;
1 use cpython::ObjectProtocol;
2 use cpython::PyBool;
2 use cpython::PyBool;
3 use cpython::PyBytes;
3 use cpython::PyBytes;
4 use cpython::PyDict;
4 use cpython::PyDict;
5 use cpython::PyList;
5 use cpython::PyList;
6 use cpython::PyModule;
6 use cpython::PyModule;
7 use cpython::PyObject;
7 use cpython::PyObject;
8 use cpython::PyResult;
8 use cpython::PyResult;
9 use cpython::PyTuple;
9 use cpython::PyTuple;
10 use cpython::Python;
10 use cpython::Python;
11
11
12 use hg::copy_tracing::combine_changeset_copies;
12 use hg::copy_tracing::combine_changeset_copies;
13 use hg::copy_tracing::ChangedFiles;
13 use hg::copy_tracing::ChangedFiles;
14 use hg::copy_tracing::DataHolder;
14 use hg::copy_tracing::DataHolder;
15 use hg::copy_tracing::RevInfo;
15 use hg::copy_tracing::RevInfo;
16 use hg::copy_tracing::RevInfoMaker;
16 use hg::copy_tracing::RevInfoMaker;
17 use hg::Revision;
17 use hg::Revision;
18
18
19 /// Combines copies information contained into revision `revs` to build a copy
19 /// Combines copies information contained into revision `revs` to build a copy
20 /// map.
20 /// map.
21 ///
21 ///
22 /// See mercurial/copies.py for details
22 /// See mercurial/copies.py for details
23 pub fn combine_changeset_copies_wrapper(
23 pub fn combine_changeset_copies_wrapper(
24 py: Python,
24 py: Python,
25 revs: PyList,
25 revs: PyList,
26 children: PyDict,
26 children_count: PyDict,
27 target_rev: Revision,
27 target_rev: Revision,
28 rev_info: PyObject,
28 rev_info: PyObject,
29 is_ancestor: PyObject,
29 is_ancestor: PyObject,
30 ) -> PyResult<PyDict> {
30 ) -> PyResult<PyDict> {
31 let revs: PyResult<_> =
31 let revs: PyResult<_> =
32 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
32 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
33
33
34 // Wrap the `is_ancestor` python callback as a Rust closure
34 // Wrap the `is_ancestor` python callback as a Rust closure
35 //
35 //
36 // No errors are expected from the Python side, and they will should only
36 // No errors are expected from the Python side, and they will should only
37 // happens in case of programing error or severe data corruption. Such
37 // happens in case of programing error or severe data corruption. Such
38 // errors will raise panic and the rust-cpython harness will turn them into
38 // errors will raise panic and the rust-cpython harness will turn them into
39 // Python exception.
39 // Python exception.
40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
41 is_ancestor
41 is_ancestor
42 .call(py, (anc, desc), None)
42 .call(py, (anc, desc), None)
43 .expect(
43 .expect(
44 "rust-copy-tracing: python call to `is_ancestor` \
44 "rust-copy-tracing: python call to `is_ancestor` \
45 failed",
45 failed",
46 )
46 )
47 .cast_into::<PyBool>(py)
47 .cast_into::<PyBool>(py)
48 .expect(
48 .expect(
49 "rust-copy-tracing: python call to `is_ancestor` \
49 "rust-copy-tracing: python call to `is_ancestor` \
50 returned unexpected non-Bool value",
50 returned unexpected non-Bool value",
51 )
51 )
52 .is_true()
52 .is_true()
53 };
53 };
54
54
55 // Wrap the `rev_info_maker` python callback as a Rust closure
55 // Wrap the `rev_info_maker` python callback as a Rust closure
56 //
56 //
57 // No errors are expected from the Python side, and they will should only
57 // No errors are expected from the Python side, and they will should only
58 // happens in case of programing error or severe data corruption. Such
58 // happens in case of programing error or severe data corruption. Such
59 // errors will raise panic and the rust-cpython harness will turn them into
59 // errors will raise panic and the rust-cpython harness will turn them into
60 // Python exception.
60 // Python exception.
61 let rev_info_maker: RevInfoMaker<PyBytes> =
61 let rev_info_maker: RevInfoMaker<PyBytes> =
62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
63 let res: PyTuple = rev_info
63 let res: PyTuple = rev_info
64 .call(py, (rev,), None)
64 .call(py, (rev,), None)
65 .expect("rust-copy-tracing: python call to `rev_info` failed")
65 .expect("rust-copy-tracing: python call to `rev_info` failed")
66 .cast_into(py)
66 .cast_into(py)
67 .expect(
67 .expect(
68 "rust-copy_tracing: python call to `rev_info` returned \
68 "rust-copy_tracing: python call to `rev_info` returned \
69 unexpected non-Tuple value",
69 unexpected non-Tuple value",
70 );
70 );
71 let p1 = res.get_item(py, 0).extract(py).expect(
71 let p1 = res.get_item(py, 0).extract(py).expect(
72 "rust-copy-tracing: rev_info return is invalid, first item \
72 "rust-copy-tracing: rev_info return is invalid, first item \
73 is a not a revision",
73 is a not a revision",
74 );
74 );
75 let p2 = res.get_item(py, 1).extract(py).expect(
75 let p2 = res.get_item(py, 1).extract(py).expect(
76 "rust-copy-tracing: rev_info return is invalid, first item \
76 "rust-copy-tracing: rev_info return is invalid, first item \
77 is a not a revision",
77 is a not a revision",
78 );
78 );
79
79
80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 Ok(raw) => {
81 Ok(raw) => {
82 // Give responsability for the raw bytes lifetime to
82 // Give responsability for the raw bytes lifetime to
83 // hg-core
83 // hg-core
84 d.data = Some(raw);
84 d.data = Some(raw);
85 let addrs = d.data.as_ref().expect(
85 let addrs = d.data.as_ref().expect(
86 "rust-copy-tracing: failed to get a reference to the \
86 "rust-copy-tracing: failed to get a reference to the \
87 raw bytes for copy data").data(py);
87 raw bytes for copy data").data(py);
88 ChangedFiles::new(addrs)
88 ChangedFiles::new(addrs)
89 }
89 }
90 // value was presumably None, meaning they was no copy data.
90 // value was presumably None, meaning they was no copy data.
91 Err(_) => ChangedFiles::new_empty(),
91 Err(_) => ChangedFiles::new_empty(),
92 };
92 };
93
93
94 (p1, p2, files)
94 (p1, p2, files)
95 });
95 });
96 let children: PyResult<_> = children
96 let children_count: PyResult<_> = children_count
97 .items(py)
97 .items(py)
98 .iter()
98 .iter()
99 .map(|(k, v)| {
99 .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
100 let v: &PyList = v.cast_as(py)?;
101 let v: PyResult<_> =
102 v.iter(py).map(|child| Ok(child.extract(py)?)).collect();
103 Ok((k.extract(py)?, v?))
104 })
105 .collect();
100 .collect();
106
101
107 let res = combine_changeset_copies(
102 let res = combine_changeset_copies(
108 revs?,
103 revs?,
109 children?,
104 children_count?,
110 target_rev,
105 target_rev,
111 rev_info_maker,
106 rev_info_maker,
112 &is_ancestor_wrap,
107 &is_ancestor_wrap,
113 );
108 );
114 let out = PyDict::new(py);
109 let out = PyDict::new(py);
115 for (dest, source) in res.into_iter() {
110 for (dest, source) in res.into_iter() {
116 out.set_item(
111 out.set_item(
117 py,
112 py,
118 PyBytes::new(py, &dest.into_vec()),
113 PyBytes::new(py, &dest.into_vec()),
119 PyBytes::new(py, &source.into_vec()),
114 PyBytes::new(py, &source.into_vec()),
120 )?;
115 )?;
121 }
116 }
122 Ok(out)
117 Ok(out)
123 }
118 }
124
119
125 /// Create the module, with `__package__` given from parent
120 /// Create the module, with `__package__` given from parent
126 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
121 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
127 let dotted_name = &format!("{}.copy_tracing", package);
122 let dotted_name = &format!("{}.copy_tracing", package);
128 let m = PyModule::new(py, dotted_name)?;
123 let m = PyModule::new(py, dotted_name)?;
129
124
130 m.add(py, "__package__", package)?;
125 m.add(py, "__package__", package)?;
131 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
126 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
132
127
133 m.add(
128 m.add(
134 py,
129 py,
135 "combine_changeset_copies",
130 "combine_changeset_copies",
136 py_fn!(
131 py_fn!(
137 py,
132 py,
138 combine_changeset_copies_wrapper(
133 combine_changeset_copies_wrapper(
139 revs: PyList,
134 revs: PyList,
140 children: PyDict,
135 children: PyDict,
141 target_rev: Revision,
136 target_rev: Revision,
142 rev_info: PyObject,
137 rev_info: PyObject,
143 is_ancestor: PyObject
138 is_ancestor: PyObject
144 )
139 )
145 ),
140 ),
146 )?;
141 )?;
147
142
148 let sys = PyModule::import(py, "sys")?;
143 let sys = PyModule::import(py, "sys")?;
149 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
144 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
150 sys_modules.set_item(py, dotted_name, &m)?;
145 sys_modules.set_item(py, dotted_name, &m)?;
151
146
152 Ok(m)
147 Ok(m)
153 }
148 }
General Comments 0
You need to be logged in to leave comments. Login now