##// END OF EJS Templates
copies-rust: remove the ancestor Oracle logic...
marmoute -
r47316:34827c95 default
parent child Browse files
Show More
@@ -1,1281 +1,1281 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 if k == v: # case 3
62 if k == v: # case 3
63 del t[k]
63 del t[k]
64 elif v not in src: # case 5
64 elif v not in src: # case 5
65 # remove copies from files that didn't exist
65 # remove copies from files that didn't exist
66 del t[k]
66 del t[k]
67 elif k not in dst: # case 1
67 elif k not in dst: # case 1
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 del t[k]
69 del t[k]
70
70
71
71
72 def _chain(prefix, suffix):
72 def _chain(prefix, suffix):
73 """chain two sets of copies 'prefix' and 'suffix'"""
73 """chain two sets of copies 'prefix' and 'suffix'"""
74 result = prefix.copy()
74 result = prefix.copy()
75 for key, value in pycompat.iteritems(suffix):
75 for key, value in pycompat.iteritems(suffix):
76 result[key] = prefix.get(value, value)
76 result[key] = prefix.get(value, value)
77 return result
77 return result
78
78
79
79
80 def _tracefile(fctx, am, basemf):
80 def _tracefile(fctx, am, basemf):
81 """return file context that is the ancestor of fctx present in ancestor
81 """return file context that is the ancestor of fctx present in ancestor
82 manifest am
82 manifest am
83
83
84 Note: we used to try and stop after a given limit, however checking if that
84 Note: we used to try and stop after a given limit, however checking if that
85 limit is reached turned out to be very expensive. we are better off
85 limit is reached turned out to be very expensive. we are better off
86 disabling that feature."""
86 disabling that feature."""
87
87
88 for f in fctx.ancestors():
88 for f in fctx.ancestors():
89 path = f.path()
89 path = f.path()
90 if am.get(path, None) == f.filenode():
90 if am.get(path, None) == f.filenode():
91 return path
91 return path
92 if basemf and basemf.get(path, None) == f.filenode():
92 if basemf and basemf.get(path, None) == f.filenode():
93 return path
93 return path
94
94
95
95
96 def _dirstatecopies(repo, match=None):
96 def _dirstatecopies(repo, match=None):
97 ds = repo.dirstate
97 ds = repo.dirstate
98 c = ds.copies().copy()
98 c = ds.copies().copy()
99 for k in list(c):
99 for k in list(c):
100 if ds[k] not in b'anm' or (match and not match(k)):
100 if ds[k] not in b'anm' or (match and not match(k)):
101 del c[k]
101 del c[k]
102 return c
102 return c
103
103
104
104
105 def _computeforwardmissing(a, b, match=None):
105 def _computeforwardmissing(a, b, match=None):
106 """Computes which files are in b but not a.
106 """Computes which files are in b but not a.
107 This is its own function so extensions can easily wrap this call to see what
107 This is its own function so extensions can easily wrap this call to see what
108 files _forwardcopies is about to process.
108 files _forwardcopies is about to process.
109 """
109 """
110 ma = a.manifest()
110 ma = a.manifest()
111 mb = b.manifest()
111 mb = b.manifest()
112 return mb.filesnotin(ma, match=match)
112 return mb.filesnotin(ma, match=match)
113
113
114
114
115 def usechangesetcentricalgo(repo):
115 def usechangesetcentricalgo(repo):
116 """Checks if we should use changeset-centric copy algorithms"""
116 """Checks if we should use changeset-centric copy algorithms"""
117 if repo.filecopiesmode == b'changeset-sidedata':
117 if repo.filecopiesmode == b'changeset-sidedata':
118 return True
118 return True
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 changesetsource = (b'changeset-only', b'compatibility')
120 changesetsource = (b'changeset-only', b'compatibility')
121 return readfrom in changesetsource
121 return readfrom in changesetsource
122
122
123
123
124 def _committedforwardcopies(a, b, base, match):
124 def _committedforwardcopies(a, b, base, match):
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 # files might have to be traced back to the fctx parent of the last
126 # files might have to be traced back to the fctx parent of the last
127 # one-side-only changeset, but not further back than that
127 # one-side-only changeset, but not further back than that
128 repo = a._repo
128 repo = a._repo
129
129
130 if usechangesetcentricalgo(repo):
130 if usechangesetcentricalgo(repo):
131 return _changesetforwardcopies(a, b, match)
131 return _changesetforwardcopies(a, b, match)
132
132
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 dbg = repo.ui.debug
134 dbg = repo.ui.debug
135 if debug:
135 if debug:
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 am = a.manifest()
137 am = a.manifest()
138 basemf = None if base is None else base.manifest()
138 basemf = None if base is None else base.manifest()
139
139
140 # find where new files came from
140 # find where new files came from
141 # we currently don't try to find where old files went, too expensive
141 # we currently don't try to find where old files went, too expensive
142 # this means we can miss a case like 'hg rm b; hg cp a b'
142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 cm = {}
143 cm = {}
144
144
145 # Computing the forward missing is quite expensive on large manifests, since
145 # Computing the forward missing is quite expensive on large manifests, since
146 # it compares the entire manifests. We can optimize it in the common use
146 # it compares the entire manifests. We can optimize it in the common use
147 # case of computing what copies are in a commit versus its parent (like
147 # case of computing what copies are in a commit versus its parent (like
148 # during a rebase or histedit). Note, we exclude merge commits from this
148 # during a rebase or histedit). Note, we exclude merge commits from this
149 # optimization, since the ctx.files() for a merge commit is not correct for
149 # optimization, since the ctx.files() for a merge commit is not correct for
150 # this comparison.
150 # this comparison.
151 forwardmissingmatch = match
151 forwardmissingmatch = match
152 if b.p1() == a and b.p2().node() == nullid:
152 if b.p1() == a and b.p2().node() == nullid:
153 filesmatcher = matchmod.exact(b.files())
153 filesmatcher = matchmod.exact(b.files())
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 missing = list(b.walk(match))
156 missing = list(b.walk(match))
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 if debug:
158 if debug:
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 else:
160 else:
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 if debug:
162 if debug:
163 dbg(
163 dbg(
164 b'debug.copies: missing files to search: %d\n'
164 b'debug.copies: missing files to search: %d\n'
165 % len(missing)
165 % len(missing)
166 )
166 )
167
167
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169
169
170 for f in sorted(missing):
170 for f in sorted(missing):
171 if debug:
171 if debug:
172 dbg(b'debug.copies: tracing file: %s\n' % f)
172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 fctx = b[f]
173 fctx = b[f]
174 fctx._ancestrycontext = ancestrycontext
174 fctx._ancestrycontext = ancestrycontext
175
175
176 if debug:
176 if debug:
177 start = util.timer()
177 start = util.timer()
178 opath = _tracefile(fctx, am, basemf)
178 opath = _tracefile(fctx, am, basemf)
179 if opath:
179 if opath:
180 if debug:
180 if debug:
181 dbg(b'debug.copies: rename of: %s\n' % opath)
181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 cm[f] = opath
182 cm[f] = opath
183 if debug:
183 if debug:
184 dbg(
184 dbg(
185 b'debug.copies: time: %f seconds\n'
185 b'debug.copies: time: %f seconds\n'
186 % (util.timer() - start)
186 % (util.timer() - start)
187 )
187 )
188 return cm
188 return cm
189
189
190
190
191 def _revinfo_getter(repo, match):
191 def _revinfo_getter(repo, match):
192 """returns a function that returns the following data given a <rev>"
192 """returns a function that returns the following data given a <rev>"
193
193
194 * p1: revision number of first parent
194 * p1: revision number of first parent
195 * p2: revision number of first parent
195 * p2: revision number of first parent
196 * changes: a ChangingFiles object
196 * changes: a ChangingFiles object
197 """
197 """
198 cl = repo.changelog
198 cl = repo.changelog
199 parents = cl.parentrevs
199 parents = cl.parentrevs
200 flags = cl.flags
200 flags = cl.flags
201
201
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203
203
204 changelogrevision = cl.changelogrevision
204 changelogrevision = cl.changelogrevision
205
205
206 if rustmod is not None:
206 if rustmod is not None:
207
207
208 def revinfo(rev):
208 def revinfo(rev):
209 p1, p2 = parents(rev)
209 p1, p2 = parents(rev)
210 if flags(rev) & HASCOPIESINFO:
210 if flags(rev) & HASCOPIESINFO:
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 else:
212 else:
213 raw = None
213 raw = None
214 return (p1, p2, raw)
214 return (p1, p2, raw)
215
215
216 else:
216 else:
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & HASCOPIESINFO:
220 if flags(rev) & HASCOPIESINFO:
221 changes = changelogrevision(rev).changes
221 changes = changelogrevision(rev).changes
222 else:
222 else:
223 changes = None
223 changes = None
224 return (p1, p2, changes)
224 return (p1, p2, changes)
225
225
226 return revinfo
226 return revinfo
227
227
228
228
229 def cached_is_ancestor(is_ancestor):
229 def cached_is_ancestor(is_ancestor):
230 """return a cached version of is_ancestor"""
230 """return a cached version of is_ancestor"""
231 cache = {}
231 cache = {}
232
232
233 def _is_ancestor(anc, desc):
233 def _is_ancestor(anc, desc):
234 if anc > desc:
234 if anc > desc:
235 return False
235 return False
236 elif anc == desc:
236 elif anc == desc:
237 return True
237 return True
238 key = (anc, desc)
238 key = (anc, desc)
239 ret = cache.get(key)
239 ret = cache.get(key)
240 if ret is None:
240 if ret is None:
241 ret = cache[key] = is_ancestor(anc, desc)
241 ret = cache[key] = is_ancestor(anc, desc)
242 return ret
242 return ret
243
243
244 return _is_ancestor
244 return _is_ancestor
245
245
246
246
247 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
248 if a.rev() in (nullrev, b.rev()):
248 if a.rev() in (nullrev, b.rev()):
249 return {}
249 return {}
250
250
251 repo = a.repo().unfiltered()
251 repo = a.repo().unfiltered()
252 children = {}
252 children = {}
253
253
254 cl = repo.changelog
254 cl = repo.changelog
255 isancestor = cl.isancestorrev
255 isancestor = cl.isancestorrev
256
256
257 # To track rename from "A" to B, we need to gather all parent β†’ children
257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 # edges that are contains in `::B` but not in `::A`.
258 # edges that are contains in `::B` but not in `::A`.
259 #
259 #
260 #
260 #
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 # that belong to ::a. These are exactly all the revisions needed to express
263 # that belong to ::a. These are exactly all the revisions needed to express
264 # the parent β†’ children we need to combine.
264 # the parent β†’ children we need to combine.
265 #
265 #
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 # excluding paths that leads to roots that are not ancestors of `a`. We
267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 # keep this out of the explanation because it is hard enough without this special case..
268 # keep this out of the explanation because it is hard enough without this special case..
269
269
270 parents = cl._uncheckedparentrevs
270 parents = cl._uncheckedparentrevs
271 graph_roots = (nullrev, nullrev)
271 graph_roots = (nullrev, nullrev)
272
272
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 roots = set()
275 roots = set()
276 has_graph_roots = False
276 has_graph_roots = False
277
277
278 # iterate over `only(B, A)`
278 # iterate over `only(B, A)`
279 for r in revs:
279 for r in revs:
280 ps = parents(r)
280 ps = parents(r)
281 if ps == graph_roots:
281 if ps == graph_roots:
282 has_graph_roots = True
282 has_graph_roots = True
283 else:
283 else:
284 p1, p2 = ps
284 p1, p2 = ps
285
285
286 # find all the "root points" (see larger comment above)
286 # find all the "root points" (see larger comment above)
287 if p1 != nullrev and p1 in ancestors:
287 if p1 != nullrev and p1 in ancestors:
288 roots.add(p1)
288 roots.add(p1)
289 if p2 != nullrev and p2 in ancestors:
289 if p2 != nullrev and p2 in ancestors:
290 roots.add(p2)
290 roots.add(p2)
291 if not roots:
291 if not roots:
292 # no common revision to track copies from
292 # no common revision to track copies from
293 return {}
293 return {}
294 if has_graph_roots:
294 if has_graph_roots:
295 # this deal with the special case mentionned in the [1] footnotes. We
295 # this deal with the special case mentionned in the [1] footnotes. We
296 # must filter out revisions that leads to non-common graphroots.
296 # must filter out revisions that leads to non-common graphroots.
297 roots = list(roots)
297 roots = list(roots)
298 m = min(roots)
298 m = min(roots)
299 h = [b.rev()]
299 h = [b.rev()]
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 roots_to_head = set(roots_to_head)
301 roots_to_head = set(roots_to_head)
302 revs = [r for r in revs if r in roots_to_head]
302 revs = [r for r in revs if r in roots_to_head]
303
303
304 if repo.filecopiesmode == b'changeset-sidedata':
304 if repo.filecopiesmode == b'changeset-sidedata':
305 # When using side-data, we will process the edges "from" the children.
305 # When using side-data, we will process the edges "from" the children.
306 # We iterate over the childre, gathering previous collected data for
306 # We iterate over the childre, gathering previous collected data for
307 # the parents. Do know when the parents data is no longer necessary, we
307 # the parents. Do know when the parents data is no longer necessary, we
308 # keep a counter of how many children each revision has.
308 # keep a counter of how many children each revision has.
309 #
309 #
310 # An interresting property of `children_count` is that it only contains
310 # An interresting property of `children_count` is that it only contains
311 # revision that will be relevant for a edge of the graph. So if a
311 # revision that will be relevant for a edge of the graph. So if a
312 # children has parent not in `children_count`, that edges should not be
312 # children has parent not in `children_count`, that edges should not be
313 # processed.
313 # processed.
314 children_count = dict((r, 0) for r in roots)
314 children_count = dict((r, 0) for r in roots)
315 for r in revs:
315 for r in revs:
316 for p in cl.parentrevs(r):
316 for p in cl.parentrevs(r):
317 if p == nullrev:
317 if p == nullrev:
318 continue
318 continue
319 children_count[r] = 0
319 children_count[r] = 0
320 if p in children_count:
320 if p in children_count:
321 children_count[p] += 1
321 children_count[p] += 1
322 revinfo = _revinfo_getter(repo, match)
322 revinfo = _revinfo_getter(repo, match)
323 return _combine_changeset_copies(
323 return _combine_changeset_copies(
324 revs, children_count, b.rev(), revinfo, match, isancestor
324 revs, children_count, b.rev(), revinfo, match, isancestor
325 )
325 )
326 else:
326 else:
327 # When not using side-data, we will process the edges "from" the parent.
327 # When not using side-data, we will process the edges "from" the parent.
328 # so we need a full mapping of the parent -> children relation.
328 # so we need a full mapping of the parent -> children relation.
329 children = dict((r, []) for r in roots)
329 children = dict((r, []) for r in roots)
330 for r in revs:
330 for r in revs:
331 for p in cl.parentrevs(r):
331 for p in cl.parentrevs(r):
332 if p == nullrev:
332 if p == nullrev:
333 continue
333 continue
334 children[r] = []
334 children[r] = []
335 if p in children:
335 if p in children:
336 children[p].append(r)
336 children[p].append(r)
337 x = revs.pop()
337 x = revs.pop()
338 assert x == b.rev()
338 assert x == b.rev()
339 revs.extend(roots)
339 revs.extend(roots)
340 revs.sort()
340 revs.sort()
341
341
342 revinfo = _revinfo_getter_extra(repo)
342 revinfo = _revinfo_getter_extra(repo)
343 return _combine_changeset_copies_extra(
343 return _combine_changeset_copies_extra(
344 revs, children, b.rev(), revinfo, match, isancestor
344 revs, children, b.rev(), revinfo, match, isancestor
345 )
345 )
346
346
347
347
348 def _combine_changeset_copies(
348 def _combine_changeset_copies(
349 revs, children_count, targetrev, revinfo, match, isancestor
349 revs, children_count, targetrev, revinfo, match, isancestor
350 ):
350 ):
351 """combine the copies information for each item of iterrevs
351 """combine the copies information for each item of iterrevs
352
352
353 revs: sorted iterable of revision to visit
353 revs: sorted iterable of revision to visit
354 children_count: a {parent: <number-of-relevant-children>} mapping.
354 children_count: a {parent: <number-of-relevant-children>} mapping.
355 targetrev: the final copies destination revision (not in iterrevs)
355 targetrev: the final copies destination revision (not in iterrevs)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
357 match: a matcher
357 match: a matcher
358
358
359 It returns the aggregated copies information for `targetrev`.
359 It returns the aggregated copies information for `targetrev`.
360 """
360 """
361
361
362 alwaysmatch = match.always()
362 alwaysmatch = match.always()
363
363
364 if rustmod is not None:
364 if rustmod is not None:
365 final_copies = rustmod.combine_changeset_copies(
365 final_copies = rustmod.combine_changeset_copies(
366 list(revs), children_count, targetrev, revinfo, isancestor
366 list(revs), children_count, targetrev, revinfo
367 )
367 )
368 else:
368 else:
369 isancestor = cached_is_ancestor(isancestor)
369 isancestor = cached_is_ancestor(isancestor)
370
370
371 all_copies = {}
371 all_copies = {}
372 # iterate over all the "children" side of copy tracing "edge"
372 # iterate over all the "children" side of copy tracing "edge"
373 for current_rev in revs:
373 for current_rev in revs:
374 p1, p2, changes = revinfo(current_rev)
374 p1, p2, changes = revinfo(current_rev)
375 current_copies = None
375 current_copies = None
376 # iterate over all parents to chain the existing data with the
376 # iterate over all parents to chain the existing data with the
377 # data from the parent β†’ child edge.
377 # data from the parent β†’ child edge.
378 for parent, parent_rev in ((1, p1), (2, p2)):
378 for parent, parent_rev in ((1, p1), (2, p2)):
379 if parent_rev == nullrev:
379 if parent_rev == nullrev:
380 continue
380 continue
381 remaining_children = children_count.get(parent_rev)
381 remaining_children = children_count.get(parent_rev)
382 if remaining_children is None:
382 if remaining_children is None:
383 continue
383 continue
384 remaining_children -= 1
384 remaining_children -= 1
385 children_count[parent_rev] = remaining_children
385 children_count[parent_rev] = remaining_children
386 if remaining_children:
386 if remaining_children:
387 copies = all_copies.get(parent_rev, None)
387 copies = all_copies.get(parent_rev, None)
388 else:
388 else:
389 copies = all_copies.pop(parent_rev, None)
389 copies = all_copies.pop(parent_rev, None)
390
390
391 if copies is None:
391 if copies is None:
392 # this is a root
392 # this is a root
393 newcopies = copies = {}
393 newcopies = copies = {}
394 elif remaining_children:
394 elif remaining_children:
395 newcopies = copies.copy()
395 newcopies = copies.copy()
396 else:
396 else:
397 newcopies = copies
397 newcopies = copies
398 # chain the data in the edge with the existing data
398 # chain the data in the edge with the existing data
399 if changes is not None:
399 if changes is not None:
400 childcopies = {}
400 childcopies = {}
401 if parent == 1:
401 if parent == 1:
402 childcopies = changes.copied_from_p1
402 childcopies = changes.copied_from_p1
403 elif parent == 2:
403 elif parent == 2:
404 childcopies = changes.copied_from_p2
404 childcopies = changes.copied_from_p2
405
405
406 if childcopies:
406 if childcopies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for dest, source in pycompat.iteritems(childcopies):
408 for dest, source in pycompat.iteritems(childcopies):
409 prev = copies.get(source)
409 prev = copies.get(source)
410 if prev is not None and prev[1] is not None:
410 if prev is not None and prev[1] is not None:
411 source = prev[1]
411 source = prev[1]
412 newcopies[dest] = (current_rev, source)
412 newcopies[dest] = (current_rev, source)
413 assert newcopies is not copies
413 assert newcopies is not copies
414 if changes.removed:
414 if changes.removed:
415 for f in changes.removed:
415 for f in changes.removed:
416 if f in newcopies:
416 if f in newcopies:
417 if newcopies is copies:
417 if newcopies is copies:
418 # copy on write to avoid affecting potential other
418 # copy on write to avoid affecting potential other
419 # branches. when there are no other branches, this
419 # branches. when there are no other branches, this
420 # could be avoided.
420 # could be avoided.
421 newcopies = copies.copy()
421 newcopies = copies.copy()
422 newcopies[f] = (current_rev, None)
422 newcopies[f] = (current_rev, None)
423 # check potential need to combine the data from another parent (for
423 # check potential need to combine the data from another parent (for
424 # that child). See comment below for details.
424 # that child). See comment below for details.
425 if current_copies is None:
425 if current_copies is None:
426 current_copies = newcopies
426 current_copies = newcopies
427 else:
427 else:
428 # we are the second parent to work on c, we need to merge our
428 # we are the second parent to work on c, we need to merge our
429 # work with the other.
429 # work with the other.
430 #
430 #
431 # In case of conflict, parent 1 take precedence over parent 2.
431 # In case of conflict, parent 1 take precedence over parent 2.
432 # This is an arbitrary choice made anew when implementing
432 # This is an arbitrary choice made anew when implementing
433 # changeset based copies. It was made without regards with
433 # changeset based copies. It was made without regards with
434 # potential filelog related behavior.
434 # potential filelog related behavior.
435 assert parent == 2
435 assert parent == 2
436 current_copies = _merge_copies_dict(
436 current_copies = _merge_copies_dict(
437 newcopies,
437 newcopies,
438 current_copies,
438 current_copies,
439 isancestor,
439 isancestor,
440 changes,
440 changes,
441 current_rev,
441 current_rev,
442 )
442 )
443 all_copies[current_rev] = current_copies
443 all_copies[current_rev] = current_copies
444
444
445 # filter out internal details and return a {dest: source mapping}
445 # filter out internal details and return a {dest: source mapping}
446 final_copies = {}
446 final_copies = {}
447 for dest, (tt, source) in all_copies[targetrev].items():
447 for dest, (tt, source) in all_copies[targetrev].items():
448 if source is not None:
448 if source is not None:
449 final_copies[dest] = source
449 final_copies[dest] = source
450 if not alwaysmatch:
450 if not alwaysmatch:
451 for filename in list(final_copies.keys()):
451 for filename in list(final_copies.keys()):
452 if not match(filename):
452 if not match(filename):
453 del final_copies[filename]
453 del final_copies[filename]
454 return final_copies
454 return final_copies
455
455
456
456
457 # constant to decide which side to pick with _merge_copies_dict
457 # constant to decide which side to pick with _merge_copies_dict
458 PICK_MINOR = 0
458 PICK_MINOR = 0
459 PICK_MAJOR = 1
459 PICK_MAJOR = 1
460 PICK_EITHER = 2
460 PICK_EITHER = 2
461
461
462
462
463 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
463 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
464 """merge two copies-mapping together, minor and major
464 """merge two copies-mapping together, minor and major
465
465
466 In case of conflict, value from "major" will be picked.
466 In case of conflict, value from "major" will be picked.
467
467
468 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
468 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
469 ancestors of `high_rev`,
469 ancestors of `high_rev`,
470
470
471 - `ismerged(path)`: callable return True if `path` have been merged in the
471 - `ismerged(path)`: callable return True if `path` have been merged in the
472 current revision,
472 current revision,
473
473
474 return the resulting dict (in practice, the "minor" object, updated)
474 return the resulting dict (in practice, the "minor" object, updated)
475 """
475 """
476 for dest, value in major.items():
476 for dest, value in major.items():
477 other = minor.get(dest)
477 other = minor.get(dest)
478 if other is None:
478 if other is None:
479 minor[dest] = value
479 minor[dest] = value
480 else:
480 else:
481 pick, overwrite = _compare_values(
481 pick, overwrite = _compare_values(
482 changes, isancestor, dest, other, value
482 changes, isancestor, dest, other, value
483 )
483 )
484 if overwrite:
484 if overwrite:
485 if pick == PICK_MAJOR:
485 if pick == PICK_MAJOR:
486 minor[dest] = (current_merge, value[1])
486 minor[dest] = (current_merge, value[1])
487 else:
487 else:
488 minor[dest] = (current_merge, other[1])
488 minor[dest] = (current_merge, other[1])
489 elif pick == PICK_MAJOR:
489 elif pick == PICK_MAJOR:
490 minor[dest] = value
490 minor[dest] = value
491 return minor
491 return minor
492
492
493
493
494 def _compare_values(changes, isancestor, dest, minor, major):
494 def _compare_values(changes, isancestor, dest, minor, major):
495 """compare two value within a _merge_copies_dict loop iteration
495 """compare two value within a _merge_copies_dict loop iteration
496
496
497 return (pick, overwrite).
497 return (pick, overwrite).
498
498
499 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
499 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
500 - overwrite is True if pick is a return of an ambiguity that needs resolution.
500 - overwrite is True if pick is a return of an ambiguity that needs resolution.
501 """
501 """
502 major_tt, major_value = major
502 major_tt, major_value = major
503 minor_tt, minor_value = minor
503 minor_tt, minor_value = minor
504
504
505 if major_tt == minor_tt:
505 if major_tt == minor_tt:
506 # if it comes from the same revision it must be the same value
506 # if it comes from the same revision it must be the same value
507 assert major_value == minor_value
507 assert major_value == minor_value
508 return PICK_EITHER, False
508 return PICK_EITHER, False
509 elif (
509 elif (
510 changes is not None
510 changes is not None
511 and minor_value is not None
511 and minor_value is not None
512 and major_value is None
512 and major_value is None
513 and dest in changes.salvaged
513 and dest in changes.salvaged
514 ):
514 ):
515 # In this case, a deletion was reverted, the "alive" value overwrite
515 # In this case, a deletion was reverted, the "alive" value overwrite
516 # the deleted one.
516 # the deleted one.
517 return PICK_MINOR, True
517 return PICK_MINOR, True
518 elif (
518 elif (
519 changes is not None
519 changes is not None
520 and major_value is not None
520 and major_value is not None
521 and minor_value is None
521 and minor_value is None
522 and dest in changes.salvaged
522 and dest in changes.salvaged
523 ):
523 ):
524 # In this case, a deletion was reverted, the "alive" value overwrite
524 # In this case, a deletion was reverted, the "alive" value overwrite
525 # the deleted one.
525 # the deleted one.
526 return PICK_MAJOR, True
526 return PICK_MAJOR, True
527 elif isancestor(minor_tt, major_tt):
527 elif isancestor(minor_tt, major_tt):
528 if changes is not None and dest in changes.merged:
528 if changes is not None and dest in changes.merged:
529 # change to dest happened on the branch without copy-source change,
529 # change to dest happened on the branch without copy-source change,
530 # so both source are valid and "major" wins.
530 # so both source are valid and "major" wins.
531 return PICK_MAJOR, True
531 return PICK_MAJOR, True
532 else:
532 else:
533 return PICK_MAJOR, False
533 return PICK_MAJOR, False
534 elif isancestor(major_tt, minor_tt):
534 elif isancestor(major_tt, minor_tt):
535 if changes is not None and dest in changes.merged:
535 if changes is not None and dest in changes.merged:
536 # change to dest happened on the branch without copy-source change,
536 # change to dest happened on the branch without copy-source change,
537 # so both source are valid and "major" wins.
537 # so both source are valid and "major" wins.
538 return PICK_MAJOR, True
538 return PICK_MAJOR, True
539 else:
539 else:
540 return PICK_MINOR, False
540 return PICK_MINOR, False
541 elif minor_value is None:
541 elif minor_value is None:
542 # in case of conflict, the "alive" side wins.
542 # in case of conflict, the "alive" side wins.
543 return PICK_MAJOR, True
543 return PICK_MAJOR, True
544 elif major_value is None:
544 elif major_value is None:
545 # in case of conflict, the "alive" side wins.
545 # in case of conflict, the "alive" side wins.
546 return PICK_MINOR, True
546 return PICK_MINOR, True
547 else:
547 else:
548 # in case of conflict where both side are alive, major wins.
548 # in case of conflict where both side are alive, major wins.
549 return PICK_MAJOR, True
549 return PICK_MAJOR, True
550
550
551
551
552 def _revinfo_getter_extra(repo):
552 def _revinfo_getter_extra(repo):
553 """return a function that return multiple data given a <rev>"i
553 """return a function that return multiple data given a <rev>"i
554
554
555 * p1: revision number of first parent
555 * p1: revision number of first parent
556 * p2: revision number of first parent
556 * p2: revision number of first parent
557 * p1copies: mapping of copies from p1
557 * p1copies: mapping of copies from p1
558 * p2copies: mapping of copies from p2
558 * p2copies: mapping of copies from p2
559 * removed: a list of removed files
559 * removed: a list of removed files
560 * ismerged: a callback to know if file was merged in that revision
560 * ismerged: a callback to know if file was merged in that revision
561 """
561 """
562 cl = repo.changelog
562 cl = repo.changelog
563 parents = cl.parentrevs
563 parents = cl.parentrevs
564
564
565 def get_ismerged(rev):
565 def get_ismerged(rev):
566 ctx = repo[rev]
566 ctx = repo[rev]
567
567
568 def ismerged(path):
568 def ismerged(path):
569 if path not in ctx.files():
569 if path not in ctx.files():
570 return False
570 return False
571 fctx = ctx[path]
571 fctx = ctx[path]
572 parents = fctx._filelog.parents(fctx._filenode)
572 parents = fctx._filelog.parents(fctx._filenode)
573 nb_parents = 0
573 nb_parents = 0
574 for n in parents:
574 for n in parents:
575 if n != nullid:
575 if n != nullid:
576 nb_parents += 1
576 nb_parents += 1
577 return nb_parents >= 2
577 return nb_parents >= 2
578
578
579 return ismerged
579 return ismerged
580
580
581 def revinfo(rev):
581 def revinfo(rev):
582 p1, p2 = parents(rev)
582 p1, p2 = parents(rev)
583 ctx = repo[rev]
583 ctx = repo[rev]
584 p1copies, p2copies = ctx._copies
584 p1copies, p2copies = ctx._copies
585 removed = ctx.filesremoved()
585 removed = ctx.filesremoved()
586 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
586 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
587
587
588 return revinfo
588 return revinfo
589
589
590
590
591 def _combine_changeset_copies_extra(
591 def _combine_changeset_copies_extra(
592 revs, children, targetrev, revinfo, match, isancestor
592 revs, children, targetrev, revinfo, match, isancestor
593 ):
593 ):
594 """version of `_combine_changeset_copies` that works with the Google
594 """version of `_combine_changeset_copies` that works with the Google
595 specific "extra" based storage for copy information"""
595 specific "extra" based storage for copy information"""
596 all_copies = {}
596 all_copies = {}
597 alwaysmatch = match.always()
597 alwaysmatch = match.always()
598 for r in revs:
598 for r in revs:
599 copies = all_copies.pop(r, None)
599 copies = all_copies.pop(r, None)
600 if copies is None:
600 if copies is None:
601 # this is a root
601 # this is a root
602 copies = {}
602 copies = {}
603 for i, c in enumerate(children[r]):
603 for i, c in enumerate(children[r]):
604 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
604 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
605 if r == p1:
605 if r == p1:
606 parent = 1
606 parent = 1
607 childcopies = p1copies
607 childcopies = p1copies
608 else:
608 else:
609 assert r == p2
609 assert r == p2
610 parent = 2
610 parent = 2
611 childcopies = p2copies
611 childcopies = p2copies
612 if not alwaysmatch:
612 if not alwaysmatch:
613 childcopies = {
613 childcopies = {
614 dst: src for dst, src in childcopies.items() if match(dst)
614 dst: src for dst, src in childcopies.items() if match(dst)
615 }
615 }
616 newcopies = copies
616 newcopies = copies
617 if childcopies:
617 if childcopies:
618 newcopies = copies.copy()
618 newcopies = copies.copy()
619 for dest, source in pycompat.iteritems(childcopies):
619 for dest, source in pycompat.iteritems(childcopies):
620 prev = copies.get(source)
620 prev = copies.get(source)
621 if prev is not None and prev[1] is not None:
621 if prev is not None and prev[1] is not None:
622 source = prev[1]
622 source = prev[1]
623 newcopies[dest] = (c, source)
623 newcopies[dest] = (c, source)
624 assert newcopies is not copies
624 assert newcopies is not copies
625 for f in removed:
625 for f in removed:
626 if f in newcopies:
626 if f in newcopies:
627 if newcopies is copies:
627 if newcopies is copies:
628 # copy on write to avoid affecting potential other
628 # copy on write to avoid affecting potential other
629 # branches. when there are no other branches, this
629 # branches. when there are no other branches, this
630 # could be avoided.
630 # could be avoided.
631 newcopies = copies.copy()
631 newcopies = copies.copy()
632 newcopies[f] = (c, None)
632 newcopies[f] = (c, None)
633 othercopies = all_copies.get(c)
633 othercopies = all_copies.get(c)
634 if othercopies is None:
634 if othercopies is None:
635 all_copies[c] = newcopies
635 all_copies[c] = newcopies
636 else:
636 else:
637 # we are the second parent to work on c, we need to merge our
637 # we are the second parent to work on c, we need to merge our
638 # work with the other.
638 # work with the other.
639 #
639 #
640 # In case of conflict, parent 1 take precedence over parent 2.
640 # In case of conflict, parent 1 take precedence over parent 2.
641 # This is an arbitrary choice made anew when implementing
641 # This is an arbitrary choice made anew when implementing
642 # changeset based copies. It was made without regards with
642 # changeset based copies. It was made without regards with
643 # potential filelog related behavior.
643 # potential filelog related behavior.
644 if parent == 1:
644 if parent == 1:
645 _merge_copies_dict_extra(
645 _merge_copies_dict_extra(
646 othercopies, newcopies, isancestor, ismerged
646 othercopies, newcopies, isancestor, ismerged
647 )
647 )
648 else:
648 else:
649 _merge_copies_dict_extra(
649 _merge_copies_dict_extra(
650 newcopies, othercopies, isancestor, ismerged
650 newcopies, othercopies, isancestor, ismerged
651 )
651 )
652 all_copies[c] = newcopies
652 all_copies[c] = newcopies
653
653
654 final_copies = {}
654 final_copies = {}
655 for dest, (tt, source) in all_copies[targetrev].items():
655 for dest, (tt, source) in all_copies[targetrev].items():
656 if source is not None:
656 if source is not None:
657 final_copies[dest] = source
657 final_copies[dest] = source
658 return final_copies
658 return final_copies
659
659
660
660
661 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
661 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
662 """version of `_merge_copies_dict` that works with the Google
662 """version of `_merge_copies_dict` that works with the Google
663 specific "extra" based storage for copy information"""
663 specific "extra" based storage for copy information"""
664 for dest, value in major.items():
664 for dest, value in major.items():
665 other = minor.get(dest)
665 other = minor.get(dest)
666 if other is None:
666 if other is None:
667 minor[dest] = value
667 minor[dest] = value
668 else:
668 else:
669 new_tt = value[0]
669 new_tt = value[0]
670 other_tt = other[0]
670 other_tt = other[0]
671 if value[1] == other[1]:
671 if value[1] == other[1]:
672 continue
672 continue
673 # content from "major" wins, unless it is older
673 # content from "major" wins, unless it is older
674 # than the branch point or there is a merge
674 # than the branch point or there is a merge
675 if (
675 if (
676 new_tt == other_tt
676 new_tt == other_tt
677 or not isancestor(new_tt, other_tt)
677 or not isancestor(new_tt, other_tt)
678 or ismerged(dest)
678 or ismerged(dest)
679 ):
679 ):
680 minor[dest] = value
680 minor[dest] = value
681
681
682
682
683 def _forwardcopies(a, b, base=None, match=None):
683 def _forwardcopies(a, b, base=None, match=None):
684 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
684 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
685
685
686 if base is None:
686 if base is None:
687 base = a
687 base = a
688 match = a.repo().narrowmatch(match)
688 match = a.repo().narrowmatch(match)
689 # check for working copy
689 # check for working copy
690 if b.rev() is None:
690 if b.rev() is None:
691 cm = _committedforwardcopies(a, b.p1(), base, match)
691 cm = _committedforwardcopies(a, b.p1(), base, match)
692 # combine copies from dirstate if necessary
692 # combine copies from dirstate if necessary
693 copies = _chain(cm, _dirstatecopies(b._repo, match))
693 copies = _chain(cm, _dirstatecopies(b._repo, match))
694 else:
694 else:
695 copies = _committedforwardcopies(a, b, base, match)
695 copies = _committedforwardcopies(a, b, base, match)
696 return copies
696 return copies
697
697
698
698
699 def _backwardrenames(a, b, match):
699 def _backwardrenames(a, b, match):
700 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
700 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
701 return {}
701 return {}
702
702
703 # Even though we're not taking copies into account, 1:n rename situations
703 # Even though we're not taking copies into account, 1:n rename situations
704 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
704 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
705 # arbitrarily pick one of the renames.
705 # arbitrarily pick one of the renames.
706 # We don't want to pass in "match" here, since that would filter
706 # We don't want to pass in "match" here, since that would filter
707 # the destination by it. Since we're reversing the copies, we want
707 # the destination by it. Since we're reversing the copies, we want
708 # to filter the source instead.
708 # to filter the source instead.
709 f = _forwardcopies(b, a)
709 f = _forwardcopies(b, a)
710 r = {}
710 r = {}
711 for k, v in sorted(pycompat.iteritems(f)):
711 for k, v in sorted(pycompat.iteritems(f)):
712 if match and not match(v):
712 if match and not match(v):
713 continue
713 continue
714 # remove copies
714 # remove copies
715 if v in a:
715 if v in a:
716 continue
716 continue
717 r[v] = k
717 r[v] = k
718 return r
718 return r
719
719
720
720
721 def pathcopies(x, y, match=None):
721 def pathcopies(x, y, match=None):
722 """find {dst@y: src@x} copy mapping for directed compare"""
722 """find {dst@y: src@x} copy mapping for directed compare"""
723 repo = x._repo
723 repo = x._repo
724 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
724 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
725 if debug:
725 if debug:
726 repo.ui.debug(
726 repo.ui.debug(
727 b'debug.copies: searching copies from %s to %s\n' % (x, y)
727 b'debug.copies: searching copies from %s to %s\n' % (x, y)
728 )
728 )
729 if x == y or not x or not y:
729 if x == y or not x or not y:
730 return {}
730 return {}
731 if y.rev() is None and x == y.p1():
731 if y.rev() is None and x == y.p1():
732 if debug:
732 if debug:
733 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
733 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
734 # short-circuit to avoid issues with merge states
734 # short-circuit to avoid issues with merge states
735 return _dirstatecopies(repo, match)
735 return _dirstatecopies(repo, match)
736 a = y.ancestor(x)
736 a = y.ancestor(x)
737 if a == x:
737 if a == x:
738 if debug:
738 if debug:
739 repo.ui.debug(b'debug.copies: search mode: forward\n')
739 repo.ui.debug(b'debug.copies: search mode: forward\n')
740 copies = _forwardcopies(x, y, match=match)
740 copies = _forwardcopies(x, y, match=match)
741 elif a == y:
741 elif a == y:
742 if debug:
742 if debug:
743 repo.ui.debug(b'debug.copies: search mode: backward\n')
743 repo.ui.debug(b'debug.copies: search mode: backward\n')
744 copies = _backwardrenames(x, y, match=match)
744 copies = _backwardrenames(x, y, match=match)
745 else:
745 else:
746 if debug:
746 if debug:
747 repo.ui.debug(b'debug.copies: search mode: combined\n')
747 repo.ui.debug(b'debug.copies: search mode: combined\n')
748 base = None
748 base = None
749 if a.rev() != nullrev:
749 if a.rev() != nullrev:
750 base = x
750 base = x
751 copies = _chain(
751 copies = _chain(
752 _backwardrenames(x, a, match=match),
752 _backwardrenames(x, a, match=match),
753 _forwardcopies(a, y, base, match=match),
753 _forwardcopies(a, y, base, match=match),
754 )
754 )
755 _filter(x, y, copies)
755 _filter(x, y, copies)
756 return copies
756 return copies
757
757
758
758
759 def mergecopies(repo, c1, c2, base):
759 def mergecopies(repo, c1, c2, base):
760 """
760 """
761 Finds moves and copies between context c1 and c2 that are relevant for
761 Finds moves and copies between context c1 and c2 that are relevant for
762 merging. 'base' will be used as the merge base.
762 merging. 'base' will be used as the merge base.
763
763
764 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
764 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
765 files that were moved/ copied in one merge parent and modified in another.
765 files that were moved/ copied in one merge parent and modified in another.
766 For example:
766 For example:
767
767
768 o ---> 4 another commit
768 o ---> 4 another commit
769 |
769 |
770 | o ---> 3 commit that modifies a.txt
770 | o ---> 3 commit that modifies a.txt
771 | /
771 | /
772 o / ---> 2 commit that moves a.txt to b.txt
772 o / ---> 2 commit that moves a.txt to b.txt
773 |/
773 |/
774 o ---> 1 merge base
774 o ---> 1 merge base
775
775
776 If we try to rebase revision 3 on revision 4, since there is no a.txt in
776 If we try to rebase revision 3 on revision 4, since there is no a.txt in
777 revision 4, and if user have copytrace disabled, we prints the following
777 revision 4, and if user have copytrace disabled, we prints the following
778 message:
778 message:
779
779
780 ```other changed <file> which local deleted```
780 ```other changed <file> which local deleted```
781
781
782 Returns a tuple where:
782 Returns a tuple where:
783
783
784 "branch_copies" an instance of branch_copies.
784 "branch_copies" an instance of branch_copies.
785
785
786 "diverge" is a mapping of source name -> list of destination names
786 "diverge" is a mapping of source name -> list of destination names
787 for divergent renames.
787 for divergent renames.
788
788
789 This function calls different copytracing algorithms based on config.
789 This function calls different copytracing algorithms based on config.
790 """
790 """
791 # avoid silly behavior for update from empty dir
791 # avoid silly behavior for update from empty dir
792 if not c1 or not c2 or c1 == c2:
792 if not c1 or not c2 or c1 == c2:
793 return branch_copies(), branch_copies(), {}
793 return branch_copies(), branch_copies(), {}
794
794
795 narrowmatch = c1.repo().narrowmatch()
795 narrowmatch = c1.repo().narrowmatch()
796
796
797 # avoid silly behavior for parent -> working dir
797 # avoid silly behavior for parent -> working dir
798 if c2.node() is None and c1.node() == repo.dirstate.p1():
798 if c2.node() is None and c1.node() == repo.dirstate.p1():
799 return (
799 return (
800 branch_copies(_dirstatecopies(repo, narrowmatch)),
800 branch_copies(_dirstatecopies(repo, narrowmatch)),
801 branch_copies(),
801 branch_copies(),
802 {},
802 {},
803 )
803 )
804
804
805 copytracing = repo.ui.config(b'experimental', b'copytrace')
805 copytracing = repo.ui.config(b'experimental', b'copytrace')
806 if stringutil.parsebool(copytracing) is False:
806 if stringutil.parsebool(copytracing) is False:
807 # stringutil.parsebool() returns None when it is unable to parse the
807 # stringutil.parsebool() returns None when it is unable to parse the
808 # value, so we should rely on making sure copytracing is on such cases
808 # value, so we should rely on making sure copytracing is on such cases
809 return branch_copies(), branch_copies(), {}
809 return branch_copies(), branch_copies(), {}
810
810
811 if usechangesetcentricalgo(repo):
811 if usechangesetcentricalgo(repo):
812 # The heuristics don't make sense when we need changeset-centric algos
812 # The heuristics don't make sense when we need changeset-centric algos
813 return _fullcopytracing(repo, c1, c2, base)
813 return _fullcopytracing(repo, c1, c2, base)
814
814
815 # Copy trace disabling is explicitly below the node == p1 logic above
815 # Copy trace disabling is explicitly below the node == p1 logic above
816 # because the logic above is required for a simple copy to be kept across a
816 # because the logic above is required for a simple copy to be kept across a
817 # rebase.
817 # rebase.
818 if copytracing == b'heuristics':
818 if copytracing == b'heuristics':
819 # Do full copytracing if only non-public revisions are involved as
819 # Do full copytracing if only non-public revisions are involved as
820 # that will be fast enough and will also cover the copies which could
820 # that will be fast enough and will also cover the copies which could
821 # be missed by heuristics
821 # be missed by heuristics
822 if _isfullcopytraceable(repo, c1, base):
822 if _isfullcopytraceable(repo, c1, base):
823 return _fullcopytracing(repo, c1, c2, base)
823 return _fullcopytracing(repo, c1, c2, base)
824 return _heuristicscopytracing(repo, c1, c2, base)
824 return _heuristicscopytracing(repo, c1, c2, base)
825 else:
825 else:
826 return _fullcopytracing(repo, c1, c2, base)
826 return _fullcopytracing(repo, c1, c2, base)
827
827
828
828
829 def _isfullcopytraceable(repo, c1, base):
829 def _isfullcopytraceable(repo, c1, base):
830 """Checks that if base, source and destination are all no-public branches,
830 """Checks that if base, source and destination are all no-public branches,
831 if yes let's use the full copytrace algorithm for increased capabilities
831 if yes let's use the full copytrace algorithm for increased capabilities
832 since it will be fast enough.
832 since it will be fast enough.
833
833
834 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
834 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
835 number of changesets from c1 to base such that if number of changesets are
835 number of changesets from c1 to base such that if number of changesets are
836 more than the limit, full copytracing algorithm won't be used.
836 more than the limit, full copytracing algorithm won't be used.
837 """
837 """
838 if c1.rev() is None:
838 if c1.rev() is None:
839 c1 = c1.p1()
839 c1 = c1.p1()
840 if c1.mutable() and base.mutable():
840 if c1.mutable() and base.mutable():
841 sourcecommitlimit = repo.ui.configint(
841 sourcecommitlimit = repo.ui.configint(
842 b'experimental', b'copytrace.sourcecommitlimit'
842 b'experimental', b'copytrace.sourcecommitlimit'
843 )
843 )
844 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
844 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
845 return commits < sourcecommitlimit
845 return commits < sourcecommitlimit
846 return False
846 return False
847
847
848
848
849 def _checksinglesidecopies(
849 def _checksinglesidecopies(
850 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
850 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
851 ):
851 ):
852 if src not in m2:
852 if src not in m2:
853 # deleted on side 2
853 # deleted on side 2
854 if src not in m1:
854 if src not in m1:
855 # renamed on side 1, deleted on side 2
855 # renamed on side 1, deleted on side 2
856 renamedelete[src] = dsts1
856 renamedelete[src] = dsts1
857 elif src not in mb:
857 elif src not in mb:
858 # Work around the "short-circuit to avoid issues with merge states"
858 # Work around the "short-circuit to avoid issues with merge states"
859 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
859 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
860 # destination doesn't exist in y.
860 # destination doesn't exist in y.
861 pass
861 pass
862 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
862 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
863 return
863 return
864 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
864 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
865 # modified on side 2
865 # modified on side 2
866 for dst in dsts1:
866 for dst in dsts1:
867 copy[dst] = src
867 copy[dst] = src
868
868
869
869
870 class branch_copies(object):
870 class branch_copies(object):
871 """Information about copies made on one side of a merge/graft.
871 """Information about copies made on one side of a merge/graft.
872
872
873 "copy" is a mapping from destination name -> source name,
873 "copy" is a mapping from destination name -> source name,
874 where source is in c1 and destination is in c2 or vice-versa.
874 where source is in c1 and destination is in c2 or vice-versa.
875
875
876 "movewithdir" is a mapping from source name -> destination name,
876 "movewithdir" is a mapping from source name -> destination name,
877 where the file at source present in one context but not the other
877 where the file at source present in one context but not the other
878 needs to be moved to destination by the merge process, because the
878 needs to be moved to destination by the merge process, because the
879 other context moved the directory it is in.
879 other context moved the directory it is in.
880
880
881 "renamedelete" is a mapping of source name -> list of destination
881 "renamedelete" is a mapping of source name -> list of destination
882 names for files deleted in c1 that were renamed in c2 or vice-versa.
882 names for files deleted in c1 that were renamed in c2 or vice-versa.
883
883
884 "dirmove" is a mapping of detected source dir -> destination dir renames.
884 "dirmove" is a mapping of detected source dir -> destination dir renames.
885 This is needed for handling changes to new files previously grafted into
885 This is needed for handling changes to new files previously grafted into
886 renamed directories.
886 renamed directories.
887 """
887 """
888
888
889 def __init__(
889 def __init__(
890 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
890 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
891 ):
891 ):
892 self.copy = {} if copy is None else copy
892 self.copy = {} if copy is None else copy
893 self.renamedelete = {} if renamedelete is None else renamedelete
893 self.renamedelete = {} if renamedelete is None else renamedelete
894 self.dirmove = {} if dirmove is None else dirmove
894 self.dirmove = {} if dirmove is None else dirmove
895 self.movewithdir = {} if movewithdir is None else movewithdir
895 self.movewithdir = {} if movewithdir is None else movewithdir
896
896
897 def __repr__(self):
897 def __repr__(self):
898 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
898 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
899 self.copy,
899 self.copy,
900 self.renamedelete,
900 self.renamedelete,
901 self.dirmove,
901 self.dirmove,
902 self.movewithdir,
902 self.movewithdir,
903 )
903 )
904
904
905
905
906 def _fullcopytracing(repo, c1, c2, base):
906 def _fullcopytracing(repo, c1, c2, base):
907 """The full copytracing algorithm which finds all the new files that were
907 """The full copytracing algorithm which finds all the new files that were
908 added from merge base up to the top commit and for each file it checks if
908 added from merge base up to the top commit and for each file it checks if
909 this file was copied from another file.
909 this file was copied from another file.
910
910
911 This is pretty slow when a lot of changesets are involved but will track all
911 This is pretty slow when a lot of changesets are involved but will track all
912 the copies.
912 the copies.
913 """
913 """
914 m1 = c1.manifest()
914 m1 = c1.manifest()
915 m2 = c2.manifest()
915 m2 = c2.manifest()
916 mb = base.manifest()
916 mb = base.manifest()
917
917
918 copies1 = pathcopies(base, c1)
918 copies1 = pathcopies(base, c1)
919 copies2 = pathcopies(base, c2)
919 copies2 = pathcopies(base, c2)
920
920
921 if not (copies1 or copies2):
921 if not (copies1 or copies2):
922 return branch_copies(), branch_copies(), {}
922 return branch_copies(), branch_copies(), {}
923
923
924 inversecopies1 = {}
924 inversecopies1 = {}
925 inversecopies2 = {}
925 inversecopies2 = {}
926 for dst, src in copies1.items():
926 for dst, src in copies1.items():
927 inversecopies1.setdefault(src, []).append(dst)
927 inversecopies1.setdefault(src, []).append(dst)
928 for dst, src in copies2.items():
928 for dst, src in copies2.items():
929 inversecopies2.setdefault(src, []).append(dst)
929 inversecopies2.setdefault(src, []).append(dst)
930
930
931 copy1 = {}
931 copy1 = {}
932 copy2 = {}
932 copy2 = {}
933 diverge = {}
933 diverge = {}
934 renamedelete1 = {}
934 renamedelete1 = {}
935 renamedelete2 = {}
935 renamedelete2 = {}
936 allsources = set(inversecopies1) | set(inversecopies2)
936 allsources = set(inversecopies1) | set(inversecopies2)
937 for src in allsources:
937 for src in allsources:
938 dsts1 = inversecopies1.get(src)
938 dsts1 = inversecopies1.get(src)
939 dsts2 = inversecopies2.get(src)
939 dsts2 = inversecopies2.get(src)
940 if dsts1 and dsts2:
940 if dsts1 and dsts2:
941 # copied/renamed on both sides
941 # copied/renamed on both sides
942 if src not in m1 and src not in m2:
942 if src not in m1 and src not in m2:
943 # renamed on both sides
943 # renamed on both sides
944 dsts1 = set(dsts1)
944 dsts1 = set(dsts1)
945 dsts2 = set(dsts2)
945 dsts2 = set(dsts2)
946 # If there's some overlap in the rename destinations, we
946 # If there's some overlap in the rename destinations, we
947 # consider it not divergent. For example, if side 1 copies 'a'
947 # consider it not divergent. For example, if side 1 copies 'a'
948 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
948 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
949 # and 'd' and deletes 'a'.
949 # and 'd' and deletes 'a'.
950 if dsts1 & dsts2:
950 if dsts1 & dsts2:
951 for dst in dsts1 & dsts2:
951 for dst in dsts1 & dsts2:
952 copy1[dst] = src
952 copy1[dst] = src
953 copy2[dst] = src
953 copy2[dst] = src
954 else:
954 else:
955 diverge[src] = sorted(dsts1 | dsts2)
955 diverge[src] = sorted(dsts1 | dsts2)
956 elif src in m1 and src in m2:
956 elif src in m1 and src in m2:
957 # copied on both sides
957 # copied on both sides
958 dsts1 = set(dsts1)
958 dsts1 = set(dsts1)
959 dsts2 = set(dsts2)
959 dsts2 = set(dsts2)
960 for dst in dsts1 & dsts2:
960 for dst in dsts1 & dsts2:
961 copy1[dst] = src
961 copy1[dst] = src
962 copy2[dst] = src
962 copy2[dst] = src
963 # TODO: Handle cases where it was renamed on one side and copied
963 # TODO: Handle cases where it was renamed on one side and copied
964 # on the other side
964 # on the other side
965 elif dsts1:
965 elif dsts1:
966 # copied/renamed only on side 1
966 # copied/renamed only on side 1
967 _checksinglesidecopies(
967 _checksinglesidecopies(
968 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
968 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
969 )
969 )
970 elif dsts2:
970 elif dsts2:
971 # copied/renamed only on side 2
971 # copied/renamed only on side 2
972 _checksinglesidecopies(
972 _checksinglesidecopies(
973 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
973 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
974 )
974 )
975
975
976 # find interesting file sets from manifests
976 # find interesting file sets from manifests
977 cache = []
977 cache = []
978
978
979 def _get_addedfiles(idx):
979 def _get_addedfiles(idx):
980 if not cache:
980 if not cache:
981 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
981 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
982 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
982 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
983 u1 = sorted(addedinm1 - addedinm2)
983 u1 = sorted(addedinm1 - addedinm2)
984 u2 = sorted(addedinm2 - addedinm1)
984 u2 = sorted(addedinm2 - addedinm1)
985 cache.extend((u1, u2))
985 cache.extend((u1, u2))
986 return cache[idx]
986 return cache[idx]
987
987
988 u1fn = lambda: _get_addedfiles(0)
988 u1fn = lambda: _get_addedfiles(0)
989 u2fn = lambda: _get_addedfiles(1)
989 u2fn = lambda: _get_addedfiles(1)
990 if repo.ui.debugflag:
990 if repo.ui.debugflag:
991 u1 = u1fn()
991 u1 = u1fn()
992 u2 = u2fn()
992 u2 = u2fn()
993
993
994 header = b" unmatched files in %s"
994 header = b" unmatched files in %s"
995 if u1:
995 if u1:
996 repo.ui.debug(
996 repo.ui.debug(
997 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
997 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
998 )
998 )
999 if u2:
999 if u2:
1000 repo.ui.debug(
1000 repo.ui.debug(
1001 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1001 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1002 )
1002 )
1003
1003
1004 renamedeleteset = set()
1004 renamedeleteset = set()
1005 divergeset = set()
1005 divergeset = set()
1006 for dsts in diverge.values():
1006 for dsts in diverge.values():
1007 divergeset.update(dsts)
1007 divergeset.update(dsts)
1008 for dsts in renamedelete1.values():
1008 for dsts in renamedelete1.values():
1009 renamedeleteset.update(dsts)
1009 renamedeleteset.update(dsts)
1010 for dsts in renamedelete2.values():
1010 for dsts in renamedelete2.values():
1011 renamedeleteset.update(dsts)
1011 renamedeleteset.update(dsts)
1012
1012
1013 repo.ui.debug(
1013 repo.ui.debug(
1014 b" all copies found (* = to merge, ! = divergent, "
1014 b" all copies found (* = to merge, ! = divergent, "
1015 b"% = renamed and deleted):\n"
1015 b"% = renamed and deleted):\n"
1016 )
1016 )
1017 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1017 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1018 if not copies:
1018 if not copies:
1019 continue
1019 continue
1020 repo.ui.debug(b" on %s side:\n" % side)
1020 repo.ui.debug(b" on %s side:\n" % side)
1021 for f in sorted(copies):
1021 for f in sorted(copies):
1022 note = b""
1022 note = b""
1023 if f in copy1 or f in copy2:
1023 if f in copy1 or f in copy2:
1024 note += b"*"
1024 note += b"*"
1025 if f in divergeset:
1025 if f in divergeset:
1026 note += b"!"
1026 note += b"!"
1027 if f in renamedeleteset:
1027 if f in renamedeleteset:
1028 note += b"%"
1028 note += b"%"
1029 repo.ui.debug(
1029 repo.ui.debug(
1030 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1030 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1031 )
1031 )
1032 del renamedeleteset
1032 del renamedeleteset
1033 del divergeset
1033 del divergeset
1034
1034
1035 repo.ui.debug(b" checking for directory renames\n")
1035 repo.ui.debug(b" checking for directory renames\n")
1036
1036
1037 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1037 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1038 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1038 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1039
1039
1040 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1040 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1041 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1041 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1042
1042
1043 return branch_copies1, branch_copies2, diverge
1043 return branch_copies1, branch_copies2, diverge
1044
1044
1045
1045
1046 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1046 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1047 """Finds moved directories and files that should move with them.
1047 """Finds moved directories and files that should move with them.
1048
1048
1049 ctx: the context for one of the sides
1049 ctx: the context for one of the sides
1050 copy: files copied on the same side (as ctx)
1050 copy: files copied on the same side (as ctx)
1051 fullcopy: files copied on the same side (as ctx), including those that
1051 fullcopy: files copied on the same side (as ctx), including those that
1052 merge.manifestmerge() won't care about
1052 merge.manifestmerge() won't care about
1053 addedfilesfn: function returning added files on the other side (compared to
1053 addedfilesfn: function returning added files on the other side (compared to
1054 ctx)
1054 ctx)
1055 """
1055 """
1056 # generate a directory move map
1056 # generate a directory move map
1057 invalid = set()
1057 invalid = set()
1058 dirmove = {}
1058 dirmove = {}
1059
1059
1060 # examine each file copy for a potential directory move, which is
1060 # examine each file copy for a potential directory move, which is
1061 # when all the files in a directory are moved to a new directory
1061 # when all the files in a directory are moved to a new directory
1062 for dst, src in pycompat.iteritems(fullcopy):
1062 for dst, src in pycompat.iteritems(fullcopy):
1063 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1063 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1064 if dsrc in invalid:
1064 if dsrc in invalid:
1065 # already seen to be uninteresting
1065 # already seen to be uninteresting
1066 continue
1066 continue
1067 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1067 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1068 # directory wasn't entirely moved locally
1068 # directory wasn't entirely moved locally
1069 invalid.add(dsrc)
1069 invalid.add(dsrc)
1070 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1070 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1071 # files from the same directory moved to two different places
1071 # files from the same directory moved to two different places
1072 invalid.add(dsrc)
1072 invalid.add(dsrc)
1073 else:
1073 else:
1074 # looks good so far
1074 # looks good so far
1075 dirmove[dsrc] = ddst
1075 dirmove[dsrc] = ddst
1076
1076
1077 for i in invalid:
1077 for i in invalid:
1078 if i in dirmove:
1078 if i in dirmove:
1079 del dirmove[i]
1079 del dirmove[i]
1080 del invalid
1080 del invalid
1081
1081
1082 if not dirmove:
1082 if not dirmove:
1083 return {}, {}
1083 return {}, {}
1084
1084
1085 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1085 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1086
1086
1087 for d in dirmove:
1087 for d in dirmove:
1088 repo.ui.debug(
1088 repo.ui.debug(
1089 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1089 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1090 )
1090 )
1091
1091
1092 movewithdir = {}
1092 movewithdir = {}
1093 # check unaccounted nonoverlapping files against directory moves
1093 # check unaccounted nonoverlapping files against directory moves
1094 for f in addedfilesfn():
1094 for f in addedfilesfn():
1095 if f not in fullcopy:
1095 if f not in fullcopy:
1096 for d in dirmove:
1096 for d in dirmove:
1097 if f.startswith(d):
1097 if f.startswith(d):
1098 # new file added in a directory that was moved, move it
1098 # new file added in a directory that was moved, move it
1099 df = dirmove[d] + f[len(d) :]
1099 df = dirmove[d] + f[len(d) :]
1100 if df not in copy:
1100 if df not in copy:
1101 movewithdir[f] = df
1101 movewithdir[f] = df
1102 repo.ui.debug(
1102 repo.ui.debug(
1103 b" pending file src: '%s' -> dst: '%s'\n"
1103 b" pending file src: '%s' -> dst: '%s'\n"
1104 % (f, df)
1104 % (f, df)
1105 )
1105 )
1106 break
1106 break
1107
1107
1108 return dirmove, movewithdir
1108 return dirmove, movewithdir
1109
1109
1110
1110
1111 def _heuristicscopytracing(repo, c1, c2, base):
1111 def _heuristicscopytracing(repo, c1, c2, base):
1112 """Fast copytracing using filename heuristics
1112 """Fast copytracing using filename heuristics
1113
1113
1114 Assumes that moves or renames are of following two types:
1114 Assumes that moves or renames are of following two types:
1115
1115
1116 1) Inside a directory only (same directory name but different filenames)
1116 1) Inside a directory only (same directory name but different filenames)
1117 2) Move from one directory to another
1117 2) Move from one directory to another
1118 (same filenames but different directory names)
1118 (same filenames but different directory names)
1119
1119
1120 Works only when there are no merge commits in the "source branch".
1120 Works only when there are no merge commits in the "source branch".
1121 Source branch is commits from base up to c2 not including base.
1121 Source branch is commits from base up to c2 not including base.
1122
1122
1123 If merge is involved it fallbacks to _fullcopytracing().
1123 If merge is involved it fallbacks to _fullcopytracing().
1124
1124
1125 Can be used by setting the following config:
1125 Can be used by setting the following config:
1126
1126
1127 [experimental]
1127 [experimental]
1128 copytrace = heuristics
1128 copytrace = heuristics
1129
1129
1130 In some cases the copy/move candidates found by heuristics can be very large
1130 In some cases the copy/move candidates found by heuristics can be very large
1131 in number and that will make the algorithm slow. The number of possible
1131 in number and that will make the algorithm slow. The number of possible
1132 candidates to check can be limited by using the config
1132 candidates to check can be limited by using the config
1133 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1133 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1134 """
1134 """
1135
1135
1136 if c1.rev() is None:
1136 if c1.rev() is None:
1137 c1 = c1.p1()
1137 c1 = c1.p1()
1138 if c2.rev() is None:
1138 if c2.rev() is None:
1139 c2 = c2.p1()
1139 c2 = c2.p1()
1140
1140
1141 changedfiles = set()
1141 changedfiles = set()
1142 m1 = c1.manifest()
1142 m1 = c1.manifest()
1143 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1143 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1144 # If base is not in c2 branch, we switch to fullcopytracing
1144 # If base is not in c2 branch, we switch to fullcopytracing
1145 repo.ui.debug(
1145 repo.ui.debug(
1146 b"switching to full copytracing as base is not "
1146 b"switching to full copytracing as base is not "
1147 b"an ancestor of c2\n"
1147 b"an ancestor of c2\n"
1148 )
1148 )
1149 return _fullcopytracing(repo, c1, c2, base)
1149 return _fullcopytracing(repo, c1, c2, base)
1150
1150
1151 ctx = c2
1151 ctx = c2
1152 while ctx != base:
1152 while ctx != base:
1153 if len(ctx.parents()) == 2:
1153 if len(ctx.parents()) == 2:
1154 # To keep things simple let's not handle merges
1154 # To keep things simple let's not handle merges
1155 repo.ui.debug(b"switching to full copytracing because of merges\n")
1155 repo.ui.debug(b"switching to full copytracing because of merges\n")
1156 return _fullcopytracing(repo, c1, c2, base)
1156 return _fullcopytracing(repo, c1, c2, base)
1157 changedfiles.update(ctx.files())
1157 changedfiles.update(ctx.files())
1158 ctx = ctx.p1()
1158 ctx = ctx.p1()
1159
1159
1160 copies2 = {}
1160 copies2 = {}
1161 cp = _forwardcopies(base, c2)
1161 cp = _forwardcopies(base, c2)
1162 for dst, src in pycompat.iteritems(cp):
1162 for dst, src in pycompat.iteritems(cp):
1163 if src in m1:
1163 if src in m1:
1164 copies2[dst] = src
1164 copies2[dst] = src
1165
1165
1166 # file is missing if it isn't present in the destination, but is present in
1166 # file is missing if it isn't present in the destination, but is present in
1167 # the base and present in the source.
1167 # the base and present in the source.
1168 # Presence in the base is important to exclude added files, presence in the
1168 # Presence in the base is important to exclude added files, presence in the
1169 # source is important to exclude removed files.
1169 # source is important to exclude removed files.
1170 filt = lambda f: f not in m1 and f in base and f in c2
1170 filt = lambda f: f not in m1 and f in base and f in c2
1171 missingfiles = [f for f in changedfiles if filt(f)]
1171 missingfiles = [f for f in changedfiles if filt(f)]
1172
1172
1173 copies1 = {}
1173 copies1 = {}
1174 if missingfiles:
1174 if missingfiles:
1175 basenametofilename = collections.defaultdict(list)
1175 basenametofilename = collections.defaultdict(list)
1176 dirnametofilename = collections.defaultdict(list)
1176 dirnametofilename = collections.defaultdict(list)
1177
1177
1178 for f in m1.filesnotin(base.manifest()):
1178 for f in m1.filesnotin(base.manifest()):
1179 basename = os.path.basename(f)
1179 basename = os.path.basename(f)
1180 dirname = os.path.dirname(f)
1180 dirname = os.path.dirname(f)
1181 basenametofilename[basename].append(f)
1181 basenametofilename[basename].append(f)
1182 dirnametofilename[dirname].append(f)
1182 dirnametofilename[dirname].append(f)
1183
1183
1184 for f in missingfiles:
1184 for f in missingfiles:
1185 basename = os.path.basename(f)
1185 basename = os.path.basename(f)
1186 dirname = os.path.dirname(f)
1186 dirname = os.path.dirname(f)
1187 samebasename = basenametofilename[basename]
1187 samebasename = basenametofilename[basename]
1188 samedirname = dirnametofilename[dirname]
1188 samedirname = dirnametofilename[dirname]
1189 movecandidates = samebasename + samedirname
1189 movecandidates = samebasename + samedirname
1190 # f is guaranteed to be present in c2, that's why
1190 # f is guaranteed to be present in c2, that's why
1191 # c2.filectx(f) won't fail
1191 # c2.filectx(f) won't fail
1192 f2 = c2.filectx(f)
1192 f2 = c2.filectx(f)
1193 # we can have a lot of candidates which can slow down the heuristics
1193 # we can have a lot of candidates which can slow down the heuristics
1194 # config value to limit the number of candidates moves to check
1194 # config value to limit the number of candidates moves to check
1195 maxcandidates = repo.ui.configint(
1195 maxcandidates = repo.ui.configint(
1196 b'experimental', b'copytrace.movecandidateslimit'
1196 b'experimental', b'copytrace.movecandidateslimit'
1197 )
1197 )
1198
1198
1199 if len(movecandidates) > maxcandidates:
1199 if len(movecandidates) > maxcandidates:
1200 repo.ui.status(
1200 repo.ui.status(
1201 _(
1201 _(
1202 b"skipping copytracing for '%s', more "
1202 b"skipping copytracing for '%s', more "
1203 b"candidates than the limit: %d\n"
1203 b"candidates than the limit: %d\n"
1204 )
1204 )
1205 % (f, len(movecandidates))
1205 % (f, len(movecandidates))
1206 )
1206 )
1207 continue
1207 continue
1208
1208
1209 for candidate in movecandidates:
1209 for candidate in movecandidates:
1210 f1 = c1.filectx(candidate)
1210 f1 = c1.filectx(candidate)
1211 if _related(f1, f2):
1211 if _related(f1, f2):
1212 # if there are a few related copies then we'll merge
1212 # if there are a few related copies then we'll merge
1213 # changes into all of them. This matches the behaviour
1213 # changes into all of them. This matches the behaviour
1214 # of upstream copytracing
1214 # of upstream copytracing
1215 copies1[candidate] = f
1215 copies1[candidate] = f
1216
1216
1217 return branch_copies(copies1), branch_copies(copies2), {}
1217 return branch_copies(copies1), branch_copies(copies2), {}
1218
1218
1219
1219
1220 def _related(f1, f2):
1220 def _related(f1, f2):
1221 """return True if f1 and f2 filectx have a common ancestor
1221 """return True if f1 and f2 filectx have a common ancestor
1222
1222
1223 Walk back to common ancestor to see if the two files originate
1223 Walk back to common ancestor to see if the two files originate
1224 from the same file. Since workingfilectx's rev() is None it messes
1224 from the same file. Since workingfilectx's rev() is None it messes
1225 up the integer comparison logic, hence the pre-step check for
1225 up the integer comparison logic, hence the pre-step check for
1226 None (f1 and f2 can only be workingfilectx's initially).
1226 None (f1 and f2 can only be workingfilectx's initially).
1227 """
1227 """
1228
1228
1229 if f1 == f2:
1229 if f1 == f2:
1230 return True # a match
1230 return True # a match
1231
1231
1232 g1, g2 = f1.ancestors(), f2.ancestors()
1232 g1, g2 = f1.ancestors(), f2.ancestors()
1233 try:
1233 try:
1234 f1r, f2r = f1.linkrev(), f2.linkrev()
1234 f1r, f2r = f1.linkrev(), f2.linkrev()
1235
1235
1236 if f1r is None:
1236 if f1r is None:
1237 f1 = next(g1)
1237 f1 = next(g1)
1238 if f2r is None:
1238 if f2r is None:
1239 f2 = next(g2)
1239 f2 = next(g2)
1240
1240
1241 while True:
1241 while True:
1242 f1r, f2r = f1.linkrev(), f2.linkrev()
1242 f1r, f2r = f1.linkrev(), f2.linkrev()
1243 if f1r > f2r:
1243 if f1r > f2r:
1244 f1 = next(g1)
1244 f1 = next(g1)
1245 elif f2r > f1r:
1245 elif f2r > f1r:
1246 f2 = next(g2)
1246 f2 = next(g2)
1247 else: # f1 and f2 point to files in the same linkrev
1247 else: # f1 and f2 point to files in the same linkrev
1248 return f1 == f2 # true if they point to the same file
1248 return f1 == f2 # true if they point to the same file
1249 except StopIteration:
1249 except StopIteration:
1250 return False
1250 return False
1251
1251
1252
1252
1253 def graftcopies(wctx, ctx, base):
1253 def graftcopies(wctx, ctx, base):
1254 """reproduce copies between base and ctx in the wctx
1254 """reproduce copies between base and ctx in the wctx
1255
1255
1256 Unlike mergecopies(), this function will only consider copies between base
1256 Unlike mergecopies(), this function will only consider copies between base
1257 and ctx; it will ignore copies between base and wctx. Also unlike
1257 and ctx; it will ignore copies between base and wctx. Also unlike
1258 mergecopies(), this function will apply copies to the working copy (instead
1258 mergecopies(), this function will apply copies to the working copy (instead
1259 of just returning information about the copies). That makes it cheaper
1259 of just returning information about the copies). That makes it cheaper
1260 (especially in the common case of base==ctx.p1()) and useful also when
1260 (especially in the common case of base==ctx.p1()) and useful also when
1261 experimental.copytrace=off.
1261 experimental.copytrace=off.
1262
1262
1263 merge.update() will have already marked most copies, but it will only
1263 merge.update() will have already marked most copies, but it will only
1264 mark copies if it thinks the source files are related (see
1264 mark copies if it thinks the source files are related (see
1265 merge._related()). It will also not mark copies if the file wasn't modified
1265 merge._related()). It will also not mark copies if the file wasn't modified
1266 on the local side. This function adds the copies that were "missed"
1266 on the local side. This function adds the copies that were "missed"
1267 by merge.update().
1267 by merge.update().
1268 """
1268 """
1269 new_copies = pathcopies(base, ctx)
1269 new_copies = pathcopies(base, ctx)
1270 parent = wctx.p1()
1270 parent = wctx.p1()
1271 _filter(parent, wctx, new_copies)
1271 _filter(parent, wctx, new_copies)
1272 # Extra filtering to drop copy information for files that existed before
1272 # Extra filtering to drop copy information for files that existed before
1273 # the graft. This is to handle the case of grafting a rename onto a commit
1273 # the graft. This is to handle the case of grafting a rename onto a commit
1274 # that already has the rename. Otherwise the presence of copy information
1274 # that already has the rename. Otherwise the presence of copy information
1275 # would result in the creation of an empty commit where we would prefer to
1275 # would result in the creation of an empty commit where we would prefer to
1276 # not create one.
1276 # not create one.
1277 for dest, __ in list(new_copies.items()):
1277 for dest, __ in list(new_copies.items()):
1278 if dest in parent:
1278 if dest in parent:
1279 del new_copies[dest]
1279 del new_copies[dest]
1280 for dst, src in pycompat.iteritems(new_copies):
1280 for dst, src in pycompat.iteritems(new_copies):
1281 wctx[dst].markcopied(src)
1281 wctx[dst].markcopied(src)
@@ -1,949 +1,890 b''
1 use crate::utils::hg_path::HgPath;
1 use crate::utils::hg_path::HgPath;
2 use crate::utils::hg_path::HgPathBuf;
2 use crate::utils::hg_path::HgPathBuf;
3 use crate::Revision;
3 use crate::Revision;
4 use crate::NULL_REVISION;
4 use crate::NULL_REVISION;
5
5
6 use im_rc::ordmap::DiffItem;
6 use im_rc::ordmap::DiffItem;
7 use im_rc::ordmap::Entry;
7 use im_rc::ordmap::Entry;
8 use im_rc::ordmap::OrdMap;
8 use im_rc::ordmap::OrdMap;
9
9
10 use std::cmp::Ordering;
10 use std::cmp::Ordering;
11 use std::collections::HashMap;
11 use std::collections::HashMap;
12 use std::collections::HashSet;
12 use std::collections::HashSet;
13 use std::convert::TryInto;
13 use std::convert::TryInto;
14
14
15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16
16
17 type PathToken = usize;
17 type PathToken = usize;
18
18
19 #[derive(Clone, Debug, PartialEq)]
19 #[derive(Clone, Debug, PartialEq)]
20 struct CopySource {
20 struct CopySource {
21 /// revision at which the copy information was added
21 /// revision at which the copy information was added
22 rev: Revision,
22 rev: Revision,
23 /// the copy source, (Set to None in case of deletion of the associated
23 /// the copy source, (Set to None in case of deletion of the associated
24 /// key)
24 /// key)
25 path: Option<PathToken>,
25 path: Option<PathToken>,
26 /// a set of previous `CopySource.rev` value directly or indirectly
26 /// a set of previous `CopySource.rev` value directly or indirectly
27 /// overwritten by this one.
27 /// overwritten by this one.
28 overwritten: HashSet<Revision>,
28 overwritten: HashSet<Revision>,
29 }
29 }
30
30
31 impl CopySource {
31 impl CopySource {
32 /// create a new CopySource
32 /// create a new CopySource
33 ///
33 ///
34 /// Use this when no previous copy source existed.
34 /// Use this when no previous copy source existed.
35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 Self {
36 Self {
37 rev,
37 rev,
38 path,
38 path,
39 overwritten: HashSet::new(),
39 overwritten: HashSet::new(),
40 }
40 }
41 }
41 }
42
42
43 /// create a new CopySource from merging two others
43 /// create a new CopySource from merging two others
44 ///
44 ///
45 /// Use this when merging two InternalPathCopies requires active merging of
45 /// Use this when merging two InternalPathCopies requires active merging of
46 /// some entries.
46 /// some entries.
47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 let mut overwritten = HashSet::new();
48 let mut overwritten = HashSet::new();
49 overwritten.extend(winner.overwritten.iter().copied());
49 overwritten.extend(winner.overwritten.iter().copied());
50 overwritten.extend(loser.overwritten.iter().copied());
50 overwritten.extend(loser.overwritten.iter().copied());
51 overwritten.insert(winner.rev);
51 overwritten.insert(winner.rev);
52 overwritten.insert(loser.rev);
52 overwritten.insert(loser.rev);
53 Self {
53 Self {
54 rev,
54 rev,
55 path: winner.path,
55 path: winner.path,
56 overwritten: overwritten,
56 overwritten: overwritten,
57 }
57 }
58 }
58 }
59
59
60 /// Update the value of a pre-existing CopySource
60 /// Update the value of a pre-existing CopySource
61 ///
61 ///
62 /// Use this when recording copy information from parent β†’ child edges
62 /// Use this when recording copy information from parent β†’ child edges
63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 self.overwritten.insert(self.rev);
64 self.overwritten.insert(self.rev);
65 self.rev = rev;
65 self.rev = rev;
66 self.path = path;
66 self.path = path;
67 }
67 }
68
68
69 /// Mark pre-existing copy information as "dropped" by a file deletion
69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 ///
70 ///
71 /// Use this when recording copy information from parent β†’ child edges
71 /// Use this when recording copy information from parent β†’ child edges
72 fn mark_delete(&mut self, rev: Revision) {
72 fn mark_delete(&mut self, rev: Revision) {
73 self.overwritten.insert(self.rev);
73 self.overwritten.insert(self.rev);
74 self.rev = rev;
74 self.rev = rev;
75 self.path = None;
75 self.path = None;
76 }
76 }
77
77
78 fn is_overwritten_by(&self, other: &Self) -> bool {
78 fn is_overwritten_by(&self, other: &Self) -> bool {
79 other.overwritten.contains(&self.rev)
79 other.overwritten.contains(&self.rev)
80 }
80 }
81 }
81 }
82
82
83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
85
85
86 /// hold parent 1, parent 2 and relevant files actions.
86 /// hold parent 1, parent 2 and relevant files actions.
87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
88
88
89 /// represent the files affected by a changesets
89 /// represent the files affected by a changesets
90 ///
90 ///
91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
92 /// all the data categories tracked by it.
92 /// all the data categories tracked by it.
93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
94 /// all the data categories tracked by it.
94 /// all the data categories tracked by it.
95 pub struct ChangedFiles<'a> {
95 pub struct ChangedFiles<'a> {
96 nb_items: u32,
96 nb_items: u32,
97 index: &'a [u8],
97 index: &'a [u8],
98 data: &'a [u8],
98 data: &'a [u8],
99 }
99 }
100
100
101 /// Represent active changes that affect the copy tracing.
101 /// Represent active changes that affect the copy tracing.
102 enum Action<'a> {
102 enum Action<'a> {
103 /// The parent ? children edge is removing a file
103 /// The parent ? children edge is removing a file
104 ///
104 ///
105 /// (actually, this could be the edge from the other parent, but it does
105 /// (actually, this could be the edge from the other parent, but it does
106 /// not matters)
106 /// not matters)
107 Removed(&'a HgPath),
107 Removed(&'a HgPath),
108 /// The parent ? children edge introduce copy information between (dest,
108 /// The parent ? children edge introduce copy information between (dest,
109 /// source)
109 /// source)
110 Copied(&'a HgPath, &'a HgPath),
110 Copied(&'a HgPath, &'a HgPath),
111 }
111 }
112
112
113 /// This express the possible "special" case we can get in a merge
113 /// This express the possible "special" case we can get in a merge
114 ///
114 ///
115 /// See mercurial/metadata.py for details on these values.
115 /// See mercurial/metadata.py for details on these values.
116 #[derive(PartialEq)]
116 #[derive(PartialEq)]
117 enum MergeCase {
117 enum MergeCase {
118 /// Merged: file had history on both side that needed to be merged
118 /// Merged: file had history on both side that needed to be merged
119 Merged,
119 Merged,
120 /// Salvaged: file was candidate for deletion, but survived the merge
120 /// Salvaged: file was candidate for deletion, but survived the merge
121 Salvaged,
121 Salvaged,
122 /// Normal: Not one of the two cases above
122 /// Normal: Not one of the two cases above
123 Normal,
123 Normal,
124 }
124 }
125
125
126 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
126 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
127
127
128 const EMPTY: &[u8] = b"";
128 const EMPTY: &[u8] = b"";
129 const COPY_MASK: u8 = 3;
129 const COPY_MASK: u8 = 3;
130 const P1_COPY: u8 = 2;
130 const P1_COPY: u8 = 2;
131 const P2_COPY: u8 = 3;
131 const P2_COPY: u8 = 3;
132 const ACTION_MASK: u8 = 28;
132 const ACTION_MASK: u8 = 28;
133 const REMOVED: u8 = 12;
133 const REMOVED: u8 = 12;
134 const MERGED: u8 = 8;
134 const MERGED: u8 = 8;
135 const SALVAGED: u8 = 16;
135 const SALVAGED: u8 = 16;
136
136
137 impl<'a> ChangedFiles<'a> {
137 impl<'a> ChangedFiles<'a> {
138 const INDEX_START: usize = 4;
138 const INDEX_START: usize = 4;
139 const ENTRY_SIZE: u32 = 9;
139 const ENTRY_SIZE: u32 = 9;
140 const FILENAME_START: u32 = 1;
140 const FILENAME_START: u32 = 1;
141 const COPY_SOURCE_START: u32 = 5;
141 const COPY_SOURCE_START: u32 = 5;
142
142
143 pub fn new(data: &'a [u8]) -> Self {
143 pub fn new(data: &'a [u8]) -> Self {
144 assert!(
144 assert!(
145 data.len() >= 4,
145 data.len() >= 4,
146 "data size ({}) is too small to contain the header (4)",
146 "data size ({}) is too small to contain the header (4)",
147 data.len()
147 data.len()
148 );
148 );
149 let nb_items_raw: [u8; 4] = (&data[0..=3])
149 let nb_items_raw: [u8; 4] = (&data[0..=3])
150 .try_into()
150 .try_into()
151 .expect("failed to turn 4 bytes into 4 bytes");
151 .expect("failed to turn 4 bytes into 4 bytes");
152 let nb_items = u32::from_be_bytes(nb_items_raw);
152 let nb_items = u32::from_be_bytes(nb_items_raw);
153
153
154 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
154 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
155 let index_end = Self::INDEX_START + index_size;
155 let index_end = Self::INDEX_START + index_size;
156
156
157 assert!(
157 assert!(
158 data.len() >= index_end,
158 data.len() >= index_end,
159 "data size ({}) is too small to fit the index_data ({})",
159 "data size ({}) is too small to fit the index_data ({})",
160 data.len(),
160 data.len(),
161 index_end
161 index_end
162 );
162 );
163
163
164 let ret = ChangedFiles {
164 let ret = ChangedFiles {
165 nb_items,
165 nb_items,
166 index: &data[Self::INDEX_START..index_end],
166 index: &data[Self::INDEX_START..index_end],
167 data: &data[index_end..],
167 data: &data[index_end..],
168 };
168 };
169 let max_data = ret.filename_end(nb_items - 1) as usize;
169 let max_data = ret.filename_end(nb_items - 1) as usize;
170 assert!(
170 assert!(
171 ret.data.len() >= max_data,
171 ret.data.len() >= max_data,
172 "data size ({}) is too small to fit all data ({})",
172 "data size ({}) is too small to fit all data ({})",
173 data.len(),
173 data.len(),
174 index_end + max_data
174 index_end + max_data
175 );
175 );
176 ret
176 ret
177 }
177 }
178
178
179 pub fn new_empty() -> Self {
179 pub fn new_empty() -> Self {
180 ChangedFiles {
180 ChangedFiles {
181 nb_items: 0,
181 nb_items: 0,
182 index: EMPTY,
182 index: EMPTY,
183 data: EMPTY,
183 data: EMPTY,
184 }
184 }
185 }
185 }
186
186
187 /// internal function to return an individual entry at a given index
187 /// internal function to return an individual entry at a given index
188 fn entry(&'a self, idx: u32) -> FileChange<'a> {
188 fn entry(&'a self, idx: u32) -> FileChange<'a> {
189 if idx >= self.nb_items {
189 if idx >= self.nb_items {
190 panic!(
190 panic!(
191 "index for entry is higher that the number of file {} >= {}",
191 "index for entry is higher that the number of file {} >= {}",
192 idx, self.nb_items
192 idx, self.nb_items
193 )
193 )
194 }
194 }
195 let flags = self.flags(idx);
195 let flags = self.flags(idx);
196 let filename = self.filename(idx);
196 let filename = self.filename(idx);
197 let copy_idx = self.copy_idx(idx);
197 let copy_idx = self.copy_idx(idx);
198 let copy_source = self.filename(copy_idx);
198 let copy_source = self.filename(copy_idx);
199 (flags, filename, copy_source)
199 (flags, filename, copy_source)
200 }
200 }
201
201
202 /// internal function to return the filename of the entry at a given index
202 /// internal function to return the filename of the entry at a given index
203 fn filename(&self, idx: u32) -> &HgPath {
203 fn filename(&self, idx: u32) -> &HgPath {
204 let filename_start;
204 let filename_start;
205 if idx == 0 {
205 if idx == 0 {
206 filename_start = 0;
206 filename_start = 0;
207 } else {
207 } else {
208 filename_start = self.filename_end(idx - 1)
208 filename_start = self.filename_end(idx - 1)
209 }
209 }
210 let filename_end = self.filename_end(idx);
210 let filename_end = self.filename_end(idx);
211 let filename_start = filename_start as usize;
211 let filename_start = filename_start as usize;
212 let filename_end = filename_end as usize;
212 let filename_end = filename_end as usize;
213 HgPath::new(&self.data[filename_start..filename_end])
213 HgPath::new(&self.data[filename_start..filename_end])
214 }
214 }
215
215
216 /// internal function to return the flag field of the entry at a given
216 /// internal function to return the flag field of the entry at a given
217 /// index
217 /// index
218 fn flags(&self, idx: u32) -> u8 {
218 fn flags(&self, idx: u32) -> u8 {
219 let idx = idx as usize;
219 let idx = idx as usize;
220 self.index[idx * (Self::ENTRY_SIZE as usize)]
220 self.index[idx * (Self::ENTRY_SIZE as usize)]
221 }
221 }
222
222
223 /// internal function to return the end of a filename part at a given index
223 /// internal function to return the end of a filename part at a given index
224 fn filename_end(&self, idx: u32) -> u32 {
224 fn filename_end(&self, idx: u32) -> u32 {
225 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
225 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
226 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
226 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
227 let start = start as usize;
227 let start = start as usize;
228 let end = end as usize;
228 let end = end as usize;
229 let raw = (&self.index[start..end])
229 let raw = (&self.index[start..end])
230 .try_into()
230 .try_into()
231 .expect("failed to turn 4 bytes into 4 bytes");
231 .expect("failed to turn 4 bytes into 4 bytes");
232 u32::from_be_bytes(raw)
232 u32::from_be_bytes(raw)
233 }
233 }
234
234
235 /// internal function to return index of the copy source of the entry at a
235 /// internal function to return index of the copy source of the entry at a
236 /// given index
236 /// given index
237 fn copy_idx(&self, idx: u32) -> u32 {
237 fn copy_idx(&self, idx: u32) -> u32 {
238 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
238 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
239 let end = (idx + 1) * Self::ENTRY_SIZE;
239 let end = (idx + 1) * Self::ENTRY_SIZE;
240 let start = start as usize;
240 let start = start as usize;
241 let end = end as usize;
241 let end = end as usize;
242 let raw = (&self.index[start..end])
242 let raw = (&self.index[start..end])
243 .try_into()
243 .try_into()
244 .expect("failed to turn 4 bytes into 4 bytes");
244 .expect("failed to turn 4 bytes into 4 bytes");
245 u32::from_be_bytes(raw)
245 u32::from_be_bytes(raw)
246 }
246 }
247
247
248 /// Return an iterator over all the `Action` in this instance.
248 /// Return an iterator over all the `Action` in this instance.
249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
250 ActionsIterator {
250 ActionsIterator {
251 changes: &self,
251 changes: &self,
252 parent: parent,
252 parent: parent,
253 current: 0,
253 current: 0,
254 }
254 }
255 }
255 }
256
256
257 /// return the MergeCase value associated with a filename
257 /// return the MergeCase value associated with a filename
258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
259 if self.nb_items == 0 {
259 if self.nb_items == 0 {
260 return MergeCase::Normal;
260 return MergeCase::Normal;
261 }
261 }
262 let mut low_part = 0;
262 let mut low_part = 0;
263 let mut high_part = self.nb_items;
263 let mut high_part = self.nb_items;
264
264
265 while low_part < high_part {
265 while low_part < high_part {
266 let cursor = (low_part + high_part - 1) / 2;
266 let cursor = (low_part + high_part - 1) / 2;
267 let (flags, filename, _source) = self.entry(cursor);
267 let (flags, filename, _source) = self.entry(cursor);
268 match path.cmp(filename) {
268 match path.cmp(filename) {
269 Ordering::Less => low_part = cursor + 1,
269 Ordering::Less => low_part = cursor + 1,
270 Ordering::Greater => high_part = cursor,
270 Ordering::Greater => high_part = cursor,
271 Ordering::Equal => {
271 Ordering::Equal => {
272 return match flags & ACTION_MASK {
272 return match flags & ACTION_MASK {
273 MERGED => MergeCase::Merged,
273 MERGED => MergeCase::Merged,
274 SALVAGED => MergeCase::Salvaged,
274 SALVAGED => MergeCase::Salvaged,
275 _ => MergeCase::Normal,
275 _ => MergeCase::Normal,
276 };
276 };
277 }
277 }
278 }
278 }
279 }
279 }
280 MergeCase::Normal
280 MergeCase::Normal
281 }
281 }
282 }
282 }
283
283
284 /// A struct responsible for answering "is X ancestors of Y" quickly
285 ///
286 /// The structure will delegate ancestors call to a callback, and cache the
287 /// result.
288 #[derive(Debug)]
289 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
290 inner: &'a A,
291 pairs: HashMap<(Revision, Revision), bool>,
292 }
293
294 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
295 fn new(func: &'a A) -> Self {
296 Self {
297 inner: func,
298 pairs: HashMap::default(),
299 }
300 }
301
302 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
303 self.pairs.insert((anc, desc), true);
304 }
305
306 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
307 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
308 if anc > desc {
309 false
310 } else if anc == desc {
311 true
312 } else {
313 if let Some(b) = self.pairs.get(&(anc, desc)) {
314 *b
315 } else {
316 let b = (self.inner)(anc, desc);
317 self.pairs.insert((anc, desc), b);
318 b
319 }
320 }
321 }
322 }
323
324 struct ActionsIterator<'a> {
284 struct ActionsIterator<'a> {
325 changes: &'a ChangedFiles<'a>,
285 changes: &'a ChangedFiles<'a>,
326 parent: Parent,
286 parent: Parent,
327 current: u32,
287 current: u32,
328 }
288 }
329
289
330 impl<'a> Iterator for ActionsIterator<'a> {
290 impl<'a> Iterator for ActionsIterator<'a> {
331 type Item = Action<'a>;
291 type Item = Action<'a>;
332
292
333 fn next(&mut self) -> Option<Action<'a>> {
293 fn next(&mut self) -> Option<Action<'a>> {
334 let copy_flag = match self.parent {
294 let copy_flag = match self.parent {
335 Parent::FirstParent => P1_COPY,
295 Parent::FirstParent => P1_COPY,
336 Parent::SecondParent => P2_COPY,
296 Parent::SecondParent => P2_COPY,
337 };
297 };
338 while self.current < self.changes.nb_items {
298 while self.current < self.changes.nb_items {
339 let (flags, file, source) = self.changes.entry(self.current);
299 let (flags, file, source) = self.changes.entry(self.current);
340 self.current += 1;
300 self.current += 1;
341 if (flags & ACTION_MASK) == REMOVED {
301 if (flags & ACTION_MASK) == REMOVED {
342 return Some(Action::Removed(file));
302 return Some(Action::Removed(file));
343 }
303 }
344 let copy = flags & COPY_MASK;
304 let copy = flags & COPY_MASK;
345 if copy == copy_flag {
305 if copy == copy_flag {
346 return Some(Action::Copied(file, source));
306 return Some(Action::Copied(file, source));
347 }
307 }
348 }
308 }
349 return None;
309 return None;
350 }
310 }
351 }
311 }
352
312
353 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
313 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
354 /// ChangedFiles
314 /// ChangedFiles
355 ///
315 ///
356 /// It is passed to the RevInfoMaker callback who can assign any necessary
316 /// It is passed to the RevInfoMaker callback who can assign any necessary
357 /// content to the `data` attribute. The copy tracing code is responsible for
317 /// content to the `data` attribute. The copy tracing code is responsible for
358 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
318 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
359 pub struct DataHolder<D> {
319 pub struct DataHolder<D> {
360 /// RevInfoMaker callback should assign data referenced by the
320 /// RevInfoMaker callback should assign data referenced by the
361 /// ChangedFiles struct it return to this attribute. The DataHolder
321 /// ChangedFiles struct it return to this attribute. The DataHolder
362 /// lifetime will be at least as long as the ChangedFiles one.
322 /// lifetime will be at least as long as the ChangedFiles one.
363 pub data: Option<D>,
323 pub data: Option<D>,
364 }
324 }
365
325
366 pub type RevInfoMaker<'a, D> =
326 pub type RevInfoMaker<'a, D> =
367 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
327 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
368
328
369 /// enum used to carry information about the parent β†’ child currently processed
329 /// enum used to carry information about the parent β†’ child currently processed
370 #[derive(Copy, Clone, Debug)]
330 #[derive(Copy, Clone, Debug)]
371 enum Parent {
331 enum Parent {
372 /// The `p1(x) β†’ x` edge
332 /// The `p1(x) β†’ x` edge
373 FirstParent,
333 FirstParent,
374 /// The `p2(x) β†’ x` edge
334 /// The `p2(x) β†’ x` edge
375 SecondParent,
335 SecondParent,
376 }
336 }
377
337
378 /// A small "tokenizer" responsible of turning full HgPath into lighter
338 /// A small "tokenizer" responsible of turning full HgPath into lighter
379 /// PathToken
339 /// PathToken
380 ///
340 ///
381 /// Dealing with small object, like integer is much faster, so HgPath input are
341 /// Dealing with small object, like integer is much faster, so HgPath input are
382 /// turned into integer "PathToken" and converted back in the end.
342 /// turned into integer "PathToken" and converted back in the end.
383 #[derive(Clone, Debug, Default)]
343 #[derive(Clone, Debug, Default)]
384 struct TwoWayPathMap {
344 struct TwoWayPathMap {
385 token: HashMap<HgPathBuf, PathToken>,
345 token: HashMap<HgPathBuf, PathToken>,
386 path: Vec<HgPathBuf>,
346 path: Vec<HgPathBuf>,
387 }
347 }
388
348
389 impl TwoWayPathMap {
349 impl TwoWayPathMap {
390 fn tokenize(&mut self, path: &HgPath) -> PathToken {
350 fn tokenize(&mut self, path: &HgPath) -> PathToken {
391 match self.token.get(path) {
351 match self.token.get(path) {
392 Some(a) => *a,
352 Some(a) => *a,
393 None => {
353 None => {
394 let a = self.token.len();
354 let a = self.token.len();
395 let buf = path.to_owned();
355 let buf = path.to_owned();
396 self.path.push(buf.clone());
356 self.path.push(buf.clone());
397 self.token.insert(buf, a);
357 self.token.insert(buf, a);
398 a
358 a
399 }
359 }
400 }
360 }
401 }
361 }
402
362
403 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
363 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
404 assert!(token < self.path.len(), format!("Unknown token: {}", token));
364 assert!(token < self.path.len(), format!("Unknown token: {}", token));
405 &self.path[token]
365 &self.path[token]
406 }
366 }
407 }
367 }
408
368
409 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
369 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
410 ///
370 ///
411 /// Arguments are:
371 /// Arguments are:
412 ///
372 ///
413 /// revs: all revisions to be considered
373 /// revs: all revisions to be considered
414 /// children: a {parent ? [childrens]} mapping
374 /// children: a {parent ? [childrens]} mapping
415 /// target_rev: the final revision we are combining copies to
375 /// target_rev: the final revision we are combining copies to
416 /// rev_info(rev): callback to get revision information:
376 /// rev_info(rev): callback to get revision information:
417 /// * first parent
377 /// * first parent
418 /// * second parent
378 /// * second parent
419 /// * ChangedFiles
379 /// * ChangedFiles
420 /// isancestors(low_rev, high_rev): callback to check if a revision is an
380 /// isancestors(low_rev, high_rev): callback to check if a revision is an
421 /// ancestor of another
381 /// ancestor of another
422 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
382 pub fn combine_changeset_copies<D>(
423 revs: Vec<Revision>,
383 revs: Vec<Revision>,
424 mut children_count: HashMap<Revision, usize>,
384 mut children_count: HashMap<Revision, usize>,
425 target_rev: Revision,
385 target_rev: Revision,
426 rev_info: RevInfoMaker<D>,
386 rev_info: RevInfoMaker<D>,
427 is_ancestor: &A,
428 ) -> PathCopies {
387 ) -> PathCopies {
429 let mut all_copies = HashMap::new();
388 let mut all_copies = HashMap::new();
430 let mut oracle = AncestorOracle::new(is_ancestor);
431
389
432 let mut path_map = TwoWayPathMap::default();
390 let mut path_map = TwoWayPathMap::default();
433
391
434 for rev in revs {
392 for rev in revs {
435 let mut d: DataHolder<D> = DataHolder { data: None };
393 let mut d: DataHolder<D> = DataHolder { data: None };
436 let (p1, p2, changes) = rev_info(rev, &mut d);
394 let (p1, p2, changes) = rev_info(rev, &mut d);
437
395
438 // We will chain the copies information accumulated for the parent with
396 // We will chain the copies information accumulated for the parent with
439 // the individual copies information the curent revision. Creating a
397 // the individual copies information the curent revision. Creating a
440 // new TimeStampedPath for each `rev` β†’ `children` vertex.
398 // new TimeStampedPath for each `rev` β†’ `children` vertex.
441 let mut copies: Option<InternalPathCopies> = None;
399 let mut copies: Option<InternalPathCopies> = None;
442 if p1 != NULL_REVISION {
400 if p1 != NULL_REVISION {
443 // Retrieve data computed in a previous iteration
401 // Retrieve data computed in a previous iteration
444 let parent_copies = get_and_clean_parent_copies(
402 let parent_copies = get_and_clean_parent_copies(
445 &mut all_copies,
403 &mut all_copies,
446 &mut children_count,
404 &mut children_count,
447 p1,
405 p1,
448 );
406 );
449 if let Some(parent_copies) = parent_copies {
407 if let Some(parent_copies) = parent_copies {
450 // combine it with data for that revision
408 // combine it with data for that revision
451 let vertex_copies = add_from_changes(
409 let vertex_copies = add_from_changes(
452 &mut path_map,
410 &mut path_map,
453 &mut oracle,
454 &parent_copies,
411 &parent_copies,
455 &changes,
412 &changes,
456 Parent::FirstParent,
413 Parent::FirstParent,
457 rev,
414 rev,
458 );
415 );
459 // keep that data around for potential later combination
416 // keep that data around for potential later combination
460 copies = Some(vertex_copies);
417 copies = Some(vertex_copies);
461 }
418 }
462 }
419 }
463 if p2 != NULL_REVISION {
420 if p2 != NULL_REVISION {
464 // Retrieve data computed in a previous iteration
421 // Retrieve data computed in a previous iteration
465 let parent_copies = get_and_clean_parent_copies(
422 let parent_copies = get_and_clean_parent_copies(
466 &mut all_copies,
423 &mut all_copies,
467 &mut children_count,
424 &mut children_count,
468 p2,
425 p2,
469 );
426 );
470 if let Some(parent_copies) = parent_copies {
427 if let Some(parent_copies) = parent_copies {
471 // combine it with data for that revision
428 // combine it with data for that revision
472 let vertex_copies = add_from_changes(
429 let vertex_copies = add_from_changes(
473 &mut path_map,
430 &mut path_map,
474 &mut oracle,
475 &parent_copies,
431 &parent_copies,
476 &changes,
432 &changes,
477 Parent::SecondParent,
433 Parent::SecondParent,
478 rev,
434 rev,
479 );
435 );
480
436
481 copies = match copies {
437 copies = match copies {
482 None => Some(vertex_copies),
438 None => Some(vertex_copies),
483 // Merge has two parents needs to combines their copy
439 // Merge has two parents needs to combines their copy
484 // information.
440 // information.
485 //
441 //
486 // If we got data from both parents, We need to combine
442 // If we got data from both parents, We need to combine
487 // them.
443 // them.
488 Some(copies) => Some(merge_copies_dict(
444 Some(copies) => Some(merge_copies_dict(
489 &path_map,
445 &path_map,
490 rev,
446 rev,
491 vertex_copies,
447 vertex_copies,
492 copies,
448 copies,
493 &changes,
449 &changes,
494 &mut oracle,
495 )),
450 )),
496 };
451 };
497 }
452 }
498 }
453 }
499 match copies {
454 match copies {
500 Some(copies) => {
455 Some(copies) => {
501 all_copies.insert(rev, copies);
456 all_copies.insert(rev, copies);
502 }
457 }
503 _ => {}
458 _ => {}
504 }
459 }
505 }
460 }
506
461
507 // Drop internal information (like the timestamp) and return the final
462 // Drop internal information (like the timestamp) and return the final
508 // mapping.
463 // mapping.
509 let tt_result = all_copies
464 let tt_result = all_copies
510 .remove(&target_rev)
465 .remove(&target_rev)
511 .expect("target revision was not processed");
466 .expect("target revision was not processed");
512 let mut result = PathCopies::default();
467 let mut result = PathCopies::default();
513 for (dest, tt_source) in tt_result {
468 for (dest, tt_source) in tt_result {
514 if let Some(path) = tt_source.path {
469 if let Some(path) = tt_source.path {
515 let path_dest = path_map.untokenize(dest).to_owned();
470 let path_dest = path_map.untokenize(dest).to_owned();
516 let path_path = path_map.untokenize(path).to_owned();
471 let path_path = path_map.untokenize(path).to_owned();
517 result.insert(path_dest, path_path);
472 result.insert(path_dest, path_path);
518 }
473 }
519 }
474 }
520 result
475 result
521 }
476 }
522
477
523 /// fetch previous computed information
478 /// fetch previous computed information
524 ///
479 ///
525 /// If no other children are expected to need this information, we drop it from
480 /// If no other children are expected to need this information, we drop it from
526 /// the cache.
481 /// the cache.
527 ///
482 ///
528 /// If parent is not part of the set we are expected to walk, return None.
483 /// If parent is not part of the set we are expected to walk, return None.
529 fn get_and_clean_parent_copies(
484 fn get_and_clean_parent_copies(
530 all_copies: &mut HashMap<Revision, InternalPathCopies>,
485 all_copies: &mut HashMap<Revision, InternalPathCopies>,
531 children_count: &mut HashMap<Revision, usize>,
486 children_count: &mut HashMap<Revision, usize>,
532 parent_rev: Revision,
487 parent_rev: Revision,
533 ) -> Option<InternalPathCopies> {
488 ) -> Option<InternalPathCopies> {
534 let count = children_count.get_mut(&parent_rev)?;
489 let count = children_count.get_mut(&parent_rev)?;
535 *count -= 1;
490 *count -= 1;
536 if *count == 0 {
491 if *count == 0 {
537 match all_copies.remove(&parent_rev) {
492 match all_copies.remove(&parent_rev) {
538 Some(c) => Some(c),
493 Some(c) => Some(c),
539 None => Some(InternalPathCopies::default()),
494 None => Some(InternalPathCopies::default()),
540 }
495 }
541 } else {
496 } else {
542 match all_copies.get(&parent_rev) {
497 match all_copies.get(&parent_rev) {
543 Some(c) => Some(c.clone()),
498 Some(c) => Some(c.clone()),
544 None => Some(InternalPathCopies::default()),
499 None => Some(InternalPathCopies::default()),
545 }
500 }
546 }
501 }
547 }
502 }
548
503
549 /// Combine ChangedFiles with some existing PathCopies information and return
504 /// Combine ChangedFiles with some existing PathCopies information and return
550 /// the result
505 /// the result
551 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
506 fn add_from_changes(
552 path_map: &mut TwoWayPathMap,
507 path_map: &mut TwoWayPathMap,
553 oracle: &mut AncestorOracle<A>,
554 base_copies: &InternalPathCopies,
508 base_copies: &InternalPathCopies,
555 changes: &ChangedFiles,
509 changes: &ChangedFiles,
556 parent: Parent,
510 parent: Parent,
557 current_rev: Revision,
511 current_rev: Revision,
558 ) -> InternalPathCopies {
512 ) -> InternalPathCopies {
559 let mut copies = base_copies.clone();
513 let mut copies = base_copies.clone();
560 for action in changes.iter_actions(parent) {
514 for action in changes.iter_actions(parent) {
561 match action {
515 match action {
562 Action::Copied(path_dest, path_source) => {
516 Action::Copied(path_dest, path_source) => {
563 let dest = path_map.tokenize(path_dest);
517 let dest = path_map.tokenize(path_dest);
564 let source = path_map.tokenize(path_source);
518 let source = path_map.tokenize(path_source);
565 let entry;
519 let entry;
566 if let Some(v) = base_copies.get(&source) {
520 if let Some(v) = base_copies.get(&source) {
567 entry = match &v.path {
521 entry = match &v.path {
568 Some(path) => Some((*(path)).to_owned()),
522 Some(path) => Some((*(path)).to_owned()),
569 None => Some(source.to_owned()),
523 None => Some(source.to_owned()),
570 }
524 }
571 } else {
525 } else {
572 entry = Some(source.to_owned());
526 entry = Some(source.to_owned());
573 }
527 }
574 // Each new entry is introduced by the children, we
528 // Each new entry is introduced by the children, we
575 // record this information as we will need it to take
529 // record this information as we will need it to take
576 // the right decision when merging conflicting copy
530 // the right decision when merging conflicting copy
577 // information. See merge_copies_dict for details.
531 // information. See merge_copies_dict for details.
578 match copies.entry(dest) {
532 match copies.entry(dest) {
579 Entry::Vacant(slot) => {
533 Entry::Vacant(slot) => {
580 let ttpc = CopySource::new(current_rev, entry);
534 let ttpc = CopySource::new(current_rev, entry);
581 slot.insert(ttpc);
535 slot.insert(ttpc);
582 }
536 }
583 Entry::Occupied(mut slot) => {
537 Entry::Occupied(mut slot) => {
584 let ttpc = slot.get_mut();
538 let ttpc = slot.get_mut();
585 oracle.record_overwrite(ttpc.rev, current_rev);
586 ttpc.overwrite(current_rev, entry);
539 ttpc.overwrite(current_rev, entry);
587 }
540 }
588 }
541 }
589 }
542 }
590 Action::Removed(deleted_path) => {
543 Action::Removed(deleted_path) => {
591 // We must drop copy information for removed file.
544 // We must drop copy information for removed file.
592 //
545 //
593 // We need to explicitly record them as dropped to
546 // We need to explicitly record them as dropped to
594 // propagate this information when merging two
547 // propagate this information when merging two
595 // InternalPathCopies object.
548 // InternalPathCopies object.
596 let deleted = path_map.tokenize(deleted_path);
549 let deleted = path_map.tokenize(deleted_path);
597 copies.entry(deleted).and_modify(|old| {
550 copies.entry(deleted).and_modify(|old| {
598 oracle.record_overwrite(old.rev, current_rev);
599 old.mark_delete(current_rev);
551 old.mark_delete(current_rev);
600 });
552 });
601 }
553 }
602 }
554 }
603 }
555 }
604 copies
556 copies
605 }
557 }
606
558
607 /// merge two copies-mapping together, minor and major
559 /// merge two copies-mapping together, minor and major
608 ///
560 ///
609 /// In case of conflict, value from "major" will be picked, unless in some
561 /// In case of conflict, value from "major" will be picked, unless in some
610 /// cases. See inline documentation for details.
562 /// cases. See inline documentation for details.
611 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
563 fn merge_copies_dict(
612 path_map: &TwoWayPathMap,
564 path_map: &TwoWayPathMap,
613 current_merge: Revision,
565 current_merge: Revision,
614 mut minor: InternalPathCopies,
566 mut minor: InternalPathCopies,
615 mut major: InternalPathCopies,
567 mut major: InternalPathCopies,
616 changes: &ChangedFiles,
568 changes: &ChangedFiles,
617 oracle: &mut AncestorOracle<A>,
618 ) -> InternalPathCopies {
569 ) -> InternalPathCopies {
619 // This closure exist as temporary help while multiple developper are
570 // This closure exist as temporary help while multiple developper are
620 // actively working on this code. Feel free to re-inline it once this
571 // actively working on this code. Feel free to re-inline it once this
621 // code is more settled.
572 // code is more settled.
622 let cmp_value = |oracle: &mut AncestorOracle<A>,
573 let cmp_value =
623 dest: &PathToken,
574 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
624 src_minor: &CopySource,
575 compare_value(
625 src_major: &CopySource| {
576 path_map,
626 compare_value(
577 current_merge,
627 path_map,
578 changes,
628 current_merge,
579 dest,
629 changes,
580 src_minor,
630 oracle,
581 src_major,
631 dest,
582 )
632 src_minor,
583 };
633 src_major,
634 )
635 };
636 if minor.is_empty() {
584 if minor.is_empty() {
637 major
585 major
638 } else if major.is_empty() {
586 } else if major.is_empty() {
639 minor
587 minor
640 } else if minor.len() * 2 < major.len() {
588 } else if minor.len() * 2 < major.len() {
641 // Lets says we are merging two InternalPathCopies instance A and B.
589 // Lets says we are merging two InternalPathCopies instance A and B.
642 //
590 //
643 // If A contains N items, the merge result will never contains more
591 // If A contains N items, the merge result will never contains more
644 // than N values differents than the one in A
592 // than N values differents than the one in A
645 //
593 //
646 // If B contains M items, with M > N, the merge result will always
594 // If B contains M items, with M > N, the merge result will always
647 // result in a minimum of M - N value differents than the on in
595 // result in a minimum of M - N value differents than the on in
648 // A
596 // A
649 //
597 //
650 // As a result, if N < (M-N), we know that simply iterating over A will
598 // As a result, if N < (M-N), we know that simply iterating over A will
651 // yield less difference than iterating over the difference
599 // yield less difference than iterating over the difference
652 // between A and B.
600 // between A and B.
653 //
601 //
654 // This help performance a lot in case were a tiny
602 // This help performance a lot in case were a tiny
655 // InternalPathCopies is merged with a much larger one.
603 // InternalPathCopies is merged with a much larger one.
656 for (dest, src_minor) in minor {
604 for (dest, src_minor) in minor {
657 let src_major = major.get(&dest);
605 let src_major = major.get(&dest);
658 match src_major {
606 match src_major {
659 None => {
607 None => {
660 major.insert(dest, src_minor);
608 major.insert(dest, src_minor);
661 }
609 }
662 Some(src_major) => {
610 Some(src_major) => {
663 let (pick, overwrite) =
611 let (pick, overwrite) =
664 cmp_value(oracle, &dest, &src_minor, src_major);
612 cmp_value(&dest, &src_minor, src_major);
665 if overwrite {
613 if overwrite {
666 oracle.record_overwrite(src_minor.rev, current_merge);
667 oracle.record_overwrite(src_major.rev, current_merge);
668 let src = match pick {
614 let src = match pick {
669 MergePick::Major => CopySource::new_from_merge(
615 MergePick::Major => CopySource::new_from_merge(
670 current_merge,
616 current_merge,
671 src_major,
617 src_major,
672 &src_minor,
618 &src_minor,
673 ),
619 ),
674 MergePick::Minor => CopySource::new_from_merge(
620 MergePick::Minor => CopySource::new_from_merge(
675 current_merge,
621 current_merge,
676 &src_minor,
622 &src_minor,
677 src_major,
623 src_major,
678 ),
624 ),
679 MergePick::Any => CopySource::new_from_merge(
625 MergePick::Any => CopySource::new_from_merge(
680 current_merge,
626 current_merge,
681 src_major,
627 src_major,
682 &src_minor,
628 &src_minor,
683 ),
629 ),
684 };
630 };
685 major.insert(dest, src);
631 major.insert(dest, src);
686 } else {
632 } else {
687 match pick {
633 match pick {
688 MergePick::Any | MergePick::Major => None,
634 MergePick::Any | MergePick::Major => None,
689 MergePick::Minor => major.insert(dest, src_minor),
635 MergePick::Minor => major.insert(dest, src_minor),
690 };
636 };
691 }
637 }
692 }
638 }
693 };
639 };
694 }
640 }
695 major
641 major
696 } else if major.len() * 2 < minor.len() {
642 } else if major.len() * 2 < minor.len() {
697 // This use the same rational than the previous block.
643 // This use the same rational than the previous block.
698 // (Check previous block documentation for details.)
644 // (Check previous block documentation for details.)
699 for (dest, src_major) in major {
645 for (dest, src_major) in major {
700 let src_minor = minor.get(&dest);
646 let src_minor = minor.get(&dest);
701 match src_minor {
647 match src_minor {
702 None => {
648 None => {
703 minor.insert(dest, src_major);
649 minor.insert(dest, src_major);
704 }
650 }
705 Some(src_minor) => {
651 Some(src_minor) => {
706 let (pick, overwrite) =
652 let (pick, overwrite) =
707 cmp_value(oracle, &dest, src_minor, &src_major);
653 cmp_value(&dest, src_minor, &src_major);
708 if overwrite {
654 if overwrite {
709 oracle.record_overwrite(src_minor.rev, current_merge);
710 oracle.record_overwrite(src_major.rev, current_merge);
711 let src = match pick {
655 let src = match pick {
712 MergePick::Major => CopySource::new_from_merge(
656 MergePick::Major => CopySource::new_from_merge(
713 current_merge,
657 current_merge,
714 &src_major,
658 &src_major,
715 src_minor,
659 src_minor,
716 ),
660 ),
717 MergePick::Minor => CopySource::new_from_merge(
661 MergePick::Minor => CopySource::new_from_merge(
718 current_merge,
662 current_merge,
719 src_minor,
663 src_minor,
720 &src_major,
664 &src_major,
721 ),
665 ),
722 MergePick::Any => CopySource::new_from_merge(
666 MergePick::Any => CopySource::new_from_merge(
723 current_merge,
667 current_merge,
724 &src_major,
668 &src_major,
725 src_minor,
669 src_minor,
726 ),
670 ),
727 };
671 };
728 minor.insert(dest, src);
672 minor.insert(dest, src);
729 } else {
673 } else {
730 match pick {
674 match pick {
731 MergePick::Any | MergePick::Minor => None,
675 MergePick::Any | MergePick::Minor => None,
732 MergePick::Major => minor.insert(dest, src_major),
676 MergePick::Major => minor.insert(dest, src_major),
733 };
677 };
734 }
678 }
735 }
679 }
736 };
680 };
737 }
681 }
738 minor
682 minor
739 } else {
683 } else {
740 let mut override_minor = Vec::new();
684 let mut override_minor = Vec::new();
741 let mut override_major = Vec::new();
685 let mut override_major = Vec::new();
742
686
743 let mut to_major = |k: &PathToken, v: &CopySource| {
687 let mut to_major = |k: &PathToken, v: &CopySource| {
744 override_major.push((k.clone(), v.clone()))
688 override_major.push((k.clone(), v.clone()))
745 };
689 };
746 let mut to_minor = |k: &PathToken, v: &CopySource| {
690 let mut to_minor = |k: &PathToken, v: &CopySource| {
747 override_minor.push((k.clone(), v.clone()))
691 override_minor.push((k.clone(), v.clone()))
748 };
692 };
749
693
750 // The diff function leverage detection of the identical subpart if
694 // The diff function leverage detection of the identical subpart if
751 // minor and major has some common ancestors. This make it very
695 // minor and major has some common ancestors. This make it very
752 // fast is most case.
696 // fast is most case.
753 //
697 //
754 // In case where the two map are vastly different in size, the current
698 // In case where the two map are vastly different in size, the current
755 // approach is still slowish because the iteration will iterate over
699 // approach is still slowish because the iteration will iterate over
756 // all the "exclusive" content of the larger on. This situation can be
700 // all the "exclusive" content of the larger on. This situation can be
757 // frequent when the subgraph of revision we are processing has a lot
701 // frequent when the subgraph of revision we are processing has a lot
758 // of roots. Each roots adding they own fully new map to the mix (and
702 // of roots. Each roots adding they own fully new map to the mix (and
759 // likely a small map, if the path from the root to the "main path" is
703 // likely a small map, if the path from the root to the "main path" is
760 // small.
704 // small.
761 //
705 //
762 // We could do better by detecting such situation and processing them
706 // We could do better by detecting such situation and processing them
763 // differently.
707 // differently.
764 for d in minor.diff(&major) {
708 for d in minor.diff(&major) {
765 match d {
709 match d {
766 DiffItem::Add(k, v) => to_minor(k, v),
710 DiffItem::Add(k, v) => to_minor(k, v),
767 DiffItem::Remove(k, v) => to_major(k, v),
711 DiffItem::Remove(k, v) => to_major(k, v),
768 DiffItem::Update { old, new } => {
712 DiffItem::Update { old, new } => {
769 let (dest, src_major) = new;
713 let (dest, src_major) = new;
770 let (_, src_minor) = old;
714 let (_, src_minor) = old;
771 let (pick, overwrite) =
715 let (pick, overwrite) =
772 cmp_value(oracle, dest, src_minor, src_major);
716 cmp_value(dest, src_minor, src_major);
773 if overwrite {
717 if overwrite {
774 oracle.record_overwrite(src_minor.rev, current_merge);
775 oracle.record_overwrite(src_major.rev, current_merge);
776 let src = match pick {
718 let src = match pick {
777 MergePick::Major => CopySource::new_from_merge(
719 MergePick::Major => CopySource::new_from_merge(
778 current_merge,
720 current_merge,
779 src_major,
721 src_major,
780 src_minor,
722 src_minor,
781 ),
723 ),
782 MergePick::Minor => CopySource::new_from_merge(
724 MergePick::Minor => CopySource::new_from_merge(
783 current_merge,
725 current_merge,
784 src_minor,
726 src_minor,
785 src_major,
727 src_major,
786 ),
728 ),
787 MergePick::Any => CopySource::new_from_merge(
729 MergePick::Any => CopySource::new_from_merge(
788 current_merge,
730 current_merge,
789 src_major,
731 src_major,
790 src_minor,
732 src_minor,
791 ),
733 ),
792 };
734 };
793 to_minor(dest, &src);
735 to_minor(dest, &src);
794 to_major(dest, &src);
736 to_major(dest, &src);
795 } else {
737 } else {
796 match pick {
738 match pick {
797 MergePick::Major => to_minor(dest, src_major),
739 MergePick::Major => to_minor(dest, src_major),
798 MergePick::Minor => to_major(dest, src_minor),
740 MergePick::Minor => to_major(dest, src_minor),
799 // If the two entry are identical, no need to do
741 // If the two entry are identical, no need to do
800 // anything (but diff should not have yield them)
742 // anything (but diff should not have yield them)
801 MergePick::Any => unreachable!(),
743 MergePick::Any => unreachable!(),
802 }
744 }
803 }
745 }
804 }
746 }
805 };
747 };
806 }
748 }
807
749
808 let updates;
750 let updates;
809 let mut result;
751 let mut result;
810 if override_major.is_empty() {
752 if override_major.is_empty() {
811 result = major
753 result = major
812 } else if override_minor.is_empty() {
754 } else if override_minor.is_empty() {
813 result = minor
755 result = minor
814 } else {
756 } else {
815 if override_minor.len() < override_major.len() {
757 if override_minor.len() < override_major.len() {
816 updates = override_minor;
758 updates = override_minor;
817 result = minor;
759 result = minor;
818 } else {
760 } else {
819 updates = override_major;
761 updates = override_major;
820 result = major;
762 result = major;
821 }
763 }
822 for (k, v) in updates {
764 for (k, v) in updates {
823 result.insert(k, v);
765 result.insert(k, v);
824 }
766 }
825 }
767 }
826 result
768 result
827 }
769 }
828 }
770 }
829
771
830 /// represent the side that should prevail when merging two
772 /// represent the side that should prevail when merging two
831 /// InternalPathCopies
773 /// InternalPathCopies
832 enum MergePick {
774 enum MergePick {
833 /// The "major" (p1) side prevails
775 /// The "major" (p1) side prevails
834 Major,
776 Major,
835 /// The "minor" (p2) side prevails
777 /// The "minor" (p2) side prevails
836 Minor,
778 Minor,
837 /// Any side could be used (because they are the same)
779 /// Any side could be used (because they are the same)
838 Any,
780 Any,
839 }
781 }
840
782
841 /// decide which side prevails in case of conflicting values
783 /// decide which side prevails in case of conflicting values
842 #[allow(clippy::if_same_then_else)]
784 #[allow(clippy::if_same_then_else)]
843 fn compare_value<A: Fn(Revision, Revision) -> bool>(
785 fn compare_value(
844 path_map: &TwoWayPathMap,
786 path_map: &TwoWayPathMap,
845 current_merge: Revision,
787 current_merge: Revision,
846 changes: &ChangedFiles,
788 changes: &ChangedFiles,
847 oracle: &mut AncestorOracle<A>,
848 dest: &PathToken,
789 dest: &PathToken,
849 src_minor: &CopySource,
790 src_minor: &CopySource,
850 src_major: &CopySource,
791 src_major: &CopySource,
851 ) -> (MergePick, bool) {
792 ) -> (MergePick, bool) {
852 if src_major.rev == current_merge {
793 if src_major.rev == current_merge {
853 if src_minor.rev == current_merge {
794 if src_minor.rev == current_merge {
854 if src_major.path.is_none() {
795 if src_major.path.is_none() {
855 // We cannot get different copy information for both p1 and p2
796 // We cannot get different copy information for both p1 and p2
856 // from the same revision. Unless this was a
797 // from the same revision. Unless this was a
857 // deletion.
798 // deletion.
858 //
799 //
859 // However the deletion might come over different data on each
800 // However the deletion might come over different data on each
860 // branch.
801 // branch.
861 let need_over = src_major.overwritten != src_minor.overwritten;
802 let need_over = src_major.overwritten != src_minor.overwritten;
862 (MergePick::Any, need_over)
803 (MergePick::Any, need_over)
863 } else {
804 } else {
864 unreachable!();
805 unreachable!();
865 }
806 }
866 } else {
807 } else {
867 // The last value comes the current merge, this value -will- win
808 // The last value comes the current merge, this value -will- win
868 // eventually.
809 // eventually.
869 (MergePick::Major, true)
810 (MergePick::Major, true)
870 }
811 }
871 } else if src_minor.rev == current_merge {
812 } else if src_minor.rev == current_merge {
872 // The last value comes the current merge, this value -will- win
813 // The last value comes the current merge, this value -will- win
873 // eventually.
814 // eventually.
874 (MergePick::Minor, true)
815 (MergePick::Minor, true)
875 } else if src_major.path == src_minor.path {
816 } else if src_major.path == src_minor.path {
876 // we have the same value, but from other source;
817 // we have the same value, but from other source;
877 if src_major.rev == src_minor.rev {
818 if src_major.rev == src_minor.rev {
878 // If the two entry are identical, they are both valid
819 // If the two entry are identical, they are both valid
879 debug_assert!(src_minor.overwritten == src_minor.overwritten);
820 debug_assert!(src_minor.overwritten == src_minor.overwritten);
880 (MergePick::Any, false)
821 (MergePick::Any, false)
881 } else if src_major.is_overwritten_by(src_minor) {
822 } else if src_major.is_overwritten_by(src_minor) {
882 (MergePick::Minor, false)
823 (MergePick::Minor, false)
883 } else if src_minor.is_overwritten_by(src_major) {
824 } else if src_minor.is_overwritten_by(src_major) {
884 (MergePick::Major, false)
825 (MergePick::Major, false)
885 } else {
826 } else {
886 (MergePick::Any, true)
827 (MergePick::Any, true)
887 }
828 }
888 } else if src_major.rev == src_minor.rev {
829 } else if src_major.rev == src_minor.rev {
889 // We cannot get copy information for both p1 and p2 in the
830 // We cannot get copy information for both p1 and p2 in the
890 // same rev. So this is the same value.
831 // same rev. So this is the same value.
891 unreachable!(
832 unreachable!(
892 "conflicting information from p1 and p2 in the same revision"
833 "conflicting information from p1 and p2 in the same revision"
893 );
834 );
894 } else {
835 } else {
895 let dest_path = path_map.untokenize(*dest);
836 let dest_path = path_map.untokenize(*dest);
896 let action = changes.get_merge_case(dest_path);
837 let action = changes.get_merge_case(dest_path);
897 if src_minor.path.is_some()
838 if src_minor.path.is_some()
898 && src_major.path.is_none()
839 && src_major.path.is_none()
899 && action == MergeCase::Salvaged
840 && action == MergeCase::Salvaged
900 {
841 {
901 // If the file is "deleted" in the major side but was
842 // If the file is "deleted" in the major side but was
902 // salvaged by the merge, we keep the minor side alive
843 // salvaged by the merge, we keep the minor side alive
903 (MergePick::Minor, true)
844 (MergePick::Minor, true)
904 } else if src_major.path.is_some()
845 } else if src_major.path.is_some()
905 && src_minor.path.is_none()
846 && src_minor.path.is_none()
906 && action == MergeCase::Salvaged
847 && action == MergeCase::Salvaged
907 {
848 {
908 // If the file is "deleted" in the minor side but was
849 // If the file is "deleted" in the minor side but was
909 // salvaged by the merge, unconditionnaly preserve the
850 // salvaged by the merge, unconditionnaly preserve the
910 // major side.
851 // major side.
911 (MergePick::Major, true)
852 (MergePick::Major, true)
912 } else if src_minor.is_overwritten_by(src_major) {
853 } else if src_minor.is_overwritten_by(src_major) {
913 // The information from the minor version are strictly older than
854 // The information from the minor version are strictly older than
914 // the major version
855 // the major version
915 if action == MergeCase::Merged {
856 if action == MergeCase::Merged {
916 // If the file was actively merged, its means some non-copy
857 // If the file was actively merged, its means some non-copy
917 // activity happened on the other branch. It
858 // activity happened on the other branch. It
918 // mean the older copy information are still relevant.
859 // mean the older copy information are still relevant.
919 //
860 //
920 // The major side wins such conflict.
861 // The major side wins such conflict.
921 (MergePick::Major, true)
862 (MergePick::Major, true)
922 } else {
863 } else {
923 // No activity on the minor branch, pick the newer one.
864 // No activity on the minor branch, pick the newer one.
924 (MergePick::Major, false)
865 (MergePick::Major, false)
925 }
866 }
926 } else if src_major.is_overwritten_by(src_minor) {
867 } else if src_major.is_overwritten_by(src_minor) {
927 if action == MergeCase::Merged {
868 if action == MergeCase::Merged {
928 // If the file was actively merged, its means some non-copy
869 // If the file was actively merged, its means some non-copy
929 // activity happened on the other branch. It
870 // activity happened on the other branch. It
930 // mean the older copy information are still relevant.
871 // mean the older copy information are still relevant.
931 //
872 //
932 // The major side wins such conflict.
873 // The major side wins such conflict.
933 (MergePick::Major, true)
874 (MergePick::Major, true)
934 } else {
875 } else {
935 // No activity on the minor branch, pick the newer one.
876 // No activity on the minor branch, pick the newer one.
936 (MergePick::Minor, false)
877 (MergePick::Minor, false)
937 }
878 }
938 } else if src_minor.path.is_none() {
879 } else if src_minor.path.is_none() {
939 // the minor side has no relevant information, pick the alive one
880 // the minor side has no relevant information, pick the alive one
940 (MergePick::Major, true)
881 (MergePick::Major, true)
941 } else if src_major.path.is_none() {
882 } else if src_major.path.is_none() {
942 // the major side has no relevant information, pick the alive one
883 // the major side has no relevant information, pick the alive one
943 (MergePick::Minor, true)
884 (MergePick::Minor, true)
944 } else {
885 } else {
945 // by default the major side wins
886 // by default the major side wins
946 (MergePick::Major, true)
887 (MergePick::Major, true)
947 }
888 }
948 }
889 }
949 }
890 }
@@ -1,148 +1,123 b''
1 use cpython::ObjectProtocol;
1 use cpython::ObjectProtocol;
2 use cpython::PyBool;
3 use cpython::PyBytes;
2 use cpython::PyBytes;
4 use cpython::PyDict;
3 use cpython::PyDict;
5 use cpython::PyList;
4 use cpython::PyList;
6 use cpython::PyModule;
5 use cpython::PyModule;
7 use cpython::PyObject;
6 use cpython::PyObject;
8 use cpython::PyResult;
7 use cpython::PyResult;
9 use cpython::PyTuple;
8 use cpython::PyTuple;
10 use cpython::Python;
9 use cpython::Python;
11
10
12 use hg::copy_tracing::combine_changeset_copies;
11 use hg::copy_tracing::combine_changeset_copies;
13 use hg::copy_tracing::ChangedFiles;
12 use hg::copy_tracing::ChangedFiles;
14 use hg::copy_tracing::DataHolder;
13 use hg::copy_tracing::DataHolder;
15 use hg::copy_tracing::RevInfo;
14 use hg::copy_tracing::RevInfo;
16 use hg::copy_tracing::RevInfoMaker;
15 use hg::copy_tracing::RevInfoMaker;
17 use hg::Revision;
16 use hg::Revision;
18
17
19 /// Combines copies information contained into revision `revs` to build a copy
18 /// Combines copies information contained into revision `revs` to build a copy
20 /// map.
19 /// map.
21 ///
20 ///
22 /// See mercurial/copies.py for details
21 /// See mercurial/copies.py for details
23 pub fn combine_changeset_copies_wrapper(
22 pub fn combine_changeset_copies_wrapper(
24 py: Python,
23 py: Python,
25 revs: PyList,
24 revs: PyList,
26 children_count: PyDict,
25 children_count: PyDict,
27 target_rev: Revision,
26 target_rev: Revision,
28 rev_info: PyObject,
27 rev_info: PyObject,
29 is_ancestor: PyObject,
30 ) -> PyResult<PyDict> {
28 ) -> PyResult<PyDict> {
31 let revs: PyResult<_> =
29 let revs: PyResult<_> =
32 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
30 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
33
31
34 // Wrap the `is_ancestor` python callback as a Rust closure
35 //
36 // No errors are expected from the Python side, and they will should only
37 // happens in case of programing error or severe data corruption. Such
38 // errors will raise panic and the rust-cpython harness will turn them into
39 // Python exception.
40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
41 is_ancestor
42 .call(py, (anc, desc), None)
43 .expect(
44 "rust-copy-tracing: python call to `is_ancestor` \
45 failed",
46 )
47 .cast_into::<PyBool>(py)
48 .expect(
49 "rust-copy-tracing: python call to `is_ancestor` \
50 returned unexpected non-Bool value",
51 )
52 .is_true()
53 };
54
55 // Wrap the `rev_info_maker` python callback as a Rust closure
32 // Wrap the `rev_info_maker` python callback as a Rust closure
56 //
33 //
57 // No errors are expected from the Python side, and they will should only
34 // No errors are expected from the Python side, and they will should only
58 // happens in case of programing error or severe data corruption. Such
35 // happens in case of programing error or severe data corruption. Such
59 // errors will raise panic and the rust-cpython harness will turn them into
36 // errors will raise panic and the rust-cpython harness will turn them into
60 // Python exception.
37 // Python exception.
61 let rev_info_maker: RevInfoMaker<PyBytes> =
38 let rev_info_maker: RevInfoMaker<PyBytes> =
62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
39 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
63 let res: PyTuple = rev_info
40 let res: PyTuple = rev_info
64 .call(py, (rev,), None)
41 .call(py, (rev,), None)
65 .expect("rust-copy-tracing: python call to `rev_info` failed")
42 .expect("rust-copy-tracing: python call to `rev_info` failed")
66 .cast_into(py)
43 .cast_into(py)
67 .expect(
44 .expect(
68 "rust-copy_tracing: python call to `rev_info` returned \
45 "rust-copy_tracing: python call to `rev_info` returned \
69 unexpected non-Tuple value",
46 unexpected non-Tuple value",
70 );
47 );
71 let p1 = res.get_item(py, 0).extract(py).expect(
48 let p1 = res.get_item(py, 0).extract(py).expect(
72 "rust-copy-tracing: rev_info return is invalid, first item \
49 "rust-copy-tracing: rev_info return is invalid, first item \
73 is a not a revision",
50 is a not a revision",
74 );
51 );
75 let p2 = res.get_item(py, 1).extract(py).expect(
52 let p2 = res.get_item(py, 1).extract(py).expect(
76 "rust-copy-tracing: rev_info return is invalid, first item \
53 "rust-copy-tracing: rev_info return is invalid, first item \
77 is a not a revision",
54 is a not a revision",
78 );
55 );
79
56
80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
57 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 Ok(raw) => {
58 Ok(raw) => {
82 // Give responsability for the raw bytes lifetime to
59 // Give responsability for the raw bytes lifetime to
83 // hg-core
60 // hg-core
84 d.data = Some(raw);
61 d.data = Some(raw);
85 let addrs = d.data.as_ref().expect(
62 let addrs = d.data.as_ref().expect(
86 "rust-copy-tracing: failed to get a reference to the \
63 "rust-copy-tracing: failed to get a reference to the \
87 raw bytes for copy data").data(py);
64 raw bytes for copy data").data(py);
88 ChangedFiles::new(addrs)
65 ChangedFiles::new(addrs)
89 }
66 }
90 // value was presumably None, meaning they was no copy data.
67 // value was presumably None, meaning they was no copy data.
91 Err(_) => ChangedFiles::new_empty(),
68 Err(_) => ChangedFiles::new_empty(),
92 };
69 };
93
70
94 (p1, p2, files)
71 (p1, p2, files)
95 });
72 });
96 let children_count: PyResult<_> = children_count
73 let children_count: PyResult<_> = children_count
97 .items(py)
74 .items(py)
98 .iter()
75 .iter()
99 .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
76 .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
100 .collect();
77 .collect();
101
78
102 let res = combine_changeset_copies(
79 let res = combine_changeset_copies(
103 revs?,
80 revs?,
104 children_count?,
81 children_count?,
105 target_rev,
82 target_rev,
106 rev_info_maker,
83 rev_info_maker,
107 &is_ancestor_wrap,
108 );
84 );
109 let out = PyDict::new(py);
85 let out = PyDict::new(py);
110 for (dest, source) in res.into_iter() {
86 for (dest, source) in res.into_iter() {
111 out.set_item(
87 out.set_item(
112 py,
88 py,
113 PyBytes::new(py, &dest.into_vec()),
89 PyBytes::new(py, &dest.into_vec()),
114 PyBytes::new(py, &source.into_vec()),
90 PyBytes::new(py, &source.into_vec()),
115 )?;
91 )?;
116 }
92 }
117 Ok(out)
93 Ok(out)
118 }
94 }
119
95
120 /// Create the module, with `__package__` given from parent
96 /// Create the module, with `__package__` given from parent
121 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
97 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
122 let dotted_name = &format!("{}.copy_tracing", package);
98 let dotted_name = &format!("{}.copy_tracing", package);
123 let m = PyModule::new(py, dotted_name)?;
99 let m = PyModule::new(py, dotted_name)?;
124
100
125 m.add(py, "__package__", package)?;
101 m.add(py, "__package__", package)?;
126 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
102 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
127
103
128 m.add(
104 m.add(
129 py,
105 py,
130 "combine_changeset_copies",
106 "combine_changeset_copies",
131 py_fn!(
107 py_fn!(
132 py,
108 py,
133 combine_changeset_copies_wrapper(
109 combine_changeset_copies_wrapper(
134 revs: PyList,
110 revs: PyList,
135 children: PyDict,
111 children: PyDict,
136 target_rev: Revision,
112 target_rev: Revision,
137 rev_info: PyObject,
113 rev_info: PyObject
138 is_ancestor: PyObject
139 )
114 )
140 ),
115 ),
141 )?;
116 )?;
142
117
143 let sys = PyModule::import(py, "sys")?;
118 let sys = PyModule::import(py, "sys")?;
144 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
119 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
145 sys_modules.set_item(py, dotted_name, &m)?;
120 sys_modules.set_item(py, dotted_name, &m)?;
146
121
147 Ok(m)
122 Ok(m)
148 }
123 }
General Comments 0
You need to be logged in to leave comments. Login now