##// END OF EJS Templates
copies: iterate over children directly (instead of parents)...
marmoute -
r46764:294d5aca default
parent child Browse files
Show More
@@ -1,1210 +1,1263
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 2
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 # remove copies from files that didn't exist
63 63 if v not in src:
64 64 del t[k]
65 65 # remove criss-crossed copies
66 66 elif k in src and v in dst:
67 67 del t[k]
68 68 # remove copies to files that were then removed
69 69 elif k not in dst:
70 70 del t[k]
71 71
72 72
73 73 def _chain(prefix, suffix):
74 74 """chain two sets of copies 'prefix' and 'suffix'"""
75 75 result = prefix.copy()
76 76 for key, value in pycompat.iteritems(suffix):
77 77 result[key] = prefix.get(value, value)
78 78 return result
79 79
80 80
81 81 def _tracefile(fctx, am, basemf):
82 82 """return file context that is the ancestor of fctx present in ancestor
83 83 manifest am
84 84
85 85 Note: we used to try and stop after a given limit, however checking if that
86 86 limit is reached turned out to be very expensive. we are better off
87 87 disabling that feature."""
88 88
89 89 for f in fctx.ancestors():
90 90 path = f.path()
91 91 if am.get(path, None) == f.filenode():
92 92 return path
93 93 if basemf and basemf.get(path, None) == f.filenode():
94 94 return path
95 95
96 96
97 97 def _dirstatecopies(repo, match=None):
98 98 ds = repo.dirstate
99 99 c = ds.copies().copy()
100 100 for k in list(c):
101 101 if ds[k] not in b'anm' or (match and not match(k)):
102 102 del c[k]
103 103 return c
104 104
105 105
106 106 def _computeforwardmissing(a, b, match=None):
107 107 """Computes which files are in b but not a.
108 108 This is its own function so extensions can easily wrap this call to see what
109 109 files _forwardcopies is about to process.
110 110 """
111 111 ma = a.manifest()
112 112 mb = b.manifest()
113 113 return mb.filesnotin(ma, match=match)
114 114
115 115
116 116 def usechangesetcentricalgo(repo):
117 117 """Checks if we should use changeset-centric copy algorithms"""
118 118 if repo.filecopiesmode == b'changeset-sidedata':
119 119 return True
120 120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 121 changesetsource = (b'changeset-only', b'compatibility')
122 122 return readfrom in changesetsource
123 123
124 124
125 125 def _committedforwardcopies(a, b, base, match):
126 126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 127 # files might have to be traced back to the fctx parent of the last
128 128 # one-side-only changeset, but not further back than that
129 129 repo = a._repo
130 130
131 131 if usechangesetcentricalgo(repo):
132 132 return _changesetforwardcopies(a, b, match)
133 133
134 134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 135 dbg = repo.ui.debug
136 136 if debug:
137 137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 138 am = a.manifest()
139 139 basemf = None if base is None else base.manifest()
140 140
141 141 # find where new files came from
142 142 # we currently don't try to find where old files went, too expensive
143 143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 144 cm = {}
145 145
146 146 # Computing the forward missing is quite expensive on large manifests, since
147 147 # it compares the entire manifests. We can optimize it in the common use
148 148 # case of computing what copies are in a commit versus its parent (like
149 149 # during a rebase or histedit). Note, we exclude merge commits from this
150 150 # optimization, since the ctx.files() for a merge commit is not correct for
151 151 # this comparison.
152 152 forwardmissingmatch = match
153 153 if b.p1() == a and b.p2().node() == nullid:
154 154 filesmatcher = matchmod.exact(b.files())
155 155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157 157
158 158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159 159
160 160 if debug:
161 161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162 162
163 163 for f in sorted(missing):
164 164 if debug:
165 165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 166 fctx = b[f]
167 167 fctx._ancestrycontext = ancestrycontext
168 168
169 169 if debug:
170 170 start = util.timer()
171 171 opath = _tracefile(fctx, am, basemf)
172 172 if opath:
173 173 if debug:
174 174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 175 cm[f] = opath
176 176 if debug:
177 177 dbg(
178 178 b'debug.copies: time: %f seconds\n'
179 179 % (util.timer() - start)
180 180 )
181 181 return cm
182 182
183 183
184 184 def _revinfo_getter(repo, match):
185 185 """returns a function that returns the following data given a <rev>"
186 186
187 187 * p1: revision number of first parent
188 188 * p2: revision number of first parent
189 189 * changes: a ChangingFiles object
190 190 """
191 191 cl = repo.changelog
192 192 parents = cl.parentrevs
193 193 flags = cl.flags
194 194
195 195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196 196
197 197 changelogrevision = cl.changelogrevision
198 198
199 199 # A small cache to avoid doing the work twice for merges
200 200 #
201 201 # In the vast majority of cases, if we ask information for a revision
202 202 # about 1 parent, we'll later ask it for the other. So it make sense to
203 203 # keep the information around when reaching the first parent of a merge
204 204 # and dropping it after it was provided for the second parents.
205 205 #
206 206 # It exists cases were only one parent of the merge will be walked. It
207 207 # happens when the "destination" the copy tracing is descendant from a
208 208 # new root, not common with the "source". In that case, we will only walk
209 209 # through merge parents that are descendant of changesets common
210 210 # between "source" and "destination".
211 211 #
212 212 # With the current case implementation if such changesets have a copy
213 213 # information, we'll keep them in memory until the end of
214 214 # _changesetforwardcopies. We don't expect the case to be frequent
215 215 # enough to matters.
216 216 #
217 217 # In addition, it would be possible to reach pathological case, were
218 218 # many first parent are met before any second parent is reached. In
219 219 # that case the cache could grow. If this even become an issue one can
220 220 # safely introduce a maximum cache size. This would trade extra CPU/IO
221 221 # time to save memory.
222 222 merge_caches = {}
223 223
224 224 alwaysmatch = match.always()
225 225
226 226 if rustmod is not None and alwaysmatch:
227 227
228 228 def revinfo(rev):
229 229 p1, p2 = parents(rev)
230 230 value = None
231 231 e = merge_caches.pop(rev, None)
232 232 if e is not None:
233 233 return e
234 234 if flags(rev) & HASCOPIESINFO:
235 235 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
236 236 else:
237 237 raw = None
238 238 value = (p1, p2, raw)
239 239 if p1 != nullrev and p2 != nullrev:
240 240 # XXX some case we over cache, IGNORE
241 241 merge_caches[rev] = value
242 242 return value
243 243
244 244 else:
245 245
246 246 def revinfo(rev):
247 247 p1, p2 = parents(rev)
248 248 value = None
249 249 e = merge_caches.pop(rev, None)
250 250 if e is not None:
251 251 return e
252 252 changes = None
253 253 if flags(rev) & HASCOPIESINFO:
254 254 changes = changelogrevision(rev).changes
255 255 value = (p1, p2, changes)
256 256 if p1 != nullrev and p2 != nullrev:
257 257 # XXX some case we over cache, IGNORE
258 258 merge_caches[rev] = value
259 259 return value
260 260
261 261 return revinfo
262 262
263 263
264 264 def cached_is_ancestor(is_ancestor):
265 265 """return a cached version of is_ancestor"""
266 266 cache = {}
267 267
268 268 def _is_ancestor(anc, desc):
269 269 if anc > desc:
270 270 return False
271 271 elif anc == desc:
272 272 return True
273 273 key = (anc, desc)
274 274 ret = cache.get(key)
275 275 if ret is None:
276 276 ret = cache[key] = is_ancestor(anc, desc)
277 277 return ret
278 278
279 279 return _is_ancestor
280 280
281 281
282 282 def _changesetforwardcopies(a, b, match):
283 283 if a.rev() in (nullrev, b.rev()):
284 284 return {}
285 285
286 286 repo = a.repo().unfiltered()
287 287 children = {}
288 288
289 289 cl = repo.changelog
290 290 isancestor = cl.isancestorrev
291 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
292 mrset = set(missingrevs)
291
292 # To track rename from "A" to B, we need to gather all parent → children
293 # edges that are contains in `::B` but not in `::A`.
294 #
295 #
296 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
297 # ::a`) and also all the "roots point", ie the parents of the exclusive set
298 # that belong to ::a. These are exactly all the revisions needed to express
299 # the parent → children we need to combine.
300 #
301 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
302 # excluding paths that leads to roots that are not ancestors of `a`. We
303 # keep this out of the explanation because it is hard enough without this special case..
304
305 parents = cl._uncheckedparentrevs
306 graph_roots = (nullrev, nullrev)
307
308 ancestors = cl.ancestors([a.rev()], inclusive=True)
309 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
293 310 roots = set()
294 for r in missingrevs:
295 for p in cl.parentrevs(r):
296 if p == nullrev:
297 continue
298 if p not in children:
299 children[p] = [r]
300 else:
301 children[p].append(r)
302 if p not in mrset:
303 roots.add(p)
311 has_graph_roots = False
312
313 # iterate over `only(B, A)`
314 for r in revs:
315 ps = parents(r)
316 if ps == graph_roots:
317 has_graph_roots = True
318 else:
319 p1, p2 = ps
320
321 # find all the "root points" (see larger comment above)
322 if p1 != nullrev and p1 in ancestors:
323 roots.add(p1)
324 if p2 != nullrev and p2 in ancestors:
325 roots.add(p2)
304 326 if not roots:
305 327 # no common revision to track copies from
306 328 return {}
307 min_root = min(roots)
308
309 from_head = set(
310 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
311 )
312
313 iterrevs = set(from_head)
314 iterrevs &= mrset
315 iterrevs.update(roots)
316 iterrevs.remove(b.rev())
317 revs = sorted(iterrevs)
329 if has_graph_roots:
330 # this deal with the special case mentionned in the [1] footnotes. We
331 # must filter out revisions that leads to non-common graphroots.
332 roots = list(roots)
333 m = min(roots)
334 h = [b.rev()]
335 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
336 roots_to_head = set(roots_to_head)
337 revs = [r for r in revs if r in roots_to_head]
318 338
319 339 if repo.filecopiesmode == b'changeset-sidedata':
340 # When using side-data, we will process the edges "from" the children.
341 # We iterate over the childre, gathering previous collected data for
342 # the parents. Do know when the parents data is no longer necessary, we
343 # keep a counter of how many children each revision has.
344 #
345 # An interresting property of `children_count` is that it only contains
346 # revision that will be relevant for a edge of the graph. So if a
347 # children has parent not in `children_count`, that edges should not be
348 # processed.
349 children_count = dict((r, 0) for r in roots)
350 for r in revs:
351 for p in cl.parentrevs(r):
352 if p == nullrev:
353 continue
354 children_count[r] = 0
355 if p in children_count:
356 children_count[p] += 1
320 357 revinfo = _revinfo_getter(repo, match)
321 358 return _combine_changeset_copies(
322 revs, children, b.rev(), revinfo, match, isancestor
359 revs, children_count, b.rev(), revinfo, match, isancestor
323 360 )
324 361 else:
362 # When not using side-data, we will process the edges "from" the parent.
363 # so we need a full mapping of the parent -> children relation.
364 children = dict((r, []) for r in roots)
365 for r in revs:
366 for p in cl.parentrevs(r):
367 if p == nullrev:
368 continue
369 children[r] = []
370 if p in children:
371 children[p].append(r)
372 x = revs.pop()
373 assert x == b.rev()
374 revs.extend(roots)
375 revs.sort()
376
325 377 revinfo = _revinfo_getter_extra(repo)
326 378 return _combine_changeset_copies_extra(
327 379 revs, children, b.rev(), revinfo, match, isancestor
328 380 )
329 381
330 382
331 383 def _combine_changeset_copies(
332 revs, children, targetrev, revinfo, match, isancestor
384 revs, children_count, targetrev, revinfo, match, isancestor
333 385 ):
334 386 """combine the copies information for each item of iterrevs
335 387
336 388 revs: sorted iterable of revision to visit
337 children: a {parent: [children]} mapping.
389 children_count: a {parent: <number-of-relevant-children>} mapping.
338 390 targetrev: the final copies destination revision (not in iterrevs)
339 391 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
340 392 match: a matcher
341 393
342 394 It returns the aggregated copies information for `targetrev`.
343 395 """
344 396
345 397 alwaysmatch = match.always()
346 398
347 399 if rustmod is not None and alwaysmatch:
348 400 return rustmod.combine_changeset_copies(
349 list(revs), children, targetrev, revinfo, isancestor
401 list(revs), children_count, targetrev, revinfo, isancestor
350 402 )
351 403
352 404 isancestor = cached_is_ancestor(isancestor)
353 405
354 406 all_copies = {}
355 # iterate over all the "parent" side of copy tracing "edge"
356 for r in revs:
357 # fetch potential previously computed data for that parent
358 copies = all_copies.pop(r, None)
359 if copies is None:
360 # this is a root
361 copies = {}
407 # iterate over all the "children" side of copy tracing "edge"
408 for current_rev in revs:
409 p1, p2, changes = revinfo(current_rev)
410 current_copies = None
362 411
363 # iterate over all known children to chain the existing data with the
412 # iterate over all parents to chain the existing data with the
364 413 # data from the parent → child edge.
365 for i, c in enumerate(children[r]):
366 p1, p2, changes = revinfo(c)
367 childcopies = {}
414 for parent, parent_rev in ((1, p1), (2, p2)):
415 if parent_rev == nullrev:
416 continue
417 remaining_children = children_count.get(parent_rev)
418 if remaining_children is None:
419 continue
420 remaining_children -= 1
421 children_count[parent_rev] = remaining_children
422 if remaining_children:
423 copies = all_copies.get(parent_rev, None)
424 else:
425 copies = all_copies.pop(parent_rev, None)
368 426
369 # select the right parent → child edge
370 if r == p1:
371 parent = 1
372 if changes is not None:
427 if copies is None:
428 # this is a root
429 copies = {}
430
431 newcopies = copies
432 # chain the data in the edge with the existing data
433 if changes is not None:
434 childcopies = {}
435 if parent == 1:
373 436 childcopies = changes.copied_from_p1
374 else:
375 assert r == p2
376 parent = 2
377 if changes is not None:
437 elif parent == 2:
378 438 childcopies = changes.copied_from_p2
379 if not alwaysmatch:
380 childcopies = {
381 dst: src for dst, src in childcopies.items() if match(dst)
382 }
383 439
384 # chain the data in the edge with the existing data
385 newcopies = copies
386 if childcopies:
387 newcopies = copies.copy()
388 for dest, source in pycompat.iteritems(childcopies):
389 prev = copies.get(source)
390 if prev is not None and prev[1] is not None:
391 source = prev[1]
392 newcopies[dest] = (c, source)
393 assert newcopies is not copies
394 if changes is not None and changes.removed:
395 if newcopies is copies:
440 if not alwaysmatch:
441 childcopies = {
442 dst: src
443 for dst, src in childcopies.items()
444 if match(dst)
445 }
446 if childcopies:
396 447 newcopies = copies.copy()
397 for f in changes.removed:
398 if f in newcopies:
399 if newcopies is copies:
400 # copy on write to avoid affecting potential other
401 # branches. when there are no other branches, this
402 # could be avoided.
403 newcopies = copies.copy()
404 newcopies[f] = (c, None)
448 for dest, source in pycompat.iteritems(childcopies):
449 prev = copies.get(source)
450 if prev is not None and prev[1] is not None:
451 source = prev[1]
452 newcopies[dest] = (current_rev, source)
453 assert newcopies is not copies
454 if changes.removed:
455 if newcopies is copies:
456 newcopies = copies.copy()
457 for f in changes.removed:
458 if f in newcopies:
459 if newcopies is copies:
460 # copy on write to avoid affecting potential other
461 # branches. when there are no other branches, this
462 # could be avoided.
463 newcopies = copies.copy()
464 newcopies[f] = (current_rev, None)
405 465
406 466 # check potential need to combine the data from another parent (for
407 467 # that child). See comment below for details.
408 othercopies = all_copies.get(c)
409 if othercopies is None:
410 all_copies[c] = newcopies
411 elif newcopies is othercopies:
468 if current_copies is None:
469 current_copies = newcopies
470 elif current_copies is newcopies:
412 471 # nothing to merge:
413 472 pass
414 473 else:
415 474 # we are the second parent to work on c, we need to merge our
416 475 # work with the other.
417 476 #
418 477 # In case of conflict, parent 1 take precedence over parent 2.
419 478 # This is an arbitrary choice made anew when implementing
420 479 # changeset based copies. It was made without regards with
421 480 # potential filelog related behavior.
422 if parent == 1:
423 if newcopies is copies:
424 newcopies = copies.copy()
425 minor, major = othercopies, newcopies
426 else:
427 # we do not know if the other dict is a copy or not, so we
428 # need to blindly copy it. Future change should make this
429 # unnecessary.
430 minor, major = newcopies, othercopies.copy()
431 copies = _merge_copies_dict(minor, major, isancestor, changes)
432 all_copies[c] = copies
481 assert parent == 2
482 current_copies = _merge_copies_dict(
483 newcopies, current_copies, isancestor, changes
484 )
485 all_copies[current_rev] = current_copies
433 486
434 487 # filter out internal details and return a {dest: source mapping}
435 488 final_copies = {}
436 489 for dest, (tt, source) in all_copies[targetrev].items():
437 490 if source is not None:
438 491 final_copies[dest] = source
439 492 return final_copies
440 493
441 494
442 495 def _merge_copies_dict(minor, major, isancestor, changes):
443 496 """merge two copies-mapping together, minor and major
444 497
445 498 In case of conflict, value from "major" will be picked.
446 499
447 500 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
448 501 ancestors of `high_rev`,
449 502
450 503 - `ismerged(path)`: callable return True if `path` have been merged in the
451 504 current revision,
452 505
453 506 return the resulting dict (in practice, the "minor" object, updated)
454 507 """
455 508 for dest, value in major.items():
456 509 other = minor.get(dest)
457 510 if other is None:
458 511 minor[dest] = value
459 512 else:
460 513 new_tt = value[0]
461 514 other_tt = other[0]
462 515 if value[1] == other[1]:
463 516 continue
464 517 # content from "major" wins, unless it is older
465 518 # than the branch point or there is a merge
466 519 if new_tt == other_tt:
467 520 minor[dest] = value
468 521 elif (
469 522 changes is not None
470 523 and value[1] is None
471 524 and dest in changes.salvaged
472 525 ):
473 526 pass
474 527 elif (
475 528 changes is not None
476 529 and other[1] is None
477 530 and dest in changes.salvaged
478 531 ):
479 532 minor[dest] = value
480 533 elif changes is not None and dest in changes.merged:
481 534 minor[dest] = value
482 535 elif not isancestor(new_tt, other_tt):
483 536 if value[1] is not None:
484 537 minor[dest] = value
485 538 elif isancestor(other_tt, new_tt):
486 539 minor[dest] = value
487 540 return minor
488 541
489 542
490 543 def _revinfo_getter_extra(repo):
491 544 """return a function that return multiple data given a <rev>"i
492 545
493 546 * p1: revision number of first parent
494 547 * p2: revision number of first parent
495 548 * p1copies: mapping of copies from p1
496 549 * p2copies: mapping of copies from p2
497 550 * removed: a list of removed files
498 551 * ismerged: a callback to know if file was merged in that revision
499 552 """
500 553 cl = repo.changelog
501 554 parents = cl.parentrevs
502 555
503 556 def get_ismerged(rev):
504 557 ctx = repo[rev]
505 558
506 559 def ismerged(path):
507 560 if path not in ctx.files():
508 561 return False
509 562 fctx = ctx[path]
510 563 parents = fctx._filelog.parents(fctx._filenode)
511 564 nb_parents = 0
512 565 for n in parents:
513 566 if n != nullid:
514 567 nb_parents += 1
515 568 return nb_parents >= 2
516 569
517 570 return ismerged
518 571
519 572 def revinfo(rev):
520 573 p1, p2 = parents(rev)
521 574 ctx = repo[rev]
522 575 p1copies, p2copies = ctx._copies
523 576 removed = ctx.filesremoved()
524 577 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
525 578
526 579 return revinfo
527 580
528 581
529 582 def _combine_changeset_copies_extra(
530 583 revs, children, targetrev, revinfo, match, isancestor
531 584 ):
532 585 """version of `_combine_changeset_copies` that works with the Google
533 586 specific "extra" based storage for copy information"""
534 587 all_copies = {}
535 588 alwaysmatch = match.always()
536 589 for r in revs:
537 590 copies = all_copies.pop(r, None)
538 591 if copies is None:
539 592 # this is a root
540 593 copies = {}
541 594 for i, c in enumerate(children[r]):
542 595 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
543 596 if r == p1:
544 597 parent = 1
545 598 childcopies = p1copies
546 599 else:
547 600 assert r == p2
548 601 parent = 2
549 602 childcopies = p2copies
550 603 if not alwaysmatch:
551 604 childcopies = {
552 605 dst: src for dst, src in childcopies.items() if match(dst)
553 606 }
554 607 newcopies = copies
555 608 if childcopies:
556 609 newcopies = copies.copy()
557 610 for dest, source in pycompat.iteritems(childcopies):
558 611 prev = copies.get(source)
559 612 if prev is not None and prev[1] is not None:
560 613 source = prev[1]
561 614 newcopies[dest] = (c, source)
562 615 assert newcopies is not copies
563 616 for f in removed:
564 617 if f in newcopies:
565 618 if newcopies is copies:
566 619 # copy on write to avoid affecting potential other
567 620 # branches. when there are no other branches, this
568 621 # could be avoided.
569 622 newcopies = copies.copy()
570 623 newcopies[f] = (c, None)
571 624 othercopies = all_copies.get(c)
572 625 if othercopies is None:
573 626 all_copies[c] = newcopies
574 627 else:
575 628 # we are the second parent to work on c, we need to merge our
576 629 # work with the other.
577 630 #
578 631 # In case of conflict, parent 1 take precedence over parent 2.
579 632 # This is an arbitrary choice made anew when implementing
580 633 # changeset based copies. It was made without regards with
581 634 # potential filelog related behavior.
582 635 if parent == 1:
583 636 _merge_copies_dict_extra(
584 637 othercopies, newcopies, isancestor, ismerged
585 638 )
586 639 else:
587 640 _merge_copies_dict_extra(
588 641 newcopies, othercopies, isancestor, ismerged
589 642 )
590 643 all_copies[c] = newcopies
591 644
592 645 final_copies = {}
593 646 for dest, (tt, source) in all_copies[targetrev].items():
594 647 if source is not None:
595 648 final_copies[dest] = source
596 649 return final_copies
597 650
598 651
599 652 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
600 653 """version of `_merge_copies_dict` that works with the Google
601 654 specific "extra" based storage for copy information"""
602 655 for dest, value in major.items():
603 656 other = minor.get(dest)
604 657 if other is None:
605 658 minor[dest] = value
606 659 else:
607 660 new_tt = value[0]
608 661 other_tt = other[0]
609 662 if value[1] == other[1]:
610 663 continue
611 664 # content from "major" wins, unless it is older
612 665 # than the branch point or there is a merge
613 666 if (
614 667 new_tt == other_tt
615 668 or not isancestor(new_tt, other_tt)
616 669 or ismerged(dest)
617 670 ):
618 671 minor[dest] = value
619 672
620 673
621 674 def _forwardcopies(a, b, base=None, match=None):
622 675 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
623 676
624 677 if base is None:
625 678 base = a
626 679 match = a.repo().narrowmatch(match)
627 680 # check for working copy
628 681 if b.rev() is None:
629 682 cm = _committedforwardcopies(a, b.p1(), base, match)
630 683 # combine copies from dirstate if necessary
631 684 copies = _chain(cm, _dirstatecopies(b._repo, match))
632 685 else:
633 686 copies = _committedforwardcopies(a, b, base, match)
634 687 return copies
635 688
636 689
637 690 def _backwardrenames(a, b, match):
638 691 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
639 692 return {}
640 693
641 694 # Even though we're not taking copies into account, 1:n rename situations
642 695 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
643 696 # arbitrarily pick one of the renames.
644 697 # We don't want to pass in "match" here, since that would filter
645 698 # the destination by it. Since we're reversing the copies, we want
646 699 # to filter the source instead.
647 700 f = _forwardcopies(b, a)
648 701 r = {}
649 702 for k, v in sorted(pycompat.iteritems(f)):
650 703 if match and not match(v):
651 704 continue
652 705 # remove copies
653 706 if v in a:
654 707 continue
655 708 r[v] = k
656 709 return r
657 710
658 711
659 712 def pathcopies(x, y, match=None):
660 713 """find {dst@y: src@x} copy mapping for directed compare"""
661 714 repo = x._repo
662 715 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
663 716 if debug:
664 717 repo.ui.debug(
665 718 b'debug.copies: searching copies from %s to %s\n' % (x, y)
666 719 )
667 720 if x == y or not x or not y:
668 721 return {}
669 722 if y.rev() is None and x == y.p1():
670 723 if debug:
671 724 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
672 725 # short-circuit to avoid issues with merge states
673 726 return _dirstatecopies(repo, match)
674 727 a = y.ancestor(x)
675 728 if a == x:
676 729 if debug:
677 730 repo.ui.debug(b'debug.copies: search mode: forward\n')
678 731 copies = _forwardcopies(x, y, match=match)
679 732 elif a == y:
680 733 if debug:
681 734 repo.ui.debug(b'debug.copies: search mode: backward\n')
682 735 copies = _backwardrenames(x, y, match=match)
683 736 else:
684 737 if debug:
685 738 repo.ui.debug(b'debug.copies: search mode: combined\n')
686 739 base = None
687 740 if a.rev() != nullrev:
688 741 base = x
689 742 copies = _chain(
690 743 _backwardrenames(x, a, match=match),
691 744 _forwardcopies(a, y, base, match=match),
692 745 )
693 746 _filter(x, y, copies)
694 747 return copies
695 748
696 749
697 750 def mergecopies(repo, c1, c2, base):
698 751 """
699 752 Finds moves and copies between context c1 and c2 that are relevant for
700 753 merging. 'base' will be used as the merge base.
701 754
702 755 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
703 756 files that were moved/ copied in one merge parent and modified in another.
704 757 For example:
705 758
706 759 o ---> 4 another commit
707 760 |
708 761 | o ---> 3 commit that modifies a.txt
709 762 | /
710 763 o / ---> 2 commit that moves a.txt to b.txt
711 764 |/
712 765 o ---> 1 merge base
713 766
714 767 If we try to rebase revision 3 on revision 4, since there is no a.txt in
715 768 revision 4, and if user have copytrace disabled, we prints the following
716 769 message:
717 770
718 771 ```other changed <file> which local deleted```
719 772
720 773 Returns a tuple where:
721 774
722 775 "branch_copies" an instance of branch_copies.
723 776
724 777 "diverge" is a mapping of source name -> list of destination names
725 778 for divergent renames.
726 779
727 780 This function calls different copytracing algorithms based on config.
728 781 """
729 782 # avoid silly behavior for update from empty dir
730 783 if not c1 or not c2 or c1 == c2:
731 784 return branch_copies(), branch_copies(), {}
732 785
733 786 narrowmatch = c1.repo().narrowmatch()
734 787
735 788 # avoid silly behavior for parent -> working dir
736 789 if c2.node() is None and c1.node() == repo.dirstate.p1():
737 790 return (
738 791 branch_copies(_dirstatecopies(repo, narrowmatch)),
739 792 branch_copies(),
740 793 {},
741 794 )
742 795
743 796 copytracing = repo.ui.config(b'experimental', b'copytrace')
744 797 if stringutil.parsebool(copytracing) is False:
745 798 # stringutil.parsebool() returns None when it is unable to parse the
746 799 # value, so we should rely on making sure copytracing is on such cases
747 800 return branch_copies(), branch_copies(), {}
748 801
749 802 if usechangesetcentricalgo(repo):
750 803 # The heuristics don't make sense when we need changeset-centric algos
751 804 return _fullcopytracing(repo, c1, c2, base)
752 805
753 806 # Copy trace disabling is explicitly below the node == p1 logic above
754 807 # because the logic above is required for a simple copy to be kept across a
755 808 # rebase.
756 809 if copytracing == b'heuristics':
757 810 # Do full copytracing if only non-public revisions are involved as
758 811 # that will be fast enough and will also cover the copies which could
759 812 # be missed by heuristics
760 813 if _isfullcopytraceable(repo, c1, base):
761 814 return _fullcopytracing(repo, c1, c2, base)
762 815 return _heuristicscopytracing(repo, c1, c2, base)
763 816 else:
764 817 return _fullcopytracing(repo, c1, c2, base)
765 818
766 819
767 820 def _isfullcopytraceable(repo, c1, base):
768 821 """Checks that if base, source and destination are all no-public branches,
769 822 if yes let's use the full copytrace algorithm for increased capabilities
770 823 since it will be fast enough.
771 824
772 825 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
773 826 number of changesets from c1 to base such that if number of changesets are
774 827 more than the limit, full copytracing algorithm won't be used.
775 828 """
776 829 if c1.rev() is None:
777 830 c1 = c1.p1()
778 831 if c1.mutable() and base.mutable():
779 832 sourcecommitlimit = repo.ui.configint(
780 833 b'experimental', b'copytrace.sourcecommitlimit'
781 834 )
782 835 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
783 836 return commits < sourcecommitlimit
784 837 return False
785 838
786 839
787 840 def _checksinglesidecopies(
788 841 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
789 842 ):
790 843 if src not in m2:
791 844 # deleted on side 2
792 845 if src not in m1:
793 846 # renamed on side 1, deleted on side 2
794 847 renamedelete[src] = dsts1
795 848 elif src not in mb:
796 849 # Work around the "short-circuit to avoid issues with merge states"
797 850 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
798 851 # destination doesn't exist in y.
799 852 pass
800 853 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
801 854 return
802 855 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
803 856 # modified on side 2
804 857 for dst in dsts1:
805 858 copy[dst] = src
806 859
807 860
808 861 class branch_copies(object):
809 862 """Information about copies made on one side of a merge/graft.
810 863
811 864 "copy" is a mapping from destination name -> source name,
812 865 where source is in c1 and destination is in c2 or vice-versa.
813 866
814 867 "movewithdir" is a mapping from source name -> destination name,
815 868 where the file at source present in one context but not the other
816 869 needs to be moved to destination by the merge process, because the
817 870 other context moved the directory it is in.
818 871
819 872 "renamedelete" is a mapping of source name -> list of destination
820 873 names for files deleted in c1 that were renamed in c2 or vice-versa.
821 874
822 875 "dirmove" is a mapping of detected source dir -> destination dir renames.
823 876 This is needed for handling changes to new files previously grafted into
824 877 renamed directories.
825 878 """
826 879
827 880 def __init__(
828 881 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
829 882 ):
830 883 self.copy = {} if copy is None else copy
831 884 self.renamedelete = {} if renamedelete is None else renamedelete
832 885 self.dirmove = {} if dirmove is None else dirmove
833 886 self.movewithdir = {} if movewithdir is None else movewithdir
834 887
835 888 def __repr__(self):
836 889 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
837 890 self.copy,
838 891 self.renamedelete,
839 892 self.dirmove,
840 893 self.movewithdir,
841 894 )
842 895
843 896
844 897 def _fullcopytracing(repo, c1, c2, base):
845 898 """The full copytracing algorithm which finds all the new files that were
846 899 added from merge base up to the top commit and for each file it checks if
847 900 this file was copied from another file.
848 901
849 902 This is pretty slow when a lot of changesets are involved but will track all
850 903 the copies.
851 904 """
852 905 m1 = c1.manifest()
853 906 m2 = c2.manifest()
854 907 mb = base.manifest()
855 908
856 909 copies1 = pathcopies(base, c1)
857 910 copies2 = pathcopies(base, c2)
858 911
859 912 if not (copies1 or copies2):
860 913 return branch_copies(), branch_copies(), {}
861 914
862 915 inversecopies1 = {}
863 916 inversecopies2 = {}
864 917 for dst, src in copies1.items():
865 918 inversecopies1.setdefault(src, []).append(dst)
866 919 for dst, src in copies2.items():
867 920 inversecopies2.setdefault(src, []).append(dst)
868 921
869 922 copy1 = {}
870 923 copy2 = {}
871 924 diverge = {}
872 925 renamedelete1 = {}
873 926 renamedelete2 = {}
874 927 allsources = set(inversecopies1) | set(inversecopies2)
875 928 for src in allsources:
876 929 dsts1 = inversecopies1.get(src)
877 930 dsts2 = inversecopies2.get(src)
878 931 if dsts1 and dsts2:
879 932 # copied/renamed on both sides
880 933 if src not in m1 and src not in m2:
881 934 # renamed on both sides
882 935 dsts1 = set(dsts1)
883 936 dsts2 = set(dsts2)
884 937 # If there's some overlap in the rename destinations, we
885 938 # consider it not divergent. For example, if side 1 copies 'a'
886 939 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
887 940 # and 'd' and deletes 'a'.
888 941 if dsts1 & dsts2:
889 942 for dst in dsts1 & dsts2:
890 943 copy1[dst] = src
891 944 copy2[dst] = src
892 945 else:
893 946 diverge[src] = sorted(dsts1 | dsts2)
894 947 elif src in m1 and src in m2:
895 948 # copied on both sides
896 949 dsts1 = set(dsts1)
897 950 dsts2 = set(dsts2)
898 951 for dst in dsts1 & dsts2:
899 952 copy1[dst] = src
900 953 copy2[dst] = src
901 954 # TODO: Handle cases where it was renamed on one side and copied
902 955 # on the other side
903 956 elif dsts1:
904 957 # copied/renamed only on side 1
905 958 _checksinglesidecopies(
906 959 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
907 960 )
908 961 elif dsts2:
909 962 # copied/renamed only on side 2
910 963 _checksinglesidecopies(
911 964 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
912 965 )
913 966
914 967 # find interesting file sets from manifests
915 968 cache = []
916 969
917 970 def _get_addedfiles(idx):
918 971 if not cache:
919 972 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
920 973 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
921 974 u1 = sorted(addedinm1 - addedinm2)
922 975 u2 = sorted(addedinm2 - addedinm1)
923 976 cache.extend((u1, u2))
924 977 return cache[idx]
925 978
926 979 u1fn = lambda: _get_addedfiles(0)
927 980 u2fn = lambda: _get_addedfiles(1)
928 981 if repo.ui.debugflag:
929 982 u1 = u1fn()
930 983 u2 = u2fn()
931 984
932 985 header = b" unmatched files in %s"
933 986 if u1:
934 987 repo.ui.debug(
935 988 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
936 989 )
937 990 if u2:
938 991 repo.ui.debug(
939 992 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
940 993 )
941 994
942 995 renamedeleteset = set()
943 996 divergeset = set()
944 997 for dsts in diverge.values():
945 998 divergeset.update(dsts)
946 999 for dsts in renamedelete1.values():
947 1000 renamedeleteset.update(dsts)
948 1001 for dsts in renamedelete2.values():
949 1002 renamedeleteset.update(dsts)
950 1003
951 1004 repo.ui.debug(
952 1005 b" all copies found (* = to merge, ! = divergent, "
953 1006 b"% = renamed and deleted):\n"
954 1007 )
955 1008 for side, copies in ((b"local", copies1), (b"remote", copies2)):
956 1009 if not copies:
957 1010 continue
958 1011 repo.ui.debug(b" on %s side:\n" % side)
959 1012 for f in sorted(copies):
960 1013 note = b""
961 1014 if f in copy1 or f in copy2:
962 1015 note += b"*"
963 1016 if f in divergeset:
964 1017 note += b"!"
965 1018 if f in renamedeleteset:
966 1019 note += b"%"
967 1020 repo.ui.debug(
968 1021 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
969 1022 )
970 1023 del renamedeleteset
971 1024 del divergeset
972 1025
973 1026 repo.ui.debug(b" checking for directory renames\n")
974 1027
975 1028 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
976 1029 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
977 1030
978 1031 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
979 1032 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
980 1033
981 1034 return branch_copies1, branch_copies2, diverge
982 1035
983 1036
984 1037 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
985 1038 """Finds moved directories and files that should move with them.
986 1039
987 1040 ctx: the context for one of the sides
988 1041 copy: files copied on the same side (as ctx)
989 1042 fullcopy: files copied on the same side (as ctx), including those that
990 1043 merge.manifestmerge() won't care about
991 1044 addedfilesfn: function returning added files on the other side (compared to
992 1045 ctx)
993 1046 """
994 1047 # generate a directory move map
995 1048 invalid = set()
996 1049 dirmove = {}
997 1050
998 1051 # examine each file copy for a potential directory move, which is
999 1052 # when all the files in a directory are moved to a new directory
1000 1053 for dst, src in pycompat.iteritems(fullcopy):
1001 1054 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1002 1055 if dsrc in invalid:
1003 1056 # already seen to be uninteresting
1004 1057 continue
1005 1058 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1006 1059 # directory wasn't entirely moved locally
1007 1060 invalid.add(dsrc)
1008 1061 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1009 1062 # files from the same directory moved to two different places
1010 1063 invalid.add(dsrc)
1011 1064 else:
1012 1065 # looks good so far
1013 1066 dirmove[dsrc] = ddst
1014 1067
1015 1068 for i in invalid:
1016 1069 if i in dirmove:
1017 1070 del dirmove[i]
1018 1071 del invalid
1019 1072
1020 1073 if not dirmove:
1021 1074 return {}, {}
1022 1075
1023 1076 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1024 1077
1025 1078 for d in dirmove:
1026 1079 repo.ui.debug(
1027 1080 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1028 1081 )
1029 1082
1030 1083 movewithdir = {}
1031 1084 # check unaccounted nonoverlapping files against directory moves
1032 1085 for f in addedfilesfn():
1033 1086 if f not in fullcopy:
1034 1087 for d in dirmove:
1035 1088 if f.startswith(d):
1036 1089 # new file added in a directory that was moved, move it
1037 1090 df = dirmove[d] + f[len(d) :]
1038 1091 if df not in copy:
1039 1092 movewithdir[f] = df
1040 1093 repo.ui.debug(
1041 1094 b" pending file src: '%s' -> dst: '%s'\n"
1042 1095 % (f, df)
1043 1096 )
1044 1097 break
1045 1098
1046 1099 return dirmove, movewithdir
1047 1100
1048 1101
1049 1102 def _heuristicscopytracing(repo, c1, c2, base):
1050 1103 """Fast copytracing using filename heuristics
1051 1104
1052 1105 Assumes that moves or renames are of following two types:
1053 1106
1054 1107 1) Inside a directory only (same directory name but different filenames)
1055 1108 2) Move from one directory to another
1056 1109 (same filenames but different directory names)
1057 1110
1058 1111 Works only when there are no merge commits in the "source branch".
1059 1112 Source branch is commits from base up to c2 not including base.
1060 1113
1061 1114 If merge is involved it fallbacks to _fullcopytracing().
1062 1115
1063 1116 Can be used by setting the following config:
1064 1117
1065 1118 [experimental]
1066 1119 copytrace = heuristics
1067 1120
1068 1121 In some cases the copy/move candidates found by heuristics can be very large
1069 1122 in number and that will make the algorithm slow. The number of possible
1070 1123 candidates to check can be limited by using the config
1071 1124 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1072 1125 """
1073 1126
1074 1127 if c1.rev() is None:
1075 1128 c1 = c1.p1()
1076 1129 if c2.rev() is None:
1077 1130 c2 = c2.p1()
1078 1131
1079 1132 changedfiles = set()
1080 1133 m1 = c1.manifest()
1081 1134 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1082 1135 # If base is not in c2 branch, we switch to fullcopytracing
1083 1136 repo.ui.debug(
1084 1137 b"switching to full copytracing as base is not "
1085 1138 b"an ancestor of c2\n"
1086 1139 )
1087 1140 return _fullcopytracing(repo, c1, c2, base)
1088 1141
1089 1142 ctx = c2
1090 1143 while ctx != base:
1091 1144 if len(ctx.parents()) == 2:
1092 1145 # To keep things simple let's not handle merges
1093 1146 repo.ui.debug(b"switching to full copytracing because of merges\n")
1094 1147 return _fullcopytracing(repo, c1, c2, base)
1095 1148 changedfiles.update(ctx.files())
1096 1149 ctx = ctx.p1()
1097 1150
1098 1151 copies2 = {}
1099 1152 cp = _forwardcopies(base, c2)
1100 1153 for dst, src in pycompat.iteritems(cp):
1101 1154 if src in m1:
1102 1155 copies2[dst] = src
1103 1156
1104 1157 # file is missing if it isn't present in the destination, but is present in
1105 1158 # the base and present in the source.
1106 1159 # Presence in the base is important to exclude added files, presence in the
1107 1160 # source is important to exclude removed files.
1108 1161 filt = lambda f: f not in m1 and f in base and f in c2
1109 1162 missingfiles = [f for f in changedfiles if filt(f)]
1110 1163
1111 1164 copies1 = {}
1112 1165 if missingfiles:
1113 1166 basenametofilename = collections.defaultdict(list)
1114 1167 dirnametofilename = collections.defaultdict(list)
1115 1168
1116 1169 for f in m1.filesnotin(base.manifest()):
1117 1170 basename = os.path.basename(f)
1118 1171 dirname = os.path.dirname(f)
1119 1172 basenametofilename[basename].append(f)
1120 1173 dirnametofilename[dirname].append(f)
1121 1174
1122 1175 for f in missingfiles:
1123 1176 basename = os.path.basename(f)
1124 1177 dirname = os.path.dirname(f)
1125 1178 samebasename = basenametofilename[basename]
1126 1179 samedirname = dirnametofilename[dirname]
1127 1180 movecandidates = samebasename + samedirname
1128 1181 # f is guaranteed to be present in c2, that's why
1129 1182 # c2.filectx(f) won't fail
1130 1183 f2 = c2.filectx(f)
1131 1184 # we can have a lot of candidates which can slow down the heuristics
1132 1185 # config value to limit the number of candidates moves to check
1133 1186 maxcandidates = repo.ui.configint(
1134 1187 b'experimental', b'copytrace.movecandidateslimit'
1135 1188 )
1136 1189
1137 1190 if len(movecandidates) > maxcandidates:
1138 1191 repo.ui.status(
1139 1192 _(
1140 1193 b"skipping copytracing for '%s', more "
1141 1194 b"candidates than the limit: %d\n"
1142 1195 )
1143 1196 % (f, len(movecandidates))
1144 1197 )
1145 1198 continue
1146 1199
1147 1200 for candidate in movecandidates:
1148 1201 f1 = c1.filectx(candidate)
1149 1202 if _related(f1, f2):
1150 1203 # if there are a few related copies then we'll merge
1151 1204 # changes into all of them. This matches the behaviour
1152 1205 # of upstream copytracing
1153 1206 copies1[candidate] = f
1154 1207
1155 1208 return branch_copies(copies1), branch_copies(copies2), {}
1156 1209
1157 1210
1158 1211 def _related(f1, f2):
1159 1212 """return True if f1 and f2 filectx have a common ancestor
1160 1213
1161 1214 Walk back to common ancestor to see if the two files originate
1162 1215 from the same file. Since workingfilectx's rev() is None it messes
1163 1216 up the integer comparison logic, hence the pre-step check for
1164 1217 None (f1 and f2 can only be workingfilectx's initially).
1165 1218 """
1166 1219
1167 1220 if f1 == f2:
1168 1221 return True # a match
1169 1222
1170 1223 g1, g2 = f1.ancestors(), f2.ancestors()
1171 1224 try:
1172 1225 f1r, f2r = f1.linkrev(), f2.linkrev()
1173 1226
1174 1227 if f1r is None:
1175 1228 f1 = next(g1)
1176 1229 if f2r is None:
1177 1230 f2 = next(g2)
1178 1231
1179 1232 while True:
1180 1233 f1r, f2r = f1.linkrev(), f2.linkrev()
1181 1234 if f1r > f2r:
1182 1235 f1 = next(g1)
1183 1236 elif f2r > f1r:
1184 1237 f2 = next(g2)
1185 1238 else: # f1 and f2 point to files in the same linkrev
1186 1239 return f1 == f2 # true if they point to the same file
1187 1240 except StopIteration:
1188 1241 return False
1189 1242
1190 1243
1191 1244 def graftcopies(wctx, ctx, base):
1192 1245 """reproduce copies between base and ctx in the wctx
1193 1246
1194 1247 Unlike mergecopies(), this function will only consider copies between base
1195 1248 and ctx; it will ignore copies between base and wctx. Also unlike
1196 1249 mergecopies(), this function will apply copies to the working copy (instead
1197 1250 of just returning information about the copies). That makes it cheaper
1198 1251 (especially in the common case of base==ctx.p1()) and useful also when
1199 1252 experimental.copytrace=off.
1200 1253
1201 1254 merge.update() will have already marked most copies, but it will only
1202 1255 mark copies if it thinks the source files are related (see
1203 1256 merge._related()). It will also not mark copies if the file wasn't modified
1204 1257 on the local side. This function adds the copies that were "missed"
1205 1258 by merge.update().
1206 1259 """
1207 1260 new_copies = pathcopies(base, ctx)
1208 1261 _filter(wctx.p1(), wctx, new_copies)
1209 1262 for dst, src in pycompat.iteritems(new_copies):
1210 1263 wctx[dst].markcopied(src)
@@ -1,665 +1,704
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 use crate::NULL_REVISION;
4 5
5 6 use im_rc::ordmap::DiffItem;
6 7 use im_rc::ordmap::OrdMap;
7 8
8 9 use std::cmp::Ordering;
9 10 use std::collections::HashMap;
10 11 use std::convert::TryInto;
11 12
12 13 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
13 14
14 15 type PathToken = HgPathBuf;
15 16
16 17 #[derive(Clone, Debug, PartialEq)]
17 18 struct TimeStampedPathCopy {
18 19 /// revision at which the copy information was added
19 20 rev: Revision,
20 21 /// the copy source, (Set to None in case of deletion of the associated
21 22 /// key)
22 23 path: Option<PathToken>,
23 24 }
24 25
25 26 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
26 27 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
27 28
28 29 /// hold parent 1, parent 2 and relevant files actions.
29 30 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
30 31
31 32 /// represent the files affected by a changesets
32 33 ///
33 34 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 35 /// all the data categories tracked by it.
35 36 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
36 37 /// all the data categories tracked by it.
37 38 pub struct ChangedFiles<'a> {
38 39 nb_items: u32,
39 40 index: &'a [u8],
40 41 data: &'a [u8],
41 42 }
42 43
43 44 /// Represent active changes that affect the copy tracing.
44 45 enum Action<'a> {
45 46 /// The parent ? children edge is removing a file
46 47 ///
47 48 /// (actually, this could be the edge from the other parent, but it does
48 49 /// not matters)
49 50 Removed(&'a HgPath),
50 51 /// The parent ? children edge introduce copy information between (dest,
51 52 /// source)
52 53 Copied(&'a HgPath, &'a HgPath),
53 54 }
54 55
55 56 /// This express the possible "special" case we can get in a merge
56 57 ///
57 58 /// See mercurial/metadata.py for details on these values.
58 59 #[derive(PartialEq)]
59 60 enum MergeCase {
60 61 /// Merged: file had history on both side that needed to be merged
61 62 Merged,
62 63 /// Salvaged: file was candidate for deletion, but survived the merge
63 64 Salvaged,
64 65 /// Normal: Not one of the two cases above
65 66 Normal,
66 67 }
67 68
68 69 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
69 70
70 71 const EMPTY: &[u8] = b"";
71 72 const COPY_MASK: u8 = 3;
72 73 const P1_COPY: u8 = 2;
73 74 const P2_COPY: u8 = 3;
74 75 const ACTION_MASK: u8 = 28;
75 76 const REMOVED: u8 = 12;
76 77 const MERGED: u8 = 8;
77 78 const SALVAGED: u8 = 16;
78 79
79 80 impl<'a> ChangedFiles<'a> {
80 81 const INDEX_START: usize = 4;
81 82 const ENTRY_SIZE: u32 = 9;
82 83 const FILENAME_START: u32 = 1;
83 84 const COPY_SOURCE_START: u32 = 5;
84 85
85 86 pub fn new(data: &'a [u8]) -> Self {
86 87 assert!(
87 88 data.len() >= 4,
88 89 "data size ({}) is too small to contain the header (4)",
89 90 data.len()
90 91 );
91 92 let nb_items_raw: [u8; 4] = (&data[0..=3])
92 93 .try_into()
93 94 .expect("failed to turn 4 bytes into 4 bytes");
94 95 let nb_items = u32::from_be_bytes(nb_items_raw);
95 96
96 97 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
97 98 let index_end = Self::INDEX_START + index_size;
98 99
99 100 assert!(
100 101 data.len() >= index_end,
101 102 "data size ({}) is too small to fit the index_data ({})",
102 103 data.len(),
103 104 index_end
104 105 );
105 106
106 107 let ret = ChangedFiles {
107 108 nb_items,
108 109 index: &data[Self::INDEX_START..index_end],
109 110 data: &data[index_end..],
110 111 };
111 112 let max_data = ret.filename_end(nb_items - 1) as usize;
112 113 assert!(
113 114 ret.data.len() >= max_data,
114 115 "data size ({}) is too small to fit all data ({})",
115 116 data.len(),
116 117 index_end + max_data
117 118 );
118 119 ret
119 120 }
120 121
121 122 pub fn new_empty() -> Self {
122 123 ChangedFiles {
123 124 nb_items: 0,
124 125 index: EMPTY,
125 126 data: EMPTY,
126 127 }
127 128 }
128 129
129 130 /// internal function to return an individual entry at a given index
130 131 fn entry(&'a self, idx: u32) -> FileChange<'a> {
131 132 if idx >= self.nb_items {
132 133 panic!(
133 134 "index for entry is higher that the number of file {} >= {}",
134 135 idx, self.nb_items
135 136 )
136 137 }
137 138 let flags = self.flags(idx);
138 139 let filename = self.filename(idx);
139 140 let copy_idx = self.copy_idx(idx);
140 141 let copy_source = self.filename(copy_idx);
141 142 (flags, filename, copy_source)
142 143 }
143 144
144 145 /// internal function to return the filename of the entry at a given index
145 146 fn filename(&self, idx: u32) -> &HgPath {
146 147 let filename_start;
147 148 if idx == 0 {
148 149 filename_start = 0;
149 150 } else {
150 151 filename_start = self.filename_end(idx - 1)
151 152 }
152 153 let filename_end = self.filename_end(idx);
153 154 let filename_start = filename_start as usize;
154 155 let filename_end = filename_end as usize;
155 156 HgPath::new(&self.data[filename_start..filename_end])
156 157 }
157 158
158 159 /// internal function to return the flag field of the entry at a given
159 160 /// index
160 161 fn flags(&self, idx: u32) -> u8 {
161 162 let idx = idx as usize;
162 163 self.index[idx * (Self::ENTRY_SIZE as usize)]
163 164 }
164 165
165 166 /// internal function to return the end of a filename part at a given index
166 167 fn filename_end(&self, idx: u32) -> u32 {
167 168 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
168 169 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
169 170 let start = start as usize;
170 171 let end = end as usize;
171 172 let raw = (&self.index[start..end])
172 173 .try_into()
173 174 .expect("failed to turn 4 bytes into 4 bytes");
174 175 u32::from_be_bytes(raw)
175 176 }
176 177
177 178 /// internal function to return index of the copy source of the entry at a
178 179 /// given index
179 180 fn copy_idx(&self, idx: u32) -> u32 {
180 181 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
181 182 let end = (idx + 1) * Self::ENTRY_SIZE;
182 183 let start = start as usize;
183 184 let end = end as usize;
184 185 let raw = (&self.index[start..end])
185 186 .try_into()
186 187 .expect("failed to turn 4 bytes into 4 bytes");
187 188 u32::from_be_bytes(raw)
188 189 }
189 190
190 191 /// Return an iterator over all the `Action` in this instance.
191 192 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
192 193 ActionsIterator {
193 194 changes: &self,
194 195 parent: parent,
195 196 current: 0,
196 197 }
197 198 }
198 199
199 200 /// return the MergeCase value associated with a filename
200 201 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
201 202 if self.nb_items == 0 {
202 203 return MergeCase::Normal;
203 204 }
204 205 let mut low_part = 0;
205 206 let mut high_part = self.nb_items;
206 207
207 208 while low_part < high_part {
208 209 let cursor = (low_part + high_part - 1) / 2;
209 210 let (flags, filename, _source) = self.entry(cursor);
210 211 match path.cmp(filename) {
211 212 Ordering::Less => low_part = cursor + 1,
212 213 Ordering::Greater => high_part = cursor,
213 214 Ordering::Equal => {
214 215 return match flags & ACTION_MASK {
215 216 MERGED => MergeCase::Merged,
216 217 SALVAGED => MergeCase::Salvaged,
217 218 _ => MergeCase::Normal,
218 219 };
219 220 }
220 221 }
221 222 }
222 223 MergeCase::Normal
223 224 }
224 225 }
225 226
226 227 /// A struct responsible for answering "is X ancestors of Y" quickly
227 228 ///
228 229 /// The structure will delegate ancestors call to a callback, and cache the
229 230 /// result.
230 231 #[derive(Debug)]
231 232 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
232 233 inner: &'a A,
233 234 pairs: HashMap<(Revision, Revision), bool>,
234 235 }
235 236
236 237 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
237 238 fn new(func: &'a A) -> Self {
238 239 Self {
239 240 inner: func,
240 241 pairs: HashMap::default(),
241 242 }
242 243 }
243 244
244 245 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
245 246 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
246 247 if anc > desc {
247 248 false
248 249 } else if anc == desc {
249 250 true
250 251 } else {
251 252 if let Some(b) = self.pairs.get(&(anc, desc)) {
252 253 *b
253 254 } else {
254 255 let b = (self.inner)(anc, desc);
255 256 self.pairs.insert((anc, desc), b);
256 257 b
257 258 }
258 259 }
259 260 }
260 261 }
261 262
262 263 struct ActionsIterator<'a> {
263 264 changes: &'a ChangedFiles<'a>,
264 265 parent: Parent,
265 266 current: u32,
266 267 }
267 268
268 269 impl<'a> Iterator for ActionsIterator<'a> {
269 270 type Item = Action<'a>;
270 271
271 272 fn next(&mut self) -> Option<Action<'a>> {
272 273 let copy_flag = match self.parent {
273 274 Parent::FirstParent => P1_COPY,
274 275 Parent::SecondParent => P2_COPY,
275 276 };
276 277 while self.current < self.changes.nb_items {
277 278 let (flags, file, source) = self.changes.entry(self.current);
278 279 self.current += 1;
279 280 if (flags & ACTION_MASK) == REMOVED {
280 281 return Some(Action::Removed(file));
281 282 }
282 283 let copy = flags & COPY_MASK;
283 284 if copy == copy_flag {
284 285 return Some(Action::Copied(file, source));
285 286 }
286 287 }
287 288 return None;
288 289 }
289 290 }
290 291
291 292 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
292 293 /// ChangedFiles
293 294 ///
294 295 /// It is passed to the RevInfoMaker callback who can assign any necessary
295 296 /// content to the `data` attribute. The copy tracing code is responsible for
296 297 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
297 298 pub struct DataHolder<D> {
298 299 /// RevInfoMaker callback should assign data referenced by the
299 300 /// ChangedFiles struct it return to this attribute. The DataHolder
300 301 /// lifetime will be at least as long as the ChangedFiles one.
301 302 pub data: Option<D>,
302 303 }
303 304
304 305 pub type RevInfoMaker<'a, D> =
305 306 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
306 307
307 308 /// enum used to carry information about the parent → child currently processed
308 309 #[derive(Copy, Clone, Debug)]
309 310 enum Parent {
310 311 /// The `p1(x) → x` edge
311 312 FirstParent,
312 313 /// The `p2(x) → x` edge
313 314 SecondParent,
314 315 }
315 316
316 317 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
317 318 ///
318 319 /// Arguments are:
319 320 ///
320 321 /// revs: all revisions to be considered
321 322 /// children: a {parent ? [childrens]} mapping
322 323 /// target_rev: the final revision we are combining copies to
323 324 /// rev_info(rev): callback to get revision information:
324 325 /// * first parent
325 326 /// * second parent
326 327 /// * ChangedFiles
327 328 /// isancestors(low_rev, high_rev): callback to check if a revision is an
328 329 /// ancestor of another
329 330 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
330 331 revs: Vec<Revision>,
331 children: HashMap<Revision, Vec<Revision>>,
332 mut children_count: HashMap<Revision, usize>,
332 333 target_rev: Revision,
333 334 rev_info: RevInfoMaker<D>,
334 335 is_ancestor: &A,
335 336 ) -> PathCopies {
336 337 let mut all_copies = HashMap::new();
337 338 let mut oracle = AncestorOracle::new(is_ancestor);
338 339
339 340 for rev in revs {
340 // Retrieve data computed in a previous iteration
341 let copies = all_copies.remove(&rev);
342 let copies = match copies {
343 Some(c) => c,
344 None => TimeStampedPathCopies::default(), // root of the walked set
345 };
346
347 let current_children = match children.get(&rev) {
348 Some(c) => c,
349 None => panic!("inconsistent `revs` and `children`"),
350 };
351
352 for child in current_children {
353 // We will chain the copies information accumulated for `rev` with
354 // the individual copies information for each of its children.
355 // Creating a new PathCopies for each `rev` → `children` vertex.
356 let mut d: DataHolder<D> = DataHolder { data: None };
357 let (p1, p2, changes) = rev_info(*child, &mut d);
341 let mut d: DataHolder<D> = DataHolder { data: None };
342 let (p1, p2, changes) = rev_info(rev, &mut d);
358 343
359 let parent = if rev == p1 {
360 Parent::FirstParent
361 } else {
362 assert_eq!(rev, p2);
363 Parent::SecondParent
364 };
365 let new_copies =
366 add_from_changes(&copies, &changes, parent, *child);
344 // We will chain the copies information accumulated for the parent with
345 // the individual copies information the curent revision. Creating a
346 // new TimeStampedPath for each `rev` → `children` vertex.
347 let mut copies: Option<TimeStampedPathCopies> = None;
348 if p1 != NULL_REVISION {
349 // Retrieve data computed in a previous iteration
350 let parent_copies = get_and_clean_parent_copies(
351 &mut all_copies,
352 &mut children_count,
353 p1,
354 );
355 if let Some(parent_copies) = parent_copies {
356 // combine it with data for that revision
357 let vertex_copies = add_from_changes(
358 &parent_copies,
359 &changes,
360 Parent::FirstParent,
361 rev,
362 );
363 // keep that data around for potential later combination
364 copies = Some(vertex_copies);
365 }
366 }
367 if p2 != NULL_REVISION {
368 // Retrieve data computed in a previous iteration
369 let parent_copies = get_and_clean_parent_copies(
370 &mut all_copies,
371 &mut children_count,
372 p2,
373 );
374 if let Some(parent_copies) = parent_copies {
375 // combine it with data for that revision
376 let vertex_copies = add_from_changes(
377 &parent_copies,
378 &changes,
379 Parent::SecondParent,
380 rev,
381 );
367 382
368 // Merge has two parents needs to combines their copy information.
369 //
370 // If the vertex from the other parent was already processed, we
371 // will have a value for the child ready to be used. We need to
372 // grab it and combine it with the one we already
373 // computed. If not we can simply store the newly
374 // computed data. The processing happening at
375 // the time of the second parent will take care of combining the
376 // two TimeStampedPathCopies instance.
377 match all_copies.remove(child) {
378 None => {
379 all_copies.insert(child, new_copies);
380 }
381 Some(other_copies) => {
382 let (minor, major) = match parent {
383 Parent::FirstParent => (other_copies, new_copies),
384 Parent::SecondParent => (new_copies, other_copies),
385 };
386 let merged_copies =
387 merge_copies_dict(minor, major, &changes, &mut oracle);
388 all_copies.insert(child, merged_copies);
389 }
390 };
383 copies = match copies {
384 None => Some(vertex_copies),
385 // Merge has two parents needs to combines their copy
386 // information.
387 //
388 // If we got data from both parents, We need to combine
389 // them.
390 Some(copies) => Some(merge_copies_dict(
391 vertex_copies,
392 copies,
393 &changes,
394 &mut oracle,
395 )),
396 };
397 }
398 }
399 match copies {
400 Some(copies) => {
401 all_copies.insert(rev, copies);
402 }
403 _ => {}
391 404 }
392 405 }
393 406
394 407 // Drop internal information (like the timestamp) and return the final
395 408 // mapping.
396 409 let tt_result = all_copies
397 410 .remove(&target_rev)
398 411 .expect("target revision was not processed");
399 412 let mut result = PathCopies::default();
400 413 for (dest, tt_source) in tt_result {
401 414 if let Some(path) = tt_source.path {
402 415 result.insert(dest, path);
403 416 }
404 417 }
405 418 result
406 419 }
407 420
421 /// fetch previous computed information
422 ///
423 /// If no other children are expected to need this information, we drop it from
424 /// the cache.
425 ///
426 /// If parent is not part of the set we are expected to walk, return None.
427 fn get_and_clean_parent_copies(
428 all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
429 children_count: &mut HashMap<Revision, usize>,
430 parent_rev: Revision,
431 ) -> Option<TimeStampedPathCopies> {
432 let count = children_count.get_mut(&parent_rev)?;
433 *count -= 1;
434 if *count == 0 {
435 match all_copies.remove(&parent_rev) {
436 Some(c) => Some(c),
437 None => Some(TimeStampedPathCopies::default()),
438 }
439 } else {
440 match all_copies.get(&parent_rev) {
441 Some(c) => Some(c.clone()),
442 None => Some(TimeStampedPathCopies::default()),
443 }
444 }
445 }
446
408 447 /// Combine ChangedFiles with some existing PathCopies information and return
409 448 /// the result
410 449 fn add_from_changes(
411 450 base_copies: &TimeStampedPathCopies,
412 451 changes: &ChangedFiles,
413 452 parent: Parent,
414 453 current_rev: Revision,
415 454 ) -> TimeStampedPathCopies {
416 455 let mut copies = base_copies.clone();
417 456 for action in changes.iter_actions(parent) {
418 457 match action {
419 458 Action::Copied(dest, source) => {
420 459 let entry;
421 460 if let Some(v) = base_copies.get(source) {
422 461 entry = match &v.path {
423 462 Some(path) => Some((*(path)).to_owned()),
424 463 None => Some(source.to_owned()),
425 464 }
426 465 } else {
427 466 entry = Some(source.to_owned());
428 467 }
429 468 // Each new entry is introduced by the children, we
430 469 // record this information as we will need it to take
431 470 // the right decision when merging conflicting copy
432 471 // information. See merge_copies_dict for details.
433 472 let ttpc = TimeStampedPathCopy {
434 473 rev: current_rev,
435 474 path: entry,
436 475 };
437 476 copies.insert(dest.to_owned(), ttpc);
438 477 }
439 478 Action::Removed(f) => {
440 479 // We must drop copy information for removed file.
441 480 //
442 481 // We need to explicitly record them as dropped to
443 482 // propagate this information when merging two
444 483 // TimeStampedPathCopies object.
445 484 if copies.contains_key(f.as_ref()) {
446 485 let ttpc = TimeStampedPathCopy {
447 486 rev: current_rev,
448 487 path: None,
449 488 };
450 489 copies.insert(f.to_owned(), ttpc);
451 490 }
452 491 }
453 492 }
454 493 }
455 494 copies
456 495 }
457 496
458 497 /// merge two copies-mapping together, minor and major
459 498 ///
460 499 /// In case of conflict, value from "major" will be picked, unless in some
461 500 /// cases. See inline documentation for details.
462 501 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
463 502 mut minor: TimeStampedPathCopies,
464 503 mut major: TimeStampedPathCopies,
465 504 changes: &ChangedFiles,
466 505 oracle: &mut AncestorOracle<A>,
467 506 ) -> TimeStampedPathCopies {
468 507 // This closure exist as temporary help while multiple developper are
469 508 // actively working on this code. Feel free to re-inline it once this
470 509 // code is more settled.
471 510 let mut cmp_value =
472 511 |dest: &PathToken,
473 512 src_minor: &TimeStampedPathCopy,
474 513 src_major: &TimeStampedPathCopy| {
475 514 compare_value(changes, oracle, dest, src_minor, src_major)
476 515 };
477 516 if minor.is_empty() {
478 517 major
479 518 } else if major.is_empty() {
480 519 minor
481 520 } else if minor.len() * 2 < major.len() {
482 521 // Lets says we are merging two TimeStampedPathCopies instance A and B.
483 522 //
484 523 // If A contains N items, the merge result will never contains more
485 524 // than N values differents than the one in A
486 525 //
487 526 // If B contains M items, with M > N, the merge result will always
488 527 // result in a minimum of M - N value differents than the on in
489 528 // A
490 529 //
491 530 // As a result, if N < (M-N), we know that simply iterating over A will
492 531 // yield less difference than iterating over the difference
493 532 // between A and B.
494 533 //
495 534 // This help performance a lot in case were a tiny
496 535 // TimeStampedPathCopies is merged with a much larger one.
497 536 for (dest, src_minor) in minor {
498 537 let src_major = major.get(&dest);
499 538 match src_major {
500 539 None => major.insert(dest, src_minor),
501 540 Some(src_major) => {
502 541 match cmp_value(&dest, &src_minor, src_major) {
503 542 MergePick::Any | MergePick::Major => None,
504 543 MergePick::Minor => major.insert(dest, src_minor),
505 544 }
506 545 }
507 546 };
508 547 }
509 548 major
510 549 } else if major.len() * 2 < minor.len() {
511 550 // This use the same rational than the previous block.
512 551 // (Check previous block documentation for details.)
513 552 for (dest, src_major) in major {
514 553 let src_minor = minor.get(&dest);
515 554 match src_minor {
516 555 None => minor.insert(dest, src_major),
517 556 Some(src_minor) => {
518 557 match cmp_value(&dest, src_minor, &src_major) {
519 558 MergePick::Any | MergePick::Minor => None,
520 559 MergePick::Major => minor.insert(dest, src_major),
521 560 }
522 561 }
523 562 };
524 563 }
525 564 minor
526 565 } else {
527 566 let mut override_minor = Vec::new();
528 567 let mut override_major = Vec::new();
529 568
530 569 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
531 570 override_major.push((k.clone(), v.clone()))
532 571 };
533 572 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
534 573 override_minor.push((k.clone(), v.clone()))
535 574 };
536 575
537 576 // The diff function leverage detection of the identical subpart if
538 577 // minor and major has some common ancestors. This make it very
539 578 // fast is most case.
540 579 //
541 580 // In case where the two map are vastly different in size, the current
542 581 // approach is still slowish because the iteration will iterate over
543 582 // all the "exclusive" content of the larger on. This situation can be
544 583 // frequent when the subgraph of revision we are processing has a lot
545 584 // of roots. Each roots adding they own fully new map to the mix (and
546 585 // likely a small map, if the path from the root to the "main path" is
547 586 // small.
548 587 //
549 588 // We could do better by detecting such situation and processing them
550 589 // differently.
551 590 for d in minor.diff(&major) {
552 591 match d {
553 592 DiffItem::Add(k, v) => to_minor(k, v),
554 593 DiffItem::Remove(k, v) => to_major(k, v),
555 594 DiffItem::Update { old, new } => {
556 595 let (dest, src_major) = new;
557 596 let (_, src_minor) = old;
558 597 match cmp_value(dest, src_minor, src_major) {
559 598 MergePick::Major => to_minor(dest, src_major),
560 599 MergePick::Minor => to_major(dest, src_minor),
561 600 // If the two entry are identical, no need to do
562 601 // anything (but diff should not have yield them)
563 602 MergePick::Any => unreachable!(),
564 603 }
565 604 }
566 605 };
567 606 }
568 607
569 608 let updates;
570 609 let mut result;
571 610 if override_major.is_empty() {
572 611 result = major
573 612 } else if override_minor.is_empty() {
574 613 result = minor
575 614 } else {
576 615 if override_minor.len() < override_major.len() {
577 616 updates = override_minor;
578 617 result = minor;
579 618 } else {
580 619 updates = override_major;
581 620 result = major;
582 621 }
583 622 for (k, v) in updates {
584 623 result.insert(k, v);
585 624 }
586 625 }
587 626 result
588 627 }
589 628 }
590 629
591 630 /// represent the side that should prevail when merging two
592 631 /// TimeStampedPathCopies
593 632 enum MergePick {
594 633 /// The "major" (p1) side prevails
595 634 Major,
596 635 /// The "minor" (p2) side prevails
597 636 Minor,
598 637 /// Any side could be used (because they are the same)
599 638 Any,
600 639 }
601 640
602 641 /// decide which side prevails in case of conflicting values
603 642 #[allow(clippy::if_same_then_else)]
604 643 fn compare_value<A: Fn(Revision, Revision) -> bool>(
605 644 changes: &ChangedFiles,
606 645 oracle: &mut AncestorOracle<A>,
607 646 dest: &PathToken,
608 647 src_minor: &TimeStampedPathCopy,
609 648 src_major: &TimeStampedPathCopy,
610 649 ) -> MergePick {
611 650 if src_major.path == src_minor.path {
612 651 // we have the same value, but from other source;
613 652 if src_major.rev == src_minor.rev {
614 653 // If the two entry are identical, they are both valid
615 654 MergePick::Any
616 655 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
617 656 MergePick::Minor
618 657 } else {
619 658 MergePick::Major
620 659 }
621 660 } else if src_major.rev == src_minor.rev {
622 661 // We cannot get copy information for both p1 and p2 in the
623 662 // same rev. So this is the same value.
624 663 unreachable!(
625 664 "conflict information from p1 and p2 in the same revision"
626 665 );
627 666 } else {
628 667 let action = changes.get_merge_case(&dest);
629 668 if src_major.path.is_none() && action == MergeCase::Salvaged {
630 669 // If the file is "deleted" in the major side but was
631 670 // salvaged by the merge, we keep the minor side alive
632 671 MergePick::Minor
633 672 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
634 673 // If the file is "deleted" in the minor side but was
635 674 // salvaged by the merge, unconditionnaly preserve the
636 675 // major side.
637 676 MergePick::Major
638 677 } else if action == MergeCase::Merged {
639 678 // If the file was actively merged, copy information
640 679 // from each side might conflict. The major side will
641 680 // win such conflict.
642 681 MergePick::Major
643 682 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
644 683 // If the minor side is strictly newer than the major
645 684 // side, it should be kept.
646 685 MergePick::Minor
647 686 } else if src_major.path.is_some() {
648 687 // without any special case, the "major" value win
649 688 // other the "minor" one.
650 689 MergePick::Major
651 690 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
652 691 // the "major" rev is a direct ancestors of "minor",
653 692 // any different value should
654 693 // overwrite
655 694 MergePick::Major
656 695 } else {
657 696 // major version is None (so the file was deleted on
658 697 // that branch) and that branch is independant (neither
659 698 // minor nor major is an ancestors of the other one.)
660 699 // We preserve the new
661 700 // information about the new file.
662 701 MergePick::Minor
663 702 }
664 703 }
665 704 }
@@ -1,153 +1,148
1 1 use cpython::ObjectProtocol;
2 2 use cpython::PyBool;
3 3 use cpython::PyBytes;
4 4 use cpython::PyDict;
5 5 use cpython::PyList;
6 6 use cpython::PyModule;
7 7 use cpython::PyObject;
8 8 use cpython::PyResult;
9 9 use cpython::PyTuple;
10 10 use cpython::Python;
11 11
12 12 use hg::copy_tracing::combine_changeset_copies;
13 13 use hg::copy_tracing::ChangedFiles;
14 14 use hg::copy_tracing::DataHolder;
15 15 use hg::copy_tracing::RevInfo;
16 16 use hg::copy_tracing::RevInfoMaker;
17 17 use hg::Revision;
18 18
19 19 /// Combines copies information contained into revision `revs` to build a copy
20 20 /// map.
21 21 ///
22 22 /// See mercurial/copies.py for details
23 23 pub fn combine_changeset_copies_wrapper(
24 24 py: Python,
25 25 revs: PyList,
26 children: PyDict,
26 children_count: PyDict,
27 27 target_rev: Revision,
28 28 rev_info: PyObject,
29 29 is_ancestor: PyObject,
30 30 ) -> PyResult<PyDict> {
31 31 let revs: PyResult<_> =
32 32 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
33 33
34 34 // Wrap the `is_ancestor` python callback as a Rust closure
35 35 //
36 36 // No errors are expected from the Python side, and they will should only
37 37 // happens in case of programing error or severe data corruption. Such
38 38 // errors will raise panic and the rust-cpython harness will turn them into
39 39 // Python exception.
40 40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
41 41 is_ancestor
42 42 .call(py, (anc, desc), None)
43 43 .expect(
44 44 "rust-copy-tracing: python call to `is_ancestor` \
45 45 failed",
46 46 )
47 47 .cast_into::<PyBool>(py)
48 48 .expect(
49 49 "rust-copy-tracing: python call to `is_ancestor` \
50 50 returned unexpected non-Bool value",
51 51 )
52 52 .is_true()
53 53 };
54 54
55 55 // Wrap the `rev_info_maker` python callback as a Rust closure
56 56 //
57 57 // No errors are expected from the Python side, and they will should only
58 58 // happens in case of programing error or severe data corruption. Such
59 59 // errors will raise panic and the rust-cpython harness will turn them into
60 60 // Python exception.
61 61 let rev_info_maker: RevInfoMaker<PyBytes> =
62 62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
63 63 let res: PyTuple = rev_info
64 64 .call(py, (rev,), None)
65 65 .expect("rust-copy-tracing: python call to `rev_info` failed")
66 66 .cast_into(py)
67 67 .expect(
68 68 "rust-copy_tracing: python call to `rev_info` returned \
69 69 unexpected non-Tuple value",
70 70 );
71 71 let p1 = res.get_item(py, 0).extract(py).expect(
72 72 "rust-copy-tracing: rev_info return is invalid, first item \
73 73 is a not a revision",
74 74 );
75 75 let p2 = res.get_item(py, 1).extract(py).expect(
76 76 "rust-copy-tracing: rev_info return is invalid, first item \
77 77 is a not a revision",
78 78 );
79 79
80 80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 81 Ok(raw) => {
82 82 // Give responsability for the raw bytes lifetime to
83 83 // hg-core
84 84 d.data = Some(raw);
85 85 let addrs = d.data.as_ref().expect(
86 86 "rust-copy-tracing: failed to get a reference to the \
87 87 raw bytes for copy data").data(py);
88 88 ChangedFiles::new(addrs)
89 89 }
90 90 // value was presumably None, meaning they was no copy data.
91 91 Err(_) => ChangedFiles::new_empty(),
92 92 };
93 93
94 94 (p1, p2, files)
95 95 });
96 let children: PyResult<_> = children
96 let children_count: PyResult<_> = children_count
97 97 .items(py)
98 98 .iter()
99 .map(|(k, v)| {
100 let v: &PyList = v.cast_as(py)?;
101 let v: PyResult<_> =
102 v.iter(py).map(|child| Ok(child.extract(py)?)).collect();
103 Ok((k.extract(py)?, v?))
104 })
99 .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
105 100 .collect();
106 101
107 102 let res = combine_changeset_copies(
108 103 revs?,
109 children?,
104 children_count?,
110 105 target_rev,
111 106 rev_info_maker,
112 107 &is_ancestor_wrap,
113 108 );
114 109 let out = PyDict::new(py);
115 110 for (dest, source) in res.into_iter() {
116 111 out.set_item(
117 112 py,
118 113 PyBytes::new(py, &dest.into_vec()),
119 114 PyBytes::new(py, &source.into_vec()),
120 115 )?;
121 116 }
122 117 Ok(out)
123 118 }
124 119
125 120 /// Create the module, with `__package__` given from parent
126 121 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
127 122 let dotted_name = &format!("{}.copy_tracing", package);
128 123 let m = PyModule::new(py, dotted_name)?;
129 124
130 125 m.add(py, "__package__", package)?;
131 126 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
132 127
133 128 m.add(
134 129 py,
135 130 "combine_changeset_copies",
136 131 py_fn!(
137 132 py,
138 133 combine_changeset_copies_wrapper(
139 134 revs: PyList,
140 135 children: PyDict,
141 136 target_rev: Revision,
142 137 rev_info: PyObject,
143 138 is_ancestor: PyObject
144 139 )
145 140 ),
146 141 )?;
147 142
148 143 let sys = PyModule::import(py, "sys")?;
149 144 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
150 145 sys_modules.set_item(py, dotted_name, &m)?;
151 146
152 147 Ok(m)
153 148 }
General Comments 0
You need to be logged in to leave comments. Login now