##// END OF EJS Templates
copies: use dedicated `_revinfo_getter` function and call...
marmoute -
r46215:4f876e6b default
parent child Browse files
Show More
@@ -1,1095 +1,1125
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 match as matchmod,
20 20 node,
21 21 pathutil,
22 22 pycompat,
23 23 util,
24 24 )
25 25
26 26
27 27 from .utils import stringutil
28 28
29 29
30 30 def _filter(src, dst, t):
31 31 """filters out invalid copies after chaining"""
32 32
33 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 35 # in the following table (not including trivial cases). For example, case 2
36 36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 37 # then was renamed between 'mid' and 'dst'.
38 38 #
39 39 # case src mid dst result
40 40 # 1 x y - -
41 41 # 2 x y y x->y
42 42 # 3 x y x -
43 43 # 4 x y z x->z
44 44 # 5 - x y -
45 45 # 6 x x y x->y
46 46 #
47 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 49 # between 5 and 6, so it includes all cases in its result.
50 50 # Cases 1, 3, and 5 are then removed by _filter().
51 51
52 52 for k, v in list(t.items()):
53 53 # remove copies from files that didn't exist
54 54 if v not in src:
55 55 del t[k]
56 56 # remove criss-crossed copies
57 57 elif k in src and v in dst:
58 58 del t[k]
59 59 # remove copies to files that were then removed
60 60 elif k not in dst:
61 61 del t[k]
62 62
63 63
64 64 def _chain(prefix, suffix):
65 65 """chain two sets of copies 'prefix' and 'suffix'"""
66 66 result = prefix.copy()
67 67 for key, value in pycompat.iteritems(suffix):
68 68 result[key] = prefix.get(value, value)
69 69 return result
70 70
71 71
72 72 def _tracefile(fctx, am, basemf):
73 73 """return file context that is the ancestor of fctx present in ancestor
74 74 manifest am
75 75
76 76 Note: we used to try and stop after a given limit, however checking if that
77 77 limit is reached turned out to be very expensive. we are better off
78 78 disabling that feature."""
79 79
80 80 for f in fctx.ancestors():
81 81 path = f.path()
82 82 if am.get(path, None) == f.filenode():
83 83 return path
84 84 if basemf and basemf.get(path, None) == f.filenode():
85 85 return path
86 86
87 87
88 88 def _dirstatecopies(repo, match=None):
89 89 ds = repo.dirstate
90 90 c = ds.copies().copy()
91 91 for k in list(c):
92 92 if ds[k] not in b'anm' or (match and not match(k)):
93 93 del c[k]
94 94 return c
95 95
96 96
97 97 def _computeforwardmissing(a, b, match=None):
98 98 """Computes which files are in b but not a.
99 99 This is its own function so extensions can easily wrap this call to see what
100 100 files _forwardcopies is about to process.
101 101 """
102 102 ma = a.manifest()
103 103 mb = b.manifest()
104 104 return mb.filesnotin(ma, match=match)
105 105
106 106
107 107 def usechangesetcentricalgo(repo):
108 108 """Checks if we should use changeset-centric copy algorithms"""
109 109 if repo.filecopiesmode == b'changeset-sidedata':
110 110 return True
111 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 112 changesetsource = (b'changeset-only', b'compatibility')
113 113 return readfrom in changesetsource
114 114
115 115
116 116 def _committedforwardcopies(a, b, base, match):
117 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 118 # files might have to be traced back to the fctx parent of the last
119 119 # one-side-only changeset, but not further back than that
120 120 repo = a._repo
121 121
122 122 if usechangesetcentricalgo(repo):
123 123 return _changesetforwardcopies(a, b, match)
124 124
125 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 126 dbg = repo.ui.debug
127 127 if debug:
128 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 129 am = a.manifest()
130 130 basemf = None if base is None else base.manifest()
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 136
137 137 # Computing the forward missing is quite expensive on large manifests, since
138 138 # it compares the entire manifests. We can optimize it in the common use
139 139 # case of computing what copies are in a commit versus its parent (like
140 140 # during a rebase or histedit). Note, we exclude merge commits from this
141 141 # optimization, since the ctx.files() for a merge commit is not correct for
142 142 # this comparison.
143 143 forwardmissingmatch = match
144 144 if b.p1() == a and b.p2().node() == node.nullid:
145 145 filesmatcher = matchmod.exact(b.files())
146 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 148
149 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 150
151 151 if debug:
152 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 153
154 154 for f in sorted(missing):
155 155 if debug:
156 156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 157 fctx = b[f]
158 158 fctx._ancestrycontext = ancestrycontext
159 159
160 160 if debug:
161 161 start = util.timer()
162 162 opath = _tracefile(fctx, am, basemf)
163 163 if opath:
164 164 if debug:
165 165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 166 cm[f] = opath
167 167 if debug:
168 168 dbg(
169 169 b'debug.copies: time: %f seconds\n'
170 170 % (util.timer() - start)
171 171 )
172 172 return cm
173 173
174 174
175 175 def _revinfo_getter(repo):
176 176 """return a function that return multiple data given a <rev>"i
177 177
178 178 * p1: revision number of first parent
179 179 * p2: revision number of first parent
180 180 * p1copies: mapping of copies from p1
181 181 * p2copies: mapping of copies from p2
182 182 * removed: a list of removed files
183 183 * ismerged: a callback to know if file was merged in that revision
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187
188 188 def get_ismerged(rev):
189 189 ctx = repo[rev]
190 190
191 191 def ismerged(path):
192 192 if path not in ctx.files():
193 193 return False
194 194 fctx = ctx[path]
195 195 parents = fctx._filelog.parents(fctx._filenode)
196 196 nb_parents = 0
197 197 for n in parents:
198 198 if n != node.nullid:
199 199 nb_parents += 1
200 200 return nb_parents >= 2
201 201
202 202 return ismerged
203 203
204 if repo.filecopiesmode == b'changeset-sidedata':
205 changelogrevision = cl.changelogrevision
206 flags = cl.flags
204 changelogrevision = cl.changelogrevision
205 flags = cl.flags
207 206
208 # A small cache to avoid doing the work twice for merges
209 #
210 # In the vast majority of cases, if we ask information for a revision
211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 # keep the information around when reaching the first parent of a merge
213 # and dropping it after it was provided for the second parents.
214 #
215 # It exists cases were only one parent of the merge will be walked. It
216 # happens when the "destination" the copy tracing is descendant from a
217 # new root, not common with the "source". In that case, we will only walk
218 # through merge parents that are descendant of changesets common
219 # between "source" and "destination".
220 #
221 # With the current case implementation if such changesets have a copy
222 # information, we'll keep them in memory until the end of
223 # _changesetforwardcopies. We don't expect the case to be frequent
224 # enough to matters.
225 #
226 # In addition, it would be possible to reach pathological case, were
227 # many first parent are met before any second parent is reached. In
228 # that case the cache could grow. If this even become an issue one can
229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 # time to save memory.
231 merge_caches = {}
207 # A small cache to avoid doing the work twice for merges
208 #
209 # In the vast majority of cases, if we ask information for a revision
210 # about 1 parent, we'll later ask it for the other. So it make sense to
211 # keep the information around when reaching the first parent of a merge
212 # and dropping it after it was provided for the second parents.
213 #
214 # It exists cases were only one parent of the merge will be walked. It
215 # happens when the "destination" the copy tracing is descendant from a
216 # new root, not common with the "source". In that case, we will only walk
217 # through merge parents that are descendant of changesets common
218 # between "source" and "destination".
219 #
220 # With the current case implementation if such changesets have a copy
221 # information, we'll keep them in memory until the end of
222 # _changesetforwardcopies. We don't expect the case to be frequent
223 # enough to matters.
224 #
225 # In addition, it would be possible to reach pathological case, were
226 # many first parent are met before any second parent is reached. In
227 # that case the cache could grow. If this even become an issue one can
228 # safely introduce a maximum cache size. This would trade extra CPU/IO
229 # time to save memory.
230 merge_caches = {}
232 231
233 def revinfo(rev):
234 p1, p2 = parents(rev)
235 value = None
236 if flags(rev) & REVIDX_SIDEDATA:
237 e = merge_caches.pop(rev, None)
238 if e is not None:
239 return e
240 c = changelogrevision(rev)
241 p1copies = c.p1copies
242 p2copies = c.p2copies
243 removed = c.filesremoved
244 if p1 != node.nullrev and p2 != node.nullrev:
245 # XXX some case we over cache, IGNORE
246 value = merge_caches[rev] = (
247 p1,
248 p2,
249 p1copies,
250 p2copies,
251 removed,
252 get_ismerged(rev),
253 )
254 else:
255 p1copies = {}
256 p2copies = {}
257 removed = []
232 def revinfo(rev):
233 p1, p2 = parents(rev)
234 value = None
235 if flags(rev) & REVIDX_SIDEDATA:
236 e = merge_caches.pop(rev, None)
237 if e is not None:
238 return e
239 c = changelogrevision(rev)
240 p1copies = c.p1copies
241 p2copies = c.p2copies
242 removed = c.filesremoved
243 if p1 != node.nullrev and p2 != node.nullrev:
244 # XXX some case we over cache, IGNORE
245 value = merge_caches[rev] = (
246 p1,
247 p2,
248 p1copies,
249 p2copies,
250 removed,
251 get_ismerged(rev),
252 )
253 else:
254 p1copies = {}
255 p2copies = {}
256 removed = []
258 257
259 if value is None:
260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 return value
262
263 else:
264
265 def revinfo(rev):
266 p1, p2 = parents(rev)
267 ctx = repo[rev]
268 p1copies, p2copies = ctx._copies
269 removed = ctx.filesremoved()
270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
258 if value is None:
259 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
260 return value
271 261
272 262 return revinfo
273 263
274 264
275 265 def _changesetforwardcopies(a, b, match):
276 266 if a.rev() in (node.nullrev, b.rev()):
277 267 return {}
278 268
279 269 repo = a.repo().unfiltered()
280 270 children = {}
281 revinfo = _revinfo_getter(repo)
282 271
283 272 cl = repo.changelog
284 273 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 274 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 275 mrset = set(missingrevs)
287 276 roots = set()
288 277 for r in missingrevs:
289 278 for p in cl.parentrevs(r):
290 279 if p == node.nullrev:
291 280 continue
292 281 if p not in children:
293 282 children[p] = [r]
294 283 else:
295 284 children[p].append(r)
296 285 if p not in mrset:
297 286 roots.add(p)
298 287 if not roots:
299 288 # no common revision to track copies from
300 289 return {}
301 290 min_root = min(roots)
302 291
303 292 from_head = set(
304 293 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 294 )
306 295
307 296 iterrevs = set(from_head)
308 297 iterrevs &= mrset
309 298 iterrevs.update(roots)
310 299 iterrevs.remove(b.rev())
311 300 revs = sorted(iterrevs)
312 301
313 302 if repo.filecopiesmode == b'changeset-sidedata':
303 revinfo = _revinfo_getter(repo)
314 304 return _combine_changeset_copies(
315 305 revs, children, b.rev(), revinfo, match, isancestor
316 306 )
317 307 else:
308 revinfo = _revinfo_getter_extra(repo)
318 309 return _combine_changeset_copies_extra(
319 310 revs, children, b.rev(), revinfo, match, isancestor
320 311 )
321 312
322 313
323 314 def _combine_changeset_copies(
324 315 revs, children, targetrev, revinfo, match, isancestor
325 316 ):
326 317 """combine the copies information for each item of iterrevs
327 318
328 319 revs: sorted iterable of revision to visit
329 320 children: a {parent: [children]} mapping.
330 321 targetrev: the final copies destination revision (not in iterrevs)
331 322 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
332 323 match: a matcher
333 324
334 325 It returns the aggregated copies information for `targetrev`.
335 326 """
336 327 all_copies = {}
337 328 alwaysmatch = match.always()
338 329 for r in revs:
339 330 copies = all_copies.pop(r, None)
340 331 if copies is None:
341 332 # this is a root
342 333 copies = {}
343 334 for i, c in enumerate(children[r]):
344 335 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
345 336 if r == p1:
346 337 parent = 1
347 338 childcopies = p1copies
348 339 else:
349 340 assert r == p2
350 341 parent = 2
351 342 childcopies = p2copies
352 343 if not alwaysmatch:
353 344 childcopies = {
354 345 dst: src for dst, src in childcopies.items() if match(dst)
355 346 }
356 347 newcopies = copies
357 348 if childcopies:
358 349 newcopies = copies.copy()
359 350 for dest, source in pycompat.iteritems(childcopies):
360 351 prev = copies.get(source)
361 352 if prev is not None and prev[1] is not None:
362 353 source = prev[1]
363 354 newcopies[dest] = (c, source)
364 355 assert newcopies is not copies
365 356 for f in removed:
366 357 if f in newcopies:
367 358 if newcopies is copies:
368 359 # copy on write to avoid affecting potential other
369 360 # branches. when there are no other branches, this
370 361 # could be avoided.
371 362 newcopies = copies.copy()
372 363 newcopies[f] = (c, None)
373 364 othercopies = all_copies.get(c)
374 365 if othercopies is None:
375 366 all_copies[c] = newcopies
376 367 else:
377 368 # we are the second parent to work on c, we need to merge our
378 369 # work with the other.
379 370 #
380 371 # In case of conflict, parent 1 take precedence over parent 2.
381 372 # This is an arbitrary choice made anew when implementing
382 373 # changeset based copies. It was made without regards with
383 374 # potential filelog related behavior.
384 375 if parent == 1:
385 376 _merge_copies_dict(
386 377 othercopies, newcopies, isancestor, ismerged
387 378 )
388 379 else:
389 380 _merge_copies_dict(
390 381 newcopies, othercopies, isancestor, ismerged
391 382 )
392 383 all_copies[c] = newcopies
393 384
394 385 final_copies = {}
395 386 for dest, (tt, source) in all_copies[targetrev].items():
396 387 if source is not None:
397 388 final_copies[dest] = source
398 389 return final_copies
399 390
400 391
401 392 def _merge_copies_dict(minor, major, isancestor, ismerged):
402 393 """merge two copies-mapping together, minor and major
403 394
404 395 In case of conflict, value from "major" will be picked.
405 396
406 397 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
407 398 ancestors of `high_rev`,
408 399
409 400 - `ismerged(path)`: callable return True if `path` have been merged in the
410 401 current revision,
411 402 """
412 403 for dest, value in major.items():
413 404 other = minor.get(dest)
414 405 if other is None:
415 406 minor[dest] = value
416 407 else:
417 408 new_tt = value[0]
418 409 other_tt = other[0]
419 410 if value[1] == other[1]:
420 411 continue
421 412 # content from "major" wins, unless it is older
422 413 # than the branch point or there is a merge
423 414 if (
424 415 new_tt == other_tt
425 416 or not isancestor(new_tt, other_tt)
426 417 or ismerged(dest)
427 418 ):
428 419 minor[dest] = value
429 420
430 421
422 def _revinfo_getter_extra(repo):
423 """return a function that return multiple data given a <rev>"i
424
425 * p1: revision number of first parent
426 * p2: revision number of first parent
427 * p1copies: mapping of copies from p1
428 * p2copies: mapping of copies from p2
429 * removed: a list of removed files
430 * ismerged: a callback to know if file was merged in that revision
431 """
432 cl = repo.changelog
433 parents = cl.parentrevs
434
435 def get_ismerged(rev):
436 ctx = repo[rev]
437
438 def ismerged(path):
439 if path not in ctx.files():
440 return False
441 fctx = ctx[path]
442 parents = fctx._filelog.parents(fctx._filenode)
443 nb_parents = 0
444 for n in parents:
445 if n != node.nullid:
446 nb_parents += 1
447 return nb_parents >= 2
448
449 return ismerged
450
451 def revinfo(rev):
452 p1, p2 = parents(rev)
453 ctx = repo[rev]
454 p1copies, p2copies = ctx._copies
455 removed = ctx.filesremoved()
456 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
457
458 return revinfo
459
460
431 461 def _combine_changeset_copies_extra(
432 462 revs, children, targetrev, revinfo, match, isancestor
433 463 ):
434 464 """version of `_combine_changeset_copies` that works with the Google
435 465 specific "extra" based storage for copy information"""
436 466 all_copies = {}
437 467 alwaysmatch = match.always()
438 468 for r in revs:
439 469 copies = all_copies.pop(r, None)
440 470 if copies is None:
441 471 # this is a root
442 472 copies = {}
443 473 for i, c in enumerate(children[r]):
444 474 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
445 475 if r == p1:
446 476 parent = 1
447 477 childcopies = p1copies
448 478 else:
449 479 assert r == p2
450 480 parent = 2
451 481 childcopies = p2copies
452 482 if not alwaysmatch:
453 483 childcopies = {
454 484 dst: src for dst, src in childcopies.items() if match(dst)
455 485 }
456 486 newcopies = copies
457 487 if childcopies:
458 488 newcopies = copies.copy()
459 489 for dest, source in pycompat.iteritems(childcopies):
460 490 prev = copies.get(source)
461 491 if prev is not None and prev[1] is not None:
462 492 source = prev[1]
463 493 newcopies[dest] = (c, source)
464 494 assert newcopies is not copies
465 495 for f in removed:
466 496 if f in newcopies:
467 497 if newcopies is copies:
468 498 # copy on write to avoid affecting potential other
469 499 # branches. when there are no other branches, this
470 500 # could be avoided.
471 501 newcopies = copies.copy()
472 502 newcopies[f] = (c, None)
473 503 othercopies = all_copies.get(c)
474 504 if othercopies is None:
475 505 all_copies[c] = newcopies
476 506 else:
477 507 # we are the second parent to work on c, we need to merge our
478 508 # work with the other.
479 509 #
480 510 # In case of conflict, parent 1 take precedence over parent 2.
481 511 # This is an arbitrary choice made anew when implementing
482 512 # changeset based copies. It was made without regards with
483 513 # potential filelog related behavior.
484 514 if parent == 1:
485 515 _merge_copies_dict_extra(
486 516 othercopies, newcopies, isancestor, ismerged
487 517 )
488 518 else:
489 519 _merge_copies_dict_extra(
490 520 newcopies, othercopies, isancestor, ismerged
491 521 )
492 522 all_copies[c] = newcopies
493 523
494 524 final_copies = {}
495 525 for dest, (tt, source) in all_copies[targetrev].items():
496 526 if source is not None:
497 527 final_copies[dest] = source
498 528 return final_copies
499 529
500 530
501 531 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
502 532 """version of `_merge_copies_dict` that works with the Google
503 533 specific "extra" based storage for copy information"""
504 534 for dest, value in major.items():
505 535 other = minor.get(dest)
506 536 if other is None:
507 537 minor[dest] = value
508 538 else:
509 539 new_tt = value[0]
510 540 other_tt = other[0]
511 541 if value[1] == other[1]:
512 542 continue
513 543 # content from "major" wins, unless it is older
514 544 # than the branch point or there is a merge
515 545 if (
516 546 new_tt == other_tt
517 547 or not isancestor(new_tt, other_tt)
518 548 or ismerged(dest)
519 549 ):
520 550 minor[dest] = value
521 551
522 552
523 553 def _forwardcopies(a, b, base=None, match=None):
524 554 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
525 555
526 556 if base is None:
527 557 base = a
528 558 match = a.repo().narrowmatch(match)
529 559 # check for working copy
530 560 if b.rev() is None:
531 561 cm = _committedforwardcopies(a, b.p1(), base, match)
532 562 # combine copies from dirstate if necessary
533 563 copies = _chain(cm, _dirstatecopies(b._repo, match))
534 564 else:
535 565 copies = _committedforwardcopies(a, b, base, match)
536 566 return copies
537 567
538 568
539 569 def _backwardrenames(a, b, match):
540 570 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
541 571 return {}
542 572
543 573 # Even though we're not taking copies into account, 1:n rename situations
544 574 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
545 575 # arbitrarily pick one of the renames.
546 576 # We don't want to pass in "match" here, since that would filter
547 577 # the destination by it. Since we're reversing the copies, we want
548 578 # to filter the source instead.
549 579 f = _forwardcopies(b, a)
550 580 r = {}
551 581 for k, v in sorted(pycompat.iteritems(f)):
552 582 if match and not match(v):
553 583 continue
554 584 # remove copies
555 585 if v in a:
556 586 continue
557 587 r[v] = k
558 588 return r
559 589
560 590
561 591 def pathcopies(x, y, match=None):
562 592 """find {dst@y: src@x} copy mapping for directed compare"""
563 593 repo = x._repo
564 594 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
565 595 if debug:
566 596 repo.ui.debug(
567 597 b'debug.copies: searching copies from %s to %s\n' % (x, y)
568 598 )
569 599 if x == y or not x or not y:
570 600 return {}
571 601 if y.rev() is None and x == y.p1():
572 602 if debug:
573 603 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
574 604 # short-circuit to avoid issues with merge states
575 605 return _dirstatecopies(repo, match)
576 606 a = y.ancestor(x)
577 607 if a == x:
578 608 if debug:
579 609 repo.ui.debug(b'debug.copies: search mode: forward\n')
580 610 copies = _forwardcopies(x, y, match=match)
581 611 elif a == y:
582 612 if debug:
583 613 repo.ui.debug(b'debug.copies: search mode: backward\n')
584 614 copies = _backwardrenames(x, y, match=match)
585 615 else:
586 616 if debug:
587 617 repo.ui.debug(b'debug.copies: search mode: combined\n')
588 618 base = None
589 619 if a.rev() != node.nullrev:
590 620 base = x
591 621 copies = _chain(
592 622 _backwardrenames(x, a, match=match),
593 623 _forwardcopies(a, y, base, match=match),
594 624 )
595 625 _filter(x, y, copies)
596 626 return copies
597 627
598 628
599 629 def mergecopies(repo, c1, c2, base):
600 630 """
601 631 Finds moves and copies between context c1 and c2 that are relevant for
602 632 merging. 'base' will be used as the merge base.
603 633
604 634 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
605 635 files that were moved/ copied in one merge parent and modified in another.
606 636 For example:
607 637
608 638 o ---> 4 another commit
609 639 |
610 640 | o ---> 3 commit that modifies a.txt
611 641 | /
612 642 o / ---> 2 commit that moves a.txt to b.txt
613 643 |/
614 644 o ---> 1 merge base
615 645
616 646 If we try to rebase revision 3 on revision 4, since there is no a.txt in
617 647 revision 4, and if user have copytrace disabled, we prints the following
618 648 message:
619 649
620 650 ```other changed <file> which local deleted```
621 651
622 652 Returns a tuple where:
623 653
624 654 "branch_copies" an instance of branch_copies.
625 655
626 656 "diverge" is a mapping of source name -> list of destination names
627 657 for divergent renames.
628 658
629 659 This function calls different copytracing algorithms based on config.
630 660 """
631 661 # avoid silly behavior for update from empty dir
632 662 if not c1 or not c2 or c1 == c2:
633 663 return branch_copies(), branch_copies(), {}
634 664
635 665 narrowmatch = c1.repo().narrowmatch()
636 666
637 667 # avoid silly behavior for parent -> working dir
638 668 if c2.node() is None and c1.node() == repo.dirstate.p1():
639 669 return (
640 670 branch_copies(_dirstatecopies(repo, narrowmatch)),
641 671 branch_copies(),
642 672 {},
643 673 )
644 674
645 675 copytracing = repo.ui.config(b'experimental', b'copytrace')
646 676 if stringutil.parsebool(copytracing) is False:
647 677 # stringutil.parsebool() returns None when it is unable to parse the
648 678 # value, so we should rely on making sure copytracing is on such cases
649 679 return branch_copies(), branch_copies(), {}
650 680
651 681 if usechangesetcentricalgo(repo):
652 682 # The heuristics don't make sense when we need changeset-centric algos
653 683 return _fullcopytracing(repo, c1, c2, base)
654 684
655 685 # Copy trace disabling is explicitly below the node == p1 logic above
656 686 # because the logic above is required for a simple copy to be kept across a
657 687 # rebase.
658 688 if copytracing == b'heuristics':
659 689 # Do full copytracing if only non-public revisions are involved as
660 690 # that will be fast enough and will also cover the copies which could
661 691 # be missed by heuristics
662 692 if _isfullcopytraceable(repo, c1, base):
663 693 return _fullcopytracing(repo, c1, c2, base)
664 694 return _heuristicscopytracing(repo, c1, c2, base)
665 695 else:
666 696 return _fullcopytracing(repo, c1, c2, base)
667 697
668 698
669 699 def _isfullcopytraceable(repo, c1, base):
670 700 """ Checks that if base, source and destination are all no-public branches,
671 701 if yes let's use the full copytrace algorithm for increased capabilities
672 702 since it will be fast enough.
673 703
674 704 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
675 705 number of changesets from c1 to base such that if number of changesets are
676 706 more than the limit, full copytracing algorithm won't be used.
677 707 """
678 708 if c1.rev() is None:
679 709 c1 = c1.p1()
680 710 if c1.mutable() and base.mutable():
681 711 sourcecommitlimit = repo.ui.configint(
682 712 b'experimental', b'copytrace.sourcecommitlimit'
683 713 )
684 714 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
685 715 return commits < sourcecommitlimit
686 716 return False
687 717
688 718
689 719 def _checksinglesidecopies(
690 720 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
691 721 ):
692 722 if src not in m2:
693 723 # deleted on side 2
694 724 if src not in m1:
695 725 # renamed on side 1, deleted on side 2
696 726 renamedelete[src] = dsts1
697 727 elif src not in mb:
698 728 # Work around the "short-circuit to avoid issues with merge states"
699 729 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
700 730 # destination doesn't exist in y.
701 731 pass
702 732 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
703 733 return
704 734 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
705 735 # modified on side 2
706 736 for dst in dsts1:
707 737 copy[dst] = src
708 738
709 739
710 740 class branch_copies(object):
711 741 """Information about copies made on one side of a merge/graft.
712 742
713 743 "copy" is a mapping from destination name -> source name,
714 744 where source is in c1 and destination is in c2 or vice-versa.
715 745
716 746 "movewithdir" is a mapping from source name -> destination name,
717 747 where the file at source present in one context but not the other
718 748 needs to be moved to destination by the merge process, because the
719 749 other context moved the directory it is in.
720 750
721 751 "renamedelete" is a mapping of source name -> list of destination
722 752 names for files deleted in c1 that were renamed in c2 or vice-versa.
723 753
724 754 "dirmove" is a mapping of detected source dir -> destination dir renames.
725 755 This is needed for handling changes to new files previously grafted into
726 756 renamed directories.
727 757 """
728 758
729 759 def __init__(
730 760 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
731 761 ):
732 762 self.copy = {} if copy is None else copy
733 763 self.renamedelete = {} if renamedelete is None else renamedelete
734 764 self.dirmove = {} if dirmove is None else dirmove
735 765 self.movewithdir = {} if movewithdir is None else movewithdir
736 766
737 767 def __repr__(self):
738 768 return (
739 769 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
740 770 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
741 771 )
742 772
743 773
744 774 def _fullcopytracing(repo, c1, c2, base):
745 775 """ The full copytracing algorithm which finds all the new files that were
746 776 added from merge base up to the top commit and for each file it checks if
747 777 this file was copied from another file.
748 778
749 779 This is pretty slow when a lot of changesets are involved but will track all
750 780 the copies.
751 781 """
752 782 m1 = c1.manifest()
753 783 m2 = c2.manifest()
754 784 mb = base.manifest()
755 785
756 786 copies1 = pathcopies(base, c1)
757 787 copies2 = pathcopies(base, c2)
758 788
759 789 if not (copies1 or copies2):
760 790 return branch_copies(), branch_copies(), {}
761 791
762 792 inversecopies1 = {}
763 793 inversecopies2 = {}
764 794 for dst, src in copies1.items():
765 795 inversecopies1.setdefault(src, []).append(dst)
766 796 for dst, src in copies2.items():
767 797 inversecopies2.setdefault(src, []).append(dst)
768 798
769 799 copy1 = {}
770 800 copy2 = {}
771 801 diverge = {}
772 802 renamedelete1 = {}
773 803 renamedelete2 = {}
774 804 allsources = set(inversecopies1) | set(inversecopies2)
775 805 for src in allsources:
776 806 dsts1 = inversecopies1.get(src)
777 807 dsts2 = inversecopies2.get(src)
778 808 if dsts1 and dsts2:
779 809 # copied/renamed on both sides
780 810 if src not in m1 and src not in m2:
781 811 # renamed on both sides
782 812 dsts1 = set(dsts1)
783 813 dsts2 = set(dsts2)
784 814 # If there's some overlap in the rename destinations, we
785 815 # consider it not divergent. For example, if side 1 copies 'a'
786 816 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
787 817 # and 'd' and deletes 'a'.
788 818 if dsts1 & dsts2:
789 819 for dst in dsts1 & dsts2:
790 820 copy1[dst] = src
791 821 copy2[dst] = src
792 822 else:
793 823 diverge[src] = sorted(dsts1 | dsts2)
794 824 elif src in m1 and src in m2:
795 825 # copied on both sides
796 826 dsts1 = set(dsts1)
797 827 dsts2 = set(dsts2)
798 828 for dst in dsts1 & dsts2:
799 829 copy1[dst] = src
800 830 copy2[dst] = src
801 831 # TODO: Handle cases where it was renamed on one side and copied
802 832 # on the other side
803 833 elif dsts1:
804 834 # copied/renamed only on side 1
805 835 _checksinglesidecopies(
806 836 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
807 837 )
808 838 elif dsts2:
809 839 # copied/renamed only on side 2
810 840 _checksinglesidecopies(
811 841 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
812 842 )
813 843
814 844 # find interesting file sets from manifests
815 845 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
816 846 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
817 847 u1 = sorted(addedinm1 - addedinm2)
818 848 u2 = sorted(addedinm2 - addedinm1)
819 849
820 850 header = b" unmatched files in %s"
821 851 if u1:
822 852 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
823 853 if u2:
824 854 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
825 855
826 856 if repo.ui.debugflag:
827 857 renamedeleteset = set()
828 858 divergeset = set()
829 859 for dsts in diverge.values():
830 860 divergeset.update(dsts)
831 861 for dsts in renamedelete1.values():
832 862 renamedeleteset.update(dsts)
833 863 for dsts in renamedelete2.values():
834 864 renamedeleteset.update(dsts)
835 865
836 866 repo.ui.debug(
837 867 b" all copies found (* = to merge, ! = divergent, "
838 868 b"% = renamed and deleted):\n"
839 869 )
840 870 for side, copies in ((b"local", copies1), (b"remote", copies2)):
841 871 if not copies:
842 872 continue
843 873 repo.ui.debug(b" on %s side:\n" % side)
844 874 for f in sorted(copies):
845 875 note = b""
846 876 if f in copy1 or f in copy2:
847 877 note += b"*"
848 878 if f in divergeset:
849 879 note += b"!"
850 880 if f in renamedeleteset:
851 881 note += b"%"
852 882 repo.ui.debug(
853 883 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
854 884 )
855 885 del renamedeleteset
856 886 del divergeset
857 887
858 888 repo.ui.debug(b" checking for directory renames\n")
859 889
860 890 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
861 891 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
862 892
863 893 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
864 894 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
865 895
866 896 return branch_copies1, branch_copies2, diverge
867 897
868 898
869 899 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
870 900 """Finds moved directories and files that should move with them.
871 901
872 902 ctx: the context for one of the sides
873 903 copy: files copied on the same side (as ctx)
874 904 fullcopy: files copied on the same side (as ctx), including those that
875 905 merge.manifestmerge() won't care about
876 906 addedfiles: added files on the other side (compared to ctx)
877 907 """
878 908 # generate a directory move map
879 909 d = ctx.dirs()
880 910 invalid = set()
881 911 dirmove = {}
882 912
883 913 # examine each file copy for a potential directory move, which is
884 914 # when all the files in a directory are moved to a new directory
885 915 for dst, src in pycompat.iteritems(fullcopy):
886 916 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
887 917 if dsrc in invalid:
888 918 # already seen to be uninteresting
889 919 continue
890 920 elif dsrc in d and ddst in d:
891 921 # directory wasn't entirely moved locally
892 922 invalid.add(dsrc)
893 923 elif dsrc in dirmove and dirmove[dsrc] != ddst:
894 924 # files from the same directory moved to two different places
895 925 invalid.add(dsrc)
896 926 else:
897 927 # looks good so far
898 928 dirmove[dsrc] = ddst
899 929
900 930 for i in invalid:
901 931 if i in dirmove:
902 932 del dirmove[i]
903 933 del d, invalid
904 934
905 935 if not dirmove:
906 936 return {}, {}
907 937
908 938 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
909 939
910 940 for d in dirmove:
911 941 repo.ui.debug(
912 942 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
913 943 )
914 944
915 945 movewithdir = {}
916 946 # check unaccounted nonoverlapping files against directory moves
917 947 for f in addedfiles:
918 948 if f not in fullcopy:
919 949 for d in dirmove:
920 950 if f.startswith(d):
921 951 # new file added in a directory that was moved, move it
922 952 df = dirmove[d] + f[len(d) :]
923 953 if df not in copy:
924 954 movewithdir[f] = df
925 955 repo.ui.debug(
926 956 b" pending file src: '%s' -> dst: '%s'\n"
927 957 % (f, df)
928 958 )
929 959 break
930 960
931 961 return dirmove, movewithdir
932 962
933 963
934 964 def _heuristicscopytracing(repo, c1, c2, base):
935 965 """ Fast copytracing using filename heuristics
936 966
937 967 Assumes that moves or renames are of following two types:
938 968
939 969 1) Inside a directory only (same directory name but different filenames)
940 970 2) Move from one directory to another
941 971 (same filenames but different directory names)
942 972
943 973 Works only when there are no merge commits in the "source branch".
944 974 Source branch is commits from base up to c2 not including base.
945 975
946 976 If merge is involved it fallbacks to _fullcopytracing().
947 977
948 978 Can be used by setting the following config:
949 979
950 980 [experimental]
951 981 copytrace = heuristics
952 982
953 983 In some cases the copy/move candidates found by heuristics can be very large
954 984 in number and that will make the algorithm slow. The number of possible
955 985 candidates to check can be limited by using the config
956 986 `experimental.copytrace.movecandidateslimit` which defaults to 100.
957 987 """
958 988
959 989 if c1.rev() is None:
960 990 c1 = c1.p1()
961 991 if c2.rev() is None:
962 992 c2 = c2.p1()
963 993
964 994 changedfiles = set()
965 995 m1 = c1.manifest()
966 996 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
967 997 # If base is not in c2 branch, we switch to fullcopytracing
968 998 repo.ui.debug(
969 999 b"switching to full copytracing as base is not "
970 1000 b"an ancestor of c2\n"
971 1001 )
972 1002 return _fullcopytracing(repo, c1, c2, base)
973 1003
974 1004 ctx = c2
975 1005 while ctx != base:
976 1006 if len(ctx.parents()) == 2:
977 1007 # To keep things simple let's not handle merges
978 1008 repo.ui.debug(b"switching to full copytracing because of merges\n")
979 1009 return _fullcopytracing(repo, c1, c2, base)
980 1010 changedfiles.update(ctx.files())
981 1011 ctx = ctx.p1()
982 1012
983 1013 copies2 = {}
984 1014 cp = _forwardcopies(base, c2)
985 1015 for dst, src in pycompat.iteritems(cp):
986 1016 if src in m1:
987 1017 copies2[dst] = src
988 1018
989 1019 # file is missing if it isn't present in the destination, but is present in
990 1020 # the base and present in the source.
991 1021 # Presence in the base is important to exclude added files, presence in the
992 1022 # source is important to exclude removed files.
993 1023 filt = lambda f: f not in m1 and f in base and f in c2
994 1024 missingfiles = [f for f in changedfiles if filt(f)]
995 1025
996 1026 copies1 = {}
997 1027 if missingfiles:
998 1028 basenametofilename = collections.defaultdict(list)
999 1029 dirnametofilename = collections.defaultdict(list)
1000 1030
1001 1031 for f in m1.filesnotin(base.manifest()):
1002 1032 basename = os.path.basename(f)
1003 1033 dirname = os.path.dirname(f)
1004 1034 basenametofilename[basename].append(f)
1005 1035 dirnametofilename[dirname].append(f)
1006 1036
1007 1037 for f in missingfiles:
1008 1038 basename = os.path.basename(f)
1009 1039 dirname = os.path.dirname(f)
1010 1040 samebasename = basenametofilename[basename]
1011 1041 samedirname = dirnametofilename[dirname]
1012 1042 movecandidates = samebasename + samedirname
1013 1043 # f is guaranteed to be present in c2, that's why
1014 1044 # c2.filectx(f) won't fail
1015 1045 f2 = c2.filectx(f)
1016 1046 # we can have a lot of candidates which can slow down the heuristics
1017 1047 # config value to limit the number of candidates moves to check
1018 1048 maxcandidates = repo.ui.configint(
1019 1049 b'experimental', b'copytrace.movecandidateslimit'
1020 1050 )
1021 1051
1022 1052 if len(movecandidates) > maxcandidates:
1023 1053 repo.ui.status(
1024 1054 _(
1025 1055 b"skipping copytracing for '%s', more "
1026 1056 b"candidates than the limit: %d\n"
1027 1057 )
1028 1058 % (f, len(movecandidates))
1029 1059 )
1030 1060 continue
1031 1061
1032 1062 for candidate in movecandidates:
1033 1063 f1 = c1.filectx(candidate)
1034 1064 if _related(f1, f2):
1035 1065 # if there are a few related copies then we'll merge
1036 1066 # changes into all of them. This matches the behaviour
1037 1067 # of upstream copytracing
1038 1068 copies1[candidate] = f
1039 1069
1040 1070 return branch_copies(copies1), branch_copies(copies2), {}
1041 1071
1042 1072
1043 1073 def _related(f1, f2):
1044 1074 """return True if f1 and f2 filectx have a common ancestor
1045 1075
1046 1076 Walk back to common ancestor to see if the two files originate
1047 1077 from the same file. Since workingfilectx's rev() is None it messes
1048 1078 up the integer comparison logic, hence the pre-step check for
1049 1079 None (f1 and f2 can only be workingfilectx's initially).
1050 1080 """
1051 1081
1052 1082 if f1 == f2:
1053 1083 return True # a match
1054 1084
1055 1085 g1, g2 = f1.ancestors(), f2.ancestors()
1056 1086 try:
1057 1087 f1r, f2r = f1.linkrev(), f2.linkrev()
1058 1088
1059 1089 if f1r is None:
1060 1090 f1 = next(g1)
1061 1091 if f2r is None:
1062 1092 f2 = next(g2)
1063 1093
1064 1094 while True:
1065 1095 f1r, f2r = f1.linkrev(), f2.linkrev()
1066 1096 if f1r > f2r:
1067 1097 f1 = next(g1)
1068 1098 elif f2r > f1r:
1069 1099 f2 = next(g2)
1070 1100 else: # f1 and f2 point to files in the same linkrev
1071 1101 return f1 == f2 # true if they point to the same file
1072 1102 except StopIteration:
1073 1103 return False
1074 1104
1075 1105
1076 1106 def graftcopies(wctx, ctx, base):
1077 1107 """reproduce copies between base and ctx in the wctx
1078 1108
1079 1109 Unlike mergecopies(), this function will only consider copies between base
1080 1110 and ctx; it will ignore copies between base and wctx. Also unlike
1081 1111 mergecopies(), this function will apply copies to the working copy (instead
1082 1112 of just returning information about the copies). That makes it cheaper
1083 1113 (especially in the common case of base==ctx.p1()) and useful also when
1084 1114 experimental.copytrace=off.
1085 1115
1086 1116 merge.update() will have already marked most copies, but it will only
1087 1117 mark copies if it thinks the source files are related (see
1088 1118 merge._related()). It will also not mark copies if the file wasn't modified
1089 1119 on the local side. This function adds the copies that were "missed"
1090 1120 by merge.update().
1091 1121 """
1092 1122 new_copies = pathcopies(base, ctx)
1093 1123 _filter(wctx.p1(), wctx, new_copies)
1094 1124 for dst, src in pycompat.iteritems(new_copies):
1095 1125 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now