##// END OF EJS Templates
copies: simplify the handling of merges...
marmoute -
r43546:32187ae9 default
parent child Browse files
Show More
@@ -1,927 +1,931 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _changesetforwardcopies(a, b, match):
182 182 if a.rev() in (node.nullrev, b.rev()):
183 183 return {}
184 184
185 185 repo = a.repo()
186 186 children = {}
187 187 cl = repo.changelog
188 188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 189 for r in missingrevs:
190 190 for p in cl.parentrevs(r):
191 191 if p == node.nullrev:
192 192 continue
193 193 if p not in children:
194 194 children[p] = [r]
195 195 else:
196 196 children[p].append(r)
197 197
198 198 roots = set(children) - set(missingrevs)
199 # 'work' contains 3-tuples of a (revision number, parent number, copies).
200 # The parent number is only used for knowing which parent the copies dict
201 # came from.
202 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
203 # instance for *one* of the child nodes (the last one). Once an instance
204 # has been put on the queue, it is thus no longer safe to modify it.
205 # Conversely, it *is* safe to modify an instance popped off the queue.
206 work = [(r, 1, {}) for r in roots]
199 work = list(roots)
200 all_copies = {r: {} for r in roots}
207 201 heapq.heapify(work)
208 202 alwaysmatch = match.always()
209 203 while work:
210 r, i1, copies = heapq.heappop(work)
211 if work and work[0][0] == r:
212 # We are tracing copies from both parents
213 r, i2, copies2 = heapq.heappop(work)
214 for dst, src in copies2.items():
215 # Unlike when copies are stored in the filelog, we consider
216 # it a copy even if the destination already existed on the
217 # other branch. It's simply too expensive to check if the
218 # file existed in the manifest.
219 if dst not in copies:
220 # If it was copied on the p1 side, leave it as copied from
221 # that side, even if it was also copied on the p2 side.
222 copies[dst] = copies2[dst]
204 r = heapq.heappop(work)
205 copies = all_copies.pop(r)
223 206 if r == b.rev():
224 207 return copies
225 208 for i, c in enumerate(children[r]):
226 209 childctx = repo[c]
227 210 if r == childctx.p1().rev():
228 211 parent = 1
229 212 childcopies = childctx.p1copies()
230 213 else:
231 214 assert r == childctx.p2().rev()
232 215 parent = 2
233 216 childcopies = childctx.p2copies()
234 217 if not alwaysmatch:
235 218 childcopies = {
236 219 dst: src for dst, src in childcopies.items() if match(dst)
237 220 }
238 221 # Copy the dict only if later iterations will also need it
239 222 if i != len(children[r]) - 1:
240 223 newcopies = copies.copy()
241 224 else:
242 225 newcopies = copies
243 226 if childcopies:
244 227 newcopies = _chain(newcopies, childcopies)
245 228 for f in childctx.filesremoved():
246 229 if f in newcopies:
247 230 del newcopies[f]
248 heapq.heappush(work, (c, parent, newcopies))
231 othercopies = all_copies.get(c)
232 if othercopies is None:
233 heapq.heappush(work, c)
234 all_copies[c] = newcopies
235 else:
236 # we are the second parent to work on c, we need to merge our
237 # work with the other.
238 #
239 # Unlike when copies are stored in the filelog, we consider
240 # it a copy even if the destination already existed on the
241 # other branch. It's simply too expensive to check if the
242 # file existed in the manifest.
243 #
244 # In case of conflict, parent 1 take precedence over parent 2.
245 # This is an arbitrary choice made anew when implementing
246 # changeset based copies. It was made without regards with
247 # potential filelog related behavior.
248 if parent == 1:
249 othercopies.update(newcopies)
250 else:
251 newcopies.update(othercopies)
252 all_copies[c] = newcopies
249 253 assert False
250 254
251 255
252 256 def _forwardcopies(a, b, base=None, match=None):
253 257 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
254 258
255 259 if base is None:
256 260 base = a
257 261 match = a.repo().narrowmatch(match)
258 262 # check for working copy
259 263 if b.rev() is None:
260 264 cm = _committedforwardcopies(a, b.p1(), base, match)
261 265 # combine copies from dirstate if necessary
262 266 copies = _chain(cm, _dirstatecopies(b._repo, match))
263 267 else:
264 268 copies = _committedforwardcopies(a, b, base, match)
265 269 return copies
266 270
267 271
268 272 def _backwardrenames(a, b, match):
269 273 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
270 274 return {}
271 275
272 276 # Even though we're not taking copies into account, 1:n rename situations
273 277 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
274 278 # arbitrarily pick one of the renames.
275 279 # We don't want to pass in "match" here, since that would filter
276 280 # the destination by it. Since we're reversing the copies, we want
277 281 # to filter the source instead.
278 282 f = _forwardcopies(b, a)
279 283 r = {}
280 284 for k, v in sorted(pycompat.iteritems(f)):
281 285 if match and not match(v):
282 286 continue
283 287 # remove copies
284 288 if v in a:
285 289 continue
286 290 r[v] = k
287 291 return r
288 292
289 293
290 294 def pathcopies(x, y, match=None):
291 295 """find {dst@y: src@x} copy mapping for directed compare"""
292 296 repo = x._repo
293 297 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
294 298 if debug:
295 299 repo.ui.debug(
296 300 b'debug.copies: searching copies from %s to %s\n' % (x, y)
297 301 )
298 302 if x == y or not x or not y:
299 303 return {}
300 304 a = y.ancestor(x)
301 305 if a == x:
302 306 if debug:
303 307 repo.ui.debug(b'debug.copies: search mode: forward\n')
304 308 if y.rev() is None and x == y.p1():
305 309 # short-circuit to avoid issues with merge states
306 310 return _dirstatecopies(repo, match)
307 311 copies = _forwardcopies(x, y, match=match)
308 312 elif a == y:
309 313 if debug:
310 314 repo.ui.debug(b'debug.copies: search mode: backward\n')
311 315 copies = _backwardrenames(x, y, match=match)
312 316 else:
313 317 if debug:
314 318 repo.ui.debug(b'debug.copies: search mode: combined\n')
315 319 base = None
316 320 if a.rev() != node.nullrev:
317 321 base = x
318 322 copies = _chain(
319 323 _backwardrenames(x, a, match=match),
320 324 _forwardcopies(a, y, base, match=match),
321 325 )
322 326 _filter(x, y, copies)
323 327 return copies
324 328
325 329
326 330 def mergecopies(repo, c1, c2, base):
327 331 """
328 332 Finds moves and copies between context c1 and c2 that are relevant for
329 333 merging. 'base' will be used as the merge base.
330 334
331 335 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
332 336 files that were moved/ copied in one merge parent and modified in another.
333 337 For example:
334 338
335 339 o ---> 4 another commit
336 340 |
337 341 | o ---> 3 commit that modifies a.txt
338 342 | /
339 343 o / ---> 2 commit that moves a.txt to b.txt
340 344 |/
341 345 o ---> 1 merge base
342 346
343 347 If we try to rebase revision 3 on revision 4, since there is no a.txt in
344 348 revision 4, and if user have copytrace disabled, we prints the following
345 349 message:
346 350
347 351 ```other changed <file> which local deleted```
348 352
349 353 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
350 354 "dirmove".
351 355
352 356 "copy" is a mapping from destination name -> source name,
353 357 where source is in c1 and destination is in c2 or vice-versa.
354 358
355 359 "movewithdir" is a mapping from source name -> destination name,
356 360 where the file at source present in one context but not the other
357 361 needs to be moved to destination by the merge process, because the
358 362 other context moved the directory it is in.
359 363
360 364 "diverge" is a mapping of source name -> list of destination names
361 365 for divergent renames.
362 366
363 367 "renamedelete" is a mapping of source name -> list of destination
364 368 names for files deleted in c1 that were renamed in c2 or vice-versa.
365 369
366 370 "dirmove" is a mapping of detected source dir -> destination dir renames.
367 371 This is needed for handling changes to new files previously grafted into
368 372 renamed directories.
369 373
370 374 This function calls different copytracing algorithms based on config.
371 375 """
372 376 # avoid silly behavior for update from empty dir
373 377 if not c1 or not c2 or c1 == c2:
374 378 return {}, {}, {}, {}, {}
375 379
376 380 narrowmatch = c1.repo().narrowmatch()
377 381
378 382 # avoid silly behavior for parent -> working dir
379 383 if c2.node() is None and c1.node() == repo.dirstate.p1():
380 384 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
381 385
382 386 copytracing = repo.ui.config(b'experimental', b'copytrace')
383 387 if stringutil.parsebool(copytracing) is False:
384 388 # stringutil.parsebool() returns None when it is unable to parse the
385 389 # value, so we should rely on making sure copytracing is on such cases
386 390 return {}, {}, {}, {}, {}
387 391
388 392 if usechangesetcentricalgo(repo):
389 393 # The heuristics don't make sense when we need changeset-centric algos
390 394 return _fullcopytracing(repo, c1, c2, base)
391 395
392 396 # Copy trace disabling is explicitly below the node == p1 logic above
393 397 # because the logic above is required for a simple copy to be kept across a
394 398 # rebase.
395 399 if copytracing == b'heuristics':
396 400 # Do full copytracing if only non-public revisions are involved as
397 401 # that will be fast enough and will also cover the copies which could
398 402 # be missed by heuristics
399 403 if _isfullcopytraceable(repo, c1, base):
400 404 return _fullcopytracing(repo, c1, c2, base)
401 405 return _heuristicscopytracing(repo, c1, c2, base)
402 406 else:
403 407 return _fullcopytracing(repo, c1, c2, base)
404 408
405 409
406 410 def _isfullcopytraceable(repo, c1, base):
407 411 """ Checks that if base, source and destination are all no-public branches,
408 412 if yes let's use the full copytrace algorithm for increased capabilities
409 413 since it will be fast enough.
410 414
411 415 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
412 416 number of changesets from c1 to base such that if number of changesets are
413 417 more than the limit, full copytracing algorithm won't be used.
414 418 """
415 419 if c1.rev() is None:
416 420 c1 = c1.p1()
417 421 if c1.mutable() and base.mutable():
418 422 sourcecommitlimit = repo.ui.configint(
419 423 b'experimental', b'copytrace.sourcecommitlimit'
420 424 )
421 425 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
422 426 return commits < sourcecommitlimit
423 427 return False
424 428
425 429
426 430 def _checksinglesidecopies(
427 431 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
428 432 ):
429 433 if src not in m2:
430 434 # deleted on side 2
431 435 if src not in m1:
432 436 # renamed on side 1, deleted on side 2
433 437 renamedelete[src] = dsts1
434 438 elif m2[src] != mb[src]:
435 439 if not _related(c2[src], base[src]):
436 440 return
437 441 # modified on side 2
438 442 for dst in dsts1:
439 443 if dst not in m2:
440 444 # dst not added on side 2 (handle as regular
441 445 # "both created" case in manifestmerge otherwise)
442 446 copy[dst] = src
443 447
444 448
445 449 def _fullcopytracing(repo, c1, c2, base):
446 450 """ The full copytracing algorithm which finds all the new files that were
447 451 added from merge base up to the top commit and for each file it checks if
448 452 this file was copied from another file.
449 453
450 454 This is pretty slow when a lot of changesets are involved but will track all
451 455 the copies.
452 456 """
453 457 m1 = c1.manifest()
454 458 m2 = c2.manifest()
455 459 mb = base.manifest()
456 460
457 461 copies1 = pathcopies(base, c1)
458 462 copies2 = pathcopies(base, c2)
459 463
460 464 inversecopies1 = {}
461 465 inversecopies2 = {}
462 466 for dst, src in copies1.items():
463 467 inversecopies1.setdefault(src, []).append(dst)
464 468 for dst, src in copies2.items():
465 469 inversecopies2.setdefault(src, []).append(dst)
466 470
467 471 copy = {}
468 472 diverge = {}
469 473 renamedelete = {}
470 474 allsources = set(inversecopies1) | set(inversecopies2)
471 475 for src in allsources:
472 476 dsts1 = inversecopies1.get(src)
473 477 dsts2 = inversecopies2.get(src)
474 478 if dsts1 and dsts2:
475 479 # copied/renamed on both sides
476 480 if src not in m1 and src not in m2:
477 481 # renamed on both sides
478 482 dsts1 = set(dsts1)
479 483 dsts2 = set(dsts2)
480 484 # If there's some overlap in the rename destinations, we
481 485 # consider it not divergent. For example, if side 1 copies 'a'
482 486 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
483 487 # and 'd' and deletes 'a'.
484 488 if dsts1 & dsts2:
485 489 for dst in dsts1 & dsts2:
486 490 copy[dst] = src
487 491 else:
488 492 diverge[src] = sorted(dsts1 | dsts2)
489 493 elif src in m1 and src in m2:
490 494 # copied on both sides
491 495 dsts1 = set(dsts1)
492 496 dsts2 = set(dsts2)
493 497 for dst in dsts1 & dsts2:
494 498 copy[dst] = src
495 499 # TODO: Handle cases where it was renamed on one side and copied
496 500 # on the other side
497 501 elif dsts1:
498 502 # copied/renamed only on side 1
499 503 _checksinglesidecopies(
500 504 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
501 505 )
502 506 elif dsts2:
503 507 # copied/renamed only on side 2
504 508 _checksinglesidecopies(
505 509 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
506 510 )
507 511
508 512 renamedeleteset = set()
509 513 divergeset = set()
510 514 for dsts in diverge.values():
511 515 divergeset.update(dsts)
512 516 for dsts in renamedelete.values():
513 517 renamedeleteset.update(dsts)
514 518
515 519 # find interesting file sets from manifests
516 520 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
517 521 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
518 522 u1 = sorted(addedinm1 - addedinm2)
519 523 u2 = sorted(addedinm2 - addedinm1)
520 524
521 525 header = b" unmatched files in %s"
522 526 if u1:
523 527 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
524 528 if u2:
525 529 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
526 530
527 531 fullcopy = copies1.copy()
528 532 fullcopy.update(copies2)
529 533 if not fullcopy:
530 534 return copy, {}, diverge, renamedelete, {}
531 535
532 536 if repo.ui.debugflag:
533 537 repo.ui.debug(
534 538 b" all copies found (* = to merge, ! = divergent, "
535 539 b"% = renamed and deleted):\n"
536 540 )
537 541 for f in sorted(fullcopy):
538 542 note = b""
539 543 if f in copy:
540 544 note += b"*"
541 545 if f in divergeset:
542 546 note += b"!"
543 547 if f in renamedeleteset:
544 548 note += b"%"
545 549 repo.ui.debug(
546 550 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
547 551 )
548 552 del divergeset
549 553
550 554 repo.ui.debug(b" checking for directory renames\n")
551 555
552 556 # generate a directory move map
553 557 d1, d2 = c1.dirs(), c2.dirs()
554 558 invalid = set()
555 559 dirmove = {}
556 560
557 561 # examine each file copy for a potential directory move, which is
558 562 # when all the files in a directory are moved to a new directory
559 563 for dst, src in pycompat.iteritems(fullcopy):
560 564 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
561 565 if dsrc in invalid:
562 566 # already seen to be uninteresting
563 567 continue
564 568 elif dsrc in d1 and ddst in d1:
565 569 # directory wasn't entirely moved locally
566 570 invalid.add(dsrc)
567 571 elif dsrc in d2 and ddst in d2:
568 572 # directory wasn't entirely moved remotely
569 573 invalid.add(dsrc)
570 574 elif dsrc in dirmove and dirmove[dsrc] != ddst:
571 575 # files from the same directory moved to two different places
572 576 invalid.add(dsrc)
573 577 else:
574 578 # looks good so far
575 579 dirmove[dsrc] = ddst
576 580
577 581 for i in invalid:
578 582 if i in dirmove:
579 583 del dirmove[i]
580 584 del d1, d2, invalid
581 585
582 586 if not dirmove:
583 587 return copy, {}, diverge, renamedelete, {}
584 588
585 589 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
586 590
587 591 for d in dirmove:
588 592 repo.ui.debug(
589 593 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
590 594 )
591 595
592 596 movewithdir = {}
593 597 # check unaccounted nonoverlapping files against directory moves
594 598 for f in u1 + u2:
595 599 if f not in fullcopy:
596 600 for d in dirmove:
597 601 if f.startswith(d):
598 602 # new file added in a directory that was moved, move it
599 603 df = dirmove[d] + f[len(d) :]
600 604 if df not in copy:
601 605 movewithdir[f] = df
602 606 repo.ui.debug(
603 607 b" pending file src: '%s' -> dst: '%s'\n"
604 608 % (f, df)
605 609 )
606 610 break
607 611
608 612 return copy, movewithdir, diverge, renamedelete, dirmove
609 613
610 614
611 615 def _heuristicscopytracing(repo, c1, c2, base):
612 616 """ Fast copytracing using filename heuristics
613 617
614 618 Assumes that moves or renames are of following two types:
615 619
616 620 1) Inside a directory only (same directory name but different filenames)
617 621 2) Move from one directory to another
618 622 (same filenames but different directory names)
619 623
620 624 Works only when there are no merge commits in the "source branch".
621 625 Source branch is commits from base up to c2 not including base.
622 626
623 627 If merge is involved it fallbacks to _fullcopytracing().
624 628
625 629 Can be used by setting the following config:
626 630
627 631 [experimental]
628 632 copytrace = heuristics
629 633
630 634 In some cases the copy/move candidates found by heuristics can be very large
631 635 in number and that will make the algorithm slow. The number of possible
632 636 candidates to check can be limited by using the config
633 637 `experimental.copytrace.movecandidateslimit` which defaults to 100.
634 638 """
635 639
636 640 if c1.rev() is None:
637 641 c1 = c1.p1()
638 642 if c2.rev() is None:
639 643 c2 = c2.p1()
640 644
641 645 copies = {}
642 646
643 647 changedfiles = set()
644 648 m1 = c1.manifest()
645 649 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
646 650 # If base is not in c2 branch, we switch to fullcopytracing
647 651 repo.ui.debug(
648 652 b"switching to full copytracing as base is not "
649 653 b"an ancestor of c2\n"
650 654 )
651 655 return _fullcopytracing(repo, c1, c2, base)
652 656
653 657 ctx = c2
654 658 while ctx != base:
655 659 if len(ctx.parents()) == 2:
656 660 # To keep things simple let's not handle merges
657 661 repo.ui.debug(b"switching to full copytracing because of merges\n")
658 662 return _fullcopytracing(repo, c1, c2, base)
659 663 changedfiles.update(ctx.files())
660 664 ctx = ctx.p1()
661 665
662 666 cp = _forwardcopies(base, c2)
663 667 for dst, src in pycompat.iteritems(cp):
664 668 if src in m1:
665 669 copies[dst] = src
666 670
667 671 # file is missing if it isn't present in the destination, but is present in
668 672 # the base and present in the source.
669 673 # Presence in the base is important to exclude added files, presence in the
670 674 # source is important to exclude removed files.
671 675 filt = lambda f: f not in m1 and f in base and f in c2
672 676 missingfiles = [f for f in changedfiles if filt(f)]
673 677
674 678 if missingfiles:
675 679 basenametofilename = collections.defaultdict(list)
676 680 dirnametofilename = collections.defaultdict(list)
677 681
678 682 for f in m1.filesnotin(base.manifest()):
679 683 basename = os.path.basename(f)
680 684 dirname = os.path.dirname(f)
681 685 basenametofilename[basename].append(f)
682 686 dirnametofilename[dirname].append(f)
683 687
684 688 for f in missingfiles:
685 689 basename = os.path.basename(f)
686 690 dirname = os.path.dirname(f)
687 691 samebasename = basenametofilename[basename]
688 692 samedirname = dirnametofilename[dirname]
689 693 movecandidates = samebasename + samedirname
690 694 # f is guaranteed to be present in c2, that's why
691 695 # c2.filectx(f) won't fail
692 696 f2 = c2.filectx(f)
693 697 # we can have a lot of candidates which can slow down the heuristics
694 698 # config value to limit the number of candidates moves to check
695 699 maxcandidates = repo.ui.configint(
696 700 b'experimental', b'copytrace.movecandidateslimit'
697 701 )
698 702
699 703 if len(movecandidates) > maxcandidates:
700 704 repo.ui.status(
701 705 _(
702 706 b"skipping copytracing for '%s', more "
703 707 b"candidates than the limit: %d\n"
704 708 )
705 709 % (f, len(movecandidates))
706 710 )
707 711 continue
708 712
709 713 for candidate in movecandidates:
710 714 f1 = c1.filectx(candidate)
711 715 if _related(f1, f2):
712 716 # if there are a few related copies then we'll merge
713 717 # changes into all of them. This matches the behaviour
714 718 # of upstream copytracing
715 719 copies[candidate] = f
716 720
717 721 return copies, {}, {}, {}, {}
718 722
719 723
720 724 def _related(f1, f2):
721 725 """return True if f1 and f2 filectx have a common ancestor
722 726
723 727 Walk back to common ancestor to see if the two files originate
724 728 from the same file. Since workingfilectx's rev() is None it messes
725 729 up the integer comparison logic, hence the pre-step check for
726 730 None (f1 and f2 can only be workingfilectx's initially).
727 731 """
728 732
729 733 if f1 == f2:
730 734 return True # a match
731 735
732 736 g1, g2 = f1.ancestors(), f2.ancestors()
733 737 try:
734 738 f1r, f2r = f1.linkrev(), f2.linkrev()
735 739
736 740 if f1r is None:
737 741 f1 = next(g1)
738 742 if f2r is None:
739 743 f2 = next(g2)
740 744
741 745 while True:
742 746 f1r, f2r = f1.linkrev(), f2.linkrev()
743 747 if f1r > f2r:
744 748 f1 = next(g1)
745 749 elif f2r > f1r:
746 750 f2 = next(g2)
747 751 else: # f1 and f2 point to files in the same linkrev
748 752 return f1 == f2 # true if they point to the same file
749 753 except StopIteration:
750 754 return False
751 755
752 756
753 757 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
754 758 """reproduce copies from fromrev to rev in the dirstate
755 759
756 760 If skiprev is specified, it's a revision that should be used to
757 761 filter copy records. Any copies that occur between fromrev and
758 762 skiprev will not be duplicated, even if they appear in the set of
759 763 copies between fromrev and rev.
760 764 """
761 765 exclude = {}
762 766 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
763 767 bctrace = stringutil.parsebool(ctraceconfig)
764 768 if skiprev is not None and (
765 769 ctraceconfig == b'heuristics' or bctrace or bctrace is None
766 770 ):
767 771 # copytrace='off' skips this line, but not the entire function because
768 772 # the line below is O(size of the repo) during a rebase, while the rest
769 773 # of the function is much faster (and is required for carrying copy
770 774 # metadata across the rebase anyway).
771 775 exclude = pathcopies(repo[fromrev], repo[skiprev])
772 776 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
773 777 if dst in exclude:
774 778 continue
775 779 if dst in wctx:
776 780 wctx[dst].markcopied(src)
777 781
778 782
779 783 def computechangesetfilesadded(ctx):
780 784 """return the list of files added in a changeset
781 785 """
782 786 added = []
783 787 for f in ctx.files():
784 788 if not any(f in p for p in ctx.parents()):
785 789 added.append(f)
786 790 return added
787 791
788 792
789 793 def computechangesetfilesremoved(ctx):
790 794 """return the list of files removed in a changeset
791 795 """
792 796 removed = []
793 797 for f in ctx.files():
794 798 if f not in ctx:
795 799 removed.append(f)
796 800 return removed
797 801
798 802
799 803 def computechangesetcopies(ctx):
800 804 """return the copies data for a changeset
801 805
802 806 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
803 807
804 808 Each dictionnary are in the form: `{newname: oldname}`
805 809 """
806 810 p1copies = {}
807 811 p2copies = {}
808 812 p1 = ctx.p1()
809 813 p2 = ctx.p2()
810 814 narrowmatch = ctx._repo.narrowmatch()
811 815 for dst in ctx.files():
812 816 if not narrowmatch(dst) or dst not in ctx:
813 817 continue
814 818 copied = ctx[dst].renamed()
815 819 if not copied:
816 820 continue
817 821 src, srcnode = copied
818 822 if src in p1 and p1[src].filenode() == srcnode:
819 823 p1copies[dst] = src
820 824 elif src in p2 and p2[src].filenode() == srcnode:
821 825 p2copies[dst] = src
822 826 return p1copies, p2copies
823 827
824 828
825 829 def encodecopies(files, copies):
826 830 items = []
827 831 for i, dst in enumerate(files):
828 832 if dst in copies:
829 833 items.append(b'%d\0%s' % (i, copies[dst]))
830 834 if len(items) != len(copies):
831 835 raise error.ProgrammingError(
832 836 b'some copy targets missing from file list'
833 837 )
834 838 return b"\n".join(items)
835 839
836 840
837 841 def decodecopies(files, data):
838 842 try:
839 843 copies = {}
840 844 if not data:
841 845 return copies
842 846 for l in data.split(b'\n'):
843 847 strindex, src = l.split(b'\0')
844 848 i = int(strindex)
845 849 dst = files[i]
846 850 copies[dst] = src
847 851 return copies
848 852 except (ValueError, IndexError):
849 853 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
850 854 # used different syntax for the value.
851 855 return None
852 856
853 857
854 858 def encodefileindices(files, subset):
855 859 subset = set(subset)
856 860 indices = []
857 861 for i, f in enumerate(files):
858 862 if f in subset:
859 863 indices.append(b'%d' % i)
860 864 return b'\n'.join(indices)
861 865
862 866
863 867 def decodefileindices(files, data):
864 868 try:
865 869 subset = []
866 870 if not data:
867 871 return subset
868 872 for strindex in data.split(b'\n'):
869 873 i = int(strindex)
870 874 if i < 0 or i >= len(files):
871 875 return None
872 876 subset.append(files[i])
873 877 return subset
874 878 except (ValueError, IndexError):
875 879 # Perhaps someone had chosen the same key name (e.g. "added") and
876 880 # used different syntax for the value.
877 881 return None
878 882
879 883
880 884 def _getsidedata(srcrepo, rev):
881 885 ctx = srcrepo[rev]
882 886 filescopies = computechangesetcopies(ctx)
883 887 filesadded = computechangesetfilesadded(ctx)
884 888 filesremoved = computechangesetfilesremoved(ctx)
885 889 sidedata = {}
886 890 if any([filescopies, filesadded, filesremoved]):
887 891 sortedfiles = sorted(ctx.files())
888 892 p1copies, p2copies = filescopies
889 893 p1copies = encodecopies(sortedfiles, p1copies)
890 894 p2copies = encodecopies(sortedfiles, p2copies)
891 895 filesadded = encodefileindices(sortedfiles, filesadded)
892 896 filesremoved = encodefileindices(sortedfiles, filesremoved)
893 897 if p1copies:
894 898 sidedata[sidedatamod.SD_P1COPIES] = p1copies
895 899 if p2copies:
896 900 sidedata[sidedatamod.SD_P2COPIES] = p2copies
897 901 if filesadded:
898 902 sidedata[sidedatamod.SD_FILESADDED] = filesadded
899 903 if filesremoved:
900 904 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
901 905 return sidedata
902 906
903 907
904 908 def getsidedataadder(srcrepo, destrepo):
905 909 def sidedatacompanion(revlog, rev):
906 910 sidedata = {}
907 911 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
908 912 sidedata = _getsidedata(srcrepo, rev)
909 913 return False, (), sidedata
910 914
911 915 return sidedatacompanion
912 916
913 917
914 918 def getsidedataremover(srcrepo, destrepo):
915 919 def sidedatacompanion(revlog, rev):
916 920 f = ()
917 921 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
918 922 if revlog.flags(rev) & REVIDX_SIDEDATA:
919 923 f = (
920 924 sidedatamod.SD_P1COPIES,
921 925 sidedatamod.SD_P2COPIES,
922 926 sidedatamod.SD_FILESADDED,
923 927 sidedatamod.SD_FILESREMOVED,
924 928 )
925 929 return False, f, {}
926 930
927 931 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now