##// END OF EJS Templates
pathcopies: give up any optimization based on `introrev`...
marmoute -
r43469:c16fe77e default
parent child Browse files
Show More
@@ -1,1009 +1,1011 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _findlimit(repo, ctxa, ctxb):
34 34 """
35 35 Find the last revision that needs to be checked to ensure that a full
36 36 transitive closure for file copies can be properly calculated.
37 37 Generally, this means finding the earliest revision number that's an
38 38 ancestor of a or b but not both, except when a or b is a direct descendent
39 39 of the other, in which case we can return the minimum revnum of a and b.
40 40 """
41 41
42 42 # basic idea:
43 43 # - mark a and b with different sides
44 44 # - if a parent's children are all on the same side, the parent is
45 45 # on that side, otherwise it is on no side
46 46 # - walk the graph in topological order with the help of a heap;
47 47 # - add unseen parents to side map
48 48 # - clear side of any parent that has children on different sides
49 49 # - track number of interesting revs that might still be on a side
50 50 # - track the lowest interesting rev seen
51 51 # - quit when interesting revs is zero
52 52
53 53 cl = repo.changelog
54 54 wdirparents = None
55 55 a = ctxa.rev()
56 56 b = ctxb.rev()
57 57 if a is None:
58 58 wdirparents = (ctxa.p1(), ctxa.p2())
59 59 a = node.wdirrev
60 60 if b is None:
61 61 assert not wdirparents
62 62 wdirparents = (ctxb.p1(), ctxb.p2())
63 63 b = node.wdirrev
64 64
65 65 side = {a: -1, b: 1}
66 66 visit = [-a, -b]
67 67 heapq.heapify(visit)
68 68 interesting = len(visit)
69 69 limit = node.wdirrev
70 70
71 71 while interesting:
72 72 r = -(heapq.heappop(visit))
73 73 if r == node.wdirrev:
74 74 parents = [pctx.rev() for pctx in wdirparents]
75 75 else:
76 76 parents = cl.parentrevs(r)
77 77 if parents[1] == node.nullrev:
78 78 parents = parents[:1]
79 79 for p in parents:
80 80 if p not in side:
81 81 # first time we see p; add it to visit
82 82 side[p] = side[r]
83 83 if side[p]:
84 84 interesting += 1
85 85 heapq.heappush(visit, -p)
86 86 elif side[p] and side[p] != side[r]:
87 87 # p was interesting but now we know better
88 88 side[p] = 0
89 89 interesting -= 1
90 90 if side[r]:
91 91 limit = r # lowest rev visited
92 92 interesting -= 1
93 93
94 94 # Consider the following flow (see test-commit-amend.t under issue4405):
95 95 # 1/ File 'a0' committed
96 96 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
97 97 # 3/ Move back to first commit
98 98 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
99 99 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
100 100 #
101 101 # During the amend in step five, we will be in this state:
102 102 #
103 103 # @ 3 temporary amend commit for a1-amend
104 104 # |
105 105 # o 2 a1-amend
106 106 # |
107 107 # | o 1 a1
108 108 # |/
109 109 # o 0 a0
110 110 #
111 111 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
112 112 # yet the filelog has the copy information in rev 1 and we will not look
113 113 # back far enough unless we also look at the a and b as candidates.
114 114 # This only occurs when a is a descendent of b or visa-versa.
115 115 return min(limit, a, b)
116 116
117 117
118 118 def _filter(src, dst, t):
119 119 """filters out invalid copies after chaining"""
120 120
121 121 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
122 122 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
123 123 # in the following table (not including trivial cases). For example, case 2
124 124 # is where a file existed in 'src' and remained under that name in 'mid' and
125 125 # then was renamed between 'mid' and 'dst'.
126 126 #
127 127 # case src mid dst result
128 128 # 1 x y - -
129 129 # 2 x y y x->y
130 130 # 3 x y x -
131 131 # 4 x y z x->z
132 132 # 5 - x y -
133 133 # 6 x x y x->y
134 134 #
135 135 # _chain() takes care of chaining the copies in 'a' and 'b', but it
136 136 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
137 137 # between 5 and 6, so it includes all cases in its result.
138 138 # Cases 1, 3, and 5 are then removed by _filter().
139 139
140 140 for k, v in list(t.items()):
141 141 # remove copies from files that didn't exist
142 142 if v not in src:
143 143 del t[k]
144 144 # remove criss-crossed copies
145 145 elif k in src and v in dst:
146 146 del t[k]
147 147 # remove copies to files that were then removed
148 148 elif k not in dst:
149 149 del t[k]
150 150
151 151
152 152 def _chain(a, b):
153 153 """chain two sets of copies 'a' and 'b'"""
154 154 t = a.copy()
155 155 for k, v in pycompat.iteritems(b):
156 156 if v in t:
157 157 t[k] = t[v]
158 158 else:
159 159 t[k] = v
160 160 return t
161 161
162 162
163 163 def _tracefile(fctx, am, basemf, limit):
164 164 """return file context that is the ancestor of fctx present in ancestor
165 manifest am, stopping after the first ancestor lower than limit"""
165 manifest am
166
167 Note: we used to try and stop after a given limit, however checking if that
168 limit is reached turned out to be very expensive. we are better off
169 disabling that feature."""
166 170
167 171 for f in fctx.ancestors():
168 172 path = f.path()
169 173 if am.get(path, None) == f.filenode():
170 174 return path
171 175 if basemf and basemf.get(path, None) == f.filenode():
172 176 return path
173 if not f.isintroducedafter(limit):
174 return None
175 177
176 178
177 179 def _dirstatecopies(repo, match=None):
178 180 ds = repo.dirstate
179 181 c = ds.copies().copy()
180 182 for k in list(c):
181 183 if ds[k] not in b'anm' or (match and not match(k)):
182 184 del c[k]
183 185 return c
184 186
185 187
186 188 def _computeforwardmissing(a, b, match=None):
187 189 """Computes which files are in b but not a.
188 190 This is its own function so extensions can easily wrap this call to see what
189 191 files _forwardcopies is about to process.
190 192 """
191 193 ma = a.manifest()
192 194 mb = b.manifest()
193 195 return mb.filesnotin(ma, match=match)
194 196
195 197
196 198 def usechangesetcentricalgo(repo):
197 199 """Checks if we should use changeset-centric copy algorithms"""
198 200 if repo.filecopiesmode == b'changeset-sidedata':
199 201 return True
200 202 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
201 203 changesetsource = (b'changeset-only', b'compatibility')
202 204 return readfrom in changesetsource
203 205
204 206
205 207 def _committedforwardcopies(a, b, base, match):
206 208 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
207 209 # files might have to be traced back to the fctx parent of the last
208 210 # one-side-only changeset, but not further back than that
209 211 repo = a._repo
210 212
211 213 if usechangesetcentricalgo(repo):
212 214 return _changesetforwardcopies(a, b, match)
213 215
214 216 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
215 217 dbg = repo.ui.debug
216 218 if debug:
217 219 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
218 220 limit = _findlimit(repo, a, b)
219 221 if debug:
220 222 dbg(b'debug.copies: search limit: %d\n' % limit)
221 223 am = a.manifest()
222 224 basemf = None if base is None else base.manifest()
223 225
224 226 # find where new files came from
225 227 # we currently don't try to find where old files went, too expensive
226 228 # this means we can miss a case like 'hg rm b; hg cp a b'
227 229 cm = {}
228 230
229 231 # Computing the forward missing is quite expensive on large manifests, since
230 232 # it compares the entire manifests. We can optimize it in the common use
231 233 # case of computing what copies are in a commit versus its parent (like
232 234 # during a rebase or histedit). Note, we exclude merge commits from this
233 235 # optimization, since the ctx.files() for a merge commit is not correct for
234 236 # this comparison.
235 237 forwardmissingmatch = match
236 238 if b.p1() == a and b.p2().node() == node.nullid:
237 239 filesmatcher = matchmod.exact(b.files())
238 240 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
239 241 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
240 242
241 243 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
242 244
243 245 if debug:
244 246 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
245 247
246 248 for f in sorted(missing):
247 249 if debug:
248 250 dbg(b'debug.copies: tracing file: %s\n' % f)
249 251 fctx = b[f]
250 252 fctx._ancestrycontext = ancestrycontext
251 253
252 254 if debug:
253 255 start = util.timer()
254 256 opath = _tracefile(fctx, am, basemf, limit)
255 257 if opath:
256 258 if debug:
257 259 dbg(b'debug.copies: rename of: %s\n' % opath)
258 260 cm[f] = opath
259 261 if debug:
260 262 dbg(
261 263 b'debug.copies: time: %f seconds\n'
262 264 % (util.timer() - start)
263 265 )
264 266 return cm
265 267
266 268
267 269 def _changesetforwardcopies(a, b, match):
268 270 if a.rev() in (node.nullrev, b.rev()):
269 271 return {}
270 272
271 273 repo = a.repo()
272 274 children = {}
273 275 cl = repo.changelog
274 276 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 277 for r in missingrevs:
276 278 for p in cl.parentrevs(r):
277 279 if p == node.nullrev:
278 280 continue
279 281 if p not in children:
280 282 children[p] = [r]
281 283 else:
282 284 children[p].append(r)
283 285
284 286 roots = set(children) - set(missingrevs)
285 287 # 'work' contains 3-tuples of a (revision number, parent number, copies).
286 288 # The parent number is only used for knowing which parent the copies dict
287 289 # came from.
288 290 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
289 291 # instance for *one* of the child nodes (the last one). Once an instance
290 292 # has been put on the queue, it is thus no longer safe to modify it.
291 293 # Conversely, it *is* safe to modify an instance popped off the queue.
292 294 work = [(r, 1, {}) for r in roots]
293 295 heapq.heapify(work)
294 296 alwaysmatch = match.always()
295 297 while work:
296 298 r, i1, copies = heapq.heappop(work)
297 299 if work and work[0][0] == r:
298 300 # We are tracing copies from both parents
299 301 r, i2, copies2 = heapq.heappop(work)
300 302 for dst, src in copies2.items():
301 303 # Unlike when copies are stored in the filelog, we consider
302 304 # it a copy even if the destination already existed on the
303 305 # other branch. It's simply too expensive to check if the
304 306 # file existed in the manifest.
305 307 if dst not in copies:
306 308 # If it was copied on the p1 side, leave it as copied from
307 309 # that side, even if it was also copied on the p2 side.
308 310 copies[dst] = copies2[dst]
309 311 if r == b.rev():
310 312 return copies
311 313 for i, c in enumerate(children[r]):
312 314 childctx = repo[c]
313 315 if r == childctx.p1().rev():
314 316 parent = 1
315 317 childcopies = childctx.p1copies()
316 318 else:
317 319 assert r == childctx.p2().rev()
318 320 parent = 2
319 321 childcopies = childctx.p2copies()
320 322 if not alwaysmatch:
321 323 childcopies = {
322 324 dst: src for dst, src in childcopies.items() if match(dst)
323 325 }
324 326 # Copy the dict only if later iterations will also need it
325 327 if i != len(children[r]) - 1:
326 328 newcopies = copies.copy()
327 329 else:
328 330 newcopies = copies
329 331 if childcopies:
330 332 newcopies = _chain(newcopies, childcopies)
331 333 for f in childctx.filesremoved():
332 334 if f in newcopies:
333 335 del newcopies[f]
334 336 heapq.heappush(work, (c, parent, newcopies))
335 337 assert False
336 338
337 339
338 340 def _forwardcopies(a, b, base=None, match=None):
339 341 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
340 342
341 343 if base is None:
342 344 base = a
343 345 match = a.repo().narrowmatch(match)
344 346 # check for working copy
345 347 if b.rev() is None:
346 348 cm = _committedforwardcopies(a, b.p1(), base, match)
347 349 # combine copies from dirstate if necessary
348 350 copies = _chain(cm, _dirstatecopies(b._repo, match))
349 351 else:
350 352 copies = _committedforwardcopies(a, b, base, match)
351 353 return copies
352 354
353 355
354 356 def _backwardrenames(a, b, match):
355 357 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
356 358 return {}
357 359
358 360 # Even though we're not taking copies into account, 1:n rename situations
359 361 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
360 362 # arbitrarily pick one of the renames.
361 363 # We don't want to pass in "match" here, since that would filter
362 364 # the destination by it. Since we're reversing the copies, we want
363 365 # to filter the source instead.
364 366 f = _forwardcopies(b, a)
365 367 r = {}
366 368 for k, v in sorted(pycompat.iteritems(f)):
367 369 if match and not match(v):
368 370 continue
369 371 # remove copies
370 372 if v in a:
371 373 continue
372 374 r[v] = k
373 375 return r
374 376
375 377
376 378 def pathcopies(x, y, match=None):
377 379 """find {dst@y: src@x} copy mapping for directed compare"""
378 380 repo = x._repo
379 381 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
380 382 if debug:
381 383 repo.ui.debug(
382 384 b'debug.copies: searching copies from %s to %s\n' % (x, y)
383 385 )
384 386 if x == y or not x or not y:
385 387 return {}
386 388 a = y.ancestor(x)
387 389 if a == x:
388 390 if debug:
389 391 repo.ui.debug(b'debug.copies: search mode: forward\n')
390 392 if y.rev() is None and x == y.p1():
391 393 # short-circuit to avoid issues with merge states
392 394 return _dirstatecopies(repo, match)
393 395 copies = _forwardcopies(x, y, match=match)
394 396 elif a == y:
395 397 if debug:
396 398 repo.ui.debug(b'debug.copies: search mode: backward\n')
397 399 copies = _backwardrenames(x, y, match=match)
398 400 else:
399 401 if debug:
400 402 repo.ui.debug(b'debug.copies: search mode: combined\n')
401 403 base = None
402 404 if a.rev() != node.nullrev:
403 405 base = x
404 406 copies = _chain(
405 407 _backwardrenames(x, a, match=match),
406 408 _forwardcopies(a, y, base, match=match),
407 409 )
408 410 _filter(x, y, copies)
409 411 return copies
410 412
411 413
412 414 def mergecopies(repo, c1, c2, base):
413 415 """
414 416 Finds moves and copies between context c1 and c2 that are relevant for
415 417 merging. 'base' will be used as the merge base.
416 418
417 419 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
418 420 files that were moved/ copied in one merge parent and modified in another.
419 421 For example:
420 422
421 423 o ---> 4 another commit
422 424 |
423 425 | o ---> 3 commit that modifies a.txt
424 426 | /
425 427 o / ---> 2 commit that moves a.txt to b.txt
426 428 |/
427 429 o ---> 1 merge base
428 430
429 431 If we try to rebase revision 3 on revision 4, since there is no a.txt in
430 432 revision 4, and if user have copytrace disabled, we prints the following
431 433 message:
432 434
433 435 ```other changed <file> which local deleted```
434 436
435 437 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
436 438 "dirmove".
437 439
438 440 "copy" is a mapping from destination name -> source name,
439 441 where source is in c1 and destination is in c2 or vice-versa.
440 442
441 443 "movewithdir" is a mapping from source name -> destination name,
442 444 where the file at source present in one context but not the other
443 445 needs to be moved to destination by the merge process, because the
444 446 other context moved the directory it is in.
445 447
446 448 "diverge" is a mapping of source name -> list of destination names
447 449 for divergent renames.
448 450
449 451 "renamedelete" is a mapping of source name -> list of destination
450 452 names for files deleted in c1 that were renamed in c2 or vice-versa.
451 453
452 454 "dirmove" is a mapping of detected source dir -> destination dir renames.
453 455 This is needed for handling changes to new files previously grafted into
454 456 renamed directories.
455 457
456 458 This function calls different copytracing algorithms based on config.
457 459 """
458 460 # avoid silly behavior for update from empty dir
459 461 if not c1 or not c2 or c1 == c2:
460 462 return {}, {}, {}, {}, {}
461 463
462 464 narrowmatch = c1.repo().narrowmatch()
463 465
464 466 # avoid silly behavior for parent -> working dir
465 467 if c2.node() is None and c1.node() == repo.dirstate.p1():
466 468 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
467 469
468 470 copytracing = repo.ui.config(b'experimental', b'copytrace')
469 471 if stringutil.parsebool(copytracing) is False:
470 472 # stringutil.parsebool() returns None when it is unable to parse the
471 473 # value, so we should rely on making sure copytracing is on such cases
472 474 return {}, {}, {}, {}, {}
473 475
474 476 if usechangesetcentricalgo(repo):
475 477 # The heuristics don't make sense when we need changeset-centric algos
476 478 return _fullcopytracing(repo, c1, c2, base)
477 479
478 480 # Copy trace disabling is explicitly below the node == p1 logic above
479 481 # because the logic above is required for a simple copy to be kept across a
480 482 # rebase.
481 483 if copytracing == b'heuristics':
482 484 # Do full copytracing if only non-public revisions are involved as
483 485 # that will be fast enough and will also cover the copies which could
484 486 # be missed by heuristics
485 487 if _isfullcopytraceable(repo, c1, base):
486 488 return _fullcopytracing(repo, c1, c2, base)
487 489 return _heuristicscopytracing(repo, c1, c2, base)
488 490 else:
489 491 return _fullcopytracing(repo, c1, c2, base)
490 492
491 493
492 494 def _isfullcopytraceable(repo, c1, base):
493 495 """ Checks that if base, source and destination are all no-public branches,
494 496 if yes let's use the full copytrace algorithm for increased capabilities
495 497 since it will be fast enough.
496 498
497 499 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
498 500 number of changesets from c1 to base such that if number of changesets are
499 501 more than the limit, full copytracing algorithm won't be used.
500 502 """
501 503 if c1.rev() is None:
502 504 c1 = c1.p1()
503 505 if c1.mutable() and base.mutable():
504 506 sourcecommitlimit = repo.ui.configint(
505 507 b'experimental', b'copytrace.sourcecommitlimit'
506 508 )
507 509 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
508 510 return commits < sourcecommitlimit
509 511 return False
510 512
511 513
512 514 def _checksinglesidecopies(
513 515 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
514 516 ):
515 517 if src not in m2:
516 518 # deleted on side 2
517 519 if src not in m1:
518 520 # renamed on side 1, deleted on side 2
519 521 renamedelete[src] = dsts1
520 522 elif m2[src] != mb[src]:
521 523 if not _related(c2[src], base[src]):
522 524 return
523 525 # modified on side 2
524 526 for dst in dsts1:
525 527 if dst not in m2:
526 528 # dst not added on side 2 (handle as regular
527 529 # "both created" case in manifestmerge otherwise)
528 530 copy[dst] = src
529 531
530 532
531 533 def _fullcopytracing(repo, c1, c2, base):
532 534 """ The full copytracing algorithm which finds all the new files that were
533 535 added from merge base up to the top commit and for each file it checks if
534 536 this file was copied from another file.
535 537
536 538 This is pretty slow when a lot of changesets are involved but will track all
537 539 the copies.
538 540 """
539 541 m1 = c1.manifest()
540 542 m2 = c2.manifest()
541 543 mb = base.manifest()
542 544
543 545 copies1 = pathcopies(base, c1)
544 546 copies2 = pathcopies(base, c2)
545 547
546 548 inversecopies1 = {}
547 549 inversecopies2 = {}
548 550 for dst, src in copies1.items():
549 551 inversecopies1.setdefault(src, []).append(dst)
550 552 for dst, src in copies2.items():
551 553 inversecopies2.setdefault(src, []).append(dst)
552 554
553 555 copy = {}
554 556 diverge = {}
555 557 renamedelete = {}
556 558 allsources = set(inversecopies1) | set(inversecopies2)
557 559 for src in allsources:
558 560 dsts1 = inversecopies1.get(src)
559 561 dsts2 = inversecopies2.get(src)
560 562 if dsts1 and dsts2:
561 563 # copied/renamed on both sides
562 564 if src not in m1 and src not in m2:
563 565 # renamed on both sides
564 566 dsts1 = set(dsts1)
565 567 dsts2 = set(dsts2)
566 568 # If there's some overlap in the rename destinations, we
567 569 # consider it not divergent. For example, if side 1 copies 'a'
568 570 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
569 571 # and 'd' and deletes 'a'.
570 572 if dsts1 & dsts2:
571 573 for dst in dsts1 & dsts2:
572 574 copy[dst] = src
573 575 else:
574 576 diverge[src] = sorted(dsts1 | dsts2)
575 577 elif src in m1 and src in m2:
576 578 # copied on both sides
577 579 dsts1 = set(dsts1)
578 580 dsts2 = set(dsts2)
579 581 for dst in dsts1 & dsts2:
580 582 copy[dst] = src
581 583 # TODO: Handle cases where it was renamed on one side and copied
582 584 # on the other side
583 585 elif dsts1:
584 586 # copied/renamed only on side 1
585 587 _checksinglesidecopies(
586 588 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
587 589 )
588 590 elif dsts2:
589 591 # copied/renamed only on side 2
590 592 _checksinglesidecopies(
591 593 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
592 594 )
593 595
594 596 renamedeleteset = set()
595 597 divergeset = set()
596 598 for dsts in diverge.values():
597 599 divergeset.update(dsts)
598 600 for dsts in renamedelete.values():
599 601 renamedeleteset.update(dsts)
600 602
601 603 # find interesting file sets from manifests
602 604 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
603 605 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
604 606 u1 = sorted(addedinm1 - addedinm2)
605 607 u2 = sorted(addedinm2 - addedinm1)
606 608
607 609 header = b" unmatched files in %s"
608 610 if u1:
609 611 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
610 612 if u2:
611 613 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
612 614
613 615 fullcopy = copies1.copy()
614 616 fullcopy.update(copies2)
615 617 if not fullcopy:
616 618 return copy, {}, diverge, renamedelete, {}
617 619
618 620 if repo.ui.debugflag:
619 621 repo.ui.debug(
620 622 b" all copies found (* = to merge, ! = divergent, "
621 623 b"% = renamed and deleted):\n"
622 624 )
623 625 for f in sorted(fullcopy):
624 626 note = b""
625 627 if f in copy:
626 628 note += b"*"
627 629 if f in divergeset:
628 630 note += b"!"
629 631 if f in renamedeleteset:
630 632 note += b"%"
631 633 repo.ui.debug(
632 634 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
633 635 )
634 636 del divergeset
635 637
636 638 repo.ui.debug(b" checking for directory renames\n")
637 639
638 640 # generate a directory move map
639 641 d1, d2 = c1.dirs(), c2.dirs()
640 642 invalid = set()
641 643 dirmove = {}
642 644
643 645 # examine each file copy for a potential directory move, which is
644 646 # when all the files in a directory are moved to a new directory
645 647 for dst, src in pycompat.iteritems(fullcopy):
646 648 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
647 649 if dsrc in invalid:
648 650 # already seen to be uninteresting
649 651 continue
650 652 elif dsrc in d1 and ddst in d1:
651 653 # directory wasn't entirely moved locally
652 654 invalid.add(dsrc)
653 655 elif dsrc in d2 and ddst in d2:
654 656 # directory wasn't entirely moved remotely
655 657 invalid.add(dsrc)
656 658 elif dsrc in dirmove and dirmove[dsrc] != ddst:
657 659 # files from the same directory moved to two different places
658 660 invalid.add(dsrc)
659 661 else:
660 662 # looks good so far
661 663 dirmove[dsrc] = ddst
662 664
663 665 for i in invalid:
664 666 if i in dirmove:
665 667 del dirmove[i]
666 668 del d1, d2, invalid
667 669
668 670 if not dirmove:
669 671 return copy, {}, diverge, renamedelete, {}
670 672
671 673 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
672 674
673 675 for d in dirmove:
674 676 repo.ui.debug(
675 677 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
676 678 )
677 679
678 680 movewithdir = {}
679 681 # check unaccounted nonoverlapping files against directory moves
680 682 for f in u1 + u2:
681 683 if f not in fullcopy:
682 684 for d in dirmove:
683 685 if f.startswith(d):
684 686 # new file added in a directory that was moved, move it
685 687 df = dirmove[d] + f[len(d) :]
686 688 if df not in copy:
687 689 movewithdir[f] = df
688 690 repo.ui.debug(
689 691 b" pending file src: '%s' -> dst: '%s'\n"
690 692 % (f, df)
691 693 )
692 694 break
693 695
694 696 return copy, movewithdir, diverge, renamedelete, dirmove
695 697
696 698
697 699 def _heuristicscopytracing(repo, c1, c2, base):
698 700 """ Fast copytracing using filename heuristics
699 701
700 702 Assumes that moves or renames are of following two types:
701 703
702 704 1) Inside a directory only (same directory name but different filenames)
703 705 2) Move from one directory to another
704 706 (same filenames but different directory names)
705 707
706 708 Works only when there are no merge commits in the "source branch".
707 709 Source branch is commits from base up to c2 not including base.
708 710
709 711 If merge is involved it fallbacks to _fullcopytracing().
710 712
711 713 Can be used by setting the following config:
712 714
713 715 [experimental]
714 716 copytrace = heuristics
715 717
716 718 In some cases the copy/move candidates found by heuristics can be very large
717 719 in number and that will make the algorithm slow. The number of possible
718 720 candidates to check can be limited by using the config
719 721 `experimental.copytrace.movecandidateslimit` which defaults to 100.
720 722 """
721 723
722 724 if c1.rev() is None:
723 725 c1 = c1.p1()
724 726 if c2.rev() is None:
725 727 c2 = c2.p1()
726 728
727 729 copies = {}
728 730
729 731 changedfiles = set()
730 732 m1 = c1.manifest()
731 733 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
732 734 # If base is not in c2 branch, we switch to fullcopytracing
733 735 repo.ui.debug(
734 736 b"switching to full copytracing as base is not "
735 737 b"an ancestor of c2\n"
736 738 )
737 739 return _fullcopytracing(repo, c1, c2, base)
738 740
739 741 ctx = c2
740 742 while ctx != base:
741 743 if len(ctx.parents()) == 2:
742 744 # To keep things simple let's not handle merges
743 745 repo.ui.debug(b"switching to full copytracing because of merges\n")
744 746 return _fullcopytracing(repo, c1, c2, base)
745 747 changedfiles.update(ctx.files())
746 748 ctx = ctx.p1()
747 749
748 750 cp = _forwardcopies(base, c2)
749 751 for dst, src in pycompat.iteritems(cp):
750 752 if src in m1:
751 753 copies[dst] = src
752 754
753 755 # file is missing if it isn't present in the destination, but is present in
754 756 # the base and present in the source.
755 757 # Presence in the base is important to exclude added files, presence in the
756 758 # source is important to exclude removed files.
757 759 filt = lambda f: f not in m1 and f in base and f in c2
758 760 missingfiles = [f for f in changedfiles if filt(f)]
759 761
760 762 if missingfiles:
761 763 basenametofilename = collections.defaultdict(list)
762 764 dirnametofilename = collections.defaultdict(list)
763 765
764 766 for f in m1.filesnotin(base.manifest()):
765 767 basename = os.path.basename(f)
766 768 dirname = os.path.dirname(f)
767 769 basenametofilename[basename].append(f)
768 770 dirnametofilename[dirname].append(f)
769 771
770 772 for f in missingfiles:
771 773 basename = os.path.basename(f)
772 774 dirname = os.path.dirname(f)
773 775 samebasename = basenametofilename[basename]
774 776 samedirname = dirnametofilename[dirname]
775 777 movecandidates = samebasename + samedirname
776 778 # f is guaranteed to be present in c2, that's why
777 779 # c2.filectx(f) won't fail
778 780 f2 = c2.filectx(f)
779 781 # we can have a lot of candidates which can slow down the heuristics
780 782 # config value to limit the number of candidates moves to check
781 783 maxcandidates = repo.ui.configint(
782 784 b'experimental', b'copytrace.movecandidateslimit'
783 785 )
784 786
785 787 if len(movecandidates) > maxcandidates:
786 788 repo.ui.status(
787 789 _(
788 790 b"skipping copytracing for '%s', more "
789 791 b"candidates than the limit: %d\n"
790 792 )
791 793 % (f, len(movecandidates))
792 794 )
793 795 continue
794 796
795 797 for candidate in movecandidates:
796 798 f1 = c1.filectx(candidate)
797 799 if _related(f1, f2):
798 800 # if there are a few related copies then we'll merge
799 801 # changes into all of them. This matches the behaviour
800 802 # of upstream copytracing
801 803 copies[candidate] = f
802 804
803 805 return copies, {}, {}, {}, {}
804 806
805 807
806 808 def _related(f1, f2):
807 809 """return True if f1 and f2 filectx have a common ancestor
808 810
809 811 Walk back to common ancestor to see if the two files originate
810 812 from the same file. Since workingfilectx's rev() is None it messes
811 813 up the integer comparison logic, hence the pre-step check for
812 814 None (f1 and f2 can only be workingfilectx's initially).
813 815 """
814 816
815 817 if f1 == f2:
816 818 return True # a match
817 819
818 820 g1, g2 = f1.ancestors(), f2.ancestors()
819 821 try:
820 822 f1r, f2r = f1.linkrev(), f2.linkrev()
821 823
822 824 if f1r is None:
823 825 f1 = next(g1)
824 826 if f2r is None:
825 827 f2 = next(g2)
826 828
827 829 while True:
828 830 f1r, f2r = f1.linkrev(), f2.linkrev()
829 831 if f1r > f2r:
830 832 f1 = next(g1)
831 833 elif f2r > f1r:
832 834 f2 = next(g2)
833 835 else: # f1 and f2 point to files in the same linkrev
834 836 return f1 == f2 # true if they point to the same file
835 837 except StopIteration:
836 838 return False
837 839
838 840
839 841 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
840 842 """reproduce copies from fromrev to rev in the dirstate
841 843
842 844 If skiprev is specified, it's a revision that should be used to
843 845 filter copy records. Any copies that occur between fromrev and
844 846 skiprev will not be duplicated, even if they appear in the set of
845 847 copies between fromrev and rev.
846 848 """
847 849 exclude = {}
848 850 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
849 851 bctrace = stringutil.parsebool(ctraceconfig)
850 852 if skiprev is not None and (
851 853 ctraceconfig == b'heuristics' or bctrace or bctrace is None
852 854 ):
853 855 # copytrace='off' skips this line, but not the entire function because
854 856 # the line below is O(size of the repo) during a rebase, while the rest
855 857 # of the function is much faster (and is required for carrying copy
856 858 # metadata across the rebase anyway).
857 859 exclude = pathcopies(repo[fromrev], repo[skiprev])
858 860 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
859 861 if dst in exclude:
860 862 continue
861 863 if dst in wctx:
862 864 wctx[dst].markcopied(src)
863 865
864 866
865 867 def computechangesetfilesadded(ctx):
866 868 """return the list of files added in a changeset
867 869 """
868 870 added = []
869 871 for f in ctx.files():
870 872 if not any(f in p for p in ctx.parents()):
871 873 added.append(f)
872 874 return added
873 875
874 876
875 877 def computechangesetfilesremoved(ctx):
876 878 """return the list of files removed in a changeset
877 879 """
878 880 removed = []
879 881 for f in ctx.files():
880 882 if f not in ctx:
881 883 removed.append(f)
882 884 return removed
883 885
884 886
885 887 def computechangesetcopies(ctx):
886 888 """return the copies data for a changeset
887 889
888 890 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
889 891
890 892 Each dictionnary are in the form: `{newname: oldname}`
891 893 """
892 894 p1copies = {}
893 895 p2copies = {}
894 896 p1 = ctx.p1()
895 897 p2 = ctx.p2()
896 898 narrowmatch = ctx._repo.narrowmatch()
897 899 for dst in ctx.files():
898 900 if not narrowmatch(dst) or dst not in ctx:
899 901 continue
900 902 copied = ctx[dst].renamed()
901 903 if not copied:
902 904 continue
903 905 src, srcnode = copied
904 906 if src in p1 and p1[src].filenode() == srcnode:
905 907 p1copies[dst] = src
906 908 elif src in p2 and p2[src].filenode() == srcnode:
907 909 p2copies[dst] = src
908 910 return p1copies, p2copies
909 911
910 912
911 913 def encodecopies(files, copies):
912 914 items = []
913 915 for i, dst in enumerate(files):
914 916 if dst in copies:
915 917 items.append(b'%d\0%s' % (i, copies[dst]))
916 918 if len(items) != len(copies):
917 919 raise error.ProgrammingError(
918 920 b'some copy targets missing from file list'
919 921 )
920 922 return b"\n".join(items)
921 923
922 924
923 925 def decodecopies(files, data):
924 926 try:
925 927 copies = {}
926 928 if not data:
927 929 return copies
928 930 for l in data.split(b'\n'):
929 931 strindex, src = l.split(b'\0')
930 932 i = int(strindex)
931 933 dst = files[i]
932 934 copies[dst] = src
933 935 return copies
934 936 except (ValueError, IndexError):
935 937 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
936 938 # used different syntax for the value.
937 939 return None
938 940
939 941
940 942 def encodefileindices(files, subset):
941 943 subset = set(subset)
942 944 indices = []
943 945 for i, f in enumerate(files):
944 946 if f in subset:
945 947 indices.append(b'%d' % i)
946 948 return b'\n'.join(indices)
947 949
948 950
949 951 def decodefileindices(files, data):
950 952 try:
951 953 subset = []
952 954 if not data:
953 955 return subset
954 956 for strindex in data.split(b'\n'):
955 957 i = int(strindex)
956 958 if i < 0 or i >= len(files):
957 959 return None
958 960 subset.append(files[i])
959 961 return subset
960 962 except (ValueError, IndexError):
961 963 # Perhaps someone had chosen the same key name (e.g. "added") and
962 964 # used different syntax for the value.
963 965 return None
964 966
965 967
966 968 def _getsidedata(srcrepo, rev):
967 969 ctx = srcrepo[rev]
968 970 filescopies = computechangesetcopies(ctx)
969 971 filesadded = computechangesetfilesadded(ctx)
970 972 filesremoved = computechangesetfilesremoved(ctx)
971 973 sidedata = {}
972 974 if any([filescopies, filesadded, filesremoved]):
973 975 sortedfiles = sorted(ctx.files())
974 976 p1copies, p2copies = filescopies
975 977 p1copies = encodecopies(sortedfiles, p1copies)
976 978 p2copies = encodecopies(sortedfiles, p2copies)
977 979 filesadded = encodefileindices(sortedfiles, filesadded)
978 980 filesremoved = encodefileindices(sortedfiles, filesremoved)
979 981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
980 982 sidedata[sidedatamod.SD_P2COPIES] = p2copies
981 983 sidedata[sidedatamod.SD_FILESADDED] = filesadded
982 984 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
983 985 return sidedata
984 986
985 987
986 988 def getsidedataadder(srcrepo, destrepo):
987 989 def sidedatacompanion(revlog, rev):
988 990 sidedata = {}
989 991 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
990 992 sidedata = _getsidedata(srcrepo, rev)
991 993 return False, (), sidedata
992 994
993 995 return sidedatacompanion
994 996
995 997
996 998 def getsidedataremover(srcrepo, destrepo):
997 999 def sidedatacompanion(revlog, rev):
998 1000 f = ()
999 1001 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1000 1002 if revlog.flags(rev) & REVIDX_SIDEDATA:
1001 1003 f = (
1002 1004 sidedatamod.SD_P1COPIES,
1003 1005 sidedatamod.SD_P2COPIES,
1004 1006 sidedatamod.SD_FILESADDED,
1005 1007 sidedatamod.SD_FILESREMOVED,
1006 1008 )
1007 1009 return False, f, {}
1008 1010
1009 1011 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now