##// END OF EJS Templates
copies: use an unfiltered repository for the changeset centric algorithm...
marmoute -
r43550:00de32aa default
parent child Browse files
Show More
@@ -1,955 +1,955 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _revinfogetter(repo):
182 182 """return a function that return multiple data given a <rev>"i
183 183
184 184 * p1: revision number of first parent
185 185 * p2: revision number of first parent
186 186 * p1copies: mapping of copies from p1
187 187 * p2copies: mapping of copies from p2
188 188 * removed: a list of removed files
189 189 """
190 190 cl = repo.changelog
191 191 parents = cl.parentrevs
192 192
193 193 def revinfo(rev):
194 194 p1, p2 = parents(rev)
195 195 ctx = repo[rev]
196 196 p1copies, p2copies = ctx._copies
197 197 removed = ctx.filesremoved()
198 198 return p1, p2, p1copies, p2copies, removed
199 199
200 200 return revinfo
201 201
202 202
203 203 def _changesetforwardcopies(a, b, match):
204 204 if a.rev() in (node.nullrev, b.rev()):
205 205 return {}
206 206
207 repo = a.repo()
207 repo = a.repo().unfiltered()
208 208 children = {}
209 209 revinfo = _revinfogetter(repo)
210 210
211 211 cl = repo.changelog
212 212 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
213 213 for r in missingrevs:
214 214 for p in cl.parentrevs(r):
215 215 if p == node.nullrev:
216 216 continue
217 217 if p not in children:
218 218 children[p] = [r]
219 219 else:
220 220 children[p].append(r)
221 221
222 222 roots = set(children) - set(missingrevs)
223 223 work = list(roots)
224 224 all_copies = {r: {} for r in roots}
225 225 heapq.heapify(work)
226 226 alwaysmatch = match.always()
227 227 while work:
228 228 r = heapq.heappop(work)
229 229 copies = all_copies.pop(r)
230 230 if r == b.rev():
231 231 return copies
232 232 for i, c in enumerate(children[r]):
233 233 p1, p2, p1copies, p2copies, removed = revinfo(c)
234 234 if r == p1:
235 235 parent = 1
236 236 childcopies = p1copies
237 237 else:
238 238 assert r == p2
239 239 parent = 2
240 240 childcopies = p2copies
241 241 if not alwaysmatch:
242 242 childcopies = {
243 243 dst: src for dst, src in childcopies.items() if match(dst)
244 244 }
245 245 # Copy the dict only if later iterations will also need it
246 246 if i != len(children[r]) - 1:
247 247 newcopies = copies.copy()
248 248 else:
249 249 newcopies = copies
250 250 if childcopies:
251 251 newcopies = _chain(newcopies, childcopies)
252 252 for f in removed:
253 253 if f in newcopies:
254 254 del newcopies[f]
255 255 othercopies = all_copies.get(c)
256 256 if othercopies is None:
257 257 heapq.heappush(work, c)
258 258 all_copies[c] = newcopies
259 259 else:
260 260 # we are the second parent to work on c, we need to merge our
261 261 # work with the other.
262 262 #
263 263 # Unlike when copies are stored in the filelog, we consider
264 264 # it a copy even if the destination already existed on the
265 265 # other branch. It's simply too expensive to check if the
266 266 # file existed in the manifest.
267 267 #
268 268 # In case of conflict, parent 1 take precedence over parent 2.
269 269 # This is an arbitrary choice made anew when implementing
270 270 # changeset based copies. It was made without regards with
271 271 # potential filelog related behavior.
272 272 if parent == 1:
273 273 othercopies.update(newcopies)
274 274 else:
275 275 newcopies.update(othercopies)
276 276 all_copies[c] = newcopies
277 277 assert False
278 278
279 279
280 280 def _forwardcopies(a, b, base=None, match=None):
281 281 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
282 282
283 283 if base is None:
284 284 base = a
285 285 match = a.repo().narrowmatch(match)
286 286 # check for working copy
287 287 if b.rev() is None:
288 288 cm = _committedforwardcopies(a, b.p1(), base, match)
289 289 # combine copies from dirstate if necessary
290 290 copies = _chain(cm, _dirstatecopies(b._repo, match))
291 291 else:
292 292 copies = _committedforwardcopies(a, b, base, match)
293 293 return copies
294 294
295 295
296 296 def _backwardrenames(a, b, match):
297 297 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
298 298 return {}
299 299
300 300 # Even though we're not taking copies into account, 1:n rename situations
301 301 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
302 302 # arbitrarily pick one of the renames.
303 303 # We don't want to pass in "match" here, since that would filter
304 304 # the destination by it. Since we're reversing the copies, we want
305 305 # to filter the source instead.
306 306 f = _forwardcopies(b, a)
307 307 r = {}
308 308 for k, v in sorted(pycompat.iteritems(f)):
309 309 if match and not match(v):
310 310 continue
311 311 # remove copies
312 312 if v in a:
313 313 continue
314 314 r[v] = k
315 315 return r
316 316
317 317
318 318 def pathcopies(x, y, match=None):
319 319 """find {dst@y: src@x} copy mapping for directed compare"""
320 320 repo = x._repo
321 321 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
322 322 if debug:
323 323 repo.ui.debug(
324 324 b'debug.copies: searching copies from %s to %s\n' % (x, y)
325 325 )
326 326 if x == y or not x or not y:
327 327 return {}
328 328 a = y.ancestor(x)
329 329 if a == x:
330 330 if debug:
331 331 repo.ui.debug(b'debug.copies: search mode: forward\n')
332 332 if y.rev() is None and x == y.p1():
333 333 # short-circuit to avoid issues with merge states
334 334 return _dirstatecopies(repo, match)
335 335 copies = _forwardcopies(x, y, match=match)
336 336 elif a == y:
337 337 if debug:
338 338 repo.ui.debug(b'debug.copies: search mode: backward\n')
339 339 copies = _backwardrenames(x, y, match=match)
340 340 else:
341 341 if debug:
342 342 repo.ui.debug(b'debug.copies: search mode: combined\n')
343 343 base = None
344 344 if a.rev() != node.nullrev:
345 345 base = x
346 346 copies = _chain(
347 347 _backwardrenames(x, a, match=match),
348 348 _forwardcopies(a, y, base, match=match),
349 349 )
350 350 _filter(x, y, copies)
351 351 return copies
352 352
353 353
354 354 def mergecopies(repo, c1, c2, base):
355 355 """
356 356 Finds moves and copies between context c1 and c2 that are relevant for
357 357 merging. 'base' will be used as the merge base.
358 358
359 359 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
360 360 files that were moved/ copied in one merge parent and modified in another.
361 361 For example:
362 362
363 363 o ---> 4 another commit
364 364 |
365 365 | o ---> 3 commit that modifies a.txt
366 366 | /
367 367 o / ---> 2 commit that moves a.txt to b.txt
368 368 |/
369 369 o ---> 1 merge base
370 370
371 371 If we try to rebase revision 3 on revision 4, since there is no a.txt in
372 372 revision 4, and if user have copytrace disabled, we prints the following
373 373 message:
374 374
375 375 ```other changed <file> which local deleted```
376 376
377 377 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
378 378 "dirmove".
379 379
380 380 "copy" is a mapping from destination name -> source name,
381 381 where source is in c1 and destination is in c2 or vice-versa.
382 382
383 383 "movewithdir" is a mapping from source name -> destination name,
384 384 where the file at source present in one context but not the other
385 385 needs to be moved to destination by the merge process, because the
386 386 other context moved the directory it is in.
387 387
388 388 "diverge" is a mapping of source name -> list of destination names
389 389 for divergent renames.
390 390
391 391 "renamedelete" is a mapping of source name -> list of destination
392 392 names for files deleted in c1 that were renamed in c2 or vice-versa.
393 393
394 394 "dirmove" is a mapping of detected source dir -> destination dir renames.
395 395 This is needed for handling changes to new files previously grafted into
396 396 renamed directories.
397 397
398 398 This function calls different copytracing algorithms based on config.
399 399 """
400 400 # avoid silly behavior for update from empty dir
401 401 if not c1 or not c2 or c1 == c2:
402 402 return {}, {}, {}, {}, {}
403 403
404 404 narrowmatch = c1.repo().narrowmatch()
405 405
406 406 # avoid silly behavior for parent -> working dir
407 407 if c2.node() is None and c1.node() == repo.dirstate.p1():
408 408 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
409 409
410 410 copytracing = repo.ui.config(b'experimental', b'copytrace')
411 411 if stringutil.parsebool(copytracing) is False:
412 412 # stringutil.parsebool() returns None when it is unable to parse the
413 413 # value, so we should rely on making sure copytracing is on such cases
414 414 return {}, {}, {}, {}, {}
415 415
416 416 if usechangesetcentricalgo(repo):
417 417 # The heuristics don't make sense when we need changeset-centric algos
418 418 return _fullcopytracing(repo, c1, c2, base)
419 419
420 420 # Copy trace disabling is explicitly below the node == p1 logic above
421 421 # because the logic above is required for a simple copy to be kept across a
422 422 # rebase.
423 423 if copytracing == b'heuristics':
424 424 # Do full copytracing if only non-public revisions are involved as
425 425 # that will be fast enough and will also cover the copies which could
426 426 # be missed by heuristics
427 427 if _isfullcopytraceable(repo, c1, base):
428 428 return _fullcopytracing(repo, c1, c2, base)
429 429 return _heuristicscopytracing(repo, c1, c2, base)
430 430 else:
431 431 return _fullcopytracing(repo, c1, c2, base)
432 432
433 433
434 434 def _isfullcopytraceable(repo, c1, base):
435 435 """ Checks that if base, source and destination are all no-public branches,
436 436 if yes let's use the full copytrace algorithm for increased capabilities
437 437 since it will be fast enough.
438 438
439 439 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
440 440 number of changesets from c1 to base such that if number of changesets are
441 441 more than the limit, full copytracing algorithm won't be used.
442 442 """
443 443 if c1.rev() is None:
444 444 c1 = c1.p1()
445 445 if c1.mutable() and base.mutable():
446 446 sourcecommitlimit = repo.ui.configint(
447 447 b'experimental', b'copytrace.sourcecommitlimit'
448 448 )
449 449 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
450 450 return commits < sourcecommitlimit
451 451 return False
452 452
453 453
454 454 def _checksinglesidecopies(
455 455 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
456 456 ):
457 457 if src not in m2:
458 458 # deleted on side 2
459 459 if src not in m1:
460 460 # renamed on side 1, deleted on side 2
461 461 renamedelete[src] = dsts1
462 462 elif m2[src] != mb[src]:
463 463 if not _related(c2[src], base[src]):
464 464 return
465 465 # modified on side 2
466 466 for dst in dsts1:
467 467 if dst not in m2:
468 468 # dst not added on side 2 (handle as regular
469 469 # "both created" case in manifestmerge otherwise)
470 470 copy[dst] = src
471 471
472 472
473 473 def _fullcopytracing(repo, c1, c2, base):
474 474 """ The full copytracing algorithm which finds all the new files that were
475 475 added from merge base up to the top commit and for each file it checks if
476 476 this file was copied from another file.
477 477
478 478 This is pretty slow when a lot of changesets are involved but will track all
479 479 the copies.
480 480 """
481 481 m1 = c1.manifest()
482 482 m2 = c2.manifest()
483 483 mb = base.manifest()
484 484
485 485 copies1 = pathcopies(base, c1)
486 486 copies2 = pathcopies(base, c2)
487 487
488 488 inversecopies1 = {}
489 489 inversecopies2 = {}
490 490 for dst, src in copies1.items():
491 491 inversecopies1.setdefault(src, []).append(dst)
492 492 for dst, src in copies2.items():
493 493 inversecopies2.setdefault(src, []).append(dst)
494 494
495 495 copy = {}
496 496 diverge = {}
497 497 renamedelete = {}
498 498 allsources = set(inversecopies1) | set(inversecopies2)
499 499 for src in allsources:
500 500 dsts1 = inversecopies1.get(src)
501 501 dsts2 = inversecopies2.get(src)
502 502 if dsts1 and dsts2:
503 503 # copied/renamed on both sides
504 504 if src not in m1 and src not in m2:
505 505 # renamed on both sides
506 506 dsts1 = set(dsts1)
507 507 dsts2 = set(dsts2)
508 508 # If there's some overlap in the rename destinations, we
509 509 # consider it not divergent. For example, if side 1 copies 'a'
510 510 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
511 511 # and 'd' and deletes 'a'.
512 512 if dsts1 & dsts2:
513 513 for dst in dsts1 & dsts2:
514 514 copy[dst] = src
515 515 else:
516 516 diverge[src] = sorted(dsts1 | dsts2)
517 517 elif src in m1 and src in m2:
518 518 # copied on both sides
519 519 dsts1 = set(dsts1)
520 520 dsts2 = set(dsts2)
521 521 for dst in dsts1 & dsts2:
522 522 copy[dst] = src
523 523 # TODO: Handle cases where it was renamed on one side and copied
524 524 # on the other side
525 525 elif dsts1:
526 526 # copied/renamed only on side 1
527 527 _checksinglesidecopies(
528 528 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
529 529 )
530 530 elif dsts2:
531 531 # copied/renamed only on side 2
532 532 _checksinglesidecopies(
533 533 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
534 534 )
535 535
536 536 renamedeleteset = set()
537 537 divergeset = set()
538 538 for dsts in diverge.values():
539 539 divergeset.update(dsts)
540 540 for dsts in renamedelete.values():
541 541 renamedeleteset.update(dsts)
542 542
543 543 # find interesting file sets from manifests
544 544 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
545 545 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
546 546 u1 = sorted(addedinm1 - addedinm2)
547 547 u2 = sorted(addedinm2 - addedinm1)
548 548
549 549 header = b" unmatched files in %s"
550 550 if u1:
551 551 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
552 552 if u2:
553 553 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
554 554
555 555 fullcopy = copies1.copy()
556 556 fullcopy.update(copies2)
557 557 if not fullcopy:
558 558 return copy, {}, diverge, renamedelete, {}
559 559
560 560 if repo.ui.debugflag:
561 561 repo.ui.debug(
562 562 b" all copies found (* = to merge, ! = divergent, "
563 563 b"% = renamed and deleted):\n"
564 564 )
565 565 for f in sorted(fullcopy):
566 566 note = b""
567 567 if f in copy:
568 568 note += b"*"
569 569 if f in divergeset:
570 570 note += b"!"
571 571 if f in renamedeleteset:
572 572 note += b"%"
573 573 repo.ui.debug(
574 574 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
575 575 )
576 576 del divergeset
577 577
578 578 repo.ui.debug(b" checking for directory renames\n")
579 579
580 580 # generate a directory move map
581 581 d1, d2 = c1.dirs(), c2.dirs()
582 582 invalid = set()
583 583 dirmove = {}
584 584
585 585 # examine each file copy for a potential directory move, which is
586 586 # when all the files in a directory are moved to a new directory
587 587 for dst, src in pycompat.iteritems(fullcopy):
588 588 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
589 589 if dsrc in invalid:
590 590 # already seen to be uninteresting
591 591 continue
592 592 elif dsrc in d1 and ddst in d1:
593 593 # directory wasn't entirely moved locally
594 594 invalid.add(dsrc)
595 595 elif dsrc in d2 and ddst in d2:
596 596 # directory wasn't entirely moved remotely
597 597 invalid.add(dsrc)
598 598 elif dsrc in dirmove and dirmove[dsrc] != ddst:
599 599 # files from the same directory moved to two different places
600 600 invalid.add(dsrc)
601 601 else:
602 602 # looks good so far
603 603 dirmove[dsrc] = ddst
604 604
605 605 for i in invalid:
606 606 if i in dirmove:
607 607 del dirmove[i]
608 608 del d1, d2, invalid
609 609
610 610 if not dirmove:
611 611 return copy, {}, diverge, renamedelete, {}
612 612
613 613 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
614 614
615 615 for d in dirmove:
616 616 repo.ui.debug(
617 617 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
618 618 )
619 619
620 620 movewithdir = {}
621 621 # check unaccounted nonoverlapping files against directory moves
622 622 for f in u1 + u2:
623 623 if f not in fullcopy:
624 624 for d in dirmove:
625 625 if f.startswith(d):
626 626 # new file added in a directory that was moved, move it
627 627 df = dirmove[d] + f[len(d) :]
628 628 if df not in copy:
629 629 movewithdir[f] = df
630 630 repo.ui.debug(
631 631 b" pending file src: '%s' -> dst: '%s'\n"
632 632 % (f, df)
633 633 )
634 634 break
635 635
636 636 return copy, movewithdir, diverge, renamedelete, dirmove
637 637
638 638
639 639 def _heuristicscopytracing(repo, c1, c2, base):
640 640 """ Fast copytracing using filename heuristics
641 641
642 642 Assumes that moves or renames are of following two types:
643 643
644 644 1) Inside a directory only (same directory name but different filenames)
645 645 2) Move from one directory to another
646 646 (same filenames but different directory names)
647 647
648 648 Works only when there are no merge commits in the "source branch".
649 649 Source branch is commits from base up to c2 not including base.
650 650
651 651 If merge is involved it fallbacks to _fullcopytracing().
652 652
653 653 Can be used by setting the following config:
654 654
655 655 [experimental]
656 656 copytrace = heuristics
657 657
658 658 In some cases the copy/move candidates found by heuristics can be very large
659 659 in number and that will make the algorithm slow. The number of possible
660 660 candidates to check can be limited by using the config
661 661 `experimental.copytrace.movecandidateslimit` which defaults to 100.
662 662 """
663 663
664 664 if c1.rev() is None:
665 665 c1 = c1.p1()
666 666 if c2.rev() is None:
667 667 c2 = c2.p1()
668 668
669 669 copies = {}
670 670
671 671 changedfiles = set()
672 672 m1 = c1.manifest()
673 673 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
674 674 # If base is not in c2 branch, we switch to fullcopytracing
675 675 repo.ui.debug(
676 676 b"switching to full copytracing as base is not "
677 677 b"an ancestor of c2\n"
678 678 )
679 679 return _fullcopytracing(repo, c1, c2, base)
680 680
681 681 ctx = c2
682 682 while ctx != base:
683 683 if len(ctx.parents()) == 2:
684 684 # To keep things simple let's not handle merges
685 685 repo.ui.debug(b"switching to full copytracing because of merges\n")
686 686 return _fullcopytracing(repo, c1, c2, base)
687 687 changedfiles.update(ctx.files())
688 688 ctx = ctx.p1()
689 689
690 690 cp = _forwardcopies(base, c2)
691 691 for dst, src in pycompat.iteritems(cp):
692 692 if src in m1:
693 693 copies[dst] = src
694 694
695 695 # file is missing if it isn't present in the destination, but is present in
696 696 # the base and present in the source.
697 697 # Presence in the base is important to exclude added files, presence in the
698 698 # source is important to exclude removed files.
699 699 filt = lambda f: f not in m1 and f in base and f in c2
700 700 missingfiles = [f for f in changedfiles if filt(f)]
701 701
702 702 if missingfiles:
703 703 basenametofilename = collections.defaultdict(list)
704 704 dirnametofilename = collections.defaultdict(list)
705 705
706 706 for f in m1.filesnotin(base.manifest()):
707 707 basename = os.path.basename(f)
708 708 dirname = os.path.dirname(f)
709 709 basenametofilename[basename].append(f)
710 710 dirnametofilename[dirname].append(f)
711 711
712 712 for f in missingfiles:
713 713 basename = os.path.basename(f)
714 714 dirname = os.path.dirname(f)
715 715 samebasename = basenametofilename[basename]
716 716 samedirname = dirnametofilename[dirname]
717 717 movecandidates = samebasename + samedirname
718 718 # f is guaranteed to be present in c2, that's why
719 719 # c2.filectx(f) won't fail
720 720 f2 = c2.filectx(f)
721 721 # we can have a lot of candidates which can slow down the heuristics
722 722 # config value to limit the number of candidates moves to check
723 723 maxcandidates = repo.ui.configint(
724 724 b'experimental', b'copytrace.movecandidateslimit'
725 725 )
726 726
727 727 if len(movecandidates) > maxcandidates:
728 728 repo.ui.status(
729 729 _(
730 730 b"skipping copytracing for '%s', more "
731 731 b"candidates than the limit: %d\n"
732 732 )
733 733 % (f, len(movecandidates))
734 734 )
735 735 continue
736 736
737 737 for candidate in movecandidates:
738 738 f1 = c1.filectx(candidate)
739 739 if _related(f1, f2):
740 740 # if there are a few related copies then we'll merge
741 741 # changes into all of them. This matches the behaviour
742 742 # of upstream copytracing
743 743 copies[candidate] = f
744 744
745 745 return copies, {}, {}, {}, {}
746 746
747 747
748 748 def _related(f1, f2):
749 749 """return True if f1 and f2 filectx have a common ancestor
750 750
751 751 Walk back to common ancestor to see if the two files originate
752 752 from the same file. Since workingfilectx's rev() is None it messes
753 753 up the integer comparison logic, hence the pre-step check for
754 754 None (f1 and f2 can only be workingfilectx's initially).
755 755 """
756 756
757 757 if f1 == f2:
758 758 return True # a match
759 759
760 760 g1, g2 = f1.ancestors(), f2.ancestors()
761 761 try:
762 762 f1r, f2r = f1.linkrev(), f2.linkrev()
763 763
764 764 if f1r is None:
765 765 f1 = next(g1)
766 766 if f2r is None:
767 767 f2 = next(g2)
768 768
769 769 while True:
770 770 f1r, f2r = f1.linkrev(), f2.linkrev()
771 771 if f1r > f2r:
772 772 f1 = next(g1)
773 773 elif f2r > f1r:
774 774 f2 = next(g2)
775 775 else: # f1 and f2 point to files in the same linkrev
776 776 return f1 == f2 # true if they point to the same file
777 777 except StopIteration:
778 778 return False
779 779
780 780
781 781 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
782 782 """reproduce copies from fromrev to rev in the dirstate
783 783
784 784 If skiprev is specified, it's a revision that should be used to
785 785 filter copy records. Any copies that occur between fromrev and
786 786 skiprev will not be duplicated, even if they appear in the set of
787 787 copies between fromrev and rev.
788 788 """
789 789 exclude = {}
790 790 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
791 791 bctrace = stringutil.parsebool(ctraceconfig)
792 792 if skiprev is not None and (
793 793 ctraceconfig == b'heuristics' or bctrace or bctrace is None
794 794 ):
795 795 # copytrace='off' skips this line, but not the entire function because
796 796 # the line below is O(size of the repo) during a rebase, while the rest
797 797 # of the function is much faster (and is required for carrying copy
798 798 # metadata across the rebase anyway).
799 799 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 800 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
801 801 if dst in exclude:
802 802 continue
803 803 if dst in wctx:
804 804 wctx[dst].markcopied(src)
805 805
806 806
807 807 def computechangesetfilesadded(ctx):
808 808 """return the list of files added in a changeset
809 809 """
810 810 added = []
811 811 for f in ctx.files():
812 812 if not any(f in p for p in ctx.parents()):
813 813 added.append(f)
814 814 return added
815 815
816 816
817 817 def computechangesetfilesremoved(ctx):
818 818 """return the list of files removed in a changeset
819 819 """
820 820 removed = []
821 821 for f in ctx.files():
822 822 if f not in ctx:
823 823 removed.append(f)
824 824 return removed
825 825
826 826
827 827 def computechangesetcopies(ctx):
828 828 """return the copies data for a changeset
829 829
830 830 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
831 831
832 832 Each dictionnary are in the form: `{newname: oldname}`
833 833 """
834 834 p1copies = {}
835 835 p2copies = {}
836 836 p1 = ctx.p1()
837 837 p2 = ctx.p2()
838 838 narrowmatch = ctx._repo.narrowmatch()
839 839 for dst in ctx.files():
840 840 if not narrowmatch(dst) or dst not in ctx:
841 841 continue
842 842 copied = ctx[dst].renamed()
843 843 if not copied:
844 844 continue
845 845 src, srcnode = copied
846 846 if src in p1 and p1[src].filenode() == srcnode:
847 847 p1copies[dst] = src
848 848 elif src in p2 and p2[src].filenode() == srcnode:
849 849 p2copies[dst] = src
850 850 return p1copies, p2copies
851 851
852 852
853 853 def encodecopies(files, copies):
854 854 items = []
855 855 for i, dst in enumerate(files):
856 856 if dst in copies:
857 857 items.append(b'%d\0%s' % (i, copies[dst]))
858 858 if len(items) != len(copies):
859 859 raise error.ProgrammingError(
860 860 b'some copy targets missing from file list'
861 861 )
862 862 return b"\n".join(items)
863 863
864 864
865 865 def decodecopies(files, data):
866 866 try:
867 867 copies = {}
868 868 if not data:
869 869 return copies
870 870 for l in data.split(b'\n'):
871 871 strindex, src = l.split(b'\0')
872 872 i = int(strindex)
873 873 dst = files[i]
874 874 copies[dst] = src
875 875 return copies
876 876 except (ValueError, IndexError):
877 877 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
878 878 # used different syntax for the value.
879 879 return None
880 880
881 881
882 882 def encodefileindices(files, subset):
883 883 subset = set(subset)
884 884 indices = []
885 885 for i, f in enumerate(files):
886 886 if f in subset:
887 887 indices.append(b'%d' % i)
888 888 return b'\n'.join(indices)
889 889
890 890
891 891 def decodefileindices(files, data):
892 892 try:
893 893 subset = []
894 894 if not data:
895 895 return subset
896 896 for strindex in data.split(b'\n'):
897 897 i = int(strindex)
898 898 if i < 0 or i >= len(files):
899 899 return None
900 900 subset.append(files[i])
901 901 return subset
902 902 except (ValueError, IndexError):
903 903 # Perhaps someone had chosen the same key name (e.g. "added") and
904 904 # used different syntax for the value.
905 905 return None
906 906
907 907
908 908 def _getsidedata(srcrepo, rev):
909 909 ctx = srcrepo[rev]
910 910 filescopies = computechangesetcopies(ctx)
911 911 filesadded = computechangesetfilesadded(ctx)
912 912 filesremoved = computechangesetfilesremoved(ctx)
913 913 sidedata = {}
914 914 if any([filescopies, filesadded, filesremoved]):
915 915 sortedfiles = sorted(ctx.files())
916 916 p1copies, p2copies = filescopies
917 917 p1copies = encodecopies(sortedfiles, p1copies)
918 918 p2copies = encodecopies(sortedfiles, p2copies)
919 919 filesadded = encodefileindices(sortedfiles, filesadded)
920 920 filesremoved = encodefileindices(sortedfiles, filesremoved)
921 921 if p1copies:
922 922 sidedata[sidedatamod.SD_P1COPIES] = p1copies
923 923 if p2copies:
924 924 sidedata[sidedatamod.SD_P2COPIES] = p2copies
925 925 if filesadded:
926 926 sidedata[sidedatamod.SD_FILESADDED] = filesadded
927 927 if filesremoved:
928 928 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
929 929 return sidedata
930 930
931 931
932 932 def getsidedataadder(srcrepo, destrepo):
933 933 def sidedatacompanion(revlog, rev):
934 934 sidedata = {}
935 935 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
936 936 sidedata = _getsidedata(srcrepo, rev)
937 937 return False, (), sidedata
938 938
939 939 return sidedatacompanion
940 940
941 941
942 942 def getsidedataremover(srcrepo, destrepo):
943 943 def sidedatacompanion(revlog, rev):
944 944 f = ()
945 945 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
946 946 if revlog.flags(rev) & REVIDX_SIDEDATA:
947 947 f = (
948 948 sidedatamod.SD_P1COPIES,
949 949 sidedatamod.SD_P2COPIES,
950 950 sidedatamod.SD_FILESADDED,
951 951 sidedatamod.SD_FILESREMOVED,
952 952 )
953 953 return False, f, {}
954 954
955 955 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now