##// END OF EJS Templates
sidedatacopies: directly fetch copies information from sidedata...
marmoute -
r43551:675c776f default
parent child Browse files
Show More
@@ -1,955 +1,968 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _revinfogetter(repo):
182 182 """return a function that return multiple data given a <rev>"i
183 183
184 184 * p1: revision number of first parent
185 185 * p2: revision number of first parent
186 186 * p1copies: mapping of copies from p1
187 187 * p2copies: mapping of copies from p2
188 188 * removed: a list of removed files
189 189 """
190 190 cl = repo.changelog
191 191 parents = cl.parentrevs
192 192
193 def revinfo(rev):
194 p1, p2 = parents(rev)
195 ctx = repo[rev]
196 p1copies, p2copies = ctx._copies
197 removed = ctx.filesremoved()
198 return p1, p2, p1copies, p2copies, removed
193 if repo.filecopiesmode == b'changeset-sidedata':
194 changelogrevision = cl.changelogrevision
195
196 def revinfo(rev):
197 p1, p2 = parents(rev)
198 c = changelogrevision(rev)
199 p1copies = c.p1copies
200 p2copies = c.p2copies
201 removed = c.filesremoved
202 return p1, p2, p1copies, p2copies, removed
203
204 else:
205
206 def revinfo(rev):
207 p1, p2 = parents(rev)
208 ctx = repo[rev]
209 p1copies, p2copies = ctx._copies
210 removed = ctx.filesremoved()
211 return p1, p2, p1copies, p2copies, removed
199 212
200 213 return revinfo
201 214
202 215
203 216 def _changesetforwardcopies(a, b, match):
204 217 if a.rev() in (node.nullrev, b.rev()):
205 218 return {}
206 219
207 220 repo = a.repo().unfiltered()
208 221 children = {}
209 222 revinfo = _revinfogetter(repo)
210 223
211 224 cl = repo.changelog
212 225 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
213 226 for r in missingrevs:
214 227 for p in cl.parentrevs(r):
215 228 if p == node.nullrev:
216 229 continue
217 230 if p not in children:
218 231 children[p] = [r]
219 232 else:
220 233 children[p].append(r)
221 234
222 235 roots = set(children) - set(missingrevs)
223 236 work = list(roots)
224 237 all_copies = {r: {} for r in roots}
225 238 heapq.heapify(work)
226 239 alwaysmatch = match.always()
227 240 while work:
228 241 r = heapq.heappop(work)
229 242 copies = all_copies.pop(r)
230 243 if r == b.rev():
231 244 return copies
232 245 for i, c in enumerate(children[r]):
233 246 p1, p2, p1copies, p2copies, removed = revinfo(c)
234 247 if r == p1:
235 248 parent = 1
236 249 childcopies = p1copies
237 250 else:
238 251 assert r == p2
239 252 parent = 2
240 253 childcopies = p2copies
241 254 if not alwaysmatch:
242 255 childcopies = {
243 256 dst: src for dst, src in childcopies.items() if match(dst)
244 257 }
245 258 # Copy the dict only if later iterations will also need it
246 259 if i != len(children[r]) - 1:
247 260 newcopies = copies.copy()
248 261 else:
249 262 newcopies = copies
250 263 if childcopies:
251 264 newcopies = _chain(newcopies, childcopies)
252 265 for f in removed:
253 266 if f in newcopies:
254 267 del newcopies[f]
255 268 othercopies = all_copies.get(c)
256 269 if othercopies is None:
257 270 heapq.heappush(work, c)
258 271 all_copies[c] = newcopies
259 272 else:
260 273 # we are the second parent to work on c, we need to merge our
261 274 # work with the other.
262 275 #
263 276 # Unlike when copies are stored in the filelog, we consider
264 277 # it a copy even if the destination already existed on the
265 278 # other branch. It's simply too expensive to check if the
266 279 # file existed in the manifest.
267 280 #
268 281 # In case of conflict, parent 1 take precedence over parent 2.
269 282 # This is an arbitrary choice made anew when implementing
270 283 # changeset based copies. It was made without regards with
271 284 # potential filelog related behavior.
272 285 if parent == 1:
273 286 othercopies.update(newcopies)
274 287 else:
275 288 newcopies.update(othercopies)
276 289 all_copies[c] = newcopies
277 290 assert False
278 291
279 292
280 293 def _forwardcopies(a, b, base=None, match=None):
281 294 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
282 295
283 296 if base is None:
284 297 base = a
285 298 match = a.repo().narrowmatch(match)
286 299 # check for working copy
287 300 if b.rev() is None:
288 301 cm = _committedforwardcopies(a, b.p1(), base, match)
289 302 # combine copies from dirstate if necessary
290 303 copies = _chain(cm, _dirstatecopies(b._repo, match))
291 304 else:
292 305 copies = _committedforwardcopies(a, b, base, match)
293 306 return copies
294 307
295 308
296 309 def _backwardrenames(a, b, match):
297 310 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
298 311 return {}
299 312
300 313 # Even though we're not taking copies into account, 1:n rename situations
301 314 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
302 315 # arbitrarily pick one of the renames.
303 316 # We don't want to pass in "match" here, since that would filter
304 317 # the destination by it. Since we're reversing the copies, we want
305 318 # to filter the source instead.
306 319 f = _forwardcopies(b, a)
307 320 r = {}
308 321 for k, v in sorted(pycompat.iteritems(f)):
309 322 if match and not match(v):
310 323 continue
311 324 # remove copies
312 325 if v in a:
313 326 continue
314 327 r[v] = k
315 328 return r
316 329
317 330
318 331 def pathcopies(x, y, match=None):
319 332 """find {dst@y: src@x} copy mapping for directed compare"""
320 333 repo = x._repo
321 334 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
322 335 if debug:
323 336 repo.ui.debug(
324 337 b'debug.copies: searching copies from %s to %s\n' % (x, y)
325 338 )
326 339 if x == y or not x or not y:
327 340 return {}
328 341 a = y.ancestor(x)
329 342 if a == x:
330 343 if debug:
331 344 repo.ui.debug(b'debug.copies: search mode: forward\n')
332 345 if y.rev() is None and x == y.p1():
333 346 # short-circuit to avoid issues with merge states
334 347 return _dirstatecopies(repo, match)
335 348 copies = _forwardcopies(x, y, match=match)
336 349 elif a == y:
337 350 if debug:
338 351 repo.ui.debug(b'debug.copies: search mode: backward\n')
339 352 copies = _backwardrenames(x, y, match=match)
340 353 else:
341 354 if debug:
342 355 repo.ui.debug(b'debug.copies: search mode: combined\n')
343 356 base = None
344 357 if a.rev() != node.nullrev:
345 358 base = x
346 359 copies = _chain(
347 360 _backwardrenames(x, a, match=match),
348 361 _forwardcopies(a, y, base, match=match),
349 362 )
350 363 _filter(x, y, copies)
351 364 return copies
352 365
353 366
354 367 def mergecopies(repo, c1, c2, base):
355 368 """
356 369 Finds moves and copies between context c1 and c2 that are relevant for
357 370 merging. 'base' will be used as the merge base.
358 371
359 372 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
360 373 files that were moved/ copied in one merge parent and modified in another.
361 374 For example:
362 375
363 376 o ---> 4 another commit
364 377 |
365 378 | o ---> 3 commit that modifies a.txt
366 379 | /
367 380 o / ---> 2 commit that moves a.txt to b.txt
368 381 |/
369 382 o ---> 1 merge base
370 383
371 384 If we try to rebase revision 3 on revision 4, since there is no a.txt in
372 385 revision 4, and if user have copytrace disabled, we prints the following
373 386 message:
374 387
375 388 ```other changed <file> which local deleted```
376 389
377 390 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
378 391 "dirmove".
379 392
380 393 "copy" is a mapping from destination name -> source name,
381 394 where source is in c1 and destination is in c2 or vice-versa.
382 395
383 396 "movewithdir" is a mapping from source name -> destination name,
384 397 where the file at source present in one context but not the other
385 398 needs to be moved to destination by the merge process, because the
386 399 other context moved the directory it is in.
387 400
388 401 "diverge" is a mapping of source name -> list of destination names
389 402 for divergent renames.
390 403
391 404 "renamedelete" is a mapping of source name -> list of destination
392 405 names for files deleted in c1 that were renamed in c2 or vice-versa.
393 406
394 407 "dirmove" is a mapping of detected source dir -> destination dir renames.
395 408 This is needed for handling changes to new files previously grafted into
396 409 renamed directories.
397 410
398 411 This function calls different copytracing algorithms based on config.
399 412 """
400 413 # avoid silly behavior for update from empty dir
401 414 if not c1 or not c2 or c1 == c2:
402 415 return {}, {}, {}, {}, {}
403 416
404 417 narrowmatch = c1.repo().narrowmatch()
405 418
406 419 # avoid silly behavior for parent -> working dir
407 420 if c2.node() is None and c1.node() == repo.dirstate.p1():
408 421 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
409 422
410 423 copytracing = repo.ui.config(b'experimental', b'copytrace')
411 424 if stringutil.parsebool(copytracing) is False:
412 425 # stringutil.parsebool() returns None when it is unable to parse the
413 426 # value, so we should rely on making sure copytracing is on such cases
414 427 return {}, {}, {}, {}, {}
415 428
416 429 if usechangesetcentricalgo(repo):
417 430 # The heuristics don't make sense when we need changeset-centric algos
418 431 return _fullcopytracing(repo, c1, c2, base)
419 432
420 433 # Copy trace disabling is explicitly below the node == p1 logic above
421 434 # because the logic above is required for a simple copy to be kept across a
422 435 # rebase.
423 436 if copytracing == b'heuristics':
424 437 # Do full copytracing if only non-public revisions are involved as
425 438 # that will be fast enough and will also cover the copies which could
426 439 # be missed by heuristics
427 440 if _isfullcopytraceable(repo, c1, base):
428 441 return _fullcopytracing(repo, c1, c2, base)
429 442 return _heuristicscopytracing(repo, c1, c2, base)
430 443 else:
431 444 return _fullcopytracing(repo, c1, c2, base)
432 445
433 446
434 447 def _isfullcopytraceable(repo, c1, base):
435 448 """ Checks that if base, source and destination are all no-public branches,
436 449 if yes let's use the full copytrace algorithm for increased capabilities
437 450 since it will be fast enough.
438 451
439 452 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
440 453 number of changesets from c1 to base such that if number of changesets are
441 454 more than the limit, full copytracing algorithm won't be used.
442 455 """
443 456 if c1.rev() is None:
444 457 c1 = c1.p1()
445 458 if c1.mutable() and base.mutable():
446 459 sourcecommitlimit = repo.ui.configint(
447 460 b'experimental', b'copytrace.sourcecommitlimit'
448 461 )
449 462 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
450 463 return commits < sourcecommitlimit
451 464 return False
452 465
453 466
454 467 def _checksinglesidecopies(
455 468 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
456 469 ):
457 470 if src not in m2:
458 471 # deleted on side 2
459 472 if src not in m1:
460 473 # renamed on side 1, deleted on side 2
461 474 renamedelete[src] = dsts1
462 475 elif m2[src] != mb[src]:
463 476 if not _related(c2[src], base[src]):
464 477 return
465 478 # modified on side 2
466 479 for dst in dsts1:
467 480 if dst not in m2:
468 481 # dst not added on side 2 (handle as regular
469 482 # "both created" case in manifestmerge otherwise)
470 483 copy[dst] = src
471 484
472 485
473 486 def _fullcopytracing(repo, c1, c2, base):
474 487 """ The full copytracing algorithm which finds all the new files that were
475 488 added from merge base up to the top commit and for each file it checks if
476 489 this file was copied from another file.
477 490
478 491 This is pretty slow when a lot of changesets are involved but will track all
479 492 the copies.
480 493 """
481 494 m1 = c1.manifest()
482 495 m2 = c2.manifest()
483 496 mb = base.manifest()
484 497
485 498 copies1 = pathcopies(base, c1)
486 499 copies2 = pathcopies(base, c2)
487 500
488 501 inversecopies1 = {}
489 502 inversecopies2 = {}
490 503 for dst, src in copies1.items():
491 504 inversecopies1.setdefault(src, []).append(dst)
492 505 for dst, src in copies2.items():
493 506 inversecopies2.setdefault(src, []).append(dst)
494 507
495 508 copy = {}
496 509 diverge = {}
497 510 renamedelete = {}
498 511 allsources = set(inversecopies1) | set(inversecopies2)
499 512 for src in allsources:
500 513 dsts1 = inversecopies1.get(src)
501 514 dsts2 = inversecopies2.get(src)
502 515 if dsts1 and dsts2:
503 516 # copied/renamed on both sides
504 517 if src not in m1 and src not in m2:
505 518 # renamed on both sides
506 519 dsts1 = set(dsts1)
507 520 dsts2 = set(dsts2)
508 521 # If there's some overlap in the rename destinations, we
509 522 # consider it not divergent. For example, if side 1 copies 'a'
510 523 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
511 524 # and 'd' and deletes 'a'.
512 525 if dsts1 & dsts2:
513 526 for dst in dsts1 & dsts2:
514 527 copy[dst] = src
515 528 else:
516 529 diverge[src] = sorted(dsts1 | dsts2)
517 530 elif src in m1 and src in m2:
518 531 # copied on both sides
519 532 dsts1 = set(dsts1)
520 533 dsts2 = set(dsts2)
521 534 for dst in dsts1 & dsts2:
522 535 copy[dst] = src
523 536 # TODO: Handle cases where it was renamed on one side and copied
524 537 # on the other side
525 538 elif dsts1:
526 539 # copied/renamed only on side 1
527 540 _checksinglesidecopies(
528 541 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
529 542 )
530 543 elif dsts2:
531 544 # copied/renamed only on side 2
532 545 _checksinglesidecopies(
533 546 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
534 547 )
535 548
536 549 renamedeleteset = set()
537 550 divergeset = set()
538 551 for dsts in diverge.values():
539 552 divergeset.update(dsts)
540 553 for dsts in renamedelete.values():
541 554 renamedeleteset.update(dsts)
542 555
543 556 # find interesting file sets from manifests
544 557 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
545 558 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
546 559 u1 = sorted(addedinm1 - addedinm2)
547 560 u2 = sorted(addedinm2 - addedinm1)
548 561
549 562 header = b" unmatched files in %s"
550 563 if u1:
551 564 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
552 565 if u2:
553 566 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
554 567
555 568 fullcopy = copies1.copy()
556 569 fullcopy.update(copies2)
557 570 if not fullcopy:
558 571 return copy, {}, diverge, renamedelete, {}
559 572
560 573 if repo.ui.debugflag:
561 574 repo.ui.debug(
562 575 b" all copies found (* = to merge, ! = divergent, "
563 576 b"% = renamed and deleted):\n"
564 577 )
565 578 for f in sorted(fullcopy):
566 579 note = b""
567 580 if f in copy:
568 581 note += b"*"
569 582 if f in divergeset:
570 583 note += b"!"
571 584 if f in renamedeleteset:
572 585 note += b"%"
573 586 repo.ui.debug(
574 587 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
575 588 )
576 589 del divergeset
577 590
578 591 repo.ui.debug(b" checking for directory renames\n")
579 592
580 593 # generate a directory move map
581 594 d1, d2 = c1.dirs(), c2.dirs()
582 595 invalid = set()
583 596 dirmove = {}
584 597
585 598 # examine each file copy for a potential directory move, which is
586 599 # when all the files in a directory are moved to a new directory
587 600 for dst, src in pycompat.iteritems(fullcopy):
588 601 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
589 602 if dsrc in invalid:
590 603 # already seen to be uninteresting
591 604 continue
592 605 elif dsrc in d1 and ddst in d1:
593 606 # directory wasn't entirely moved locally
594 607 invalid.add(dsrc)
595 608 elif dsrc in d2 and ddst in d2:
596 609 # directory wasn't entirely moved remotely
597 610 invalid.add(dsrc)
598 611 elif dsrc in dirmove and dirmove[dsrc] != ddst:
599 612 # files from the same directory moved to two different places
600 613 invalid.add(dsrc)
601 614 else:
602 615 # looks good so far
603 616 dirmove[dsrc] = ddst
604 617
605 618 for i in invalid:
606 619 if i in dirmove:
607 620 del dirmove[i]
608 621 del d1, d2, invalid
609 622
610 623 if not dirmove:
611 624 return copy, {}, diverge, renamedelete, {}
612 625
613 626 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
614 627
615 628 for d in dirmove:
616 629 repo.ui.debug(
617 630 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
618 631 )
619 632
620 633 movewithdir = {}
621 634 # check unaccounted nonoverlapping files against directory moves
622 635 for f in u1 + u2:
623 636 if f not in fullcopy:
624 637 for d in dirmove:
625 638 if f.startswith(d):
626 639 # new file added in a directory that was moved, move it
627 640 df = dirmove[d] + f[len(d) :]
628 641 if df not in copy:
629 642 movewithdir[f] = df
630 643 repo.ui.debug(
631 644 b" pending file src: '%s' -> dst: '%s'\n"
632 645 % (f, df)
633 646 )
634 647 break
635 648
636 649 return copy, movewithdir, diverge, renamedelete, dirmove
637 650
638 651
639 652 def _heuristicscopytracing(repo, c1, c2, base):
640 653 """ Fast copytracing using filename heuristics
641 654
642 655 Assumes that moves or renames are of following two types:
643 656
644 657 1) Inside a directory only (same directory name but different filenames)
645 658 2) Move from one directory to another
646 659 (same filenames but different directory names)
647 660
648 661 Works only when there are no merge commits in the "source branch".
649 662 Source branch is commits from base up to c2 not including base.
650 663
651 664 If merge is involved it fallbacks to _fullcopytracing().
652 665
653 666 Can be used by setting the following config:
654 667
655 668 [experimental]
656 669 copytrace = heuristics
657 670
658 671 In some cases the copy/move candidates found by heuristics can be very large
659 672 in number and that will make the algorithm slow. The number of possible
660 673 candidates to check can be limited by using the config
661 674 `experimental.copytrace.movecandidateslimit` which defaults to 100.
662 675 """
663 676
664 677 if c1.rev() is None:
665 678 c1 = c1.p1()
666 679 if c2.rev() is None:
667 680 c2 = c2.p1()
668 681
669 682 copies = {}
670 683
671 684 changedfiles = set()
672 685 m1 = c1.manifest()
673 686 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
674 687 # If base is not in c2 branch, we switch to fullcopytracing
675 688 repo.ui.debug(
676 689 b"switching to full copytracing as base is not "
677 690 b"an ancestor of c2\n"
678 691 )
679 692 return _fullcopytracing(repo, c1, c2, base)
680 693
681 694 ctx = c2
682 695 while ctx != base:
683 696 if len(ctx.parents()) == 2:
684 697 # To keep things simple let's not handle merges
685 698 repo.ui.debug(b"switching to full copytracing because of merges\n")
686 699 return _fullcopytracing(repo, c1, c2, base)
687 700 changedfiles.update(ctx.files())
688 701 ctx = ctx.p1()
689 702
690 703 cp = _forwardcopies(base, c2)
691 704 for dst, src in pycompat.iteritems(cp):
692 705 if src in m1:
693 706 copies[dst] = src
694 707
695 708 # file is missing if it isn't present in the destination, but is present in
696 709 # the base and present in the source.
697 710 # Presence in the base is important to exclude added files, presence in the
698 711 # source is important to exclude removed files.
699 712 filt = lambda f: f not in m1 and f in base and f in c2
700 713 missingfiles = [f for f in changedfiles if filt(f)]
701 714
702 715 if missingfiles:
703 716 basenametofilename = collections.defaultdict(list)
704 717 dirnametofilename = collections.defaultdict(list)
705 718
706 719 for f in m1.filesnotin(base.manifest()):
707 720 basename = os.path.basename(f)
708 721 dirname = os.path.dirname(f)
709 722 basenametofilename[basename].append(f)
710 723 dirnametofilename[dirname].append(f)
711 724
712 725 for f in missingfiles:
713 726 basename = os.path.basename(f)
714 727 dirname = os.path.dirname(f)
715 728 samebasename = basenametofilename[basename]
716 729 samedirname = dirnametofilename[dirname]
717 730 movecandidates = samebasename + samedirname
718 731 # f is guaranteed to be present in c2, that's why
719 732 # c2.filectx(f) won't fail
720 733 f2 = c2.filectx(f)
721 734 # we can have a lot of candidates which can slow down the heuristics
722 735 # config value to limit the number of candidates moves to check
723 736 maxcandidates = repo.ui.configint(
724 737 b'experimental', b'copytrace.movecandidateslimit'
725 738 )
726 739
727 740 if len(movecandidates) > maxcandidates:
728 741 repo.ui.status(
729 742 _(
730 743 b"skipping copytracing for '%s', more "
731 744 b"candidates than the limit: %d\n"
732 745 )
733 746 % (f, len(movecandidates))
734 747 )
735 748 continue
736 749
737 750 for candidate in movecandidates:
738 751 f1 = c1.filectx(candidate)
739 752 if _related(f1, f2):
740 753 # if there are a few related copies then we'll merge
741 754 # changes into all of them. This matches the behaviour
742 755 # of upstream copytracing
743 756 copies[candidate] = f
744 757
745 758 return copies, {}, {}, {}, {}
746 759
747 760
748 761 def _related(f1, f2):
749 762 """return True if f1 and f2 filectx have a common ancestor
750 763
751 764 Walk back to common ancestor to see if the two files originate
752 765 from the same file. Since workingfilectx's rev() is None it messes
753 766 up the integer comparison logic, hence the pre-step check for
754 767 None (f1 and f2 can only be workingfilectx's initially).
755 768 """
756 769
757 770 if f1 == f2:
758 771 return True # a match
759 772
760 773 g1, g2 = f1.ancestors(), f2.ancestors()
761 774 try:
762 775 f1r, f2r = f1.linkrev(), f2.linkrev()
763 776
764 777 if f1r is None:
765 778 f1 = next(g1)
766 779 if f2r is None:
767 780 f2 = next(g2)
768 781
769 782 while True:
770 783 f1r, f2r = f1.linkrev(), f2.linkrev()
771 784 if f1r > f2r:
772 785 f1 = next(g1)
773 786 elif f2r > f1r:
774 787 f2 = next(g2)
775 788 else: # f1 and f2 point to files in the same linkrev
776 789 return f1 == f2 # true if they point to the same file
777 790 except StopIteration:
778 791 return False
779 792
780 793
781 794 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
782 795 """reproduce copies from fromrev to rev in the dirstate
783 796
784 797 If skiprev is specified, it's a revision that should be used to
785 798 filter copy records. Any copies that occur between fromrev and
786 799 skiprev will not be duplicated, even if they appear in the set of
787 800 copies between fromrev and rev.
788 801 """
789 802 exclude = {}
790 803 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
791 804 bctrace = stringutil.parsebool(ctraceconfig)
792 805 if skiprev is not None and (
793 806 ctraceconfig == b'heuristics' or bctrace or bctrace is None
794 807 ):
795 808 # copytrace='off' skips this line, but not the entire function because
796 809 # the line below is O(size of the repo) during a rebase, while the rest
797 810 # of the function is much faster (and is required for carrying copy
798 811 # metadata across the rebase anyway).
799 812 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 813 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
801 814 if dst in exclude:
802 815 continue
803 816 if dst in wctx:
804 817 wctx[dst].markcopied(src)
805 818
806 819
807 820 def computechangesetfilesadded(ctx):
808 821 """return the list of files added in a changeset
809 822 """
810 823 added = []
811 824 for f in ctx.files():
812 825 if not any(f in p for p in ctx.parents()):
813 826 added.append(f)
814 827 return added
815 828
816 829
817 830 def computechangesetfilesremoved(ctx):
818 831 """return the list of files removed in a changeset
819 832 """
820 833 removed = []
821 834 for f in ctx.files():
822 835 if f not in ctx:
823 836 removed.append(f)
824 837 return removed
825 838
826 839
827 840 def computechangesetcopies(ctx):
828 841 """return the copies data for a changeset
829 842
830 843 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
831 844
832 845 Each dictionnary are in the form: `{newname: oldname}`
833 846 """
834 847 p1copies = {}
835 848 p2copies = {}
836 849 p1 = ctx.p1()
837 850 p2 = ctx.p2()
838 851 narrowmatch = ctx._repo.narrowmatch()
839 852 for dst in ctx.files():
840 853 if not narrowmatch(dst) or dst not in ctx:
841 854 continue
842 855 copied = ctx[dst].renamed()
843 856 if not copied:
844 857 continue
845 858 src, srcnode = copied
846 859 if src in p1 and p1[src].filenode() == srcnode:
847 860 p1copies[dst] = src
848 861 elif src in p2 and p2[src].filenode() == srcnode:
849 862 p2copies[dst] = src
850 863 return p1copies, p2copies
851 864
852 865
853 866 def encodecopies(files, copies):
854 867 items = []
855 868 for i, dst in enumerate(files):
856 869 if dst in copies:
857 870 items.append(b'%d\0%s' % (i, copies[dst]))
858 871 if len(items) != len(copies):
859 872 raise error.ProgrammingError(
860 873 b'some copy targets missing from file list'
861 874 )
862 875 return b"\n".join(items)
863 876
864 877
865 878 def decodecopies(files, data):
866 879 try:
867 880 copies = {}
868 881 if not data:
869 882 return copies
870 883 for l in data.split(b'\n'):
871 884 strindex, src = l.split(b'\0')
872 885 i = int(strindex)
873 886 dst = files[i]
874 887 copies[dst] = src
875 888 return copies
876 889 except (ValueError, IndexError):
877 890 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
878 891 # used different syntax for the value.
879 892 return None
880 893
881 894
882 895 def encodefileindices(files, subset):
883 896 subset = set(subset)
884 897 indices = []
885 898 for i, f in enumerate(files):
886 899 if f in subset:
887 900 indices.append(b'%d' % i)
888 901 return b'\n'.join(indices)
889 902
890 903
891 904 def decodefileindices(files, data):
892 905 try:
893 906 subset = []
894 907 if not data:
895 908 return subset
896 909 for strindex in data.split(b'\n'):
897 910 i = int(strindex)
898 911 if i < 0 or i >= len(files):
899 912 return None
900 913 subset.append(files[i])
901 914 return subset
902 915 except (ValueError, IndexError):
903 916 # Perhaps someone had chosen the same key name (e.g. "added") and
904 917 # used different syntax for the value.
905 918 return None
906 919
907 920
908 921 def _getsidedata(srcrepo, rev):
909 922 ctx = srcrepo[rev]
910 923 filescopies = computechangesetcopies(ctx)
911 924 filesadded = computechangesetfilesadded(ctx)
912 925 filesremoved = computechangesetfilesremoved(ctx)
913 926 sidedata = {}
914 927 if any([filescopies, filesadded, filesremoved]):
915 928 sortedfiles = sorted(ctx.files())
916 929 p1copies, p2copies = filescopies
917 930 p1copies = encodecopies(sortedfiles, p1copies)
918 931 p2copies = encodecopies(sortedfiles, p2copies)
919 932 filesadded = encodefileindices(sortedfiles, filesadded)
920 933 filesremoved = encodefileindices(sortedfiles, filesremoved)
921 934 if p1copies:
922 935 sidedata[sidedatamod.SD_P1COPIES] = p1copies
923 936 if p2copies:
924 937 sidedata[sidedatamod.SD_P2COPIES] = p2copies
925 938 if filesadded:
926 939 sidedata[sidedatamod.SD_FILESADDED] = filesadded
927 940 if filesremoved:
928 941 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
929 942 return sidedata
930 943
931 944
932 945 def getsidedataadder(srcrepo, destrepo):
933 946 def sidedatacompanion(revlog, rev):
934 947 sidedata = {}
935 948 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
936 949 sidedata = _getsidedata(srcrepo, rev)
937 950 return False, (), sidedata
938 951
939 952 return sidedatacompanion
940 953
941 954
942 955 def getsidedataremover(srcrepo, destrepo):
943 956 def sidedatacompanion(revlog, rev):
944 957 f = ()
945 958 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
946 959 if revlog.flags(rev) & REVIDX_SIDEDATA:
947 960 f = (
948 961 sidedatamod.SD_P1COPIES,
949 962 sidedatamod.SD_P2COPIES,
950 963 sidedatamod.SD_FILESADDED,
951 964 sidedatamod.SD_FILESREMOVED,
952 965 )
953 966 return False, f, {}
954 967
955 968 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now