##// END OF EJS Templates
sidedatacopies: fast path data fetching if revision has no sidedata...
marmoute -
r43591:8a292526 default
parent child Browse files
Show More
@@ -1,968 +1,974 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _revinfogetter(repo):
182 182 """return a function that return multiple data given a <rev>"i
183 183
184 184 * p1: revision number of first parent
185 185 * p2: revision number of first parent
186 186 * p1copies: mapping of copies from p1
187 187 * p2copies: mapping of copies from p2
188 188 * removed: a list of removed files
189 189 """
190 190 cl = repo.changelog
191 191 parents = cl.parentrevs
192 192
193 193 if repo.filecopiesmode == b'changeset-sidedata':
194 194 changelogrevision = cl.changelogrevision
195 flags = cl.flags
195 196
196 197 def revinfo(rev):
197 198 p1, p2 = parents(rev)
199 if flags(rev) & REVIDX_SIDEDATA:
198 200 c = changelogrevision(rev)
199 201 p1copies = c.p1copies
200 202 p2copies = c.p2copies
201 203 removed = c.filesremoved
204 else:
205 p1copies = {}
206 p2copies = {}
207 removed = ()
202 208 return p1, p2, p1copies, p2copies, removed
203 209
204 210 else:
205 211
206 212 def revinfo(rev):
207 213 p1, p2 = parents(rev)
208 214 ctx = repo[rev]
209 215 p1copies, p2copies = ctx._copies
210 216 removed = ctx.filesremoved()
211 217 return p1, p2, p1copies, p2copies, removed
212 218
213 219 return revinfo
214 220
215 221
216 222 def _changesetforwardcopies(a, b, match):
217 223 if a.rev() in (node.nullrev, b.rev()):
218 224 return {}
219 225
220 226 repo = a.repo().unfiltered()
221 227 children = {}
222 228 revinfo = _revinfogetter(repo)
223 229
224 230 cl = repo.changelog
225 231 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
226 232 for r in missingrevs:
227 233 for p in cl.parentrevs(r):
228 234 if p == node.nullrev:
229 235 continue
230 236 if p not in children:
231 237 children[p] = [r]
232 238 else:
233 239 children[p].append(r)
234 240
235 241 roots = set(children) - set(missingrevs)
236 242 work = list(roots)
237 243 all_copies = {r: {} for r in roots}
238 244 heapq.heapify(work)
239 245 alwaysmatch = match.always()
240 246 while work:
241 247 r = heapq.heappop(work)
242 248 copies = all_copies.pop(r)
243 249 if r == b.rev():
244 250 return copies
245 251 for i, c in enumerate(children[r]):
246 252 p1, p2, p1copies, p2copies, removed = revinfo(c)
247 253 if r == p1:
248 254 parent = 1
249 255 childcopies = p1copies
250 256 else:
251 257 assert r == p2
252 258 parent = 2
253 259 childcopies = p2copies
254 260 if not alwaysmatch:
255 261 childcopies = {
256 262 dst: src for dst, src in childcopies.items() if match(dst)
257 263 }
258 264 # Copy the dict only if later iterations will also need it
259 265 if i != len(children[r]) - 1:
260 266 newcopies = copies.copy()
261 267 else:
262 268 newcopies = copies
263 269 if childcopies:
264 270 newcopies = _chain(newcopies, childcopies)
265 271 for f in removed:
266 272 if f in newcopies:
267 273 del newcopies[f]
268 274 othercopies = all_copies.get(c)
269 275 if othercopies is None:
270 276 heapq.heappush(work, c)
271 277 all_copies[c] = newcopies
272 278 else:
273 279 # we are the second parent to work on c, we need to merge our
274 280 # work with the other.
275 281 #
276 282 # Unlike when copies are stored in the filelog, we consider
277 283 # it a copy even if the destination already existed on the
278 284 # other branch. It's simply too expensive to check if the
279 285 # file existed in the manifest.
280 286 #
281 287 # In case of conflict, parent 1 take precedence over parent 2.
282 288 # This is an arbitrary choice made anew when implementing
283 289 # changeset based copies. It was made without regards with
284 290 # potential filelog related behavior.
285 291 if parent == 1:
286 292 othercopies.update(newcopies)
287 293 else:
288 294 newcopies.update(othercopies)
289 295 all_copies[c] = newcopies
290 296 assert False
291 297
292 298
293 299 def _forwardcopies(a, b, base=None, match=None):
294 300 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
295 301
296 302 if base is None:
297 303 base = a
298 304 match = a.repo().narrowmatch(match)
299 305 # check for working copy
300 306 if b.rev() is None:
301 307 cm = _committedforwardcopies(a, b.p1(), base, match)
302 308 # combine copies from dirstate if necessary
303 309 copies = _chain(cm, _dirstatecopies(b._repo, match))
304 310 else:
305 311 copies = _committedforwardcopies(a, b, base, match)
306 312 return copies
307 313
308 314
309 315 def _backwardrenames(a, b, match):
310 316 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
311 317 return {}
312 318
313 319 # Even though we're not taking copies into account, 1:n rename situations
314 320 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
315 321 # arbitrarily pick one of the renames.
316 322 # We don't want to pass in "match" here, since that would filter
317 323 # the destination by it. Since we're reversing the copies, we want
318 324 # to filter the source instead.
319 325 f = _forwardcopies(b, a)
320 326 r = {}
321 327 for k, v in sorted(pycompat.iteritems(f)):
322 328 if match and not match(v):
323 329 continue
324 330 # remove copies
325 331 if v in a:
326 332 continue
327 333 r[v] = k
328 334 return r
329 335
330 336
331 337 def pathcopies(x, y, match=None):
332 338 """find {dst@y: src@x} copy mapping for directed compare"""
333 339 repo = x._repo
334 340 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
335 341 if debug:
336 342 repo.ui.debug(
337 343 b'debug.copies: searching copies from %s to %s\n' % (x, y)
338 344 )
339 345 if x == y or not x or not y:
340 346 return {}
341 347 a = y.ancestor(x)
342 348 if a == x:
343 349 if debug:
344 350 repo.ui.debug(b'debug.copies: search mode: forward\n')
345 351 if y.rev() is None and x == y.p1():
346 352 # short-circuit to avoid issues with merge states
347 353 return _dirstatecopies(repo, match)
348 354 copies = _forwardcopies(x, y, match=match)
349 355 elif a == y:
350 356 if debug:
351 357 repo.ui.debug(b'debug.copies: search mode: backward\n')
352 358 copies = _backwardrenames(x, y, match=match)
353 359 else:
354 360 if debug:
355 361 repo.ui.debug(b'debug.copies: search mode: combined\n')
356 362 base = None
357 363 if a.rev() != node.nullrev:
358 364 base = x
359 365 copies = _chain(
360 366 _backwardrenames(x, a, match=match),
361 367 _forwardcopies(a, y, base, match=match),
362 368 )
363 369 _filter(x, y, copies)
364 370 return copies
365 371
366 372
367 373 def mergecopies(repo, c1, c2, base):
368 374 """
369 375 Finds moves and copies between context c1 and c2 that are relevant for
370 376 merging. 'base' will be used as the merge base.
371 377
372 378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
373 379 files that were moved/ copied in one merge parent and modified in another.
374 380 For example:
375 381
376 382 o ---> 4 another commit
377 383 |
378 384 | o ---> 3 commit that modifies a.txt
379 385 | /
380 386 o / ---> 2 commit that moves a.txt to b.txt
381 387 |/
382 388 o ---> 1 merge base
383 389
384 390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
385 391 revision 4, and if user have copytrace disabled, we prints the following
386 392 message:
387 393
388 394 ```other changed <file> which local deleted```
389 395
390 396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
391 397 "dirmove".
392 398
393 399 "copy" is a mapping from destination name -> source name,
394 400 where source is in c1 and destination is in c2 or vice-versa.
395 401
396 402 "movewithdir" is a mapping from source name -> destination name,
397 403 where the file at source present in one context but not the other
398 404 needs to be moved to destination by the merge process, because the
399 405 other context moved the directory it is in.
400 406
401 407 "diverge" is a mapping of source name -> list of destination names
402 408 for divergent renames.
403 409
404 410 "renamedelete" is a mapping of source name -> list of destination
405 411 names for files deleted in c1 that were renamed in c2 or vice-versa.
406 412
407 413 "dirmove" is a mapping of detected source dir -> destination dir renames.
408 414 This is needed for handling changes to new files previously grafted into
409 415 renamed directories.
410 416
411 417 This function calls different copytracing algorithms based on config.
412 418 """
413 419 # avoid silly behavior for update from empty dir
414 420 if not c1 or not c2 or c1 == c2:
415 421 return {}, {}, {}, {}, {}
416 422
417 423 narrowmatch = c1.repo().narrowmatch()
418 424
419 425 # avoid silly behavior for parent -> working dir
420 426 if c2.node() is None and c1.node() == repo.dirstate.p1():
421 427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
422 428
423 429 copytracing = repo.ui.config(b'experimental', b'copytrace')
424 430 if stringutil.parsebool(copytracing) is False:
425 431 # stringutil.parsebool() returns None when it is unable to parse the
426 432 # value, so we should rely on making sure copytracing is on such cases
427 433 return {}, {}, {}, {}, {}
428 434
429 435 if usechangesetcentricalgo(repo):
430 436 # The heuristics don't make sense when we need changeset-centric algos
431 437 return _fullcopytracing(repo, c1, c2, base)
432 438
433 439 # Copy trace disabling is explicitly below the node == p1 logic above
434 440 # because the logic above is required for a simple copy to be kept across a
435 441 # rebase.
436 442 if copytracing == b'heuristics':
437 443 # Do full copytracing if only non-public revisions are involved as
438 444 # that will be fast enough and will also cover the copies which could
439 445 # be missed by heuristics
440 446 if _isfullcopytraceable(repo, c1, base):
441 447 return _fullcopytracing(repo, c1, c2, base)
442 448 return _heuristicscopytracing(repo, c1, c2, base)
443 449 else:
444 450 return _fullcopytracing(repo, c1, c2, base)
445 451
446 452
447 453 def _isfullcopytraceable(repo, c1, base):
448 454 """ Checks that if base, source and destination are all no-public branches,
449 455 if yes let's use the full copytrace algorithm for increased capabilities
450 456 since it will be fast enough.
451 457
452 458 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
453 459 number of changesets from c1 to base such that if number of changesets are
454 460 more than the limit, full copytracing algorithm won't be used.
455 461 """
456 462 if c1.rev() is None:
457 463 c1 = c1.p1()
458 464 if c1.mutable() and base.mutable():
459 465 sourcecommitlimit = repo.ui.configint(
460 466 b'experimental', b'copytrace.sourcecommitlimit'
461 467 )
462 468 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
463 469 return commits < sourcecommitlimit
464 470 return False
465 471
466 472
467 473 def _checksinglesidecopies(
468 474 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
469 475 ):
470 476 if src not in m2:
471 477 # deleted on side 2
472 478 if src not in m1:
473 479 # renamed on side 1, deleted on side 2
474 480 renamedelete[src] = dsts1
475 481 elif m2[src] != mb[src]:
476 482 if not _related(c2[src], base[src]):
477 483 return
478 484 # modified on side 2
479 485 for dst in dsts1:
480 486 if dst not in m2:
481 487 # dst not added on side 2 (handle as regular
482 488 # "both created" case in manifestmerge otherwise)
483 489 copy[dst] = src
484 490
485 491
486 492 def _fullcopytracing(repo, c1, c2, base):
487 493 """ The full copytracing algorithm which finds all the new files that were
488 494 added from merge base up to the top commit and for each file it checks if
489 495 this file was copied from another file.
490 496
491 497 This is pretty slow when a lot of changesets are involved but will track all
492 498 the copies.
493 499 """
494 500 m1 = c1.manifest()
495 501 m2 = c2.manifest()
496 502 mb = base.manifest()
497 503
498 504 copies1 = pathcopies(base, c1)
499 505 copies2 = pathcopies(base, c2)
500 506
501 507 inversecopies1 = {}
502 508 inversecopies2 = {}
503 509 for dst, src in copies1.items():
504 510 inversecopies1.setdefault(src, []).append(dst)
505 511 for dst, src in copies2.items():
506 512 inversecopies2.setdefault(src, []).append(dst)
507 513
508 514 copy = {}
509 515 diverge = {}
510 516 renamedelete = {}
511 517 allsources = set(inversecopies1) | set(inversecopies2)
512 518 for src in allsources:
513 519 dsts1 = inversecopies1.get(src)
514 520 dsts2 = inversecopies2.get(src)
515 521 if dsts1 and dsts2:
516 522 # copied/renamed on both sides
517 523 if src not in m1 and src not in m2:
518 524 # renamed on both sides
519 525 dsts1 = set(dsts1)
520 526 dsts2 = set(dsts2)
521 527 # If there's some overlap in the rename destinations, we
522 528 # consider it not divergent. For example, if side 1 copies 'a'
523 529 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
524 530 # and 'd' and deletes 'a'.
525 531 if dsts1 & dsts2:
526 532 for dst in dsts1 & dsts2:
527 533 copy[dst] = src
528 534 else:
529 535 diverge[src] = sorted(dsts1 | dsts2)
530 536 elif src in m1 and src in m2:
531 537 # copied on both sides
532 538 dsts1 = set(dsts1)
533 539 dsts2 = set(dsts2)
534 540 for dst in dsts1 & dsts2:
535 541 copy[dst] = src
536 542 # TODO: Handle cases where it was renamed on one side and copied
537 543 # on the other side
538 544 elif dsts1:
539 545 # copied/renamed only on side 1
540 546 _checksinglesidecopies(
541 547 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
542 548 )
543 549 elif dsts2:
544 550 # copied/renamed only on side 2
545 551 _checksinglesidecopies(
546 552 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
547 553 )
548 554
549 555 renamedeleteset = set()
550 556 divergeset = set()
551 557 for dsts in diverge.values():
552 558 divergeset.update(dsts)
553 559 for dsts in renamedelete.values():
554 560 renamedeleteset.update(dsts)
555 561
556 562 # find interesting file sets from manifests
557 563 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
558 564 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
559 565 u1 = sorted(addedinm1 - addedinm2)
560 566 u2 = sorted(addedinm2 - addedinm1)
561 567
562 568 header = b" unmatched files in %s"
563 569 if u1:
564 570 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
565 571 if u2:
566 572 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
567 573
568 574 fullcopy = copies1.copy()
569 575 fullcopy.update(copies2)
570 576 if not fullcopy:
571 577 return copy, {}, diverge, renamedelete, {}
572 578
573 579 if repo.ui.debugflag:
574 580 repo.ui.debug(
575 581 b" all copies found (* = to merge, ! = divergent, "
576 582 b"% = renamed and deleted):\n"
577 583 )
578 584 for f in sorted(fullcopy):
579 585 note = b""
580 586 if f in copy:
581 587 note += b"*"
582 588 if f in divergeset:
583 589 note += b"!"
584 590 if f in renamedeleteset:
585 591 note += b"%"
586 592 repo.ui.debug(
587 593 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
588 594 )
589 595 del divergeset
590 596
591 597 repo.ui.debug(b" checking for directory renames\n")
592 598
593 599 # generate a directory move map
594 600 d1, d2 = c1.dirs(), c2.dirs()
595 601 invalid = set()
596 602 dirmove = {}
597 603
598 604 # examine each file copy for a potential directory move, which is
599 605 # when all the files in a directory are moved to a new directory
600 606 for dst, src in pycompat.iteritems(fullcopy):
601 607 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
602 608 if dsrc in invalid:
603 609 # already seen to be uninteresting
604 610 continue
605 611 elif dsrc in d1 and ddst in d1:
606 612 # directory wasn't entirely moved locally
607 613 invalid.add(dsrc)
608 614 elif dsrc in d2 and ddst in d2:
609 615 # directory wasn't entirely moved remotely
610 616 invalid.add(dsrc)
611 617 elif dsrc in dirmove and dirmove[dsrc] != ddst:
612 618 # files from the same directory moved to two different places
613 619 invalid.add(dsrc)
614 620 else:
615 621 # looks good so far
616 622 dirmove[dsrc] = ddst
617 623
618 624 for i in invalid:
619 625 if i in dirmove:
620 626 del dirmove[i]
621 627 del d1, d2, invalid
622 628
623 629 if not dirmove:
624 630 return copy, {}, diverge, renamedelete, {}
625 631
626 632 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
627 633
628 634 for d in dirmove:
629 635 repo.ui.debug(
630 636 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
631 637 )
632 638
633 639 movewithdir = {}
634 640 # check unaccounted nonoverlapping files against directory moves
635 641 for f in u1 + u2:
636 642 if f not in fullcopy:
637 643 for d in dirmove:
638 644 if f.startswith(d):
639 645 # new file added in a directory that was moved, move it
640 646 df = dirmove[d] + f[len(d) :]
641 647 if df not in copy:
642 648 movewithdir[f] = df
643 649 repo.ui.debug(
644 650 b" pending file src: '%s' -> dst: '%s'\n"
645 651 % (f, df)
646 652 )
647 653 break
648 654
649 655 return copy, movewithdir, diverge, renamedelete, dirmove
650 656
651 657
652 658 def _heuristicscopytracing(repo, c1, c2, base):
653 659 """ Fast copytracing using filename heuristics
654 660
655 661 Assumes that moves or renames are of following two types:
656 662
657 663 1) Inside a directory only (same directory name but different filenames)
658 664 2) Move from one directory to another
659 665 (same filenames but different directory names)
660 666
661 667 Works only when there are no merge commits in the "source branch".
662 668 Source branch is commits from base up to c2 not including base.
663 669
664 670 If merge is involved it fallbacks to _fullcopytracing().
665 671
666 672 Can be used by setting the following config:
667 673
668 674 [experimental]
669 675 copytrace = heuristics
670 676
671 677 In some cases the copy/move candidates found by heuristics can be very large
672 678 in number and that will make the algorithm slow. The number of possible
673 679 candidates to check can be limited by using the config
674 680 `experimental.copytrace.movecandidateslimit` which defaults to 100.
675 681 """
676 682
677 683 if c1.rev() is None:
678 684 c1 = c1.p1()
679 685 if c2.rev() is None:
680 686 c2 = c2.p1()
681 687
682 688 copies = {}
683 689
684 690 changedfiles = set()
685 691 m1 = c1.manifest()
686 692 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
687 693 # If base is not in c2 branch, we switch to fullcopytracing
688 694 repo.ui.debug(
689 695 b"switching to full copytracing as base is not "
690 696 b"an ancestor of c2\n"
691 697 )
692 698 return _fullcopytracing(repo, c1, c2, base)
693 699
694 700 ctx = c2
695 701 while ctx != base:
696 702 if len(ctx.parents()) == 2:
697 703 # To keep things simple let's not handle merges
698 704 repo.ui.debug(b"switching to full copytracing because of merges\n")
699 705 return _fullcopytracing(repo, c1, c2, base)
700 706 changedfiles.update(ctx.files())
701 707 ctx = ctx.p1()
702 708
703 709 cp = _forwardcopies(base, c2)
704 710 for dst, src in pycompat.iteritems(cp):
705 711 if src in m1:
706 712 copies[dst] = src
707 713
708 714 # file is missing if it isn't present in the destination, but is present in
709 715 # the base and present in the source.
710 716 # Presence in the base is important to exclude added files, presence in the
711 717 # source is important to exclude removed files.
712 718 filt = lambda f: f not in m1 and f in base and f in c2
713 719 missingfiles = [f for f in changedfiles if filt(f)]
714 720
715 721 if missingfiles:
716 722 basenametofilename = collections.defaultdict(list)
717 723 dirnametofilename = collections.defaultdict(list)
718 724
719 725 for f in m1.filesnotin(base.manifest()):
720 726 basename = os.path.basename(f)
721 727 dirname = os.path.dirname(f)
722 728 basenametofilename[basename].append(f)
723 729 dirnametofilename[dirname].append(f)
724 730
725 731 for f in missingfiles:
726 732 basename = os.path.basename(f)
727 733 dirname = os.path.dirname(f)
728 734 samebasename = basenametofilename[basename]
729 735 samedirname = dirnametofilename[dirname]
730 736 movecandidates = samebasename + samedirname
731 737 # f is guaranteed to be present in c2, that's why
732 738 # c2.filectx(f) won't fail
733 739 f2 = c2.filectx(f)
734 740 # we can have a lot of candidates which can slow down the heuristics
735 741 # config value to limit the number of candidates moves to check
736 742 maxcandidates = repo.ui.configint(
737 743 b'experimental', b'copytrace.movecandidateslimit'
738 744 )
739 745
740 746 if len(movecandidates) > maxcandidates:
741 747 repo.ui.status(
742 748 _(
743 749 b"skipping copytracing for '%s', more "
744 750 b"candidates than the limit: %d\n"
745 751 )
746 752 % (f, len(movecandidates))
747 753 )
748 754 continue
749 755
750 756 for candidate in movecandidates:
751 757 f1 = c1.filectx(candidate)
752 758 if _related(f1, f2):
753 759 # if there are a few related copies then we'll merge
754 760 # changes into all of them. This matches the behaviour
755 761 # of upstream copytracing
756 762 copies[candidate] = f
757 763
758 764 return copies, {}, {}, {}, {}
759 765
760 766
761 767 def _related(f1, f2):
762 768 """return True if f1 and f2 filectx have a common ancestor
763 769
764 770 Walk back to common ancestor to see if the two files originate
765 771 from the same file. Since workingfilectx's rev() is None it messes
766 772 up the integer comparison logic, hence the pre-step check for
767 773 None (f1 and f2 can only be workingfilectx's initially).
768 774 """
769 775
770 776 if f1 == f2:
771 777 return True # a match
772 778
773 779 g1, g2 = f1.ancestors(), f2.ancestors()
774 780 try:
775 781 f1r, f2r = f1.linkrev(), f2.linkrev()
776 782
777 783 if f1r is None:
778 784 f1 = next(g1)
779 785 if f2r is None:
780 786 f2 = next(g2)
781 787
782 788 while True:
783 789 f1r, f2r = f1.linkrev(), f2.linkrev()
784 790 if f1r > f2r:
785 791 f1 = next(g1)
786 792 elif f2r > f1r:
787 793 f2 = next(g2)
788 794 else: # f1 and f2 point to files in the same linkrev
789 795 return f1 == f2 # true if they point to the same file
790 796 except StopIteration:
791 797 return False
792 798
793 799
794 800 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
795 801 """reproduce copies from fromrev to rev in the dirstate
796 802
797 803 If skiprev is specified, it's a revision that should be used to
798 804 filter copy records. Any copies that occur between fromrev and
799 805 skiprev will not be duplicated, even if they appear in the set of
800 806 copies between fromrev and rev.
801 807 """
802 808 exclude = {}
803 809 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
804 810 bctrace = stringutil.parsebool(ctraceconfig)
805 811 if skiprev is not None and (
806 812 ctraceconfig == b'heuristics' or bctrace or bctrace is None
807 813 ):
808 814 # copytrace='off' skips this line, but not the entire function because
809 815 # the line below is O(size of the repo) during a rebase, while the rest
810 816 # of the function is much faster (and is required for carrying copy
811 817 # metadata across the rebase anyway).
812 818 exclude = pathcopies(repo[fromrev], repo[skiprev])
813 819 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
814 820 if dst in exclude:
815 821 continue
816 822 if dst in wctx:
817 823 wctx[dst].markcopied(src)
818 824
819 825
820 826 def computechangesetfilesadded(ctx):
821 827 """return the list of files added in a changeset
822 828 """
823 829 added = []
824 830 for f in ctx.files():
825 831 if not any(f in p for p in ctx.parents()):
826 832 added.append(f)
827 833 return added
828 834
829 835
830 836 def computechangesetfilesremoved(ctx):
831 837 """return the list of files removed in a changeset
832 838 """
833 839 removed = []
834 840 for f in ctx.files():
835 841 if f not in ctx:
836 842 removed.append(f)
837 843 return removed
838 844
839 845
840 846 def computechangesetcopies(ctx):
841 847 """return the copies data for a changeset
842 848
843 849 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
844 850
845 851 Each dictionnary are in the form: `{newname: oldname}`
846 852 """
847 853 p1copies = {}
848 854 p2copies = {}
849 855 p1 = ctx.p1()
850 856 p2 = ctx.p2()
851 857 narrowmatch = ctx._repo.narrowmatch()
852 858 for dst in ctx.files():
853 859 if not narrowmatch(dst) or dst not in ctx:
854 860 continue
855 861 copied = ctx[dst].renamed()
856 862 if not copied:
857 863 continue
858 864 src, srcnode = copied
859 865 if src in p1 and p1[src].filenode() == srcnode:
860 866 p1copies[dst] = src
861 867 elif src in p2 and p2[src].filenode() == srcnode:
862 868 p2copies[dst] = src
863 869 return p1copies, p2copies
864 870
865 871
866 872 def encodecopies(files, copies):
867 873 items = []
868 874 for i, dst in enumerate(files):
869 875 if dst in copies:
870 876 items.append(b'%d\0%s' % (i, copies[dst]))
871 877 if len(items) != len(copies):
872 878 raise error.ProgrammingError(
873 879 b'some copy targets missing from file list'
874 880 )
875 881 return b"\n".join(items)
876 882
877 883
878 884 def decodecopies(files, data):
879 885 try:
880 886 copies = {}
881 887 if not data:
882 888 return copies
883 889 for l in data.split(b'\n'):
884 890 strindex, src = l.split(b'\0')
885 891 i = int(strindex)
886 892 dst = files[i]
887 893 copies[dst] = src
888 894 return copies
889 895 except (ValueError, IndexError):
890 896 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
891 897 # used different syntax for the value.
892 898 return None
893 899
894 900
895 901 def encodefileindices(files, subset):
896 902 subset = set(subset)
897 903 indices = []
898 904 for i, f in enumerate(files):
899 905 if f in subset:
900 906 indices.append(b'%d' % i)
901 907 return b'\n'.join(indices)
902 908
903 909
904 910 def decodefileindices(files, data):
905 911 try:
906 912 subset = []
907 913 if not data:
908 914 return subset
909 915 for strindex in data.split(b'\n'):
910 916 i = int(strindex)
911 917 if i < 0 or i >= len(files):
912 918 return None
913 919 subset.append(files[i])
914 920 return subset
915 921 except (ValueError, IndexError):
916 922 # Perhaps someone had chosen the same key name (e.g. "added") and
917 923 # used different syntax for the value.
918 924 return None
919 925
920 926
921 927 def _getsidedata(srcrepo, rev):
922 928 ctx = srcrepo[rev]
923 929 filescopies = computechangesetcopies(ctx)
924 930 filesadded = computechangesetfilesadded(ctx)
925 931 filesremoved = computechangesetfilesremoved(ctx)
926 932 sidedata = {}
927 933 if any([filescopies, filesadded, filesremoved]):
928 934 sortedfiles = sorted(ctx.files())
929 935 p1copies, p2copies = filescopies
930 936 p1copies = encodecopies(sortedfiles, p1copies)
931 937 p2copies = encodecopies(sortedfiles, p2copies)
932 938 filesadded = encodefileindices(sortedfiles, filesadded)
933 939 filesremoved = encodefileindices(sortedfiles, filesremoved)
934 940 if p1copies:
935 941 sidedata[sidedatamod.SD_P1COPIES] = p1copies
936 942 if p2copies:
937 943 sidedata[sidedatamod.SD_P2COPIES] = p2copies
938 944 if filesadded:
939 945 sidedata[sidedatamod.SD_FILESADDED] = filesadded
940 946 if filesremoved:
941 947 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
942 948 return sidedata
943 949
944 950
945 951 def getsidedataadder(srcrepo, destrepo):
946 952 def sidedatacompanion(revlog, rev):
947 953 sidedata = {}
948 954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
949 955 sidedata = _getsidedata(srcrepo, rev)
950 956 return False, (), sidedata
951 957
952 958 return sidedatacompanion
953 959
954 960
955 961 def getsidedataremover(srcrepo, destrepo):
956 962 def sidedatacompanion(revlog, rev):
957 963 f = ()
958 964 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
959 965 if revlog.flags(rev) & REVIDX_SIDEDATA:
960 966 f = (
961 967 sidedatamod.SD_P1COPIES,
962 968 sidedatamod.SD_P2COPIES,
963 969 sidedatamod.SD_FILESADDED,
964 970 sidedatamod.SD_FILESREMOVED,
965 971 )
966 972 return False, f, {}
967 973
968 974 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now