##// END OF EJS Templates
copies: move _makegetfctx calls into checkcopies
Matt Mackall -
r26657:1ae898cf default
parent child Browse files
Show More
@@ -1,544 +1,539 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 pathutil,
14 14 util,
15 15 )
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b, match=None):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 ma = a.manifest()
149 149 mb = b.manifest()
150 150 if match:
151 151 ma = ma.matches(match)
152 152 mb = mb.matches(match)
153 153 return mb.filesnotin(ma)
154 154
155 155 def _forwardcopies(a, b, match=None):
156 156 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
157 157
158 158 # check for working copy
159 159 w = None
160 160 if b.rev() is None:
161 161 w = b
162 162 b = w.p1()
163 163 if a == b:
164 164 # short-circuit to avoid issues with merge states
165 165 return _dirstatecopies(w)
166 166
167 167 # files might have to be traced back to the fctx parent of the last
168 168 # one-side-only changeset, but not further back than that
169 169 limit = _findlimit(a._repo, a.rev(), b.rev())
170 170 if limit is None:
171 171 limit = -1
172 172 am = a.manifest()
173 173
174 174 # find where new files came from
175 175 # we currently don't try to find where old files went, too expensive
176 176 # this means we can miss a case like 'hg rm b; hg cp a b'
177 177 cm = {}
178 178 missing = _computeforwardmissing(a, b, match=match)
179 179 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
180 180 for f in missing:
181 181 fctx = b[f]
182 182 fctx._ancestrycontext = ancestrycontext
183 183 ofctx = _tracefile(fctx, am, limit)
184 184 if ofctx:
185 185 cm[f] = ofctx.path()
186 186
187 187 # combine copies from dirstate if necessary
188 188 if w is not None:
189 189 cm = _chain(a, w, cm, _dirstatecopies(w))
190 190
191 191 return cm
192 192
193 193 def _backwardrenames(a, b):
194 194 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
195 195 return {}
196 196
197 197 # Even though we're not taking copies into account, 1:n rename situations
198 198 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
199 199 # arbitrarily pick one of the renames.
200 200 f = _forwardcopies(b, a)
201 201 r = {}
202 202 for k, v in sorted(f.iteritems()):
203 203 # remove copies
204 204 if v in a:
205 205 continue
206 206 r[v] = k
207 207 return r
208 208
209 209 def pathcopies(x, y, match=None):
210 210 '''find {dst@y: src@x} copy mapping for directed compare'''
211 211 if x == y or not x or not y:
212 212 return {}
213 213 a = y.ancestor(x)
214 214 if a == x:
215 215 return _forwardcopies(x, y, match=match)
216 216 if a == y:
217 217 return _backwardrenames(x, y)
218 218 return _chain(x, y, _backwardrenames(x, a),
219 219 _forwardcopies(a, y, match=match))
220 220
221 221 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
222 222 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
223 223 and c2. This is its own function so extensions can easily wrap this call
224 224 to see what files mergecopies is about to process.
225 225
226 226 Even though c1 and c2 are not used in this function, they are useful in
227 227 other extensions for being able to read the file nodes of the changed files.
228 228 """
229 229 u1 = sorted(addedinm1 - addedinm2)
230 230 u2 = sorted(addedinm2 - addedinm1)
231 231
232 232 if u1:
233 233 repo.ui.debug(" unmatched files in local:\n %s\n"
234 234 % "\n ".join(u1))
235 235 if u2:
236 236 repo.ui.debug(" unmatched files in other:\n %s\n"
237 237 % "\n ".join(u2))
238 238 return u1, u2
239 239
240 240 def _makegetfctx(ctx):
241 241 """return a 'getfctx' function suitable for checkcopies usage
242 242
243 243 We have to re-setup the function building 'filectx' for each
244 244 'checkcopies' to ensure the linkrev adjustement is properly setup for
245 245 each. Linkrev adjustment is important to avoid bug in rename
246 246 detection. Moreover, having a proper '_ancestrycontext' setup ensures
247 247 the performance impact of this adjustment is kept limited. Without it,
248 248 each file could do a full dag traversal making the time complexity of
249 249 the operation explode (see issue4537).
250 250
251 251 This function exists here mostly to limit the impact on stable. Feel
252 252 free to refactor on default.
253 253 """
254 254 rev = ctx.rev()
255 255 repo = ctx._repo
256 256 ac = getattr(ctx, '_ancestrycontext', None)
257 257 if ac is None:
258 258 revs = [rev]
259 259 if rev is None:
260 260 revs = [p.rev() for p in ctx.parents()]
261 261 ac = repo.changelog.ancestors(revs, inclusive=True)
262 262 ctx._ancestrycontext = ac
263 263 def makectx(f, n):
264 264 if len(n) != 20: # in a working context?
265 265 if ctx.rev() is None:
266 266 return ctx.filectx(f)
267 267 return repo[None][f]
268 268 fctx = repo.filectx(f, fileid=n)
269 269 # setup only needed for filectx not create from a changectx
270 270 fctx._ancestrycontext = ac
271 271 fctx._descendantrev = rev
272 272 return fctx
273 273 return util.lrucachefunc(makectx)
274 274
275 275 def mergecopies(repo, c1, c2, ca):
276 276 """
277 277 Find moves and copies between context c1 and c2 that are relevant
278 278 for merging.
279 279
280 280 Returns four dicts: "copy", "movewithdir", "diverge", and
281 281 "renamedelete".
282 282
283 283 "copy" is a mapping from destination name -> source name,
284 284 where source is in c1 and destination is in c2 or vice-versa.
285 285
286 286 "movewithdir" is a mapping from source name -> destination name,
287 287 where the file at source present in one context but not the other
288 288 needs to be moved to destination by the merge process, because the
289 289 other context moved the directory it is in.
290 290
291 291 "diverge" is a mapping of source name -> list of destination names
292 292 for divergent renames.
293 293
294 294 "renamedelete" is a mapping of source name -> list of destination
295 295 names for files deleted in c1 that were renamed in c2 or vice-versa.
296 296 """
297 297 # avoid silly behavior for update from empty dir
298 298 if not c1 or not c2 or c1 == c2:
299 299 return {}, {}, {}, {}
300 300
301 301 # avoid silly behavior for parent -> working dir
302 302 if c2.node() is None and c1.node() == repo.dirstate.p1():
303 303 return repo.dirstate.copies(), {}, {}, {}
304 304
305 305 # Copy trace disabling is explicitly below the node == p1 logic above
306 306 # because the logic above is required for a simple copy to be kept across a
307 307 # rebase.
308 308 if repo.ui.configbool('experimental', 'disablecopytrace'):
309 309 return {}, {}, {}, {}
310 310
311 311 limit = _findlimit(repo, c1.rev(), c2.rev())
312 312 if limit is None:
313 313 # no common ancestor, no copies
314 314 return {}, {}, {}, {}
315 315 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
316 316
317 317 m1 = c1.manifest()
318 318 m2 = c2.manifest()
319 319 ma = ca.manifest()
320 320
321 321 copy1, copy2, = {}, {}
322 322 movewithdir1, movewithdir2 = {}, {}
323 323 fullcopy1, fullcopy2 = {}, {}
324 324 diverge = {}
325 325
326 326 addedinm1 = m1.filesnotin(ma)
327 327 addedinm2 = m2.filesnotin(ma)
328 328 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
329 329
330 330 for f in u1:
331 getfctx = _makegetfctx(c1)
332 checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
331 checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
333 332
334 333 for f in u2:
335 getfctx = _makegetfctx(c2)
336 checkcopies(getfctx, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
334 checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
337 335
338 336 copy = dict(copy1.items() + copy2.items())
339 337 movewithdir = dict(movewithdir1.items() + movewithdir2.items())
340 338 fullcopy = dict(fullcopy1.items() + fullcopy2.items())
341 339
342 340 renamedelete = {}
343 341 renamedelete2 = set()
344 342 divergeset = set()
345 343 for of, fl in diverge.items():
346 344 if len(fl) == 1 or of in c1 or of in c2:
347 345 del diverge[of] # not actually divergent, or not a rename
348 346 if of not in c1 and of not in c2:
349 347 # renamed on one side, deleted on the other side, but filter
350 348 # out files that have been renamed and then deleted
351 349 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
352 350 renamedelete2.update(fl) # reverse map for below
353 351 else:
354 352 divergeset.update(fl) # reverse map for below
355 353
356 354 bothnew = sorted(addedinm1 & addedinm2)
357 355 if bothnew:
358 356 repo.ui.debug(" unmatched files new in both:\n %s\n"
359 357 % "\n ".join(bothnew))
360 358 bothdiverge, _copy, _fullcopy = {}, {}, {}
361 359 for f in bothnew:
362 getfctx = _makegetfctx(c1)
363 checkcopies(getfctx, f, m1, m2, ca, limit, bothdiverge,
364 _copy, _fullcopy)
365 getfctx = _makegetfctx(c2)
366 checkcopies(getfctx, f, m2, m1, ca, limit, bothdiverge,
367 _copy, _fullcopy)
360 checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
361 checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
368 362 for of, fl in bothdiverge.items():
369 363 if len(fl) == 2 and fl[0] == fl[1]:
370 364 copy[fl[0]] = of # not actually divergent, just matching renames
371 365
372 366 if fullcopy and repo.ui.debugflag:
373 367 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
374 368 "% = renamed and deleted):\n")
375 369 for f in sorted(fullcopy):
376 370 note = ""
377 371 if f in copy:
378 372 note += "*"
379 373 if f in divergeset:
380 374 note += "!"
381 375 if f in renamedelete2:
382 376 note += "%"
383 377 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
384 378 note))
385 379 del divergeset
386 380
387 381 if not fullcopy:
388 382 return copy, movewithdir, diverge, renamedelete
389 383
390 384 repo.ui.debug(" checking for directory renames\n")
391 385
392 386 # generate a directory move map
393 387 d1, d2 = c1.dirs(), c2.dirs()
394 388 # Hack for adding '', which is not otherwise added, to d1 and d2
395 389 d1.addpath('/')
396 390 d2.addpath('/')
397 391 invalid = set()
398 392 dirmove = {}
399 393
400 394 # examine each file copy for a potential directory move, which is
401 395 # when all the files in a directory are moved to a new directory
402 396 for dst, src in fullcopy.iteritems():
403 397 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
404 398 if dsrc in invalid:
405 399 # already seen to be uninteresting
406 400 continue
407 401 elif dsrc in d1 and ddst in d1:
408 402 # directory wasn't entirely moved locally
409 403 invalid.add(dsrc)
410 404 elif dsrc in d2 and ddst in d2:
411 405 # directory wasn't entirely moved remotely
412 406 invalid.add(dsrc)
413 407 elif dsrc in dirmove and dirmove[dsrc] != ddst:
414 408 # files from the same directory moved to two different places
415 409 invalid.add(dsrc)
416 410 else:
417 411 # looks good so far
418 412 dirmove[dsrc + "/"] = ddst + "/"
419 413
420 414 for i in invalid:
421 415 if i in dirmove:
422 416 del dirmove[i]
423 417 del d1, d2, invalid
424 418
425 419 if not dirmove:
426 420 return copy, movewithdir, diverge, renamedelete
427 421
428 422 for d in dirmove:
429 423 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
430 424 (d, dirmove[d]))
431 425
432 426 # check unaccounted nonoverlapping files against directory moves
433 427 for f in u1 + u2:
434 428 if f not in fullcopy:
435 429 for d in dirmove:
436 430 if f.startswith(d):
437 431 # new file added in a directory that was moved, move it
438 432 df = dirmove[d] + f[len(d):]
439 433 if df not in copy:
440 434 movewithdir[f] = df
441 435 repo.ui.debug((" pending file src: '%s' -> "
442 436 "dst: '%s'\n") % (f, df))
443 437 break
444 438
445 439 return copy, movewithdir, diverge, renamedelete
446 440
447 def checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
441 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
448 442 """
449 443 check possible copies of f from m1 to m2
450 444
451 getfctx = function accepting (filename, node) that returns a filectx.
445 ctx = starting context for f in m1
452 446 f = the filename to check
453 447 m1 = the source manifest
454 448 m2 = the destination manifest
455 449 ca = the changectx of the common ancestor
456 450 limit = the rev number to not search beyond
457 451 diverge = record all diverges in this dict
458 452 copy = record all non-divergent copies in this dict
459 453 fullcopy = record all copies in this dict
460 454 """
461 455
462 456 ma = ca.manifest()
457 getfctx = _makegetfctx(ctx)
463 458
464 459 def _related(f1, f2, limit):
465 460 # Walk back to common ancestor to see if the two files originate
466 461 # from the same file. Since workingfilectx's rev() is None it messes
467 462 # up the integer comparison logic, hence the pre-step check for
468 463 # None (f1 and f2 can only be workingfilectx's initially).
469 464
470 465 if f1 == f2:
471 466 return f1 # a match
472 467
473 468 g1, g2 = f1.ancestors(), f2.ancestors()
474 469 try:
475 470 f1r, f2r = f1.linkrev(), f2.linkrev()
476 471
477 472 if f1r is None:
478 473 f1 = g1.next()
479 474 if f2r is None:
480 475 f2 = g2.next()
481 476
482 477 while True:
483 478 f1r, f2r = f1.linkrev(), f2.linkrev()
484 479 if f1r > f2r:
485 480 f1 = g1.next()
486 481 elif f2r > f1r:
487 482 f2 = g2.next()
488 483 elif f1 == f2:
489 484 return f1 # a match
490 485 elif f1r == f2r or f1r < limit or f2r < limit:
491 486 return False # copy no longer relevant
492 487 except StopIteration:
493 488 return False
494 489
495 490 of = None
496 491 seen = set([f])
497 492 for oc in getfctx(f, m1[f]).ancestors():
498 493 ocr = oc.linkrev()
499 494 of = oc.path()
500 495 if of in seen:
501 496 # check limit late - grab last rename before
502 497 if ocr < limit:
503 498 break
504 499 continue
505 500 seen.add(of)
506 501
507 502 fullcopy[f] = of # remember for dir rename detection
508 503 if of not in m2:
509 504 continue # no match, keep looking
510 505 if m2[of] == ma.get(of):
511 506 break # no merge needed, quit early
512 507 c2 = getfctx(of, m2[of])
513 508 cr = _related(oc, c2, ca.rev())
514 509 if cr and (of == f or of == c2.path()): # non-divergent
515 510 copy[f] = of
516 511 of = None
517 512 break
518 513
519 514 if of in ma:
520 515 diverge.setdefault(of, []).append(f)
521 516
522 517 def duplicatecopies(repo, rev, fromrev, skiprev=None):
523 518 '''reproduce copies from fromrev to rev in the dirstate
524 519
525 520 If skiprev is specified, it's a revision that should be used to
526 521 filter copy records. Any copies that occur between fromrev and
527 522 skiprev will not be duplicated, even if they appear in the set of
528 523 copies between fromrev and rev.
529 524 '''
530 525 exclude = {}
531 526 if (skiprev is not None and
532 527 not repo.ui.configbool('experimental', 'disablecopytrace')):
533 528 # disablecopytrace skips this line, but not the entire function because
534 529 # the line below is O(size of the repo) during a rebase, while the rest
535 530 # of the function is much faster (and is required for carrying copy
536 531 # metadata across the rebase anyway).
537 532 exclude = pathcopies(repo[fromrev], repo[skiprev])
538 533 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
539 534 # copies.pathcopies returns backward renames, so dst might not
540 535 # actually be in the dirstate
541 536 if dst in exclude:
542 537 continue
543 538 if repo.dirstate[dst] in "nma":
544 539 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now