##// END OF EJS Templates
mergecopies: reuse ancestry context when traversing file history (issue4537)...
Pierre-Yves David -
r24412:9372180b stable
parent child Browse files
Show More
@@ -1,472 +1,503 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _nonoverlap(d1, d2, d3):
12 12 "Return list of elements in d1 not in d2 or d3"
13 13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14 14
15 15 def _dirname(f):
16 16 s = f.rfind("/")
17 17 if s == -1:
18 18 return ""
19 19 return f[:s]
20 20
21 21 def _findlimit(repo, a, b):
22 22 """
23 23 Find the last revision that needs to be checked to ensure that a full
24 24 transitive closure for file copies can be properly calculated.
25 25 Generally, this means finding the earliest revision number that's an
26 26 ancestor of a or b but not both, except when a or b is a direct descendent
27 27 of the other, in which case we can return the minimum revnum of a and b.
28 28 None if no such revision exists.
29 29 """
30 30
31 31 # basic idea:
32 32 # - mark a and b with different sides
33 33 # - if a parent's children are all on the same side, the parent is
34 34 # on that side, otherwise it is on no side
35 35 # - walk the graph in topological order with the help of a heap;
36 36 # - add unseen parents to side map
37 37 # - clear side of any parent that has children on different sides
38 38 # - track number of interesting revs that might still be on a side
39 39 # - track the lowest interesting rev seen
40 40 # - quit when interesting revs is zero
41 41
42 42 cl = repo.changelog
43 43 working = len(cl) # pseudo rev for the working directory
44 44 if a is None:
45 45 a = working
46 46 if b is None:
47 47 b = working
48 48
49 49 side = {a: -1, b: 1}
50 50 visit = [-a, -b]
51 51 heapq.heapify(visit)
52 52 interesting = len(visit)
53 53 hascommonancestor = False
54 54 limit = working
55 55
56 56 while interesting:
57 57 r = -heapq.heappop(visit)
58 58 if r == working:
59 59 parents = [cl.rev(p) for p in repo.dirstate.parents()]
60 60 else:
61 61 parents = cl.parentrevs(r)
62 62 for p in parents:
63 63 if p < 0:
64 64 continue
65 65 if p not in side:
66 66 # first time we see p; add it to visit
67 67 side[p] = side[r]
68 68 if side[p]:
69 69 interesting += 1
70 70 heapq.heappush(visit, -p)
71 71 elif side[p] and side[p] != side[r]:
72 72 # p was interesting but now we know better
73 73 side[p] = 0
74 74 interesting -= 1
75 75 hascommonancestor = True
76 76 if side[r]:
77 77 limit = r # lowest rev visited
78 78 interesting -= 1
79 79
80 80 if not hascommonancestor:
81 81 return None
82 82
83 83 # Consider the following flow (see test-commit-amend.t under issue4405):
84 84 # 1/ File 'a0' committed
85 85 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
86 86 # 3/ Move back to first commit
87 87 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
88 88 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
89 89 #
90 90 # During the amend in step five, we will be in this state:
91 91 #
92 92 # @ 3 temporary amend commit for a1-amend
93 93 # |
94 94 # o 2 a1-amend
95 95 # |
96 96 # | o 1 a1
97 97 # |/
98 98 # o 0 a0
99 99 #
100 100 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
101 101 # yet the filelog has the copy information in rev 1 and we will not look
102 102 # back far enough unless we also look at the a and b as candidates.
103 103 # This only occurs when a is a descendent of b or visa-versa.
104 104 return min(limit, a, b)
105 105
106 106 def _chain(src, dst, a, b):
107 107 '''chain two sets of copies a->b'''
108 108 t = a.copy()
109 109 for k, v in b.iteritems():
110 110 if v in t:
111 111 # found a chain
112 112 if t[v] != k:
113 113 # file wasn't renamed back to itself
114 114 t[k] = t[v]
115 115 if v not in dst:
116 116 # chain was a rename, not a copy
117 117 del t[v]
118 118 if v in src:
119 119 # file is a copy of an existing file
120 120 t[k] = v
121 121
122 122 # remove criss-crossed copies
123 123 for k, v in t.items():
124 124 if k in src and v in dst:
125 125 del t[k]
126 126
127 127 return t
128 128
129 129 def _tracefile(fctx, am, limit=-1):
130 130 '''return file context that is the ancestor of fctx present in ancestor
131 131 manifest am, stopping after the first ancestor lower than limit'''
132 132
133 133 for f in fctx.ancestors():
134 134 if am.get(f.path(), None) == f.filenode():
135 135 return f
136 136 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
137 137 return None
138 138
139 139 def _dirstatecopies(d):
140 140 ds = d._repo.dirstate
141 141 c = ds.copies().copy()
142 142 for k in c.keys():
143 143 if ds[k] not in 'anm':
144 144 del c[k]
145 145 return c
146 146
147 147 def _forwardcopies(a, b):
148 148 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
149 149
150 150 # check for working copy
151 151 w = None
152 152 if b.rev() is None:
153 153 w = b
154 154 b = w.p1()
155 155 if a == b:
156 156 # short-circuit to avoid issues with merge states
157 157 return _dirstatecopies(w)
158 158
159 159 # files might have to be traced back to the fctx parent of the last
160 160 # one-side-only changeset, but not further back than that
161 161 limit = _findlimit(a._repo, a.rev(), b.rev())
162 162 if limit is None:
163 163 limit = -1
164 164 am = a.manifest()
165 165
166 166 # find where new files came from
167 167 # we currently don't try to find where old files went, too expensive
168 168 # this means we can miss a case like 'hg rm b; hg cp a b'
169 169 cm = {}
170 170 missing = set(b.manifest().iterkeys())
171 171 missing.difference_update(a.manifest().iterkeys())
172 172
173 173 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
174 174 for f in missing:
175 175 fctx = b[f]
176 176 fctx._ancestrycontext = ancestrycontext
177 177 ofctx = _tracefile(fctx, am, limit)
178 178 if ofctx:
179 179 cm[f] = ofctx.path()
180 180
181 181 # combine copies from dirstate if necessary
182 182 if w is not None:
183 183 cm = _chain(a, w, cm, _dirstatecopies(w))
184 184
185 185 return cm
186 186
187 187 def _backwardrenames(a, b):
188 188 # Even though we're not taking copies into account, 1:n rename situations
189 189 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
190 190 # arbitrarily pick one of the renames.
191 191 f = _forwardcopies(b, a)
192 192 r = {}
193 193 for k, v in sorted(f.iteritems()):
194 194 # remove copies
195 195 if v in a:
196 196 continue
197 197 r[v] = k
198 198 return r
199 199
200 200 def pathcopies(x, y):
201 201 '''find {dst@y: src@x} copy mapping for directed compare'''
202 202 if x == y or not x or not y:
203 203 return {}
204 204 a = y.ancestor(x)
205 205 if a == x:
206 206 return _forwardcopies(x, y)
207 207 if a == y:
208 208 return _backwardrenames(x, y)
209 209 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
210 210
211 211 def mergecopies(repo, c1, c2, ca):
212 212 """
213 213 Find moves and copies between context c1 and c2 that are relevant
214 214 for merging.
215 215
216 216 Returns four dicts: "copy", "movewithdir", "diverge", and
217 217 "renamedelete".
218 218
219 219 "copy" is a mapping from destination name -> source name,
220 220 where source is in c1 and destination is in c2 or vice-versa.
221 221
222 222 "movewithdir" is a mapping from source name -> destination name,
223 223 where the file at source present in one context but not the other
224 224 needs to be moved to destination by the merge process, because the
225 225 other context moved the directory it is in.
226 226
227 227 "diverge" is a mapping of source name -> list of destination names
228 228 for divergent renames.
229 229
230 230 "renamedelete" is a mapping of source name -> list of destination
231 231 names for files deleted in c1 that were renamed in c2 or vice-versa.
232 232 """
233 233 # avoid silly behavior for update from empty dir
234 234 if not c1 or not c2 or c1 == c2:
235 235 return {}, {}, {}, {}
236 236
237 237 # avoid silly behavior for parent -> working dir
238 238 if c2.node() is None and c1.node() == repo.dirstate.p1():
239 239 return repo.dirstate.copies(), {}, {}, {}
240 240
241 241 limit = _findlimit(repo, c1.rev(), c2.rev())
242 242 if limit is None:
243 243 # no common ancestor, no copies
244 244 return {}, {}, {}, {}
245 245 m1 = c1.manifest()
246 246 m2 = c2.manifest()
247 247 ma = ca.manifest()
248 248
249
250 def setupctx(ctx):
251 """return a 'makectx' function suitable for checkcopies usage from ctx
252
253 We have to re-setup the function building 'filectx' for each
254 'checkcopies' to ensure the linkrev adjustement is properly setup for
255 each. Linkrev adjustment is important to avoid bug in rename
256 detection. Moreover, having a proper '_ancestrycontext' setup ensures
257 the performance impact of this adjustment is kept limited. Without it,
258 each file could do a full dag traversal making the time complexity of
259 the operation explode (see issue4537).
260
261 This function exists here mostly to limit the impact on stable. Feel
262 free to refactor on default.
263 """
264 rev = ctx.rev()
265 ac = getattr(ctx, '_ancestrycontext', None)
266 if ac is None:
267 revs = [rev]
268 if rev is None:
269 revs = [p.rev() for p in ctx.parents()]
270 ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
271 ctx._ancestrycontext = ac
249 272 def makectx(f, n):
250 273 if len(n) != 20: # in a working context?
251 274 if c1.rev() is None:
252 275 return c1.filectx(f)
253 276 return c2.filectx(f)
254 return repo.filectx(f, fileid=n)
277 fctx = repo.filectx(f, fileid=n)
278 # setup only needed for filectx not create from a changectx
279 fctx._ancestrycontext = ac
280 fctx._descendantrev = rev
281 return fctx
282 return util.lrucachefunc(makectx)
255 283
256 ctx = util.lrucachefunc(makectx)
257 284 copy = {}
258 285 movewithdir = {}
259 286 fullcopy = {}
260 287 diverge = {}
261 288
262 289 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
263 290
264 291 u1 = _nonoverlap(m1, m2, ma)
265 292 u2 = _nonoverlap(m2, m1, ma)
266 293
267 294 if u1:
268 295 repo.ui.debug(" unmatched files in local:\n %s\n"
269 296 % "\n ".join(u1))
270 297 if u2:
271 298 repo.ui.debug(" unmatched files in other:\n %s\n"
272 299 % "\n ".join(u2))
273 300
274 301 for f in u1:
302 ctx = setupctx(c1)
275 303 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
276 304
277 305 for f in u2:
306 ctx = setupctx(c2)
278 307 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
279 308
280 309 renamedelete = {}
281 310 renamedelete2 = set()
282 311 diverge2 = set()
283 312 for of, fl in diverge.items():
284 313 if len(fl) == 1 or of in c1 or of in c2:
285 314 del diverge[of] # not actually divergent, or not a rename
286 315 if of not in c1 and of not in c2:
287 316 # renamed on one side, deleted on the other side, but filter
288 317 # out files that have been renamed and then deleted
289 318 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
290 319 renamedelete2.update(fl) # reverse map for below
291 320 else:
292 321 diverge2.update(fl) # reverse map for below
293 322
294 323 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
295 324 if bothnew:
296 325 repo.ui.debug(" unmatched files new in both:\n %s\n"
297 326 % "\n ".join(bothnew))
298 327 bothdiverge, _copy, _fullcopy = {}, {}, {}
299 328 for f in bothnew:
329 ctx = setupctx(c1)
300 330 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
331 ctx = setupctx(c2)
301 332 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
302 333 for of, fl in bothdiverge.items():
303 334 if len(fl) == 2 and fl[0] == fl[1]:
304 335 copy[fl[0]] = of # not actually divergent, just matching renames
305 336
306 337 if fullcopy and repo.ui.debugflag:
307 338 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
308 339 "% = renamed and deleted):\n")
309 340 for f in sorted(fullcopy):
310 341 note = ""
311 342 if f in copy:
312 343 note += "*"
313 344 if f in diverge2:
314 345 note += "!"
315 346 if f in renamedelete2:
316 347 note += "%"
317 348 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
318 349 note))
319 350 del diverge2
320 351
321 352 if not fullcopy:
322 353 return copy, movewithdir, diverge, renamedelete
323 354
324 355 repo.ui.debug(" checking for directory renames\n")
325 356
326 357 # generate a directory move map
327 358 d1, d2 = c1.dirs(), c2.dirs()
328 359 d1.addpath('/')
329 360 d2.addpath('/')
330 361 invalid = set()
331 362 dirmove = {}
332 363
333 364 # examine each file copy for a potential directory move, which is
334 365 # when all the files in a directory are moved to a new directory
335 366 for dst, src in fullcopy.iteritems():
336 367 dsrc, ddst = _dirname(src), _dirname(dst)
337 368 if dsrc in invalid:
338 369 # already seen to be uninteresting
339 370 continue
340 371 elif dsrc in d1 and ddst in d1:
341 372 # directory wasn't entirely moved locally
342 373 invalid.add(dsrc)
343 374 elif dsrc in d2 and ddst in d2:
344 375 # directory wasn't entirely moved remotely
345 376 invalid.add(dsrc)
346 377 elif dsrc in dirmove and dirmove[dsrc] != ddst:
347 378 # files from the same directory moved to two different places
348 379 invalid.add(dsrc)
349 380 else:
350 381 # looks good so far
351 382 dirmove[dsrc + "/"] = ddst + "/"
352 383
353 384 for i in invalid:
354 385 if i in dirmove:
355 386 del dirmove[i]
356 387 del d1, d2, invalid
357 388
358 389 if not dirmove:
359 390 return copy, movewithdir, diverge, renamedelete
360 391
361 392 for d in dirmove:
362 393 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
363 394 (d, dirmove[d]))
364 395
365 396 # check unaccounted nonoverlapping files against directory moves
366 397 for f in u1 + u2:
367 398 if f not in fullcopy:
368 399 for d in dirmove:
369 400 if f.startswith(d):
370 401 # new file added in a directory that was moved, move it
371 402 df = dirmove[d] + f[len(d):]
372 403 if df not in copy:
373 404 movewithdir[f] = df
374 405 repo.ui.debug((" pending file src: '%s' -> "
375 406 "dst: '%s'\n") % (f, df))
376 407 break
377 408
378 409 return copy, movewithdir, diverge, renamedelete
379 410
380 411 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
381 412 """
382 413 check possible copies of f from m1 to m2
383 414
384 415 ctx = function accepting (filename, node) that returns a filectx.
385 416 f = the filename to check
386 417 m1 = the source manifest
387 418 m2 = the destination manifest
388 419 ca = the changectx of the common ancestor
389 420 limit = the rev number to not search beyond
390 421 diverge = record all diverges in this dict
391 422 copy = record all non-divergent copies in this dict
392 423 fullcopy = record all copies in this dict
393 424 """
394 425
395 426 ma = ca.manifest()
396 427
397 428 def _related(f1, f2, limit):
398 429 # Walk back to common ancestor to see if the two files originate
399 430 # from the same file. Since workingfilectx's rev() is None it messes
400 431 # up the integer comparison logic, hence the pre-step check for
401 432 # None (f1 and f2 can only be workingfilectx's initially).
402 433
403 434 if f1 == f2:
404 435 return f1 # a match
405 436
406 437 g1, g2 = f1.ancestors(), f2.ancestors()
407 438 try:
408 439 f1r, f2r = f1.rev(), f2.rev()
409 440
410 441 if f1r is None:
411 442 f1 = g1.next()
412 443 if f2r is None:
413 444 f2 = g2.next()
414 445
415 446 while True:
416 447 f1r, f2r = f1.rev(), f2.rev()
417 448 if f1r > f2r:
418 449 f1 = g1.next()
419 450 elif f2r > f1r:
420 451 f2 = g2.next()
421 452 elif f1 == f2:
422 453 return f1 # a match
423 454 elif f1r == f2r or f1r < limit or f2r < limit:
424 455 return False # copy no longer relevant
425 456 except StopIteration:
426 457 return False
427 458
428 459 of = None
429 460 seen = set([f])
430 461 for oc in ctx(f, m1[f]).ancestors():
431 462 ocr = oc.rev()
432 463 of = oc.path()
433 464 if of in seen:
434 465 # check limit late - grab last rename before
435 466 if ocr < limit:
436 467 break
437 468 continue
438 469 seen.add(of)
439 470
440 471 fullcopy[f] = of # remember for dir rename detection
441 472 if of not in m2:
442 473 continue # no match, keep looking
443 474 if m2[of] == ma.get(of):
444 475 break # no merge needed, quit early
445 476 c2 = ctx(of, m2[of])
446 477 cr = _related(oc, c2, ca.rev())
447 478 if cr and (of == f or of == c2.path()): # non-divergent
448 479 copy[f] = of
449 480 of = None
450 481 break
451 482
452 483 if of in ma:
453 484 diverge.setdefault(of, []).append(f)
454 485
455 486 def duplicatecopies(repo, rev, fromrev, skiprev=None):
456 487 '''reproduce copies from fromrev to rev in the dirstate
457 488
458 489 If skiprev is specified, it's a revision that should be used to
459 490 filter copy records. Any copies that occur between fromrev and
460 491 skiprev will not be duplicated, even if they appear in the set of
461 492 copies between fromrev and rev.
462 493 '''
463 494 exclude = {}
464 495 if skiprev is not None:
465 496 exclude = pathcopies(repo[fromrev], repo[skiprev])
466 497 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
467 498 # copies.pathcopies returns backward renames, so dst might not
468 499 # actually be in the dirstate
469 500 if dst in exclude:
470 501 continue
471 502 if repo.dirstate[dst] in "nma":
472 503 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now