##// END OF EJS Templates
copies: pass changectx instead of manifest to _computenonoverlap...
Durham Goode -
r24625:2cebf17c default
parent child Browse files
Show More
@@ -1,517 +1,517
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _dirname(f):
12 12 s = f.rfind("/")
13 13 if s == -1:
14 14 return ""
15 15 return f[:s]
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 return b.manifest().filesnotin(a.manifest())
149 149
150 150 def _forwardcopies(a, b):
151 151 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
152 152
153 153 # check for working copy
154 154 w = None
155 155 if b.rev() is None:
156 156 w = b
157 157 b = w.p1()
158 158 if a == b:
159 159 # short-circuit to avoid issues with merge states
160 160 return _dirstatecopies(w)
161 161
162 162 # files might have to be traced back to the fctx parent of the last
163 163 # one-side-only changeset, but not further back than that
164 164 limit = _findlimit(a._repo, a.rev(), b.rev())
165 165 if limit is None:
166 166 limit = -1
167 167 am = a.manifest()
168 168
169 169 # find where new files came from
170 170 # we currently don't try to find where old files went, too expensive
171 171 # this means we can miss a case like 'hg rm b; hg cp a b'
172 172 cm = {}
173 173 missing = _computeforwardmissing(a, b)
174 174 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
175 175 for f in missing:
176 176 fctx = b[f]
177 177 fctx._ancestrycontext = ancestrycontext
178 178 ofctx = _tracefile(fctx, am, limit)
179 179 if ofctx:
180 180 cm[f] = ofctx.path()
181 181
182 182 # combine copies from dirstate if necessary
183 183 if w is not None:
184 184 cm = _chain(a, w, cm, _dirstatecopies(w))
185 185
186 186 return cm
187 187
188 188 def _backwardrenames(a, b):
189 189 # Even though we're not taking copies into account, 1:n rename situations
190 190 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
191 191 # arbitrarily pick one of the renames.
192 192 f = _forwardcopies(b, a)
193 193 r = {}
194 194 for k, v in sorted(f.iteritems()):
195 195 # remove copies
196 196 if v in a:
197 197 continue
198 198 r[v] = k
199 199 return r
200 200
201 201 def pathcopies(x, y):
202 202 '''find {dst@y: src@x} copy mapping for directed compare'''
203 203 if x == y or not x or not y:
204 204 return {}
205 205 a = y.ancestor(x)
206 206 if a == x:
207 207 return _forwardcopies(x, y)
208 208 if a == y:
209 209 return _backwardrenames(x, y)
210 210 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
211 211
212 def _computenonoverlap(repo, m1, m2, addedinm1, addedinm2):
213 """Computes, based on addedinm1 and addedinm2, the files exclusive to m1
214 and m2. This is its own function so extensions can easily wrap this call
212 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
213 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
214 and c2. This is its own function so extensions can easily wrap this call
215 215 to see what files mergecopies is about to process.
216 216
217 Even though m1 and m2 are not used in this function, they are useful in
217 Even though c1 and c2 are not used in this function, they are useful in
218 218 other extensions for being able to read the file nodes of the changed files.
219 219 """
220 220 u1 = sorted(addedinm1 - addedinm2)
221 221 u2 = sorted(addedinm2 - addedinm1)
222 222
223 223 if u1:
224 224 repo.ui.debug(" unmatched files in local:\n %s\n"
225 225 % "\n ".join(u1))
226 226 if u2:
227 227 repo.ui.debug(" unmatched files in other:\n %s\n"
228 228 % "\n ".join(u2))
229 229 return u1, u2
230 230
231 231 def mergecopies(repo, c1, c2, ca):
232 232 """
233 233 Find moves and copies between context c1 and c2 that are relevant
234 234 for merging.
235 235
236 236 Returns four dicts: "copy", "movewithdir", "diverge", and
237 237 "renamedelete".
238 238
239 239 "copy" is a mapping from destination name -> source name,
240 240 where source is in c1 and destination is in c2 or vice-versa.
241 241
242 242 "movewithdir" is a mapping from source name -> destination name,
243 243 where the file at source present in one context but not the other
244 244 needs to be moved to destination by the merge process, because the
245 245 other context moved the directory it is in.
246 246
247 247 "diverge" is a mapping of source name -> list of destination names
248 248 for divergent renames.
249 249
250 250 "renamedelete" is a mapping of source name -> list of destination
251 251 names for files deleted in c1 that were renamed in c2 or vice-versa.
252 252 """
253 253 # avoid silly behavior for update from empty dir
254 254 if not c1 or not c2 or c1 == c2:
255 255 return {}, {}, {}, {}
256 256
257 257 # avoid silly behavior for parent -> working dir
258 258 if c2.node() is None and c1.node() == repo.dirstate.p1():
259 259 return repo.dirstate.copies(), {}, {}, {}
260 260
261 261 limit = _findlimit(repo, c1.rev(), c2.rev())
262 262 if limit is None:
263 263 # no common ancestor, no copies
264 264 return {}, {}, {}, {}
265 265 m1 = c1.manifest()
266 266 m2 = c2.manifest()
267 267 ma = ca.manifest()
268 268
269 269
270 270 def setupctx(ctx):
271 271 """return a 'makectx' function suitable for checkcopies usage from ctx
272 272
273 273 We have to re-setup the function building 'filectx' for each
274 274 'checkcopies' to ensure the linkrev adjustement is properly setup for
275 275 each. Linkrev adjustment is important to avoid bug in rename
276 276 detection. Moreover, having a proper '_ancestrycontext' setup ensures
277 277 the performance impact of this adjustment is kept limited. Without it,
278 278 each file could do a full dag traversal making the time complexity of
279 279 the operation explode (see issue4537).
280 280
281 281 This function exists here mostly to limit the impact on stable. Feel
282 282 free to refactor on default.
283 283 """
284 284 rev = ctx.rev()
285 285 ac = getattr(ctx, '_ancestrycontext', None)
286 286 if ac is None:
287 287 revs = [rev]
288 288 if rev is None:
289 289 revs = [p.rev() for p in ctx.parents()]
290 290 ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
291 291 ctx._ancestrycontext = ac
292 292 def makectx(f, n):
293 293 if len(n) != 20: # in a working context?
294 294 if c1.rev() is None:
295 295 return c1.filectx(f)
296 296 return c2.filectx(f)
297 297 fctx = repo.filectx(f, fileid=n)
298 298 # setup only needed for filectx not create from a changectx
299 299 fctx._ancestrycontext = ac
300 300 fctx._descendantrev = rev
301 301 return fctx
302 302 return util.lrucachefunc(makectx)
303 303
304 304 copy = {}
305 305 movewithdir = {}
306 306 fullcopy = {}
307 307 diverge = {}
308 308
309 309 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
310 310
311 311 addedinm1 = m1.filesnotin(ma)
312 312 addedinm2 = m2.filesnotin(ma)
313 u1, u2 = _computenonoverlap(repo, m1, m2, addedinm1, addedinm2)
313 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
314 314
315 315 for f in u1:
316 316 ctx = setupctx(c1)
317 317 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
318 318
319 319 for f in u2:
320 320 ctx = setupctx(c2)
321 321 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
322 322
323 323 renamedelete = {}
324 324 renamedelete2 = set()
325 325 diverge2 = set()
326 326 for of, fl in diverge.items():
327 327 if len(fl) == 1 or of in c1 or of in c2:
328 328 del diverge[of] # not actually divergent, or not a rename
329 329 if of not in c1 and of not in c2:
330 330 # renamed on one side, deleted on the other side, but filter
331 331 # out files that have been renamed and then deleted
332 332 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
333 333 renamedelete2.update(fl) # reverse map for below
334 334 else:
335 335 diverge2.update(fl) # reverse map for below
336 336
337 337 bothnew = sorted(addedinm1 & addedinm2)
338 338 if bothnew:
339 339 repo.ui.debug(" unmatched files new in both:\n %s\n"
340 340 % "\n ".join(bothnew))
341 341 bothdiverge, _copy, _fullcopy = {}, {}, {}
342 342 for f in bothnew:
343 343 ctx = setupctx(c1)
344 344 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
345 345 ctx = setupctx(c2)
346 346 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
347 347 for of, fl in bothdiverge.items():
348 348 if len(fl) == 2 and fl[0] == fl[1]:
349 349 copy[fl[0]] = of # not actually divergent, just matching renames
350 350
351 351 if fullcopy and repo.ui.debugflag:
352 352 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
353 353 "% = renamed and deleted):\n")
354 354 for f in sorted(fullcopy):
355 355 note = ""
356 356 if f in copy:
357 357 note += "*"
358 358 if f in diverge2:
359 359 note += "!"
360 360 if f in renamedelete2:
361 361 note += "%"
362 362 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
363 363 note))
364 364 del diverge2
365 365
366 366 if not fullcopy:
367 367 return copy, movewithdir, diverge, renamedelete
368 368
369 369 repo.ui.debug(" checking for directory renames\n")
370 370
371 371 # generate a directory move map
372 372 d1, d2 = c1.dirs(), c2.dirs()
373 373 d1.addpath('/')
374 374 d2.addpath('/')
375 375 invalid = set()
376 376 dirmove = {}
377 377
378 378 # examine each file copy for a potential directory move, which is
379 379 # when all the files in a directory are moved to a new directory
380 380 for dst, src in fullcopy.iteritems():
381 381 dsrc, ddst = _dirname(src), _dirname(dst)
382 382 if dsrc in invalid:
383 383 # already seen to be uninteresting
384 384 continue
385 385 elif dsrc in d1 and ddst in d1:
386 386 # directory wasn't entirely moved locally
387 387 invalid.add(dsrc)
388 388 elif dsrc in d2 and ddst in d2:
389 389 # directory wasn't entirely moved remotely
390 390 invalid.add(dsrc)
391 391 elif dsrc in dirmove and dirmove[dsrc] != ddst:
392 392 # files from the same directory moved to two different places
393 393 invalid.add(dsrc)
394 394 else:
395 395 # looks good so far
396 396 dirmove[dsrc + "/"] = ddst + "/"
397 397
398 398 for i in invalid:
399 399 if i in dirmove:
400 400 del dirmove[i]
401 401 del d1, d2, invalid
402 402
403 403 if not dirmove:
404 404 return copy, movewithdir, diverge, renamedelete
405 405
406 406 for d in dirmove:
407 407 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
408 408 (d, dirmove[d]))
409 409
410 410 # check unaccounted nonoverlapping files against directory moves
411 411 for f in u1 + u2:
412 412 if f not in fullcopy:
413 413 for d in dirmove:
414 414 if f.startswith(d):
415 415 # new file added in a directory that was moved, move it
416 416 df = dirmove[d] + f[len(d):]
417 417 if df not in copy:
418 418 movewithdir[f] = df
419 419 repo.ui.debug((" pending file src: '%s' -> "
420 420 "dst: '%s'\n") % (f, df))
421 421 break
422 422
423 423 return copy, movewithdir, diverge, renamedelete
424 424
425 425 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
426 426 """
427 427 check possible copies of f from m1 to m2
428 428
429 429 ctx = function accepting (filename, node) that returns a filectx.
430 430 f = the filename to check
431 431 m1 = the source manifest
432 432 m2 = the destination manifest
433 433 ca = the changectx of the common ancestor
434 434 limit = the rev number to not search beyond
435 435 diverge = record all diverges in this dict
436 436 copy = record all non-divergent copies in this dict
437 437 fullcopy = record all copies in this dict
438 438 """
439 439
440 440 ma = ca.manifest()
441 441
442 442 def _related(f1, f2, limit):
443 443 # Walk back to common ancestor to see if the two files originate
444 444 # from the same file. Since workingfilectx's rev() is None it messes
445 445 # up the integer comparison logic, hence the pre-step check for
446 446 # None (f1 and f2 can only be workingfilectx's initially).
447 447
448 448 if f1 == f2:
449 449 return f1 # a match
450 450
451 451 g1, g2 = f1.ancestors(), f2.ancestors()
452 452 try:
453 453 f1r, f2r = f1.rev(), f2.rev()
454 454
455 455 if f1r is None:
456 456 f1 = g1.next()
457 457 if f2r is None:
458 458 f2 = g2.next()
459 459
460 460 while True:
461 461 f1r, f2r = f1.rev(), f2.rev()
462 462 if f1r > f2r:
463 463 f1 = g1.next()
464 464 elif f2r > f1r:
465 465 f2 = g2.next()
466 466 elif f1 == f2:
467 467 return f1 # a match
468 468 elif f1r == f2r or f1r < limit or f2r < limit:
469 469 return False # copy no longer relevant
470 470 except StopIteration:
471 471 return False
472 472
473 473 of = None
474 474 seen = set([f])
475 475 for oc in ctx(f, m1[f]).ancestors():
476 476 ocr = oc.rev()
477 477 of = oc.path()
478 478 if of in seen:
479 479 # check limit late - grab last rename before
480 480 if ocr < limit:
481 481 break
482 482 continue
483 483 seen.add(of)
484 484
485 485 fullcopy[f] = of # remember for dir rename detection
486 486 if of not in m2:
487 487 continue # no match, keep looking
488 488 if m2[of] == ma.get(of):
489 489 break # no merge needed, quit early
490 490 c2 = ctx(of, m2[of])
491 491 cr = _related(oc, c2, ca.rev())
492 492 if cr and (of == f or of == c2.path()): # non-divergent
493 493 copy[f] = of
494 494 of = None
495 495 break
496 496
497 497 if of in ma:
498 498 diverge.setdefault(of, []).append(f)
499 499
500 500 def duplicatecopies(repo, rev, fromrev, skiprev=None):
501 501 '''reproduce copies from fromrev to rev in the dirstate
502 502
503 503 If skiprev is specified, it's a revision that should be used to
504 504 filter copy records. Any copies that occur between fromrev and
505 505 skiprev will not be duplicated, even if they appear in the set of
506 506 copies between fromrev and rev.
507 507 '''
508 508 exclude = {}
509 509 if skiprev is not None:
510 510 exclude = pathcopies(repo[fromrev], repo[skiprev])
511 511 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
512 512 # copies.pathcopies returns backward renames, so dst might not
513 513 # actually be in the dirstate
514 514 if dst in exclude:
515 515 continue
516 516 if repo.dirstate[dst] in "nma":
517 517 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now