##// END OF EJS Templates
copies: factor out setupctx into _makegetfctx...
Matt Mackall -
r26656:3e3d783b default
parent child Browse files
Show More
@@ -1,545 +1,544
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 pathutil,
14 14 util,
15 15 )
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b, match=None):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 ma = a.manifest()
149 149 mb = b.manifest()
150 150 if match:
151 151 ma = ma.matches(match)
152 152 mb = mb.matches(match)
153 153 return mb.filesnotin(ma)
154 154
155 155 def _forwardcopies(a, b, match=None):
156 156 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
157 157
158 158 # check for working copy
159 159 w = None
160 160 if b.rev() is None:
161 161 w = b
162 162 b = w.p1()
163 163 if a == b:
164 164 # short-circuit to avoid issues with merge states
165 165 return _dirstatecopies(w)
166 166
167 167 # files might have to be traced back to the fctx parent of the last
168 168 # one-side-only changeset, but not further back than that
169 169 limit = _findlimit(a._repo, a.rev(), b.rev())
170 170 if limit is None:
171 171 limit = -1
172 172 am = a.manifest()
173 173
174 174 # find where new files came from
175 175 # we currently don't try to find where old files went, too expensive
176 176 # this means we can miss a case like 'hg rm b; hg cp a b'
177 177 cm = {}
178 178 missing = _computeforwardmissing(a, b, match=match)
179 179 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
180 180 for f in missing:
181 181 fctx = b[f]
182 182 fctx._ancestrycontext = ancestrycontext
183 183 ofctx = _tracefile(fctx, am, limit)
184 184 if ofctx:
185 185 cm[f] = ofctx.path()
186 186
187 187 # combine copies from dirstate if necessary
188 188 if w is not None:
189 189 cm = _chain(a, w, cm, _dirstatecopies(w))
190 190
191 191 return cm
192 192
193 193 def _backwardrenames(a, b):
194 194 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
195 195 return {}
196 196
197 197 # Even though we're not taking copies into account, 1:n rename situations
198 198 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
199 199 # arbitrarily pick one of the renames.
200 200 f = _forwardcopies(b, a)
201 201 r = {}
202 202 for k, v in sorted(f.iteritems()):
203 203 # remove copies
204 204 if v in a:
205 205 continue
206 206 r[v] = k
207 207 return r
208 208
209 209 def pathcopies(x, y, match=None):
210 210 '''find {dst@y: src@x} copy mapping for directed compare'''
211 211 if x == y or not x or not y:
212 212 return {}
213 213 a = y.ancestor(x)
214 214 if a == x:
215 215 return _forwardcopies(x, y, match=match)
216 216 if a == y:
217 217 return _backwardrenames(x, y)
218 218 return _chain(x, y, _backwardrenames(x, a),
219 219 _forwardcopies(a, y, match=match))
220 220
221 221 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
222 222 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
223 223 and c2. This is its own function so extensions can easily wrap this call
224 224 to see what files mergecopies is about to process.
225 225
226 226 Even though c1 and c2 are not used in this function, they are useful in
227 227 other extensions for being able to read the file nodes of the changed files.
228 228 """
229 229 u1 = sorted(addedinm1 - addedinm2)
230 230 u2 = sorted(addedinm2 - addedinm1)
231 231
232 232 if u1:
233 233 repo.ui.debug(" unmatched files in local:\n %s\n"
234 234 % "\n ".join(u1))
235 235 if u2:
236 236 repo.ui.debug(" unmatched files in other:\n %s\n"
237 237 % "\n ".join(u2))
238 238 return u1, u2
239 239
240 def _makegetfctx(ctx):
241 """return a 'getfctx' function suitable for checkcopies usage
242
243 We have to re-setup the function building 'filectx' for each
244 'checkcopies' to ensure the linkrev adjustement is properly setup for
245 each. Linkrev adjustment is important to avoid bug in rename
246 detection. Moreover, having a proper '_ancestrycontext' setup ensures
247 the performance impact of this adjustment is kept limited. Without it,
248 each file could do a full dag traversal making the time complexity of
249 the operation explode (see issue4537).
250
251 This function exists here mostly to limit the impact on stable. Feel
252 free to refactor on default.
253 """
254 rev = ctx.rev()
255 repo = ctx._repo
256 ac = getattr(ctx, '_ancestrycontext', None)
257 if ac is None:
258 revs = [rev]
259 if rev is None:
260 revs = [p.rev() for p in ctx.parents()]
261 ac = repo.changelog.ancestors(revs, inclusive=True)
262 ctx._ancestrycontext = ac
263 def makectx(f, n):
264 if len(n) != 20: # in a working context?
265 if ctx.rev() is None:
266 return ctx.filectx(f)
267 return repo[None][f]
268 fctx = repo.filectx(f, fileid=n)
269 # setup only needed for filectx not create from a changectx
270 fctx._ancestrycontext = ac
271 fctx._descendantrev = rev
272 return fctx
273 return util.lrucachefunc(makectx)
274
240 275 def mergecopies(repo, c1, c2, ca):
241 276 """
242 277 Find moves and copies between context c1 and c2 that are relevant
243 278 for merging.
244 279
245 280 Returns four dicts: "copy", "movewithdir", "diverge", and
246 281 "renamedelete".
247 282
248 283 "copy" is a mapping from destination name -> source name,
249 284 where source is in c1 and destination is in c2 or vice-versa.
250 285
251 286 "movewithdir" is a mapping from source name -> destination name,
252 287 where the file at source present in one context but not the other
253 288 needs to be moved to destination by the merge process, because the
254 289 other context moved the directory it is in.
255 290
256 291 "diverge" is a mapping of source name -> list of destination names
257 292 for divergent renames.
258 293
259 294 "renamedelete" is a mapping of source name -> list of destination
260 295 names for files deleted in c1 that were renamed in c2 or vice-versa.
261 296 """
262 297 # avoid silly behavior for update from empty dir
263 298 if not c1 or not c2 or c1 == c2:
264 299 return {}, {}, {}, {}
265 300
266 301 # avoid silly behavior for parent -> working dir
267 302 if c2.node() is None and c1.node() == repo.dirstate.p1():
268 303 return repo.dirstate.copies(), {}, {}, {}
269 304
270 305 # Copy trace disabling is explicitly below the node == p1 logic above
271 306 # because the logic above is required for a simple copy to be kept across a
272 307 # rebase.
273 308 if repo.ui.configbool('experimental', 'disablecopytrace'):
274 309 return {}, {}, {}, {}
275 310
276 311 limit = _findlimit(repo, c1.rev(), c2.rev())
277 312 if limit is None:
278 313 # no common ancestor, no copies
279 314 return {}, {}, {}, {}
280 315 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
281 316
282 317 m1 = c1.manifest()
283 318 m2 = c2.manifest()
284 319 ma = ca.manifest()
285 320
286
287 def setupctx(ctx):
288 """return a 'getfctx' function suitable for checkcopies usage
289
290 We have to re-setup the function building 'filectx' for each
291 'checkcopies' to ensure the linkrev adjustement is properly setup for
292 each. Linkrev adjustment is important to avoid bug in rename
293 detection. Moreover, having a proper '_ancestrycontext' setup ensures
294 the performance impact of this adjustment is kept limited. Without it,
295 each file could do a full dag traversal making the time complexity of
296 the operation explode (see issue4537).
297
298 This function exists here mostly to limit the impact on stable. Feel
299 free to refactor on default.
300 """
301 rev = ctx.rev()
302 ac = getattr(ctx, '_ancestrycontext', None)
303 repo = ctx._repo
304 if ac is None:
305 revs = [rev]
306 if rev is None:
307 revs = [p.rev() for p in ctx.parents()]
308 ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
309 ctx._ancestrycontext = ac
310 def makectx(f, n):
311 if len(n) != 20: # in a working context?
312 if ctx.rev() is None:
313 return ctx.filectx(f)
314 return repo[None][f]
315 fctx = repo.filectx(f, fileid=n)
316 # setup only needed for filectx not create from a changectx
317 fctx._ancestrycontext = ac
318 fctx._descendantrev = rev
319 return fctx
320 return util.lrucachefunc(makectx)
321
322 321 copy1, copy2, = {}, {}
323 322 movewithdir1, movewithdir2 = {}, {}
324 323 fullcopy1, fullcopy2 = {}, {}
325 324 diverge = {}
326 325
327 326 addedinm1 = m1.filesnotin(ma)
328 327 addedinm2 = m2.filesnotin(ma)
329 328 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
330 329
331 330 for f in u1:
332 getfctx = setupctx(c1)
331 getfctx = _makegetfctx(c1)
333 332 checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
334 333
335 334 for f in u2:
336 getfctx = setupctx(c2)
335 getfctx = _makegetfctx(c2)
337 336 checkcopies(getfctx, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
338 337
339 338 copy = dict(copy1.items() + copy2.items())
340 339 movewithdir = dict(movewithdir1.items() + movewithdir2.items())
341 340 fullcopy = dict(fullcopy1.items() + fullcopy2.items())
342 341
343 342 renamedelete = {}
344 343 renamedelete2 = set()
345 344 divergeset = set()
346 345 for of, fl in diverge.items():
347 346 if len(fl) == 1 or of in c1 or of in c2:
348 347 del diverge[of] # not actually divergent, or not a rename
349 348 if of not in c1 and of not in c2:
350 349 # renamed on one side, deleted on the other side, but filter
351 350 # out files that have been renamed and then deleted
352 351 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
353 352 renamedelete2.update(fl) # reverse map for below
354 353 else:
355 354 divergeset.update(fl) # reverse map for below
356 355
357 356 bothnew = sorted(addedinm1 & addedinm2)
358 357 if bothnew:
359 358 repo.ui.debug(" unmatched files new in both:\n %s\n"
360 359 % "\n ".join(bothnew))
361 360 bothdiverge, _copy, _fullcopy = {}, {}, {}
362 361 for f in bothnew:
363 getfctx = setupctx(c1)
362 getfctx = _makegetfctx(c1)
364 363 checkcopies(getfctx, f, m1, m2, ca, limit, bothdiverge,
365 364 _copy, _fullcopy)
366 getfctx = setupctx(c2)
365 getfctx = _makegetfctx(c2)
367 366 checkcopies(getfctx, f, m2, m1, ca, limit, bothdiverge,
368 367 _copy, _fullcopy)
369 368 for of, fl in bothdiverge.items():
370 369 if len(fl) == 2 and fl[0] == fl[1]:
371 370 copy[fl[0]] = of # not actually divergent, just matching renames
372 371
373 372 if fullcopy and repo.ui.debugflag:
374 373 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
375 374 "% = renamed and deleted):\n")
376 375 for f in sorted(fullcopy):
377 376 note = ""
378 377 if f in copy:
379 378 note += "*"
380 379 if f in divergeset:
381 380 note += "!"
382 381 if f in renamedelete2:
383 382 note += "%"
384 383 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
385 384 note))
386 385 del divergeset
387 386
388 387 if not fullcopy:
389 388 return copy, movewithdir, diverge, renamedelete
390 389
391 390 repo.ui.debug(" checking for directory renames\n")
392 391
393 392 # generate a directory move map
394 393 d1, d2 = c1.dirs(), c2.dirs()
395 394 # Hack for adding '', which is not otherwise added, to d1 and d2
396 395 d1.addpath('/')
397 396 d2.addpath('/')
398 397 invalid = set()
399 398 dirmove = {}
400 399
401 400 # examine each file copy for a potential directory move, which is
402 401 # when all the files in a directory are moved to a new directory
403 402 for dst, src in fullcopy.iteritems():
404 403 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
405 404 if dsrc in invalid:
406 405 # already seen to be uninteresting
407 406 continue
408 407 elif dsrc in d1 and ddst in d1:
409 408 # directory wasn't entirely moved locally
410 409 invalid.add(dsrc)
411 410 elif dsrc in d2 and ddst in d2:
412 411 # directory wasn't entirely moved remotely
413 412 invalid.add(dsrc)
414 413 elif dsrc in dirmove and dirmove[dsrc] != ddst:
415 414 # files from the same directory moved to two different places
416 415 invalid.add(dsrc)
417 416 else:
418 417 # looks good so far
419 418 dirmove[dsrc + "/"] = ddst + "/"
420 419
421 420 for i in invalid:
422 421 if i in dirmove:
423 422 del dirmove[i]
424 423 del d1, d2, invalid
425 424
426 425 if not dirmove:
427 426 return copy, movewithdir, diverge, renamedelete
428 427
429 428 for d in dirmove:
430 429 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
431 430 (d, dirmove[d]))
432 431
433 432 # check unaccounted nonoverlapping files against directory moves
434 433 for f in u1 + u2:
435 434 if f not in fullcopy:
436 435 for d in dirmove:
437 436 if f.startswith(d):
438 437 # new file added in a directory that was moved, move it
439 438 df = dirmove[d] + f[len(d):]
440 439 if df not in copy:
441 440 movewithdir[f] = df
442 441 repo.ui.debug((" pending file src: '%s' -> "
443 442 "dst: '%s'\n") % (f, df))
444 443 break
445 444
446 445 return copy, movewithdir, diverge, renamedelete
447 446
448 447 def checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
449 448 """
450 449 check possible copies of f from m1 to m2
451 450
452 451 getfctx = function accepting (filename, node) that returns a filectx.
453 452 f = the filename to check
454 453 m1 = the source manifest
455 454 m2 = the destination manifest
456 455 ca = the changectx of the common ancestor
457 456 limit = the rev number to not search beyond
458 457 diverge = record all diverges in this dict
459 458 copy = record all non-divergent copies in this dict
460 459 fullcopy = record all copies in this dict
461 460 """
462 461
463 462 ma = ca.manifest()
464 463
465 464 def _related(f1, f2, limit):
466 465 # Walk back to common ancestor to see if the two files originate
467 466 # from the same file. Since workingfilectx's rev() is None it messes
468 467 # up the integer comparison logic, hence the pre-step check for
469 468 # None (f1 and f2 can only be workingfilectx's initially).
470 469
471 470 if f1 == f2:
472 471 return f1 # a match
473 472
474 473 g1, g2 = f1.ancestors(), f2.ancestors()
475 474 try:
476 475 f1r, f2r = f1.linkrev(), f2.linkrev()
477 476
478 477 if f1r is None:
479 478 f1 = g1.next()
480 479 if f2r is None:
481 480 f2 = g2.next()
482 481
483 482 while True:
484 483 f1r, f2r = f1.linkrev(), f2.linkrev()
485 484 if f1r > f2r:
486 485 f1 = g1.next()
487 486 elif f2r > f1r:
488 487 f2 = g2.next()
489 488 elif f1 == f2:
490 489 return f1 # a match
491 490 elif f1r == f2r or f1r < limit or f2r < limit:
492 491 return False # copy no longer relevant
493 492 except StopIteration:
494 493 return False
495 494
496 495 of = None
497 496 seen = set([f])
498 497 for oc in getfctx(f, m1[f]).ancestors():
499 498 ocr = oc.linkrev()
500 499 of = oc.path()
501 500 if of in seen:
502 501 # check limit late - grab last rename before
503 502 if ocr < limit:
504 503 break
505 504 continue
506 505 seen.add(of)
507 506
508 507 fullcopy[f] = of # remember for dir rename detection
509 508 if of not in m2:
510 509 continue # no match, keep looking
511 510 if m2[of] == ma.get(of):
512 511 break # no merge needed, quit early
513 512 c2 = getfctx(of, m2[of])
514 513 cr = _related(oc, c2, ca.rev())
515 514 if cr and (of == f or of == c2.path()): # non-divergent
516 515 copy[f] = of
517 516 of = None
518 517 break
519 518
520 519 if of in ma:
521 520 diverge.setdefault(of, []).append(f)
522 521
523 522 def duplicatecopies(repo, rev, fromrev, skiprev=None):
524 523 '''reproduce copies from fromrev to rev in the dirstate
525 524
526 525 If skiprev is specified, it's a revision that should be used to
527 526 filter copy records. Any copies that occur between fromrev and
528 527 skiprev will not be duplicated, even if they appear in the set of
529 528 copies between fromrev and rev.
530 529 '''
531 530 exclude = {}
532 531 if (skiprev is not None and
533 532 not repo.ui.configbool('experimental', 'disablecopytrace')):
534 533 # disablecopytrace skips this line, but not the entire function because
535 534 # the line below is O(size of the repo) during a rebase, while the rest
536 535 # of the function is much faster (and is required for carrying copy
537 536 # metadata across the rebase anyway).
538 537 exclude = pathcopies(repo[fromrev], repo[skiprev])
539 538 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
540 539 # copies.pathcopies returns backward renames, so dst might not
541 540 # actually be in the dirstate
542 541 if dst in exclude:
543 542 continue
544 543 if repo.dirstate[dst] in "nma":
545 544 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now