##// END OF EJS Templates
copies: rename ctx() to getfctx() for clarity
Matt Mackall -
r26315:3c6902ed default
parent child Browse files
Show More
@@ -1,538 +1,540 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 pathutil,
14 14 util,
15 15 )
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b, match=None):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 ma = a.manifest()
149 149 mb = b.manifest()
150 150 if match:
151 151 ma = ma.matches(match)
152 152 mb = mb.matches(match)
153 153 return mb.filesnotin(ma)
154 154
155 155 def _forwardcopies(a, b, match=None):
156 156 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
157 157
158 158 # check for working copy
159 159 w = None
160 160 if b.rev() is None:
161 161 w = b
162 162 b = w.p1()
163 163 if a == b:
164 164 # short-circuit to avoid issues with merge states
165 165 return _dirstatecopies(w)
166 166
167 167 # files might have to be traced back to the fctx parent of the last
168 168 # one-side-only changeset, but not further back than that
169 169 limit = _findlimit(a._repo, a.rev(), b.rev())
170 170 if limit is None:
171 171 limit = -1
172 172 am = a.manifest()
173 173
174 174 # find where new files came from
175 175 # we currently don't try to find where old files went, too expensive
176 176 # this means we can miss a case like 'hg rm b; hg cp a b'
177 177 cm = {}
178 178 missing = _computeforwardmissing(a, b, match=match)
179 179 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
180 180 for f in missing:
181 181 fctx = b[f]
182 182 fctx._ancestrycontext = ancestrycontext
183 183 ofctx = _tracefile(fctx, am, limit)
184 184 if ofctx:
185 185 cm[f] = ofctx.path()
186 186
187 187 # combine copies from dirstate if necessary
188 188 if w is not None:
189 189 cm = _chain(a, w, cm, _dirstatecopies(w))
190 190
191 191 return cm
192 192
193 193 def _backwardrenames(a, b):
194 194 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
195 195 return {}
196 196
197 197 # Even though we're not taking copies into account, 1:n rename situations
198 198 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
199 199 # arbitrarily pick one of the renames.
200 200 f = _forwardcopies(b, a)
201 201 r = {}
202 202 for k, v in sorted(f.iteritems()):
203 203 # remove copies
204 204 if v in a:
205 205 continue
206 206 r[v] = k
207 207 return r
208 208
209 209 def pathcopies(x, y, match=None):
210 210 '''find {dst@y: src@x} copy mapping for directed compare'''
211 211 if x == y or not x or not y:
212 212 return {}
213 213 a = y.ancestor(x)
214 214 if a == x:
215 215 return _forwardcopies(x, y, match=match)
216 216 if a == y:
217 217 return _backwardrenames(x, y)
218 218 return _chain(x, y, _backwardrenames(x, a),
219 219 _forwardcopies(a, y, match=match))
220 220
221 221 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
222 222 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
223 223 and c2. This is its own function so extensions can easily wrap this call
224 224 to see what files mergecopies is about to process.
225 225
226 226 Even though c1 and c2 are not used in this function, they are useful in
227 227 other extensions for being able to read the file nodes of the changed files.
228 228 """
229 229 u1 = sorted(addedinm1 - addedinm2)
230 230 u2 = sorted(addedinm2 - addedinm1)
231 231
232 232 if u1:
233 233 repo.ui.debug(" unmatched files in local:\n %s\n"
234 234 % "\n ".join(u1))
235 235 if u2:
236 236 repo.ui.debug(" unmatched files in other:\n %s\n"
237 237 % "\n ".join(u2))
238 238 return u1, u2
239 239
240 240 def mergecopies(repo, c1, c2, ca):
241 241 """
242 242 Find moves and copies between context c1 and c2 that are relevant
243 243 for merging.
244 244
245 245 Returns four dicts: "copy", "movewithdir", "diverge", and
246 246 "renamedelete".
247 247
248 248 "copy" is a mapping from destination name -> source name,
249 249 where source is in c1 and destination is in c2 or vice-versa.
250 250
251 251 "movewithdir" is a mapping from source name -> destination name,
252 252 where the file at source present in one context but not the other
253 253 needs to be moved to destination by the merge process, because the
254 254 other context moved the directory it is in.
255 255
256 256 "diverge" is a mapping of source name -> list of destination names
257 257 for divergent renames.
258 258
259 259 "renamedelete" is a mapping of source name -> list of destination
260 260 names for files deleted in c1 that were renamed in c2 or vice-versa.
261 261 """
262 262 # avoid silly behavior for update from empty dir
263 263 if not c1 or not c2 or c1 == c2:
264 264 return {}, {}, {}, {}
265 265
266 266 # avoid silly behavior for parent -> working dir
267 267 if c2.node() is None and c1.node() == repo.dirstate.p1():
268 268 return repo.dirstate.copies(), {}, {}, {}
269 269
270 270 # Copy trace disabling is explicitly below the node == p1 logic above
271 271 # because the logic above is required for a simple copy to be kept across a
272 272 # rebase.
273 273 if repo.ui.configbool('experimental', 'disablecopytrace'):
274 274 return {}, {}, {}, {}
275 275
276 276 limit = _findlimit(repo, c1.rev(), c2.rev())
277 277 if limit is None:
278 278 # no common ancestor, no copies
279 279 return {}, {}, {}, {}
280 280 m1 = c1.manifest()
281 281 m2 = c2.manifest()
282 282 ma = ca.manifest()
283 283
284 284
285 285 def setupctx(ctx):
286 """return a 'makectx' function suitable for checkcopies usage from ctx
286 """return a 'getfctx' function suitable for checkcopies usage
287 287
288 288 We have to re-setup the function building 'filectx' for each
289 289 'checkcopies' to ensure the linkrev adjustement is properly setup for
290 290 each. Linkrev adjustment is important to avoid bug in rename
291 291 detection. Moreover, having a proper '_ancestrycontext' setup ensures
292 292 the performance impact of this adjustment is kept limited. Without it,
293 293 each file could do a full dag traversal making the time complexity of
294 294 the operation explode (see issue4537).
295 295
296 296 This function exists here mostly to limit the impact on stable. Feel
297 297 free to refactor on default.
298 298 """
299 299 rev = ctx.rev()
300 300 ac = getattr(ctx, '_ancestrycontext', None)
301 301 if ac is None:
302 302 revs = [rev]
303 303 if rev is None:
304 304 revs = [p.rev() for p in ctx.parents()]
305 305 ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
306 306 ctx._ancestrycontext = ac
307 307 def makectx(f, n):
308 308 if len(n) != 20: # in a working context?
309 309 if c1.rev() is None:
310 310 return c1.filectx(f)
311 311 return c2.filectx(f)
312 312 fctx = repo.filectx(f, fileid=n)
313 313 # setup only needed for filectx not create from a changectx
314 314 fctx._ancestrycontext = ac
315 315 fctx._descendantrev = rev
316 316 return fctx
317 317 return util.lrucachefunc(makectx)
318 318
319 319 copy = {}
320 320 movewithdir = {}
321 321 fullcopy = {}
322 322 diverge = {}
323 323
324 324 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
325 325
326 326 addedinm1 = m1.filesnotin(ma)
327 327 addedinm2 = m2.filesnotin(ma)
328 328 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
329 329
330 330 for f in u1:
331 ctx = setupctx(c1)
332 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
331 getfctx = setupctx(c1)
332 checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
333 333
334 334 for f in u2:
335 ctx = setupctx(c2)
336 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
335 getfctx = setupctx(c2)
336 checkcopies(getfctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
337 337
338 338 renamedelete = {}
339 339 renamedelete2 = set()
340 340 diverge2 = set()
341 341 for of, fl in diverge.items():
342 342 if len(fl) == 1 or of in c1 or of in c2:
343 343 del diverge[of] # not actually divergent, or not a rename
344 344 if of not in c1 and of not in c2:
345 345 # renamed on one side, deleted on the other side, but filter
346 346 # out files that have been renamed and then deleted
347 347 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
348 348 renamedelete2.update(fl) # reverse map for below
349 349 else:
350 350 diverge2.update(fl) # reverse map for below
351 351
352 352 bothnew = sorted(addedinm1 & addedinm2)
353 353 if bothnew:
354 354 repo.ui.debug(" unmatched files new in both:\n %s\n"
355 355 % "\n ".join(bothnew))
356 356 bothdiverge, _copy, _fullcopy = {}, {}, {}
357 357 for f in bothnew:
358 ctx = setupctx(c1)
359 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
360 ctx = setupctx(c2)
361 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
358 getfctx = setupctx(c1)
359 checkcopies(getfctx, f, m1, m2, ca, limit, bothdiverge,
360 _copy, _fullcopy)
361 getfctx = setupctx(c2)
362 checkcopies(getfctx, f, m2, m1, ca, limit, bothdiverge,
363 _copy, _fullcopy)
362 364 for of, fl in bothdiverge.items():
363 365 if len(fl) == 2 and fl[0] == fl[1]:
364 366 copy[fl[0]] = of # not actually divergent, just matching renames
365 367
366 368 if fullcopy and repo.ui.debugflag:
367 369 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
368 370 "% = renamed and deleted):\n")
369 371 for f in sorted(fullcopy):
370 372 note = ""
371 373 if f in copy:
372 374 note += "*"
373 375 if f in diverge2:
374 376 note += "!"
375 377 if f in renamedelete2:
376 378 note += "%"
377 379 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
378 380 note))
379 381 del diverge2
380 382
381 383 if not fullcopy:
382 384 return copy, movewithdir, diverge, renamedelete
383 385
384 386 repo.ui.debug(" checking for directory renames\n")
385 387
386 388 # generate a directory move map
387 389 d1, d2 = c1.dirs(), c2.dirs()
388 390 # Hack for adding '', which is not otherwise added, to d1 and d2
389 391 d1.addpath('/')
390 392 d2.addpath('/')
391 393 invalid = set()
392 394 dirmove = {}
393 395
394 396 # examine each file copy for a potential directory move, which is
395 397 # when all the files in a directory are moved to a new directory
396 398 for dst, src in fullcopy.iteritems():
397 399 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
398 400 if dsrc in invalid:
399 401 # already seen to be uninteresting
400 402 continue
401 403 elif dsrc in d1 and ddst in d1:
402 404 # directory wasn't entirely moved locally
403 405 invalid.add(dsrc)
404 406 elif dsrc in d2 and ddst in d2:
405 407 # directory wasn't entirely moved remotely
406 408 invalid.add(dsrc)
407 409 elif dsrc in dirmove and dirmove[dsrc] != ddst:
408 410 # files from the same directory moved to two different places
409 411 invalid.add(dsrc)
410 412 else:
411 413 # looks good so far
412 414 dirmove[dsrc + "/"] = ddst + "/"
413 415
414 416 for i in invalid:
415 417 if i in dirmove:
416 418 del dirmove[i]
417 419 del d1, d2, invalid
418 420
419 421 if not dirmove:
420 422 return copy, movewithdir, diverge, renamedelete
421 423
422 424 for d in dirmove:
423 425 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
424 426 (d, dirmove[d]))
425 427
426 428 # check unaccounted nonoverlapping files against directory moves
427 429 for f in u1 + u2:
428 430 if f not in fullcopy:
429 431 for d in dirmove:
430 432 if f.startswith(d):
431 433 # new file added in a directory that was moved, move it
432 434 df = dirmove[d] + f[len(d):]
433 435 if df not in copy:
434 436 movewithdir[f] = df
435 437 repo.ui.debug((" pending file src: '%s' -> "
436 438 "dst: '%s'\n") % (f, df))
437 439 break
438 440
439 441 return copy, movewithdir, diverge, renamedelete
440 442
441 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
443 def checkcopies(getfctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
442 444 """
443 445 check possible copies of f from m1 to m2
444 446
445 ctx = function accepting (filename, node) that returns a filectx.
447 getfctx = function accepting (filename, node) that returns a filectx.
446 448 f = the filename to check
447 449 m1 = the source manifest
448 450 m2 = the destination manifest
449 451 ca = the changectx of the common ancestor
450 452 limit = the rev number to not search beyond
451 453 diverge = record all diverges in this dict
452 454 copy = record all non-divergent copies in this dict
453 455 fullcopy = record all copies in this dict
454 456 """
455 457
456 458 ma = ca.manifest()
457 459
458 460 def _related(f1, f2, limit):
459 461 # Walk back to common ancestor to see if the two files originate
460 462 # from the same file. Since workingfilectx's rev() is None it messes
461 463 # up the integer comparison logic, hence the pre-step check for
462 464 # None (f1 and f2 can only be workingfilectx's initially).
463 465
464 466 if f1 == f2:
465 467 return f1 # a match
466 468
467 469 g1, g2 = f1.ancestors(), f2.ancestors()
468 470 try:
469 471 f1r, f2r = f1.linkrev(), f2.linkrev()
470 472
471 473 if f1r is None:
472 474 f1 = g1.next()
473 475 if f2r is None:
474 476 f2 = g2.next()
475 477
476 478 while True:
477 479 f1r, f2r = f1.linkrev(), f2.linkrev()
478 480 if f1r > f2r:
479 481 f1 = g1.next()
480 482 elif f2r > f1r:
481 483 f2 = g2.next()
482 484 elif f1 == f2:
483 485 return f1 # a match
484 486 elif f1r == f2r or f1r < limit or f2r < limit:
485 487 return False # copy no longer relevant
486 488 except StopIteration:
487 489 return False
488 490
489 491 of = None
490 492 seen = set([f])
491 for oc in ctx(f, m1[f]).ancestors():
493 for oc in getfctx(f, m1[f]).ancestors():
492 494 ocr = oc.linkrev()
493 495 of = oc.path()
494 496 if of in seen:
495 497 # check limit late - grab last rename before
496 498 if ocr < limit:
497 499 break
498 500 continue
499 501 seen.add(of)
500 502
501 503 fullcopy[f] = of # remember for dir rename detection
502 504 if of not in m2:
503 505 continue # no match, keep looking
504 506 if m2[of] == ma.get(of):
505 507 break # no merge needed, quit early
506 c2 = ctx(of, m2[of])
508 c2 = getfctx(of, m2[of])
507 509 cr = _related(oc, c2, ca.rev())
508 510 if cr and (of == f or of == c2.path()): # non-divergent
509 511 copy[f] = of
510 512 of = None
511 513 break
512 514
513 515 if of in ma:
514 516 diverge.setdefault(of, []).append(f)
515 517
516 518 def duplicatecopies(repo, rev, fromrev, skiprev=None):
517 519 '''reproduce copies from fromrev to rev in the dirstate
518 520
519 521 If skiprev is specified, it's a revision that should be used to
520 522 filter copy records. Any copies that occur between fromrev and
521 523 skiprev will not be duplicated, even if they appear in the set of
522 524 copies between fromrev and rev.
523 525 '''
524 526 exclude = {}
525 527 if (skiprev is not None and
526 528 not repo.ui.configbool('experimental', 'disablecopytrace')):
527 529 # disablecopytrace skips this line, but not the entire function because
528 530 # the line below is O(size of the repo) during a rebase, while the rest
529 531 # of the function is much faster (and is required for carrying copy
530 532 # metadata across the rebase anyway).
531 533 exclude = pathcopies(repo[fromrev], repo[skiprev])
532 534 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
533 535 # copies.pathcopies returns backward renames, so dst might not
534 536 # actually be in the dirstate
535 537 if dst in exclude:
536 538 continue
537 539 if repo.dirstate[dst] in "nma":
538 540 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now