##// END OF EJS Templates
copy: move mergecopies file logic to a function...
Durham Goode -
r24010:a63c2b15 default
parent child Browse files
Show More
@@ -1,472 +1,480 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _nonoverlap(d1, d2, d3):
12 12 "Return list of elements in d1 not in d2 or d3"
13 13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14 14
15 15 def _dirname(f):
16 16 s = f.rfind("/")
17 17 if s == -1:
18 18 return ""
19 19 return f[:s]
20 20
21 21 def _findlimit(repo, a, b):
22 22 """
23 23 Find the last revision that needs to be checked to ensure that a full
24 24 transitive closure for file copies can be properly calculated.
25 25 Generally, this means finding the earliest revision number that's an
26 26 ancestor of a or b but not both, except when a or b is a direct descendent
27 27 of the other, in which case we can return the minimum revnum of a and b.
28 28 None if no such revision exists.
29 29 """
30 30
31 31 # basic idea:
32 32 # - mark a and b with different sides
33 33 # - if a parent's children are all on the same side, the parent is
34 34 # on that side, otherwise it is on no side
35 35 # - walk the graph in topological order with the help of a heap;
36 36 # - add unseen parents to side map
37 37 # - clear side of any parent that has children on different sides
38 38 # - track number of interesting revs that might still be on a side
39 39 # - track the lowest interesting rev seen
40 40 # - quit when interesting revs is zero
41 41
42 42 cl = repo.changelog
43 43 working = len(cl) # pseudo rev for the working directory
44 44 if a is None:
45 45 a = working
46 46 if b is None:
47 47 b = working
48 48
49 49 side = {a: -1, b: 1}
50 50 visit = [-a, -b]
51 51 heapq.heapify(visit)
52 52 interesting = len(visit)
53 53 hascommonancestor = False
54 54 limit = working
55 55
56 56 while interesting:
57 57 r = -heapq.heappop(visit)
58 58 if r == working:
59 59 parents = [cl.rev(p) for p in repo.dirstate.parents()]
60 60 else:
61 61 parents = cl.parentrevs(r)
62 62 for p in parents:
63 63 if p < 0:
64 64 continue
65 65 if p not in side:
66 66 # first time we see p; add it to visit
67 67 side[p] = side[r]
68 68 if side[p]:
69 69 interesting += 1
70 70 heapq.heappush(visit, -p)
71 71 elif side[p] and side[p] != side[r]:
72 72 # p was interesting but now we know better
73 73 side[p] = 0
74 74 interesting -= 1
75 75 hascommonancestor = True
76 76 if side[r]:
77 77 limit = r # lowest rev visited
78 78 interesting -= 1
79 79
80 80 if not hascommonancestor:
81 81 return None
82 82
83 83 # Consider the following flow (see test-commit-amend.t under issue4405):
84 84 # 1/ File 'a0' committed
85 85 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
86 86 # 3/ Move back to first commit
87 87 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
88 88 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
89 89 #
90 90 # During the amend in step five, we will be in this state:
91 91 #
92 92 # @ 3 temporary amend commit for a1-amend
93 93 # |
94 94 # o 2 a1-amend
95 95 # |
96 96 # | o 1 a1
97 97 # |/
98 98 # o 0 a0
99 99 #
100 100 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
101 101 # yet the filelog has the copy information in rev 1 and we will not look
102 102 # back far enough unless we also look at the a and b as candidates.
103 103 # This only occurs when a is a descendent of b or visa-versa.
104 104 return min(limit, a, b)
105 105
106 106 def _chain(src, dst, a, b):
107 107 '''chain two sets of copies a->b'''
108 108 t = a.copy()
109 109 for k, v in b.iteritems():
110 110 if v in t:
111 111 # found a chain
112 112 if t[v] != k:
113 113 # file wasn't renamed back to itself
114 114 t[k] = t[v]
115 115 if v not in dst:
116 116 # chain was a rename, not a copy
117 117 del t[v]
118 118 if v in src:
119 119 # file is a copy of an existing file
120 120 t[k] = v
121 121
122 122 # remove criss-crossed copies
123 123 for k, v in t.items():
124 124 if k in src and v in dst:
125 125 del t[k]
126 126
127 127 return t
128 128
129 129 def _tracefile(fctx, am, limit=-1):
130 130 '''return file context that is the ancestor of fctx present in ancestor
131 131 manifest am, stopping after the first ancestor lower than limit'''
132 132
133 133 for f in fctx.ancestors():
134 134 if am.get(f.path(), None) == f.filenode():
135 135 return f
136 136 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
137 137 return None
138 138
139 139 def _dirstatecopies(d):
140 140 ds = d._repo.dirstate
141 141 c = ds.copies().copy()
142 142 for k in c.keys():
143 143 if ds[k] not in 'anm':
144 144 del c[k]
145 145 return c
146 146
147 147 def _forwardcopies(a, b):
148 148 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
149 149
150 150 # check for working copy
151 151 w = None
152 152 if b.rev() is None:
153 153 w = b
154 154 b = w.p1()
155 155 if a == b:
156 156 # short-circuit to avoid issues with merge states
157 157 return _dirstatecopies(w)
158 158
159 159 # files might have to be traced back to the fctx parent of the last
160 160 # one-side-only changeset, but not further back than that
161 161 limit = _findlimit(a._repo, a.rev(), b.rev())
162 162 if limit is None:
163 163 limit = -1
164 164 am = a.manifest()
165 165
166 166 # find where new files came from
167 167 # we currently don't try to find where old files went, too expensive
168 168 # this means we can miss a case like 'hg rm b; hg cp a b'
169 169 cm = {}
170 170 missing = set(b.manifest().iterkeys())
171 171 missing.difference_update(a.manifest().iterkeys())
172 172
173 173 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
174 174 for f in missing:
175 175 fctx = b[f]
176 176 fctx._ancestrycontext = ancestrycontext
177 177 ofctx = _tracefile(fctx, am, limit)
178 178 if ofctx:
179 179 cm[f] = ofctx.path()
180 180
181 181 # combine copies from dirstate if necessary
182 182 if w is not None:
183 183 cm = _chain(a, w, cm, _dirstatecopies(w))
184 184
185 185 return cm
186 186
187 187 def _backwardrenames(a, b):
188 188 # Even though we're not taking copies into account, 1:n rename situations
189 189 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
190 190 # arbitrarily pick one of the renames.
191 191 f = _forwardcopies(b, a)
192 192 r = {}
193 193 for k, v in sorted(f.iteritems()):
194 194 # remove copies
195 195 if v in a:
196 196 continue
197 197 r[v] = k
198 198 return r
199 199
200 200 def pathcopies(x, y):
201 201 '''find {dst@y: src@x} copy mapping for directed compare'''
202 202 if x == y or not x or not y:
203 203 return {}
204 204 a = y.ancestor(x)
205 205 if a == x:
206 206 return _forwardcopies(x, y)
207 207 if a == y:
208 208 return _backwardrenames(x, y)
209 209 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
210 210
211 def _computenonoverlap(repo, m1, m2, ma):
212 """Computes the files exclusive to m1 and m2.
213 This is its own function so extensions can easily wrap this call to see what
214 files mergecopies is about to process.
215 """
216 u1 = _nonoverlap(m1, m2, ma)
217 u2 = _nonoverlap(m2, m1, ma)
218
219 if u1:
220 repo.ui.debug(" unmatched files in local:\n %s\n"
221 % "\n ".join(u1))
222 if u2:
223 repo.ui.debug(" unmatched files in other:\n %s\n"
224 % "\n ".join(u2))
225 return u1, u2
226
211 227 def mergecopies(repo, c1, c2, ca):
212 228 """
213 229 Find moves and copies between context c1 and c2 that are relevant
214 230 for merging.
215 231
216 232 Returns four dicts: "copy", "movewithdir", "diverge", and
217 233 "renamedelete".
218 234
219 235 "copy" is a mapping from destination name -> source name,
220 236 where source is in c1 and destination is in c2 or vice-versa.
221 237
222 238 "movewithdir" is a mapping from source name -> destination name,
223 239 where the file at source present in one context but not the other
224 240 needs to be moved to destination by the merge process, because the
225 241 other context moved the directory it is in.
226 242
227 243 "diverge" is a mapping of source name -> list of destination names
228 244 for divergent renames.
229 245
230 246 "renamedelete" is a mapping of source name -> list of destination
231 247 names for files deleted in c1 that were renamed in c2 or vice-versa.
232 248 """
233 249 # avoid silly behavior for update from empty dir
234 250 if not c1 or not c2 or c1 == c2:
235 251 return {}, {}, {}, {}
236 252
237 253 # avoid silly behavior for parent -> working dir
238 254 if c2.node() is None and c1.node() == repo.dirstate.p1():
239 255 return repo.dirstate.copies(), {}, {}, {}
240 256
241 257 limit = _findlimit(repo, c1.rev(), c2.rev())
242 258 if limit is None:
243 259 # no common ancestor, no copies
244 260 return {}, {}, {}, {}
245 261 m1 = c1.manifest()
246 262 m2 = c2.manifest()
247 263 ma = ca.manifest()
248 264
249 265 def makectx(f, n):
250 266 if len(n) != 20: # in a working context?
251 267 if c1.rev() is None:
252 268 return c1.filectx(f)
253 269 return c2.filectx(f)
254 270 return repo.filectx(f, fileid=n)
255 271
256 272 ctx = util.lrucachefunc(makectx)
257 273 copy = {}
258 274 movewithdir = {}
259 275 fullcopy = {}
260 276 diverge = {}
261 277
262 278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
263 279
264 u1 = _nonoverlap(m1, m2, ma)
265 u2 = _nonoverlap(m2, m1, ma)
266
267 if u1:
268 repo.ui.debug(" unmatched files in local:\n %s\n"
269 % "\n ".join(u1))
270 if u2:
271 repo.ui.debug(" unmatched files in other:\n %s\n"
272 % "\n ".join(u2))
280 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
273 281
274 282 for f in u1:
275 283 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
276 284
277 285 for f in u2:
278 286 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
279 287
280 288 renamedelete = {}
281 289 renamedelete2 = set()
282 290 diverge2 = set()
283 291 for of, fl in diverge.items():
284 292 if len(fl) == 1 or of in c1 or of in c2:
285 293 del diverge[of] # not actually divergent, or not a rename
286 294 if of not in c1 and of not in c2:
287 295 # renamed on one side, deleted on the other side, but filter
288 296 # out files that have been renamed and then deleted
289 297 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
290 298 renamedelete2.update(fl) # reverse map for below
291 299 else:
292 300 diverge2.update(fl) # reverse map for below
293 301
294 302 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
295 303 if bothnew:
296 304 repo.ui.debug(" unmatched files new in both:\n %s\n"
297 305 % "\n ".join(bothnew))
298 306 bothdiverge, _copy, _fullcopy = {}, {}, {}
299 307 for f in bothnew:
300 308 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
301 309 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
302 310 for of, fl in bothdiverge.items():
303 311 if len(fl) == 2 and fl[0] == fl[1]:
304 312 copy[fl[0]] = of # not actually divergent, just matching renames
305 313
306 314 if fullcopy and repo.ui.debugflag:
307 315 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
308 316 "% = renamed and deleted):\n")
309 317 for f in sorted(fullcopy):
310 318 note = ""
311 319 if f in copy:
312 320 note += "*"
313 321 if f in diverge2:
314 322 note += "!"
315 323 if f in renamedelete2:
316 324 note += "%"
317 325 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
318 326 note))
319 327 del diverge2
320 328
321 329 if not fullcopy:
322 330 return copy, movewithdir, diverge, renamedelete
323 331
324 332 repo.ui.debug(" checking for directory renames\n")
325 333
326 334 # generate a directory move map
327 335 d1, d2 = c1.dirs(), c2.dirs()
328 336 d1.addpath('/')
329 337 d2.addpath('/')
330 338 invalid = set()
331 339 dirmove = {}
332 340
333 341 # examine each file copy for a potential directory move, which is
334 342 # when all the files in a directory are moved to a new directory
335 343 for dst, src in fullcopy.iteritems():
336 344 dsrc, ddst = _dirname(src), _dirname(dst)
337 345 if dsrc in invalid:
338 346 # already seen to be uninteresting
339 347 continue
340 348 elif dsrc in d1 and ddst in d1:
341 349 # directory wasn't entirely moved locally
342 350 invalid.add(dsrc)
343 351 elif dsrc in d2 and ddst in d2:
344 352 # directory wasn't entirely moved remotely
345 353 invalid.add(dsrc)
346 354 elif dsrc in dirmove and dirmove[dsrc] != ddst:
347 355 # files from the same directory moved to two different places
348 356 invalid.add(dsrc)
349 357 else:
350 358 # looks good so far
351 359 dirmove[dsrc + "/"] = ddst + "/"
352 360
353 361 for i in invalid:
354 362 if i in dirmove:
355 363 del dirmove[i]
356 364 del d1, d2, invalid
357 365
358 366 if not dirmove:
359 367 return copy, movewithdir, diverge, renamedelete
360 368
361 369 for d in dirmove:
362 370 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
363 371 (d, dirmove[d]))
364 372
365 373 # check unaccounted nonoverlapping files against directory moves
366 374 for f in u1 + u2:
367 375 if f not in fullcopy:
368 376 for d in dirmove:
369 377 if f.startswith(d):
370 378 # new file added in a directory that was moved, move it
371 379 df = dirmove[d] + f[len(d):]
372 380 if df not in copy:
373 381 movewithdir[f] = df
374 382 repo.ui.debug((" pending file src: '%s' -> "
375 383 "dst: '%s'\n") % (f, df))
376 384 break
377 385
378 386 return copy, movewithdir, diverge, renamedelete
379 387
380 388 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
381 389 """
382 390 check possible copies of f from m1 to m2
383 391
384 392 ctx = function accepting (filename, node) that returns a filectx.
385 393 f = the filename to check
386 394 m1 = the source manifest
387 395 m2 = the destination manifest
388 396 ca = the changectx of the common ancestor
389 397 limit = the rev number to not search beyond
390 398 diverge = record all diverges in this dict
391 399 copy = record all non-divergent copies in this dict
392 400 fullcopy = record all copies in this dict
393 401 """
394 402
395 403 ma = ca.manifest()
396 404
397 405 def _related(f1, f2, limit):
398 406 # Walk back to common ancestor to see if the two files originate
399 407 # from the same file. Since workingfilectx's rev() is None it messes
400 408 # up the integer comparison logic, hence the pre-step check for
401 409 # None (f1 and f2 can only be workingfilectx's initially).
402 410
403 411 if f1 == f2:
404 412 return f1 # a match
405 413
406 414 g1, g2 = f1.ancestors(), f2.ancestors()
407 415 try:
408 416 f1r, f2r = f1.rev(), f2.rev()
409 417
410 418 if f1r is None:
411 419 f1 = g1.next()
412 420 if f2r is None:
413 421 f2 = g2.next()
414 422
415 423 while True:
416 424 f1r, f2r = f1.rev(), f2.rev()
417 425 if f1r > f2r:
418 426 f1 = g1.next()
419 427 elif f2r > f1r:
420 428 f2 = g2.next()
421 429 elif f1 == f2:
422 430 return f1 # a match
423 431 elif f1r == f2r or f1r < limit or f2r < limit:
424 432 return False # copy no longer relevant
425 433 except StopIteration:
426 434 return False
427 435
428 436 of = None
429 437 seen = set([f])
430 438 for oc in ctx(f, m1[f]).ancestors():
431 439 ocr = oc.rev()
432 440 of = oc.path()
433 441 if of in seen:
434 442 # check limit late - grab last rename before
435 443 if ocr < limit:
436 444 break
437 445 continue
438 446 seen.add(of)
439 447
440 448 fullcopy[f] = of # remember for dir rename detection
441 449 if of not in m2:
442 450 continue # no match, keep looking
443 451 if m2[of] == ma.get(of):
444 452 break # no merge needed, quit early
445 453 c2 = ctx(of, m2[of])
446 454 cr = _related(oc, c2, ca.rev())
447 455 if cr and (of == f or of == c2.path()): # non-divergent
448 456 copy[f] = of
449 457 of = None
450 458 break
451 459
452 460 if of in ma:
453 461 diverge.setdefault(of, []).append(f)
454 462
455 463 def duplicatecopies(repo, rev, fromrev, skiprev=None):
456 464 '''reproduce copies from fromrev to rev in the dirstate
457 465
458 466 If skiprev is specified, it's a revision that should be used to
459 467 filter copy records. Any copies that occur between fromrev and
460 468 skiprev will not be duplicated, even if they appear in the set of
461 469 copies between fromrev and rev.
462 470 '''
463 471 exclude = {}
464 472 if skiprev is not None:
465 473 exclude = pathcopies(repo[fromrev], repo[skiprev])
466 474 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
467 475 # copies.pathcopies returns backward renames, so dst might not
468 476 # actually be in the dirstate
469 477 if dst in exclude:
470 478 continue
471 479 if repo.dirstate[dst] in "nma":
472 480 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now