##// END OF EJS Templates
copies: only calculate 'addedinm[12]' sets once...
Martin von Zweigbergk -
r24187:30219bd4 default
parent child Browse files
Show More
@@ -1,485 +1,483 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _dirname(f):
12 12 s = f.rfind("/")
13 13 if s == -1:
14 14 return ""
15 15 return f[:s]
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 return b.manifest().filesnotin(a.manifest())
149 149
150 150 def _forwardcopies(a, b):
151 151 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
152 152
153 153 # check for working copy
154 154 w = None
155 155 if b.rev() is None:
156 156 w = b
157 157 b = w.p1()
158 158 if a == b:
159 159 # short-circuit to avoid issues with merge states
160 160 return _dirstatecopies(w)
161 161
162 162 # files might have to be traced back to the fctx parent of the last
163 163 # one-side-only changeset, but not further back than that
164 164 limit = _findlimit(a._repo, a.rev(), b.rev())
165 165 if limit is None:
166 166 limit = -1
167 167 am = a.manifest()
168 168
169 169 # find where new files came from
170 170 # we currently don't try to find where old files went, too expensive
171 171 # this means we can miss a case like 'hg rm b; hg cp a b'
172 172 cm = {}
173 173 missing = _computeforwardmissing(a, b)
174 174 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
175 175 for f in missing:
176 176 fctx = b[f]
177 177 fctx._ancestrycontext = ancestrycontext
178 178 ofctx = _tracefile(fctx, am, limit)
179 179 if ofctx:
180 180 cm[f] = ofctx.path()
181 181
182 182 # combine copies from dirstate if necessary
183 183 if w is not None:
184 184 cm = _chain(a, w, cm, _dirstatecopies(w))
185 185
186 186 return cm
187 187
188 188 def _backwardrenames(a, b):
189 189 # Even though we're not taking copies into account, 1:n rename situations
190 190 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
191 191 # arbitrarily pick one of the renames.
192 192 f = _forwardcopies(b, a)
193 193 r = {}
194 194 for k, v in sorted(f.iteritems()):
195 195 # remove copies
196 196 if v in a:
197 197 continue
198 198 r[v] = k
199 199 return r
200 200
201 201 def pathcopies(x, y):
202 202 '''find {dst@y: src@x} copy mapping for directed compare'''
203 203 if x == y or not x or not y:
204 204 return {}
205 205 a = y.ancestor(x)
206 206 if a == x:
207 207 return _forwardcopies(x, y)
208 208 if a == y:
209 209 return _backwardrenames(x, y)
210 210 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
211 211
212 def _computenonoverlap(repo, m1, m2, ma):
213 """Computes the files exclusive to m1 and m2.
214 This is its own function so extensions can easily wrap this call to see what
215 files mergecopies is about to process.
212 def _computenonoverlap(repo, addedinm1, addedinm2):
213 """Computes, based on addedinm1 and addedinm2, the files exclusive to m1
214 and m2. This is its own function so extensions can easily wrap this call
215 to see what files mergecopies is about to process.
216 216 """
217 addedinm1 = m1.filesnotin(ma)
218 addedinm2 = m2.filesnotin(ma)
219 217 u1 = sorted(addedinm1 - addedinm2)
220 218 u2 = sorted(addedinm2 - addedinm1)
221 219
222 220 if u1:
223 221 repo.ui.debug(" unmatched files in local:\n %s\n"
224 222 % "\n ".join(u1))
225 223 if u2:
226 224 repo.ui.debug(" unmatched files in other:\n %s\n"
227 225 % "\n ".join(u2))
228 226 return u1, u2
229 227
230 228 def mergecopies(repo, c1, c2, ca):
231 229 """
232 230 Find moves and copies between context c1 and c2 that are relevant
233 231 for merging.
234 232
235 233 Returns four dicts: "copy", "movewithdir", "diverge", and
236 234 "renamedelete".
237 235
238 236 "copy" is a mapping from destination name -> source name,
239 237 where source is in c1 and destination is in c2 or vice-versa.
240 238
241 239 "movewithdir" is a mapping from source name -> destination name,
242 240 where the file at source present in one context but not the other
243 241 needs to be moved to destination by the merge process, because the
244 242 other context moved the directory it is in.
245 243
246 244 "diverge" is a mapping of source name -> list of destination names
247 245 for divergent renames.
248 246
249 247 "renamedelete" is a mapping of source name -> list of destination
250 248 names for files deleted in c1 that were renamed in c2 or vice-versa.
251 249 """
252 250 # avoid silly behavior for update from empty dir
253 251 if not c1 or not c2 or c1 == c2:
254 252 return {}, {}, {}, {}
255 253
256 254 # avoid silly behavior for parent -> working dir
257 255 if c2.node() is None and c1.node() == repo.dirstate.p1():
258 256 return repo.dirstate.copies(), {}, {}, {}
259 257
260 258 limit = _findlimit(repo, c1.rev(), c2.rev())
261 259 if limit is None:
262 260 # no common ancestor, no copies
263 261 return {}, {}, {}, {}
264 262 m1 = c1.manifest()
265 263 m2 = c2.manifest()
266 264 ma = ca.manifest()
267 265
268 266 def makectx(f, n):
269 267 if len(n) != 20: # in a working context?
270 268 if c1.rev() is None:
271 269 return c1.filectx(f)
272 270 return c2.filectx(f)
273 271 return repo.filectx(f, fileid=n)
274 272
275 273 ctx = util.lrucachefunc(makectx)
276 274 copy = {}
277 275 movewithdir = {}
278 276 fullcopy = {}
279 277 diverge = {}
280 278
281 279 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
282 280
283 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
281 addedinm1 = m1.filesnotin(ma)
282 addedinm2 = m2.filesnotin(ma)
283 u1, u2 = _computenonoverlap(repo, addedinm1, addedinm2)
284 284
285 285 for f in u1:
286 286 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
287 287
288 288 for f in u2:
289 289 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
290 290
291 291 renamedelete = {}
292 292 renamedelete2 = set()
293 293 diverge2 = set()
294 294 for of, fl in diverge.items():
295 295 if len(fl) == 1 or of in c1 or of in c2:
296 296 del diverge[of] # not actually divergent, or not a rename
297 297 if of not in c1 and of not in c2:
298 298 # renamed on one side, deleted on the other side, but filter
299 299 # out files that have been renamed and then deleted
300 300 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
301 301 renamedelete2.update(fl) # reverse map for below
302 302 else:
303 303 diverge2.update(fl) # reverse map for below
304 304
305 addedinm1 = m1.filesnotin(ma)
306 addedinm2 = m2.filesnotin(ma)
307 305 bothnew = sorted(addedinm1 & addedinm2)
308 306 if bothnew:
309 307 repo.ui.debug(" unmatched files new in both:\n %s\n"
310 308 % "\n ".join(bothnew))
311 309 bothdiverge, _copy, _fullcopy = {}, {}, {}
312 310 for f in bothnew:
313 311 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
314 312 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
315 313 for of, fl in bothdiverge.items():
316 314 if len(fl) == 2 and fl[0] == fl[1]:
317 315 copy[fl[0]] = of # not actually divergent, just matching renames
318 316
319 317 if fullcopy and repo.ui.debugflag:
320 318 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
321 319 "% = renamed and deleted):\n")
322 320 for f in sorted(fullcopy):
323 321 note = ""
324 322 if f in copy:
325 323 note += "*"
326 324 if f in diverge2:
327 325 note += "!"
328 326 if f in renamedelete2:
329 327 note += "%"
330 328 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
331 329 note))
332 330 del diverge2
333 331
334 332 if not fullcopy:
335 333 return copy, movewithdir, diverge, renamedelete
336 334
337 335 repo.ui.debug(" checking for directory renames\n")
338 336
339 337 # generate a directory move map
340 338 d1, d2 = c1.dirs(), c2.dirs()
341 339 d1.addpath('/')
342 340 d2.addpath('/')
343 341 invalid = set()
344 342 dirmove = {}
345 343
346 344 # examine each file copy for a potential directory move, which is
347 345 # when all the files in a directory are moved to a new directory
348 346 for dst, src in fullcopy.iteritems():
349 347 dsrc, ddst = _dirname(src), _dirname(dst)
350 348 if dsrc in invalid:
351 349 # already seen to be uninteresting
352 350 continue
353 351 elif dsrc in d1 and ddst in d1:
354 352 # directory wasn't entirely moved locally
355 353 invalid.add(dsrc)
356 354 elif dsrc in d2 and ddst in d2:
357 355 # directory wasn't entirely moved remotely
358 356 invalid.add(dsrc)
359 357 elif dsrc in dirmove and dirmove[dsrc] != ddst:
360 358 # files from the same directory moved to two different places
361 359 invalid.add(dsrc)
362 360 else:
363 361 # looks good so far
364 362 dirmove[dsrc + "/"] = ddst + "/"
365 363
366 364 for i in invalid:
367 365 if i in dirmove:
368 366 del dirmove[i]
369 367 del d1, d2, invalid
370 368
371 369 if not dirmove:
372 370 return copy, movewithdir, diverge, renamedelete
373 371
374 372 for d in dirmove:
375 373 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
376 374 (d, dirmove[d]))
377 375
378 376 # check unaccounted nonoverlapping files against directory moves
379 377 for f in u1 + u2:
380 378 if f not in fullcopy:
381 379 for d in dirmove:
382 380 if f.startswith(d):
383 381 # new file added in a directory that was moved, move it
384 382 df = dirmove[d] + f[len(d):]
385 383 if df not in copy:
386 384 movewithdir[f] = df
387 385 repo.ui.debug((" pending file src: '%s' -> "
388 386 "dst: '%s'\n") % (f, df))
389 387 break
390 388
391 389 return copy, movewithdir, diverge, renamedelete
392 390
393 391 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
394 392 """
395 393 check possible copies of f from m1 to m2
396 394
397 395 ctx = function accepting (filename, node) that returns a filectx.
398 396 f = the filename to check
399 397 m1 = the source manifest
400 398 m2 = the destination manifest
401 399 ca = the changectx of the common ancestor
402 400 limit = the rev number to not search beyond
403 401 diverge = record all diverges in this dict
404 402 copy = record all non-divergent copies in this dict
405 403 fullcopy = record all copies in this dict
406 404 """
407 405
408 406 ma = ca.manifest()
409 407
410 408 def _related(f1, f2, limit):
411 409 # Walk back to common ancestor to see if the two files originate
412 410 # from the same file. Since workingfilectx's rev() is None it messes
413 411 # up the integer comparison logic, hence the pre-step check for
414 412 # None (f1 and f2 can only be workingfilectx's initially).
415 413
416 414 if f1 == f2:
417 415 return f1 # a match
418 416
419 417 g1, g2 = f1.ancestors(), f2.ancestors()
420 418 try:
421 419 f1r, f2r = f1.rev(), f2.rev()
422 420
423 421 if f1r is None:
424 422 f1 = g1.next()
425 423 if f2r is None:
426 424 f2 = g2.next()
427 425
428 426 while True:
429 427 f1r, f2r = f1.rev(), f2.rev()
430 428 if f1r > f2r:
431 429 f1 = g1.next()
432 430 elif f2r > f1r:
433 431 f2 = g2.next()
434 432 elif f1 == f2:
435 433 return f1 # a match
436 434 elif f1r == f2r or f1r < limit or f2r < limit:
437 435 return False # copy no longer relevant
438 436 except StopIteration:
439 437 return False
440 438
441 439 of = None
442 440 seen = set([f])
443 441 for oc in ctx(f, m1[f]).ancestors():
444 442 ocr = oc.rev()
445 443 of = oc.path()
446 444 if of in seen:
447 445 # check limit late - grab last rename before
448 446 if ocr < limit:
449 447 break
450 448 continue
451 449 seen.add(of)
452 450
453 451 fullcopy[f] = of # remember for dir rename detection
454 452 if of not in m2:
455 453 continue # no match, keep looking
456 454 if m2[of] == ma.get(of):
457 455 break # no merge needed, quit early
458 456 c2 = ctx(of, m2[of])
459 457 cr = _related(oc, c2, ca.rev())
460 458 if cr and (of == f or of == c2.path()): # non-divergent
461 459 copy[f] = of
462 460 of = None
463 461 break
464 462
465 463 if of in ma:
466 464 diverge.setdefault(of, []).append(f)
467 465
468 466 def duplicatecopies(repo, rev, fromrev, skiprev=None):
469 467 '''reproduce copies from fromrev to rev in the dirstate
470 468
471 469 If skiprev is specified, it's a revision that should be used to
472 470 filter copy records. Any copies that occur between fromrev and
473 471 skiprev will not be duplicated, even if they appear in the set of
474 472 copies between fromrev and rev.
475 473 '''
476 474 exclude = {}
477 475 if skiprev is not None:
478 476 exclude = pathcopies(repo[fromrev], repo[skiprev])
479 477 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
480 478 # copies.pathcopies returns backward renames, so dst might not
481 479 # actually be in the dirstate
482 480 if dst in exclude:
483 481 continue
484 482 if repo.dirstate[dst] in "nma":
485 483 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now