##// END OF EJS Templates
copies: calculate 'bothnew' from manifestdict.filesnotin()...
Martin von Zweigbergk -
r24186:61aadba2 default
parent child Browse files
Show More
@@ -1,483 +1,485 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _dirname(f):
12 12 s = f.rfind("/")
13 13 if s == -1:
14 14 return ""
15 15 return f[:s]
16 16
17 17 def _findlimit(repo, a, b):
18 18 """
19 19 Find the last revision that needs to be checked to ensure that a full
20 20 transitive closure for file copies can be properly calculated.
21 21 Generally, this means finding the earliest revision number that's an
22 22 ancestor of a or b but not both, except when a or b is a direct descendent
23 23 of the other, in which case we can return the minimum revnum of a and b.
24 24 None if no such revision exists.
25 25 """
26 26
27 27 # basic idea:
28 28 # - mark a and b with different sides
29 29 # - if a parent's children are all on the same side, the parent is
30 30 # on that side, otherwise it is on no side
31 31 # - walk the graph in topological order with the help of a heap;
32 32 # - add unseen parents to side map
33 33 # - clear side of any parent that has children on different sides
34 34 # - track number of interesting revs that might still be on a side
35 35 # - track the lowest interesting rev seen
36 36 # - quit when interesting revs is zero
37 37
38 38 cl = repo.changelog
39 39 working = len(cl) # pseudo rev for the working directory
40 40 if a is None:
41 41 a = working
42 42 if b is None:
43 43 b = working
44 44
45 45 side = {a: -1, b: 1}
46 46 visit = [-a, -b]
47 47 heapq.heapify(visit)
48 48 interesting = len(visit)
49 49 hascommonancestor = False
50 50 limit = working
51 51
52 52 while interesting:
53 53 r = -heapq.heappop(visit)
54 54 if r == working:
55 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
56 56 else:
57 57 parents = cl.parentrevs(r)
58 58 for p in parents:
59 59 if p < 0:
60 60 continue
61 61 if p not in side:
62 62 # first time we see p; add it to visit
63 63 side[p] = side[r]
64 64 if side[p]:
65 65 interesting += 1
66 66 heapq.heappush(visit, -p)
67 67 elif side[p] and side[p] != side[r]:
68 68 # p was interesting but now we know better
69 69 side[p] = 0
70 70 interesting -= 1
71 71 hascommonancestor = True
72 72 if side[r]:
73 73 limit = r # lowest rev visited
74 74 interesting -= 1
75 75
76 76 if not hascommonancestor:
77 77 return None
78 78
79 79 # Consider the following flow (see test-commit-amend.t under issue4405):
80 80 # 1/ File 'a0' committed
81 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
82 82 # 3/ Move back to first commit
83 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
84 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
85 85 #
86 86 # During the amend in step five, we will be in this state:
87 87 #
88 88 # @ 3 temporary amend commit for a1-amend
89 89 # |
90 90 # o 2 a1-amend
91 91 # |
92 92 # | o 1 a1
93 93 # |/
94 94 # o 0 a0
95 95 #
96 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
97 97 # yet the filelog has the copy information in rev 1 and we will not look
98 98 # back far enough unless we also look at the a and b as candidates.
99 99 # This only occurs when a is a descendent of b or visa-versa.
100 100 return min(limit, a, b)
101 101
102 102 def _chain(src, dst, a, b):
103 103 '''chain two sets of copies a->b'''
104 104 t = a.copy()
105 105 for k, v in b.iteritems():
106 106 if v in t:
107 107 # found a chain
108 108 if t[v] != k:
109 109 # file wasn't renamed back to itself
110 110 t[k] = t[v]
111 111 if v not in dst:
112 112 # chain was a rename, not a copy
113 113 del t[v]
114 114 if v in src:
115 115 # file is a copy of an existing file
116 116 t[k] = v
117 117
118 118 # remove criss-crossed copies
119 119 for k, v in t.items():
120 120 if k in src and v in dst:
121 121 del t[k]
122 122
123 123 return t
124 124
125 125 def _tracefile(fctx, am, limit=-1):
126 126 '''return file context that is the ancestor of fctx present in ancestor
127 127 manifest am, stopping after the first ancestor lower than limit'''
128 128
129 129 for f in fctx.ancestors():
130 130 if am.get(f.path(), None) == f.filenode():
131 131 return f
132 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
133 133 return None
134 134
135 135 def _dirstatecopies(d):
136 136 ds = d._repo.dirstate
137 137 c = ds.copies().copy()
138 138 for k in c.keys():
139 139 if ds[k] not in 'anm':
140 140 del c[k]
141 141 return c
142 142
143 143 def _computeforwardmissing(a, b):
144 144 """Computes which files are in b but not a.
145 145 This is its own function so extensions can easily wrap this call to see what
146 146 files _forwardcopies is about to process.
147 147 """
148 148 return b.manifest().filesnotin(a.manifest())
149 149
150 150 def _forwardcopies(a, b):
151 151 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
152 152
153 153 # check for working copy
154 154 w = None
155 155 if b.rev() is None:
156 156 w = b
157 157 b = w.p1()
158 158 if a == b:
159 159 # short-circuit to avoid issues with merge states
160 160 return _dirstatecopies(w)
161 161
162 162 # files might have to be traced back to the fctx parent of the last
163 163 # one-side-only changeset, but not further back than that
164 164 limit = _findlimit(a._repo, a.rev(), b.rev())
165 165 if limit is None:
166 166 limit = -1
167 167 am = a.manifest()
168 168
169 169 # find where new files came from
170 170 # we currently don't try to find where old files went, too expensive
171 171 # this means we can miss a case like 'hg rm b; hg cp a b'
172 172 cm = {}
173 173 missing = _computeforwardmissing(a, b)
174 174 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
175 175 for f in missing:
176 176 fctx = b[f]
177 177 fctx._ancestrycontext = ancestrycontext
178 178 ofctx = _tracefile(fctx, am, limit)
179 179 if ofctx:
180 180 cm[f] = ofctx.path()
181 181
182 182 # combine copies from dirstate if necessary
183 183 if w is not None:
184 184 cm = _chain(a, w, cm, _dirstatecopies(w))
185 185
186 186 return cm
187 187
188 188 def _backwardrenames(a, b):
189 189 # Even though we're not taking copies into account, 1:n rename situations
190 190 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
191 191 # arbitrarily pick one of the renames.
192 192 f = _forwardcopies(b, a)
193 193 r = {}
194 194 for k, v in sorted(f.iteritems()):
195 195 # remove copies
196 196 if v in a:
197 197 continue
198 198 r[v] = k
199 199 return r
200 200
201 201 def pathcopies(x, y):
202 202 '''find {dst@y: src@x} copy mapping for directed compare'''
203 203 if x == y or not x or not y:
204 204 return {}
205 205 a = y.ancestor(x)
206 206 if a == x:
207 207 return _forwardcopies(x, y)
208 208 if a == y:
209 209 return _backwardrenames(x, y)
210 210 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
211 211
212 212 def _computenonoverlap(repo, m1, m2, ma):
213 213 """Computes the files exclusive to m1 and m2.
214 214 This is its own function so extensions can easily wrap this call to see what
215 215 files mergecopies is about to process.
216 216 """
217 217 addedinm1 = m1.filesnotin(ma)
218 218 addedinm2 = m2.filesnotin(ma)
219 219 u1 = sorted(addedinm1 - addedinm2)
220 220 u2 = sorted(addedinm2 - addedinm1)
221 221
222 222 if u1:
223 223 repo.ui.debug(" unmatched files in local:\n %s\n"
224 224 % "\n ".join(u1))
225 225 if u2:
226 226 repo.ui.debug(" unmatched files in other:\n %s\n"
227 227 % "\n ".join(u2))
228 228 return u1, u2
229 229
230 230 def mergecopies(repo, c1, c2, ca):
231 231 """
232 232 Find moves and copies between context c1 and c2 that are relevant
233 233 for merging.
234 234
235 235 Returns four dicts: "copy", "movewithdir", "diverge", and
236 236 "renamedelete".
237 237
238 238 "copy" is a mapping from destination name -> source name,
239 239 where source is in c1 and destination is in c2 or vice-versa.
240 240
241 241 "movewithdir" is a mapping from source name -> destination name,
242 242 where the file at source present in one context but not the other
243 243 needs to be moved to destination by the merge process, because the
244 244 other context moved the directory it is in.
245 245
246 246 "diverge" is a mapping of source name -> list of destination names
247 247 for divergent renames.
248 248
249 249 "renamedelete" is a mapping of source name -> list of destination
250 250 names for files deleted in c1 that were renamed in c2 or vice-versa.
251 251 """
252 252 # avoid silly behavior for update from empty dir
253 253 if not c1 or not c2 or c1 == c2:
254 254 return {}, {}, {}, {}
255 255
256 256 # avoid silly behavior for parent -> working dir
257 257 if c2.node() is None and c1.node() == repo.dirstate.p1():
258 258 return repo.dirstate.copies(), {}, {}, {}
259 259
260 260 limit = _findlimit(repo, c1.rev(), c2.rev())
261 261 if limit is None:
262 262 # no common ancestor, no copies
263 263 return {}, {}, {}, {}
264 264 m1 = c1.manifest()
265 265 m2 = c2.manifest()
266 266 ma = ca.manifest()
267 267
268 268 def makectx(f, n):
269 269 if len(n) != 20: # in a working context?
270 270 if c1.rev() is None:
271 271 return c1.filectx(f)
272 272 return c2.filectx(f)
273 273 return repo.filectx(f, fileid=n)
274 274
275 275 ctx = util.lrucachefunc(makectx)
276 276 copy = {}
277 277 movewithdir = {}
278 278 fullcopy = {}
279 279 diverge = {}
280 280
281 281 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
282 282
283 283 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
284 284
285 285 for f in u1:
286 286 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
287 287
288 288 for f in u2:
289 289 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
290 290
291 291 renamedelete = {}
292 292 renamedelete2 = set()
293 293 diverge2 = set()
294 294 for of, fl in diverge.items():
295 295 if len(fl) == 1 or of in c1 or of in c2:
296 296 del diverge[of] # not actually divergent, or not a rename
297 297 if of not in c1 and of not in c2:
298 298 # renamed on one side, deleted on the other side, but filter
299 299 # out files that have been renamed and then deleted
300 300 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
301 301 renamedelete2.update(fl) # reverse map for below
302 302 else:
303 303 diverge2.update(fl) # reverse map for below
304 304
305 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
305 addedinm1 = m1.filesnotin(ma)
306 addedinm2 = m2.filesnotin(ma)
307 bothnew = sorted(addedinm1 & addedinm2)
306 308 if bothnew:
307 309 repo.ui.debug(" unmatched files new in both:\n %s\n"
308 310 % "\n ".join(bothnew))
309 311 bothdiverge, _copy, _fullcopy = {}, {}, {}
310 312 for f in bothnew:
311 313 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
312 314 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
313 315 for of, fl in bothdiverge.items():
314 316 if len(fl) == 2 and fl[0] == fl[1]:
315 317 copy[fl[0]] = of # not actually divergent, just matching renames
316 318
317 319 if fullcopy and repo.ui.debugflag:
318 320 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
319 321 "% = renamed and deleted):\n")
320 322 for f in sorted(fullcopy):
321 323 note = ""
322 324 if f in copy:
323 325 note += "*"
324 326 if f in diverge2:
325 327 note += "!"
326 328 if f in renamedelete2:
327 329 note += "%"
328 330 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
329 331 note))
330 332 del diverge2
331 333
332 334 if not fullcopy:
333 335 return copy, movewithdir, diverge, renamedelete
334 336
335 337 repo.ui.debug(" checking for directory renames\n")
336 338
337 339 # generate a directory move map
338 340 d1, d2 = c1.dirs(), c2.dirs()
339 341 d1.addpath('/')
340 342 d2.addpath('/')
341 343 invalid = set()
342 344 dirmove = {}
343 345
344 346 # examine each file copy for a potential directory move, which is
345 347 # when all the files in a directory are moved to a new directory
346 348 for dst, src in fullcopy.iteritems():
347 349 dsrc, ddst = _dirname(src), _dirname(dst)
348 350 if dsrc in invalid:
349 351 # already seen to be uninteresting
350 352 continue
351 353 elif dsrc in d1 and ddst in d1:
352 354 # directory wasn't entirely moved locally
353 355 invalid.add(dsrc)
354 356 elif dsrc in d2 and ddst in d2:
355 357 # directory wasn't entirely moved remotely
356 358 invalid.add(dsrc)
357 359 elif dsrc in dirmove and dirmove[dsrc] != ddst:
358 360 # files from the same directory moved to two different places
359 361 invalid.add(dsrc)
360 362 else:
361 363 # looks good so far
362 364 dirmove[dsrc + "/"] = ddst + "/"
363 365
364 366 for i in invalid:
365 367 if i in dirmove:
366 368 del dirmove[i]
367 369 del d1, d2, invalid
368 370
369 371 if not dirmove:
370 372 return copy, movewithdir, diverge, renamedelete
371 373
372 374 for d in dirmove:
373 375 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
374 376 (d, dirmove[d]))
375 377
376 378 # check unaccounted nonoverlapping files against directory moves
377 379 for f in u1 + u2:
378 380 if f not in fullcopy:
379 381 for d in dirmove:
380 382 if f.startswith(d):
381 383 # new file added in a directory that was moved, move it
382 384 df = dirmove[d] + f[len(d):]
383 385 if df not in copy:
384 386 movewithdir[f] = df
385 387 repo.ui.debug((" pending file src: '%s' -> "
386 388 "dst: '%s'\n") % (f, df))
387 389 break
388 390
389 391 return copy, movewithdir, diverge, renamedelete
390 392
391 393 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
392 394 """
393 395 check possible copies of f from m1 to m2
394 396
395 397 ctx = function accepting (filename, node) that returns a filectx.
396 398 f = the filename to check
397 399 m1 = the source manifest
398 400 m2 = the destination manifest
399 401 ca = the changectx of the common ancestor
400 402 limit = the rev number to not search beyond
401 403 diverge = record all diverges in this dict
402 404 copy = record all non-divergent copies in this dict
403 405 fullcopy = record all copies in this dict
404 406 """
405 407
406 408 ma = ca.manifest()
407 409
408 410 def _related(f1, f2, limit):
409 411 # Walk back to common ancestor to see if the two files originate
410 412 # from the same file. Since workingfilectx's rev() is None it messes
411 413 # up the integer comparison logic, hence the pre-step check for
412 414 # None (f1 and f2 can only be workingfilectx's initially).
413 415
414 416 if f1 == f2:
415 417 return f1 # a match
416 418
417 419 g1, g2 = f1.ancestors(), f2.ancestors()
418 420 try:
419 421 f1r, f2r = f1.rev(), f2.rev()
420 422
421 423 if f1r is None:
422 424 f1 = g1.next()
423 425 if f2r is None:
424 426 f2 = g2.next()
425 427
426 428 while True:
427 429 f1r, f2r = f1.rev(), f2.rev()
428 430 if f1r > f2r:
429 431 f1 = g1.next()
430 432 elif f2r > f1r:
431 433 f2 = g2.next()
432 434 elif f1 == f2:
433 435 return f1 # a match
434 436 elif f1r == f2r or f1r < limit or f2r < limit:
435 437 return False # copy no longer relevant
436 438 except StopIteration:
437 439 return False
438 440
439 441 of = None
440 442 seen = set([f])
441 443 for oc in ctx(f, m1[f]).ancestors():
442 444 ocr = oc.rev()
443 445 of = oc.path()
444 446 if of in seen:
445 447 # check limit late - grab last rename before
446 448 if ocr < limit:
447 449 break
448 450 continue
449 451 seen.add(of)
450 452
451 453 fullcopy[f] = of # remember for dir rename detection
452 454 if of not in m2:
453 455 continue # no match, keep looking
454 456 if m2[of] == ma.get(of):
455 457 break # no merge needed, quit early
456 458 c2 = ctx(of, m2[of])
457 459 cr = _related(oc, c2, ca.rev())
458 460 if cr and (of == f or of == c2.path()): # non-divergent
459 461 copy[f] = of
460 462 of = None
461 463 break
462 464
463 465 if of in ma:
464 466 diverge.setdefault(of, []).append(f)
465 467
466 468 def duplicatecopies(repo, rev, fromrev, skiprev=None):
467 469 '''reproduce copies from fromrev to rev in the dirstate
468 470
469 471 If skiprev is specified, it's a revision that should be used to
470 472 filter copy records. Any copies that occur between fromrev and
471 473 skiprev will not be duplicated, even if they appear in the set of
472 474 copies between fromrev and rev.
473 475 '''
474 476 exclude = {}
475 477 if skiprev is not None:
476 478 exclude = pathcopies(repo[fromrev], repo[skiprev])
477 479 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
478 480 # copies.pathcopies returns backward renames, so dst might not
479 481 # actually be in the dirstate
480 482 if dst in exclude:
481 483 continue
482 484 if repo.dirstate[dst] in "nma":
483 485 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now