##// END OF EJS Templates
copies: use absolute_import
Gregory Szorc -
r25924:cfc24c22 default
parent child Browse files
Show More
@@ -1,518 +1,524
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 import util, pathutil
8 from __future__ import absolute_import
9
9 10 import heapq
10 11
12 from . import (
13 pathutil,
14 util,
15 )
16
11 17 def _findlimit(repo, a, b):
12 18 """
13 19 Find the last revision that needs to be checked to ensure that a full
14 20 transitive closure for file copies can be properly calculated.
15 21 Generally, this means finding the earliest revision number that's an
16 22 ancestor of a or b but not both, except when a or b is a direct descendent
17 23 of the other, in which case we can return the minimum revnum of a and b.
18 24 None if no such revision exists.
19 25 """
20 26
21 27 # basic idea:
22 28 # - mark a and b with different sides
23 29 # - if a parent's children are all on the same side, the parent is
24 30 # on that side, otherwise it is on no side
25 31 # - walk the graph in topological order with the help of a heap;
26 32 # - add unseen parents to side map
27 33 # - clear side of any parent that has children on different sides
28 34 # - track number of interesting revs that might still be on a side
29 35 # - track the lowest interesting rev seen
30 36 # - quit when interesting revs is zero
31 37
32 38 cl = repo.changelog
33 39 working = len(cl) # pseudo rev for the working directory
34 40 if a is None:
35 41 a = working
36 42 if b is None:
37 43 b = working
38 44
39 45 side = {a: -1, b: 1}
40 46 visit = [-a, -b]
41 47 heapq.heapify(visit)
42 48 interesting = len(visit)
43 49 hascommonancestor = False
44 50 limit = working
45 51
46 52 while interesting:
47 53 r = -heapq.heappop(visit)
48 54 if r == working:
49 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
50 56 else:
51 57 parents = cl.parentrevs(r)
52 58 for p in parents:
53 59 if p < 0:
54 60 continue
55 61 if p not in side:
56 62 # first time we see p; add it to visit
57 63 side[p] = side[r]
58 64 if side[p]:
59 65 interesting += 1
60 66 heapq.heappush(visit, -p)
61 67 elif side[p] and side[p] != side[r]:
62 68 # p was interesting but now we know better
63 69 side[p] = 0
64 70 interesting -= 1
65 71 hascommonancestor = True
66 72 if side[r]:
67 73 limit = r # lowest rev visited
68 74 interesting -= 1
69 75
70 76 if not hascommonancestor:
71 77 return None
72 78
73 79 # Consider the following flow (see test-commit-amend.t under issue4405):
74 80 # 1/ File 'a0' committed
75 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
76 82 # 3/ Move back to first commit
77 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
78 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
79 85 #
80 86 # During the amend in step five, we will be in this state:
81 87 #
82 88 # @ 3 temporary amend commit for a1-amend
83 89 # |
84 90 # o 2 a1-amend
85 91 # |
86 92 # | o 1 a1
87 93 # |/
88 94 # o 0 a0
89 95 #
90 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
91 97 # yet the filelog has the copy information in rev 1 and we will not look
92 98 # back far enough unless we also look at the a and b as candidates.
93 99 # This only occurs when a is a descendent of b or visa-versa.
94 100 return min(limit, a, b)
95 101
96 102 def _chain(src, dst, a, b):
97 103 '''chain two sets of copies a->b'''
98 104 t = a.copy()
99 105 for k, v in b.iteritems():
100 106 if v in t:
101 107 # found a chain
102 108 if t[v] != k:
103 109 # file wasn't renamed back to itself
104 110 t[k] = t[v]
105 111 if v not in dst:
106 112 # chain was a rename, not a copy
107 113 del t[v]
108 114 if v in src:
109 115 # file is a copy of an existing file
110 116 t[k] = v
111 117
112 118 # remove criss-crossed copies
113 119 for k, v in t.items():
114 120 if k in src and v in dst:
115 121 del t[k]
116 122
117 123 return t
118 124
119 125 def _tracefile(fctx, am, limit=-1):
120 126 '''return file context that is the ancestor of fctx present in ancestor
121 127 manifest am, stopping after the first ancestor lower than limit'''
122 128
123 129 for f in fctx.ancestors():
124 130 if am.get(f.path(), None) == f.filenode():
125 131 return f
126 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
127 133 return None
128 134
129 135 def _dirstatecopies(d):
130 136 ds = d._repo.dirstate
131 137 c = ds.copies().copy()
132 138 for k in c.keys():
133 139 if ds[k] not in 'anm':
134 140 del c[k]
135 141 return c
136 142
137 143 def _computeforwardmissing(a, b, match=None):
138 144 """Computes which files are in b but not a.
139 145 This is its own function so extensions can easily wrap this call to see what
140 146 files _forwardcopies is about to process.
141 147 """
142 148 ma = a.manifest()
143 149 mb = b.manifest()
144 150 if match:
145 151 ma = ma.matches(match)
146 152 mb = mb.matches(match)
147 153 return mb.filesnotin(ma)
148 154
149 155 def _forwardcopies(a, b, match=None):
150 156 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
151 157
152 158 # check for working copy
153 159 w = None
154 160 if b.rev() is None:
155 161 w = b
156 162 b = w.p1()
157 163 if a == b:
158 164 # short-circuit to avoid issues with merge states
159 165 return _dirstatecopies(w)
160 166
161 167 # files might have to be traced back to the fctx parent of the last
162 168 # one-side-only changeset, but not further back than that
163 169 limit = _findlimit(a._repo, a.rev(), b.rev())
164 170 if limit is None:
165 171 limit = -1
166 172 am = a.manifest()
167 173
168 174 # find where new files came from
169 175 # we currently don't try to find where old files went, too expensive
170 176 # this means we can miss a case like 'hg rm b; hg cp a b'
171 177 cm = {}
172 178 missing = _computeforwardmissing(a, b, match=match)
173 179 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
174 180 for f in missing:
175 181 fctx = b[f]
176 182 fctx._ancestrycontext = ancestrycontext
177 183 ofctx = _tracefile(fctx, am, limit)
178 184 if ofctx:
179 185 cm[f] = ofctx.path()
180 186
181 187 # combine copies from dirstate if necessary
182 188 if w is not None:
183 189 cm = _chain(a, w, cm, _dirstatecopies(w))
184 190
185 191 return cm
186 192
187 193 def _backwardrenames(a, b):
188 194 # Even though we're not taking copies into account, 1:n rename situations
189 195 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
190 196 # arbitrarily pick one of the renames.
191 197 f = _forwardcopies(b, a)
192 198 r = {}
193 199 for k, v in sorted(f.iteritems()):
194 200 # remove copies
195 201 if v in a:
196 202 continue
197 203 r[v] = k
198 204 return r
199 205
200 206 def pathcopies(x, y, match=None):
201 207 '''find {dst@y: src@x} copy mapping for directed compare'''
202 208 if x == y or not x or not y:
203 209 return {}
204 210 a = y.ancestor(x)
205 211 if a == x:
206 212 return _forwardcopies(x, y, match=match)
207 213 if a == y:
208 214 return _backwardrenames(x, y)
209 215 return _chain(x, y, _backwardrenames(x, a),
210 216 _forwardcopies(a, y, match=match))
211 217
212 218 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
213 219 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
214 220 and c2. This is its own function so extensions can easily wrap this call
215 221 to see what files mergecopies is about to process.
216 222
217 223 Even though c1 and c2 are not used in this function, they are useful in
218 224 other extensions for being able to read the file nodes of the changed files.
219 225 """
220 226 u1 = sorted(addedinm1 - addedinm2)
221 227 u2 = sorted(addedinm2 - addedinm1)
222 228
223 229 if u1:
224 230 repo.ui.debug(" unmatched files in local:\n %s\n"
225 231 % "\n ".join(u1))
226 232 if u2:
227 233 repo.ui.debug(" unmatched files in other:\n %s\n"
228 234 % "\n ".join(u2))
229 235 return u1, u2
230 236
231 237 def mergecopies(repo, c1, c2, ca):
232 238 """
233 239 Find moves and copies between context c1 and c2 that are relevant
234 240 for merging.
235 241
236 242 Returns four dicts: "copy", "movewithdir", "diverge", and
237 243 "renamedelete".
238 244
239 245 "copy" is a mapping from destination name -> source name,
240 246 where source is in c1 and destination is in c2 or vice-versa.
241 247
242 248 "movewithdir" is a mapping from source name -> destination name,
243 249 where the file at source present in one context but not the other
244 250 needs to be moved to destination by the merge process, because the
245 251 other context moved the directory it is in.
246 252
247 253 "diverge" is a mapping of source name -> list of destination names
248 254 for divergent renames.
249 255
250 256 "renamedelete" is a mapping of source name -> list of destination
251 257 names for files deleted in c1 that were renamed in c2 or vice-versa.
252 258 """
253 259 # avoid silly behavior for update from empty dir
254 260 if not c1 or not c2 or c1 == c2:
255 261 return {}, {}, {}, {}
256 262
257 263 # avoid silly behavior for parent -> working dir
258 264 if c2.node() is None and c1.node() == repo.dirstate.p1():
259 265 return repo.dirstate.copies(), {}, {}, {}
260 266
261 267 limit = _findlimit(repo, c1.rev(), c2.rev())
262 268 if limit is None:
263 269 # no common ancestor, no copies
264 270 return {}, {}, {}, {}
265 271 m1 = c1.manifest()
266 272 m2 = c2.manifest()
267 273 ma = ca.manifest()
268 274
269 275
270 276 def setupctx(ctx):
271 277 """return a 'makectx' function suitable for checkcopies usage from ctx
272 278
273 279 We have to re-setup the function building 'filectx' for each
274 280 'checkcopies' to ensure the linkrev adjustement is properly setup for
275 281 each. Linkrev adjustment is important to avoid bug in rename
276 282 detection. Moreover, having a proper '_ancestrycontext' setup ensures
277 283 the performance impact of this adjustment is kept limited. Without it,
278 284 each file could do a full dag traversal making the time complexity of
279 285 the operation explode (see issue4537).
280 286
281 287 This function exists here mostly to limit the impact on stable. Feel
282 288 free to refactor on default.
283 289 """
284 290 rev = ctx.rev()
285 291 ac = getattr(ctx, '_ancestrycontext', None)
286 292 if ac is None:
287 293 revs = [rev]
288 294 if rev is None:
289 295 revs = [p.rev() for p in ctx.parents()]
290 296 ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
291 297 ctx._ancestrycontext = ac
292 298 def makectx(f, n):
293 299 if len(n) != 20: # in a working context?
294 300 if c1.rev() is None:
295 301 return c1.filectx(f)
296 302 return c2.filectx(f)
297 303 fctx = repo.filectx(f, fileid=n)
298 304 # setup only needed for filectx not create from a changectx
299 305 fctx._ancestrycontext = ac
300 306 fctx._descendantrev = rev
301 307 return fctx
302 308 return util.lrucachefunc(makectx)
303 309
304 310 copy = {}
305 311 movewithdir = {}
306 312 fullcopy = {}
307 313 diverge = {}
308 314
309 315 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
310 316
311 317 addedinm1 = m1.filesnotin(ma)
312 318 addedinm2 = m2.filesnotin(ma)
313 319 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
314 320
315 321 for f in u1:
316 322 ctx = setupctx(c1)
317 323 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
318 324
319 325 for f in u2:
320 326 ctx = setupctx(c2)
321 327 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
322 328
323 329 renamedelete = {}
324 330 renamedelete2 = set()
325 331 diverge2 = set()
326 332 for of, fl in diverge.items():
327 333 if len(fl) == 1 or of in c1 or of in c2:
328 334 del diverge[of] # not actually divergent, or not a rename
329 335 if of not in c1 and of not in c2:
330 336 # renamed on one side, deleted on the other side, but filter
331 337 # out files that have been renamed and then deleted
332 338 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
333 339 renamedelete2.update(fl) # reverse map for below
334 340 else:
335 341 diverge2.update(fl) # reverse map for below
336 342
337 343 bothnew = sorted(addedinm1 & addedinm2)
338 344 if bothnew:
339 345 repo.ui.debug(" unmatched files new in both:\n %s\n"
340 346 % "\n ".join(bothnew))
341 347 bothdiverge, _copy, _fullcopy = {}, {}, {}
342 348 for f in bothnew:
343 349 ctx = setupctx(c1)
344 350 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
345 351 ctx = setupctx(c2)
346 352 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
347 353 for of, fl in bothdiverge.items():
348 354 if len(fl) == 2 and fl[0] == fl[1]:
349 355 copy[fl[0]] = of # not actually divergent, just matching renames
350 356
351 357 if fullcopy and repo.ui.debugflag:
352 358 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
353 359 "% = renamed and deleted):\n")
354 360 for f in sorted(fullcopy):
355 361 note = ""
356 362 if f in copy:
357 363 note += "*"
358 364 if f in diverge2:
359 365 note += "!"
360 366 if f in renamedelete2:
361 367 note += "%"
362 368 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
363 369 note))
364 370 del diverge2
365 371
366 372 if not fullcopy:
367 373 return copy, movewithdir, diverge, renamedelete
368 374
369 375 repo.ui.debug(" checking for directory renames\n")
370 376
371 377 # generate a directory move map
372 378 d1, d2 = c1.dirs(), c2.dirs()
373 379 # Hack for adding '', which is not otherwise added, to d1 and d2
374 380 d1.addpath('/')
375 381 d2.addpath('/')
376 382 invalid = set()
377 383 dirmove = {}
378 384
379 385 # examine each file copy for a potential directory move, which is
380 386 # when all the files in a directory are moved to a new directory
381 387 for dst, src in fullcopy.iteritems():
382 388 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
383 389 if dsrc in invalid:
384 390 # already seen to be uninteresting
385 391 continue
386 392 elif dsrc in d1 and ddst in d1:
387 393 # directory wasn't entirely moved locally
388 394 invalid.add(dsrc)
389 395 elif dsrc in d2 and ddst in d2:
390 396 # directory wasn't entirely moved remotely
391 397 invalid.add(dsrc)
392 398 elif dsrc in dirmove and dirmove[dsrc] != ddst:
393 399 # files from the same directory moved to two different places
394 400 invalid.add(dsrc)
395 401 else:
396 402 # looks good so far
397 403 dirmove[dsrc + "/"] = ddst + "/"
398 404
399 405 for i in invalid:
400 406 if i in dirmove:
401 407 del dirmove[i]
402 408 del d1, d2, invalid
403 409
404 410 if not dirmove:
405 411 return copy, movewithdir, diverge, renamedelete
406 412
407 413 for d in dirmove:
408 414 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
409 415 (d, dirmove[d]))
410 416
411 417 # check unaccounted nonoverlapping files against directory moves
412 418 for f in u1 + u2:
413 419 if f not in fullcopy:
414 420 for d in dirmove:
415 421 if f.startswith(d):
416 422 # new file added in a directory that was moved, move it
417 423 df = dirmove[d] + f[len(d):]
418 424 if df not in copy:
419 425 movewithdir[f] = df
420 426 repo.ui.debug((" pending file src: '%s' -> "
421 427 "dst: '%s'\n") % (f, df))
422 428 break
423 429
424 430 return copy, movewithdir, diverge, renamedelete
425 431
426 432 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
427 433 """
428 434 check possible copies of f from m1 to m2
429 435
430 436 ctx = function accepting (filename, node) that returns a filectx.
431 437 f = the filename to check
432 438 m1 = the source manifest
433 439 m2 = the destination manifest
434 440 ca = the changectx of the common ancestor
435 441 limit = the rev number to not search beyond
436 442 diverge = record all diverges in this dict
437 443 copy = record all non-divergent copies in this dict
438 444 fullcopy = record all copies in this dict
439 445 """
440 446
441 447 ma = ca.manifest()
442 448
443 449 def _related(f1, f2, limit):
444 450 # Walk back to common ancestor to see if the two files originate
445 451 # from the same file. Since workingfilectx's rev() is None it messes
446 452 # up the integer comparison logic, hence the pre-step check for
447 453 # None (f1 and f2 can only be workingfilectx's initially).
448 454
449 455 if f1 == f2:
450 456 return f1 # a match
451 457
452 458 g1, g2 = f1.ancestors(), f2.ancestors()
453 459 try:
454 460 f1r, f2r = f1.linkrev(), f2.linkrev()
455 461
456 462 if f1r is None:
457 463 f1 = g1.next()
458 464 if f2r is None:
459 465 f2 = g2.next()
460 466
461 467 while True:
462 468 f1r, f2r = f1.linkrev(), f2.linkrev()
463 469 if f1r > f2r:
464 470 f1 = g1.next()
465 471 elif f2r > f1r:
466 472 f2 = g2.next()
467 473 elif f1 == f2:
468 474 return f1 # a match
469 475 elif f1r == f2r or f1r < limit or f2r < limit:
470 476 return False # copy no longer relevant
471 477 except StopIteration:
472 478 return False
473 479
474 480 of = None
475 481 seen = set([f])
476 482 for oc in ctx(f, m1[f]).ancestors():
477 483 ocr = oc.linkrev()
478 484 of = oc.path()
479 485 if of in seen:
480 486 # check limit late - grab last rename before
481 487 if ocr < limit:
482 488 break
483 489 continue
484 490 seen.add(of)
485 491
486 492 fullcopy[f] = of # remember for dir rename detection
487 493 if of not in m2:
488 494 continue # no match, keep looking
489 495 if m2[of] == ma.get(of):
490 496 break # no merge needed, quit early
491 497 c2 = ctx(of, m2[of])
492 498 cr = _related(oc, c2, ca.rev())
493 499 if cr and (of == f or of == c2.path()): # non-divergent
494 500 copy[f] = of
495 501 of = None
496 502 break
497 503
498 504 if of in ma:
499 505 diverge.setdefault(of, []).append(f)
500 506
501 507 def duplicatecopies(repo, rev, fromrev, skiprev=None):
502 508 '''reproduce copies from fromrev to rev in the dirstate
503 509
504 510 If skiprev is specified, it's a revision that should be used to
505 511 filter copy records. Any copies that occur between fromrev and
506 512 skiprev will not be duplicated, even if they appear in the set of
507 513 copies between fromrev and rev.
508 514 '''
509 515 exclude = {}
510 516 if skiprev is not None:
511 517 exclude = pathcopies(repo[fromrev], repo[skiprev])
512 518 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
513 519 # copies.pathcopies returns backward renames, so dst might not
514 520 # actually be in the dirstate
515 521 if dst in exclude:
516 522 continue
517 523 if repo.dirstate[dst] in "nma":
518 524 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now