##// END OF EJS Templates
copies._forwardcopies: use set operations to find missing files...
Siddharth Agarwal -
r18878:3cfaace0 default
parent child Browse files
Show More
@@ -1,386 +1,388 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import util
8 import util
9 import heapq
9 import heapq
10
10
11 def _nonoverlap(d1, d2, d3):
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
14
15 def _dirname(f):
15 def _dirname(f):
16 s = f.rfind("/")
16 s = f.rfind("/")
17 if s == -1:
17 if s == -1:
18 return ""
18 return ""
19 return f[:s]
19 return f[:s]
20
20
21 def _findlimit(repo, a, b):
21 def _findlimit(repo, a, b):
22 """Find the earliest revision that's an ancestor of a or b but not both,
22 """Find the earliest revision that's an ancestor of a or b but not both,
23 None if no such revision exists.
23 None if no such revision exists.
24 """
24 """
25 # basic idea:
25 # basic idea:
26 # - mark a and b with different sides
26 # - mark a and b with different sides
27 # - if a parent's children are all on the same side, the parent is
27 # - if a parent's children are all on the same side, the parent is
28 # on that side, otherwise it is on no side
28 # on that side, otherwise it is on no side
29 # - walk the graph in topological order with the help of a heap;
29 # - walk the graph in topological order with the help of a heap;
30 # - add unseen parents to side map
30 # - add unseen parents to side map
31 # - clear side of any parent that has children on different sides
31 # - clear side of any parent that has children on different sides
32 # - track number of interesting revs that might still be on a side
32 # - track number of interesting revs that might still be on a side
33 # - track the lowest interesting rev seen
33 # - track the lowest interesting rev seen
34 # - quit when interesting revs is zero
34 # - quit when interesting revs is zero
35
35
36 cl = repo.changelog
36 cl = repo.changelog
37 working = len(cl) # pseudo rev for the working directory
37 working = len(cl) # pseudo rev for the working directory
38 if a is None:
38 if a is None:
39 a = working
39 a = working
40 if b is None:
40 if b is None:
41 b = working
41 b = working
42
42
43 side = {a: -1, b: 1}
43 side = {a: -1, b: 1}
44 visit = [-a, -b]
44 visit = [-a, -b]
45 heapq.heapify(visit)
45 heapq.heapify(visit)
46 interesting = len(visit)
46 interesting = len(visit)
47 hascommonancestor = False
47 hascommonancestor = False
48 limit = working
48 limit = working
49
49
50 while interesting:
50 while interesting:
51 r = -heapq.heappop(visit)
51 r = -heapq.heappop(visit)
52 if r == working:
52 if r == working:
53 parents = [cl.rev(p) for p in repo.dirstate.parents()]
53 parents = [cl.rev(p) for p in repo.dirstate.parents()]
54 else:
54 else:
55 parents = cl.parentrevs(r)
55 parents = cl.parentrevs(r)
56 for p in parents:
56 for p in parents:
57 if p < 0:
57 if p < 0:
58 continue
58 continue
59 if p not in side:
59 if p not in side:
60 # first time we see p; add it to visit
60 # first time we see p; add it to visit
61 side[p] = side[r]
61 side[p] = side[r]
62 if side[p]:
62 if side[p]:
63 interesting += 1
63 interesting += 1
64 heapq.heappush(visit, -p)
64 heapq.heappush(visit, -p)
65 elif side[p] and side[p] != side[r]:
65 elif side[p] and side[p] != side[r]:
66 # p was interesting but now we know better
66 # p was interesting but now we know better
67 side[p] = 0
67 side[p] = 0
68 interesting -= 1
68 interesting -= 1
69 hascommonancestor = True
69 hascommonancestor = True
70 if side[r]:
70 if side[r]:
71 limit = r # lowest rev visited
71 limit = r # lowest rev visited
72 interesting -= 1
72 interesting -= 1
73
73
74 if not hascommonancestor:
74 if not hascommonancestor:
75 return None
75 return None
76 return limit
76 return limit
77
77
78 def _chain(src, dst, a, b):
78 def _chain(src, dst, a, b):
79 '''chain two sets of copies a->b'''
79 '''chain two sets of copies a->b'''
80 t = a.copy()
80 t = a.copy()
81 for k, v in b.iteritems():
81 for k, v in b.iteritems():
82 if v in t:
82 if v in t:
83 # found a chain
83 # found a chain
84 if t[v] != k:
84 if t[v] != k:
85 # file wasn't renamed back to itself
85 # file wasn't renamed back to itself
86 t[k] = t[v]
86 t[k] = t[v]
87 if v not in dst:
87 if v not in dst:
88 # chain was a rename, not a copy
88 # chain was a rename, not a copy
89 del t[v]
89 del t[v]
90 if v in src:
90 if v in src:
91 # file is a copy of an existing file
91 # file is a copy of an existing file
92 t[k] = v
92 t[k] = v
93
93
94 # remove criss-crossed copies
94 # remove criss-crossed copies
95 for k, v in t.items():
95 for k, v in t.items():
96 if k in src and v in dst:
96 if k in src and v in dst:
97 del t[k]
97 del t[k]
98
98
99 return t
99 return t
100
100
101 def _tracefile(fctx, actx):
101 def _tracefile(fctx, actx):
102 '''return file context that is the ancestor of fctx present in actx'''
102 '''return file context that is the ancestor of fctx present in actx'''
103 stop = actx.rev()
103 stop = actx.rev()
104 am = actx.manifest()
104 am = actx.manifest()
105
105
106 for f in fctx.ancestors():
106 for f in fctx.ancestors():
107 if am.get(f.path(), None) == f.filenode():
107 if am.get(f.path(), None) == f.filenode():
108 return f
108 return f
109 if f.rev() < stop:
109 if f.rev() < stop:
110 return None
110 return None
111
111
112 def _dirstatecopies(d):
112 def _dirstatecopies(d):
113 ds = d._repo.dirstate
113 ds = d._repo.dirstate
114 c = ds.copies().copy()
114 c = ds.copies().copy()
115 for k in c.keys():
115 for k in c.keys():
116 if ds[k] not in 'anm':
116 if ds[k] not in 'anm':
117 del c[k]
117 del c[k]
118 return c
118 return c
119
119
120 def _forwardcopies(a, b):
120 def _forwardcopies(a, b):
121 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
121 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
122
122
123 # check for working copy
123 # check for working copy
124 w = None
124 w = None
125 if b.rev() is None:
125 if b.rev() is None:
126 w = b
126 w = b
127 b = w.p1()
127 b = w.p1()
128 if a == b:
128 if a == b:
129 # short-circuit to avoid issues with merge states
129 # short-circuit to avoid issues with merge states
130 return _dirstatecopies(w)
130 return _dirstatecopies(w)
131
131
132 # find where new files came from
132 # find where new files came from
133 # we currently don't try to find where old files went, too expensive
133 # we currently don't try to find where old files went, too expensive
134 # this means we can miss a case like 'hg rm b; hg cp a b'
134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 cm = {}
135 cm = {}
136 for f in b:
136 missing = set(b.manifest().iterkeys())
137 if f not in a:
137 missing.difference_update(a.manifest().iterkeys())
138
139 for f in missing:
138 ofctx = _tracefile(b[f], a)
140 ofctx = _tracefile(b[f], a)
139 if ofctx:
141 if ofctx:
140 cm[f] = ofctx.path()
142 cm[f] = ofctx.path()
141
143
142 # combine copies from dirstate if necessary
144 # combine copies from dirstate if necessary
143 if w is not None:
145 if w is not None:
144 cm = _chain(a, w, cm, _dirstatecopies(w))
146 cm = _chain(a, w, cm, _dirstatecopies(w))
145
147
146 return cm
148 return cm
147
149
148 def _backwardrenames(a, b):
150 def _backwardrenames(a, b):
149 # Even though we're not taking copies into account, 1:n rename situations
151 # Even though we're not taking copies into account, 1:n rename situations
150 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
152 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
151 # arbitrarily pick one of the renames.
153 # arbitrarily pick one of the renames.
152 f = _forwardcopies(b, a)
154 f = _forwardcopies(b, a)
153 r = {}
155 r = {}
154 for k, v in sorted(f.iteritems()):
156 for k, v in sorted(f.iteritems()):
155 # remove copies
157 # remove copies
156 if v in a:
158 if v in a:
157 continue
159 continue
158 r[v] = k
160 r[v] = k
159 return r
161 return r
160
162
161 def pathcopies(x, y):
163 def pathcopies(x, y):
162 '''find {dst@y: src@x} copy mapping for directed compare'''
164 '''find {dst@y: src@x} copy mapping for directed compare'''
163 if x == y or not x or not y:
165 if x == y or not x or not y:
164 return {}
166 return {}
165 a = y.ancestor(x)
167 a = y.ancestor(x)
166 if a == x:
168 if a == x:
167 return _forwardcopies(x, y)
169 return _forwardcopies(x, y)
168 if a == y:
170 if a == y:
169 return _backwardrenames(x, y)
171 return _backwardrenames(x, y)
170 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
172 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
171
173
172 def mergecopies(repo, c1, c2, ca):
174 def mergecopies(repo, c1, c2, ca):
173 """
175 """
174 Find moves and copies between context c1 and c2 that are relevant
176 Find moves and copies between context c1 and c2 that are relevant
175 for merging.
177 for merging.
176
178
177 Returns four dicts: "copy", "movewithdir", "diverge", and
179 Returns four dicts: "copy", "movewithdir", "diverge", and
178 "renamedelete".
180 "renamedelete".
179
181
180 "copy" is a mapping from destination name -> source name,
182 "copy" is a mapping from destination name -> source name,
181 where source is in c1 and destination is in c2 or vice-versa.
183 where source is in c1 and destination is in c2 or vice-versa.
182
184
183 "movewithdir" is a mapping from source name -> destination name,
185 "movewithdir" is a mapping from source name -> destination name,
184 where the file at source present in one context but not the other
186 where the file at source present in one context but not the other
185 needs to be moved to destination by the merge process, because the
187 needs to be moved to destination by the merge process, because the
186 other context moved the directory it is in.
188 other context moved the directory it is in.
187
189
188 "diverge" is a mapping of source name -> list of destination names
190 "diverge" is a mapping of source name -> list of destination names
189 for divergent renames.
191 for divergent renames.
190
192
191 "renamedelete" is a mapping of source name -> list of destination
193 "renamedelete" is a mapping of source name -> list of destination
192 names for files deleted in c1 that were renamed in c2 or vice-versa.
194 names for files deleted in c1 that were renamed in c2 or vice-versa.
193 """
195 """
194 # avoid silly behavior for update from empty dir
196 # avoid silly behavior for update from empty dir
195 if not c1 or not c2 or c1 == c2:
197 if not c1 or not c2 or c1 == c2:
196 return {}, {}, {}, {}
198 return {}, {}, {}, {}
197
199
198 # avoid silly behavior for parent -> working dir
200 # avoid silly behavior for parent -> working dir
199 if c2.node() is None and c1.node() == repo.dirstate.p1():
201 if c2.node() is None and c1.node() == repo.dirstate.p1():
200 return repo.dirstate.copies(), {}, {}, {}
202 return repo.dirstate.copies(), {}, {}, {}
201
203
202 limit = _findlimit(repo, c1.rev(), c2.rev())
204 limit = _findlimit(repo, c1.rev(), c2.rev())
203 if limit is None:
205 if limit is None:
204 # no common ancestor, no copies
206 # no common ancestor, no copies
205 return {}, {}, {}, {}
207 return {}, {}, {}, {}
206 m1 = c1.manifest()
208 m1 = c1.manifest()
207 m2 = c2.manifest()
209 m2 = c2.manifest()
208 ma = ca.manifest()
210 ma = ca.manifest()
209
211
210 def makectx(f, n):
212 def makectx(f, n):
211 if len(n) != 20: # in a working context?
213 if len(n) != 20: # in a working context?
212 if c1.rev() is None:
214 if c1.rev() is None:
213 return c1.filectx(f)
215 return c1.filectx(f)
214 return c2.filectx(f)
216 return c2.filectx(f)
215 return repo.filectx(f, fileid=n)
217 return repo.filectx(f, fileid=n)
216
218
217 ctx = util.lrucachefunc(makectx)
219 ctx = util.lrucachefunc(makectx)
218 copy = {}
220 copy = {}
219 movewithdir = {}
221 movewithdir = {}
220 fullcopy = {}
222 fullcopy = {}
221 diverge = {}
223 diverge = {}
222
224
223 def related(f1, f2, limit):
225 def related(f1, f2, limit):
224 # Walk back to common ancestor to see if the two files originate
226 # Walk back to common ancestor to see if the two files originate
225 # from the same file. Since workingfilectx's rev() is None it messes
227 # from the same file. Since workingfilectx's rev() is None it messes
226 # up the integer comparison logic, hence the pre-step check for
228 # up the integer comparison logic, hence the pre-step check for
227 # None (f1 and f2 can only be workingfilectx's initially).
229 # None (f1 and f2 can only be workingfilectx's initially).
228
230
229 if f1 == f2:
231 if f1 == f2:
230 return f1 # a match
232 return f1 # a match
231
233
232 g1, g2 = f1.ancestors(), f2.ancestors()
234 g1, g2 = f1.ancestors(), f2.ancestors()
233 try:
235 try:
234 f1r, f2r = f1.rev(), f2.rev()
236 f1r, f2r = f1.rev(), f2.rev()
235
237
236 if f1r is None:
238 if f1r is None:
237 f1 = g1.next()
239 f1 = g1.next()
238 if f2r is None:
240 if f2r is None:
239 f2 = g2.next()
241 f2 = g2.next()
240
242
241 while True:
243 while True:
242 f1r, f2r = f1.rev(), f2.rev()
244 f1r, f2r = f1.rev(), f2.rev()
243 if f1r > f2r:
245 if f1r > f2r:
244 f1 = g1.next()
246 f1 = g1.next()
245 elif f2r > f1r:
247 elif f2r > f1r:
246 f2 = g2.next()
248 f2 = g2.next()
247 elif f1 == f2:
249 elif f1 == f2:
248 return f1 # a match
250 return f1 # a match
249 elif f1r == f2r or f1r < limit or f2r < limit:
251 elif f1r == f2r or f1r < limit or f2r < limit:
250 return False # copy no longer relevant
252 return False # copy no longer relevant
251 except StopIteration:
253 except StopIteration:
252 return False
254 return False
253
255
254 def checkcopies(f, m1, m2):
256 def checkcopies(f, m1, m2):
255 '''check possible copies of f from m1 to m2'''
257 '''check possible copies of f from m1 to m2'''
256 of = None
258 of = None
257 seen = set([f])
259 seen = set([f])
258 for oc in ctx(f, m1[f]).ancestors():
260 for oc in ctx(f, m1[f]).ancestors():
259 ocr = oc.rev()
261 ocr = oc.rev()
260 of = oc.path()
262 of = oc.path()
261 if of in seen:
263 if of in seen:
262 # check limit late - grab last rename before
264 # check limit late - grab last rename before
263 if ocr < limit:
265 if ocr < limit:
264 break
266 break
265 continue
267 continue
266 seen.add(of)
268 seen.add(of)
267
269
268 fullcopy[f] = of # remember for dir rename detection
270 fullcopy[f] = of # remember for dir rename detection
269 if of not in m2:
271 if of not in m2:
270 continue # no match, keep looking
272 continue # no match, keep looking
271 if m2[of] == ma.get(of):
273 if m2[of] == ma.get(of):
272 break # no merge needed, quit early
274 break # no merge needed, quit early
273 c2 = ctx(of, m2[of])
275 c2 = ctx(of, m2[of])
274 cr = related(oc, c2, ca.rev())
276 cr = related(oc, c2, ca.rev())
275 if cr and (of == f or of == c2.path()): # non-divergent
277 if cr and (of == f or of == c2.path()): # non-divergent
276 copy[f] = of
278 copy[f] = of
277 of = None
279 of = None
278 break
280 break
279
281
280 if of in ma:
282 if of in ma:
281 diverge.setdefault(of, []).append(f)
283 diverge.setdefault(of, []).append(f)
282
284
283 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
285 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
284
286
285 u1 = _nonoverlap(m1, m2, ma)
287 u1 = _nonoverlap(m1, m2, ma)
286 u2 = _nonoverlap(m2, m1, ma)
288 u2 = _nonoverlap(m2, m1, ma)
287
289
288 if u1:
290 if u1:
289 repo.ui.debug(" unmatched files in local:\n %s\n"
291 repo.ui.debug(" unmatched files in local:\n %s\n"
290 % "\n ".join(u1))
292 % "\n ".join(u1))
291 if u2:
293 if u2:
292 repo.ui.debug(" unmatched files in other:\n %s\n"
294 repo.ui.debug(" unmatched files in other:\n %s\n"
293 % "\n ".join(u2))
295 % "\n ".join(u2))
294
296
295 for f in u1:
297 for f in u1:
296 checkcopies(f, m1, m2)
298 checkcopies(f, m1, m2)
297 for f in u2:
299 for f in u2:
298 checkcopies(f, m2, m1)
300 checkcopies(f, m2, m1)
299
301
300 renamedelete = {}
302 renamedelete = {}
301 renamedelete2 = set()
303 renamedelete2 = set()
302 diverge2 = set()
304 diverge2 = set()
303 for of, fl in diverge.items():
305 for of, fl in diverge.items():
304 if len(fl) == 1 or of in c1 or of in c2:
306 if len(fl) == 1 or of in c1 or of in c2:
305 del diverge[of] # not actually divergent, or not a rename
307 del diverge[of] # not actually divergent, or not a rename
306 if of not in c1 and of not in c2:
308 if of not in c1 and of not in c2:
307 # renamed on one side, deleted on the other side, but filter
309 # renamed on one side, deleted on the other side, but filter
308 # out files that have been renamed and then deleted
310 # out files that have been renamed and then deleted
309 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
311 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
310 renamedelete2.update(fl) # reverse map for below
312 renamedelete2.update(fl) # reverse map for below
311 else:
313 else:
312 diverge2.update(fl) # reverse map for below
314 diverge2.update(fl) # reverse map for below
313
315
314 if fullcopy:
316 if fullcopy:
315 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
317 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
316 "% = renamed and deleted):\n")
318 "% = renamed and deleted):\n")
317 for f in sorted(fullcopy):
319 for f in sorted(fullcopy):
318 note = ""
320 note = ""
319 if f in copy:
321 if f in copy:
320 note += "*"
322 note += "*"
321 if f in diverge2:
323 if f in diverge2:
322 note += "!"
324 note += "!"
323 if f in renamedelete2:
325 if f in renamedelete2:
324 note += "%"
326 note += "%"
325 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
327 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
326 note))
328 note))
327 del diverge2
329 del diverge2
328
330
329 if not fullcopy:
331 if not fullcopy:
330 return copy, movewithdir, diverge, renamedelete
332 return copy, movewithdir, diverge, renamedelete
331
333
332 repo.ui.debug(" checking for directory renames\n")
334 repo.ui.debug(" checking for directory renames\n")
333
335
334 # generate a directory move map
336 # generate a directory move map
335 d1, d2 = c1.dirs(), c2.dirs()
337 d1, d2 = c1.dirs(), c2.dirs()
336 d1.add('')
338 d1.add('')
337 d2.add('')
339 d2.add('')
338 invalid = set()
340 invalid = set()
339 dirmove = {}
341 dirmove = {}
340
342
341 # examine each file copy for a potential directory move, which is
343 # examine each file copy for a potential directory move, which is
342 # when all the files in a directory are moved to a new directory
344 # when all the files in a directory are moved to a new directory
343 for dst, src in fullcopy.iteritems():
345 for dst, src in fullcopy.iteritems():
344 dsrc, ddst = _dirname(src), _dirname(dst)
346 dsrc, ddst = _dirname(src), _dirname(dst)
345 if dsrc in invalid:
347 if dsrc in invalid:
346 # already seen to be uninteresting
348 # already seen to be uninteresting
347 continue
349 continue
348 elif dsrc in d1 and ddst in d1:
350 elif dsrc in d1 and ddst in d1:
349 # directory wasn't entirely moved locally
351 # directory wasn't entirely moved locally
350 invalid.add(dsrc)
352 invalid.add(dsrc)
351 elif dsrc in d2 and ddst in d2:
353 elif dsrc in d2 and ddst in d2:
352 # directory wasn't entirely moved remotely
354 # directory wasn't entirely moved remotely
353 invalid.add(dsrc)
355 invalid.add(dsrc)
354 elif dsrc in dirmove and dirmove[dsrc] != ddst:
356 elif dsrc in dirmove and dirmove[dsrc] != ddst:
355 # files from the same directory moved to two different places
357 # files from the same directory moved to two different places
356 invalid.add(dsrc)
358 invalid.add(dsrc)
357 else:
359 else:
358 # looks good so far
360 # looks good so far
359 dirmove[dsrc + "/"] = ddst + "/"
361 dirmove[dsrc + "/"] = ddst + "/"
360
362
361 for i in invalid:
363 for i in invalid:
362 if i in dirmove:
364 if i in dirmove:
363 del dirmove[i]
365 del dirmove[i]
364 del d1, d2, invalid
366 del d1, d2, invalid
365
367
366 if not dirmove:
368 if not dirmove:
367 return copy, movewithdir, diverge, renamedelete
369 return copy, movewithdir, diverge, renamedelete
368
370
369 for d in dirmove:
371 for d in dirmove:
370 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
372 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
371 (d, dirmove[d]))
373 (d, dirmove[d]))
372
374
373 # check unaccounted nonoverlapping files against directory moves
375 # check unaccounted nonoverlapping files against directory moves
374 for f in u1 + u2:
376 for f in u1 + u2:
375 if f not in fullcopy:
377 if f not in fullcopy:
376 for d in dirmove:
378 for d in dirmove:
377 if f.startswith(d):
379 if f.startswith(d):
378 # new file added in a directory that was moved, move it
380 # new file added in a directory that was moved, move it
379 df = dirmove[d] + f[len(d):]
381 df = dirmove[d] + f[len(d):]
380 if df not in copy:
382 if df not in copy:
381 movewithdir[f] = df
383 movewithdir[f] = df
382 repo.ui.debug((" pending file src: '%s' -> "
384 repo.ui.debug((" pending file src: '%s' -> "
383 "dst: '%s'\n") % (f, df))
385 "dst: '%s'\n") % (f, df))
384 break
386 break
385
387
386 return copy, movewithdir, diverge, renamedelete
388 return copy, movewithdir, diverge, renamedelete
General Comments 0
You need to be logged in to leave comments. Login now