##// END OF EJS Templates
copies: use ctx.dirs() for directory rename detection
Matt Mackall -
r16178:828fe2ca default
parent child Browse files
Show More
@@ -1,366 +1,357 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import util
8 import util
9 import heapq
9 import heapq
10
10
11 def _nonoverlap(d1, d2, d3):
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
14
15 def _dirname(f):
15 def _dirname(f):
16 s = f.rfind("/")
16 s = f.rfind("/")
17 if s == -1:
17 if s == -1:
18 return ""
18 return ""
19 return f[:s]
19 return f[:s]
20
20
21 def _dirs(files):
22 d = set()
23 for f in files:
24 f = _dirname(f)
25 while f not in d:
26 d.add(f)
27 f = _dirname(f)
28 return d
29
30 def _findlimit(repo, a, b):
21 def _findlimit(repo, a, b):
31 """Find the earliest revision that's an ancestor of a or b but not both,
22 """Find the earliest revision that's an ancestor of a or b but not both,
32 None if no such revision exists.
23 None if no such revision exists.
33 """
24 """
34 # basic idea:
25 # basic idea:
35 # - mark a and b with different sides
26 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
27 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
28 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
29 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
30 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
31 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
32 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
33 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
34 # - quit when interesting revs is zero
44
35
45 cl = repo.changelog
36 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
37 working = len(cl) # pseudo rev for the working directory
47 if a is None:
38 if a is None:
48 a = working
39 a = working
49 if b is None:
40 if b is None:
50 b = working
41 b = working
51
42
52 side = {a: -1, b: 1}
43 side = {a: -1, b: 1}
53 visit = [-a, -b]
44 visit = [-a, -b]
54 heapq.heapify(visit)
45 heapq.heapify(visit)
55 interesting = len(visit)
46 interesting = len(visit)
56 hascommonancestor = False
47 hascommonancestor = False
57 limit = working
48 limit = working
58
49
59 while interesting:
50 while interesting:
60 r = -heapq.heappop(visit)
51 r = -heapq.heappop(visit)
61 if r == working:
52 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
53 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
54 else:
64 parents = cl.parentrevs(r)
55 parents = cl.parentrevs(r)
65 for p in parents:
56 for p in parents:
66 if p < 0:
57 if p < 0:
67 continue
58 continue
68 if p not in side:
59 if p not in side:
69 # first time we see p; add it to visit
60 # first time we see p; add it to visit
70 side[p] = side[r]
61 side[p] = side[r]
71 if side[p]:
62 if side[p]:
72 interesting += 1
63 interesting += 1
73 heapq.heappush(visit, -p)
64 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
65 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
66 # p was interesting but now we know better
76 side[p] = 0
67 side[p] = 0
77 interesting -= 1
68 interesting -= 1
78 hascommonancestor = True
69 hascommonancestor = True
79 if side[r]:
70 if side[r]:
80 limit = r # lowest rev visited
71 limit = r # lowest rev visited
81 interesting -= 1
72 interesting -= 1
82
73
83 if not hascommonancestor:
74 if not hascommonancestor:
84 return None
75 return None
85 return limit
76 return limit
86
77
87 def _chain(src, dst, a, b):
78 def _chain(src, dst, a, b):
88 '''chain two sets of copies a->b'''
79 '''chain two sets of copies a->b'''
89 t = a.copy()
80 t = a.copy()
90 for k, v in b.iteritems():
81 for k, v in b.iteritems():
91 if v in t:
82 if v in t:
92 # found a chain
83 # found a chain
93 if t[v] != k:
84 if t[v] != k:
94 # file wasn't renamed back to itself
85 # file wasn't renamed back to itself
95 t[k] = t[v]
86 t[k] = t[v]
96 if v not in dst:
87 if v not in dst:
97 # chain was a rename, not a copy
88 # chain was a rename, not a copy
98 del t[v]
89 del t[v]
99 if v in src:
90 if v in src:
100 # file is a copy of an existing file
91 # file is a copy of an existing file
101 t[k] = v
92 t[k] = v
102
93
103 # remove criss-crossed copies
94 # remove criss-crossed copies
104 for k, v in t.items():
95 for k, v in t.items():
105 if k in src and v in dst:
96 if k in src and v in dst:
106 del t[k]
97 del t[k]
107
98
108 return t
99 return t
109
100
110 def _tracefile(fctx, actx):
101 def _tracefile(fctx, actx):
111 '''return file context that is the ancestor of fctx present in actx'''
102 '''return file context that is the ancestor of fctx present in actx'''
112 stop = actx.rev()
103 stop = actx.rev()
113 am = actx.manifest()
104 am = actx.manifest()
114
105
115 for f in fctx.ancestors():
106 for f in fctx.ancestors():
116 if am.get(f.path(), None) == f.filenode():
107 if am.get(f.path(), None) == f.filenode():
117 return f
108 return f
118 if f.rev() < stop:
109 if f.rev() < stop:
119 return None
110 return None
120
111
121 def _dirstatecopies(d):
112 def _dirstatecopies(d):
122 ds = d._repo.dirstate
113 ds = d._repo.dirstate
123 c = ds.copies().copy()
114 c = ds.copies().copy()
124 for k in c.keys():
115 for k in c.keys():
125 if ds[k] not in 'anm':
116 if ds[k] not in 'anm':
126 del c[k]
117 del c[k]
127 return c
118 return c
128
119
129 def _forwardcopies(a, b):
120 def _forwardcopies(a, b):
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
121 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
131
122
132 # check for working copy
123 # check for working copy
133 w = None
124 w = None
134 if b.rev() is None:
125 if b.rev() is None:
135 w = b
126 w = b
136 b = w.p1()
127 b = w.p1()
137 if a == b:
128 if a == b:
138 # short-circuit to avoid issues with merge states
129 # short-circuit to avoid issues with merge states
139 return _dirstatecopies(w)
130 return _dirstatecopies(w)
140
131
141 # find where new files came from
132 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
133 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
134 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
135 cm = {}
145 for f in b:
136 for f in b:
146 if f not in a:
137 if f not in a:
147 ofctx = _tracefile(b[f], a)
138 ofctx = _tracefile(b[f], a)
148 if ofctx:
139 if ofctx:
149 cm[f] = ofctx.path()
140 cm[f] = ofctx.path()
150
141
151 # combine copies from dirstate if necessary
142 # combine copies from dirstate if necessary
152 if w is not None:
143 if w is not None:
153 cm = _chain(a, w, cm, _dirstatecopies(w))
144 cm = _chain(a, w, cm, _dirstatecopies(w))
154
145
155 return cm
146 return cm
156
147
157 def _backwardcopies(a, b):
148 def _backwardcopies(a, b):
158 # because the forward mapping is 1:n, we can lose renames here
149 # because the forward mapping is 1:n, we can lose renames here
159 # in particular, we find renames better than copies
150 # in particular, we find renames better than copies
160 f = _forwardcopies(b, a)
151 f = _forwardcopies(b, a)
161 r = {}
152 r = {}
162 for k, v in f.iteritems():
153 for k, v in f.iteritems():
163 r[v] = k
154 r[v] = k
164 return r
155 return r
165
156
166 def pathcopies(x, y):
157 def pathcopies(x, y):
167 '''find {dst@y: src@x} copy mapping for directed compare'''
158 '''find {dst@y: src@x} copy mapping for directed compare'''
168 if x == y or not x or not y:
159 if x == y or not x or not y:
169 return {}
160 return {}
170 a = y.ancestor(x)
161 a = y.ancestor(x)
171 if a == x:
162 if a == x:
172 return _forwardcopies(x, y)
163 return _forwardcopies(x, y)
173 if a == y:
164 if a == y:
174 return _backwardcopies(x, y)
165 return _backwardcopies(x, y)
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
166 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
176
167
177 def mergecopies(repo, c1, c2, ca):
168 def mergecopies(repo, c1, c2, ca):
178 """
169 """
179 Find moves and copies between context c1 and c2 that are relevant
170 Find moves and copies between context c1 and c2 that are relevant
180 for merging.
171 for merging.
181
172
182 Returns two dicts, "copy" and "diverge".
173 Returns two dicts, "copy" and "diverge".
183
174
184 "copy" is a mapping from destination name -> source name,
175 "copy" is a mapping from destination name -> source name,
185 where source is in c1 and destination is in c2 or vice-versa.
176 where source is in c1 and destination is in c2 or vice-versa.
186
177
187 "diverge" is a mapping of source name -> list of destination names
178 "diverge" is a mapping of source name -> list of destination names
188 for divergent renames.
179 for divergent renames.
189 """
180 """
190 # avoid silly behavior for update from empty dir
181 # avoid silly behavior for update from empty dir
191 if not c1 or not c2 or c1 == c2:
182 if not c1 or not c2 or c1 == c2:
192 return {}, {}
183 return {}, {}
193
184
194 # avoid silly behavior for parent -> working dir
185 # avoid silly behavior for parent -> working dir
195 if c2.node() is None and c1.node() == repo.dirstate.p1():
186 if c2.node() is None and c1.node() == repo.dirstate.p1():
196 return repo.dirstate.copies(), {}
187 return repo.dirstate.copies(), {}
197
188
198 limit = _findlimit(repo, c1.rev(), c2.rev())
189 limit = _findlimit(repo, c1.rev(), c2.rev())
199 if limit is None:
190 if limit is None:
200 # no common ancestor, no copies
191 # no common ancestor, no copies
201 return {}, {}
192 return {}, {}
202 m1 = c1.manifest()
193 m1 = c1.manifest()
203 m2 = c2.manifest()
194 m2 = c2.manifest()
204 ma = ca.manifest()
195 ma = ca.manifest()
205
196
206 def makectx(f, n):
197 def makectx(f, n):
207 if len(n) != 20: # in a working context?
198 if len(n) != 20: # in a working context?
208 if c1.rev() is None:
199 if c1.rev() is None:
209 return c1.filectx(f)
200 return c1.filectx(f)
210 return c2.filectx(f)
201 return c2.filectx(f)
211 return repo.filectx(f, fileid=n)
202 return repo.filectx(f, fileid=n)
212
203
213 ctx = util.lrucachefunc(makectx)
204 ctx = util.lrucachefunc(makectx)
214 copy = {}
205 copy = {}
215 fullcopy = {}
206 fullcopy = {}
216 diverge = {}
207 diverge = {}
217
208
218 def related(f1, f2, limit):
209 def related(f1, f2, limit):
219 # Walk back to common ancestor to see if the two files originate
210 # Walk back to common ancestor to see if the two files originate
220 # from the same file. Since workingfilectx's rev() is None it messes
211 # from the same file. Since workingfilectx's rev() is None it messes
221 # up the integer comparison logic, hence the pre-step check for
212 # up the integer comparison logic, hence the pre-step check for
222 # None (f1 and f2 can only be workingfilectx's initially).
213 # None (f1 and f2 can only be workingfilectx's initially).
223
214
224 if f1 == f2:
215 if f1 == f2:
225 return f1 # a match
216 return f1 # a match
226
217
227 g1, g2 = f1.ancestors(), f2.ancestors()
218 g1, g2 = f1.ancestors(), f2.ancestors()
228 try:
219 try:
229 f1r, f2r = f1.rev(), f2.rev()
220 f1r, f2r = f1.rev(), f2.rev()
230
221
231 if f1r is None:
222 if f1r is None:
232 f1 = g1.next()
223 f1 = g1.next()
233 if f2r is None:
224 if f2r is None:
234 f2 = g2.next()
225 f2 = g2.next()
235
226
236 while True:
227 while True:
237 f1r, f2r = f1.rev(), f2.rev()
228 f1r, f2r = f1.rev(), f2.rev()
238 if f1r > f2r:
229 if f1r > f2r:
239 f1 = g1.next()
230 f1 = g1.next()
240 elif f2r > f1r:
231 elif f2r > f1r:
241 f2 = g2.next()
232 f2 = g2.next()
242 elif f1 == f2:
233 elif f1 == f2:
243 return f1 # a match
234 return f1 # a match
244 elif f1r == f2r or f1r < limit or f2r < limit:
235 elif f1r == f2r or f1r < limit or f2r < limit:
245 return False # copy no longer relevant
236 return False # copy no longer relevant
246 except StopIteration:
237 except StopIteration:
247 return False
238 return False
248
239
249 def checkcopies(f, m1, m2):
240 def checkcopies(f, m1, m2):
250 '''check possible copies of f from m1 to m2'''
241 '''check possible copies of f from m1 to m2'''
251 of = None
242 of = None
252 seen = set([f])
243 seen = set([f])
253 for oc in ctx(f, m1[f]).ancestors():
244 for oc in ctx(f, m1[f]).ancestors():
254 ocr = oc.rev()
245 ocr = oc.rev()
255 of = oc.path()
246 of = oc.path()
256 if of in seen:
247 if of in seen:
257 # check limit late - grab last rename before
248 # check limit late - grab last rename before
258 if ocr < limit:
249 if ocr < limit:
259 break
250 break
260 continue
251 continue
261 seen.add(of)
252 seen.add(of)
262
253
263 fullcopy[f] = of # remember for dir rename detection
254 fullcopy[f] = of # remember for dir rename detection
264 if of not in m2:
255 if of not in m2:
265 continue # no match, keep looking
256 continue # no match, keep looking
266 if m2[of] == ma.get(of):
257 if m2[of] == ma.get(of):
267 break # no merge needed, quit early
258 break # no merge needed, quit early
268 c2 = ctx(of, m2[of])
259 c2 = ctx(of, m2[of])
269 cr = related(oc, c2, ca.rev())
260 cr = related(oc, c2, ca.rev())
270 if cr and (of == f or of == c2.path()): # non-divergent
261 if cr and (of == f or of == c2.path()): # non-divergent
271 copy[f] = of
262 copy[f] = of
272 of = None
263 of = None
273 break
264 break
274
265
275 if of in ma:
266 if of in ma:
276 diverge.setdefault(of, []).append(f)
267 diverge.setdefault(of, []).append(f)
277
268
278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
269 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
279
270
280 u1 = _nonoverlap(m1, m2, ma)
271 u1 = _nonoverlap(m1, m2, ma)
281 u2 = _nonoverlap(m2, m1, ma)
272 u2 = _nonoverlap(m2, m1, ma)
282
273
283 if u1:
274 if u1:
284 repo.ui.debug(" unmatched files in local:\n %s\n"
275 repo.ui.debug(" unmatched files in local:\n %s\n"
285 % "\n ".join(u1))
276 % "\n ".join(u1))
286 if u2:
277 if u2:
287 repo.ui.debug(" unmatched files in other:\n %s\n"
278 repo.ui.debug(" unmatched files in other:\n %s\n"
288 % "\n ".join(u2))
279 % "\n ".join(u2))
289
280
290 for f in u1:
281 for f in u1:
291 checkcopies(f, m1, m2)
282 checkcopies(f, m1, m2)
292 for f in u2:
283 for f in u2:
293 checkcopies(f, m2, m1)
284 checkcopies(f, m2, m1)
294
285
295 diverge2 = set()
286 diverge2 = set()
296 for of, fl in diverge.items():
287 for of, fl in diverge.items():
297 if len(fl) == 1 or of in c2:
288 if len(fl) == 1 or of in c2:
298 del diverge[of] # not actually divergent, or not a rename
289 del diverge[of] # not actually divergent, or not a rename
299 else:
290 else:
300 diverge2.update(fl) # reverse map for below
291 diverge2.update(fl) # reverse map for below
301
292
302 if fullcopy:
293 if fullcopy:
303 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
294 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
304 for f in fullcopy:
295 for f in fullcopy:
305 note = ""
296 note = ""
306 if f in copy:
297 if f in copy:
307 note += "*"
298 note += "*"
308 if f in diverge2:
299 if f in diverge2:
309 note += "!"
300 note += "!"
310 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
301 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
311 del diverge2
302 del diverge2
312
303
313 if not fullcopy:
304 if not fullcopy:
314 return copy, diverge
305 return copy, diverge
315
306
316 repo.ui.debug(" checking for directory renames\n")
307 repo.ui.debug(" checking for directory renames\n")
317
308
318 # generate a directory move map
309 # generate a directory move map
319 d1, d2 = _dirs(m1), _dirs(m2)
310 d1, d2 = c1.dirs(), c2.dirs()
320 invalid = set()
311 invalid = set([""])
321 dirmove = {}
312 dirmove = {}
322
313
323 # examine each file copy for a potential directory move, which is
314 # examine each file copy for a potential directory move, which is
324 # when all the files in a directory are moved to a new directory
315 # when all the files in a directory are moved to a new directory
325 for dst, src in fullcopy.iteritems():
316 for dst, src in fullcopy.iteritems():
326 dsrc, ddst = _dirname(src), _dirname(dst)
317 dsrc, ddst = _dirname(src), _dirname(dst)
327 if dsrc in invalid:
318 if dsrc in invalid:
328 # already seen to be uninteresting
319 # already seen to be uninteresting
329 continue
320 continue
330 elif dsrc in d1 and ddst in d1:
321 elif dsrc in d1 and ddst in d1:
331 # directory wasn't entirely moved locally
322 # directory wasn't entirely moved locally
332 invalid.add(dsrc)
323 invalid.add(dsrc)
333 elif dsrc in d2 and ddst in d2:
324 elif dsrc in d2 and ddst in d2:
334 # directory wasn't entirely moved remotely
325 # directory wasn't entirely moved remotely
335 invalid.add(dsrc)
326 invalid.add(dsrc)
336 elif dsrc in dirmove and dirmove[dsrc] != ddst:
327 elif dsrc in dirmove and dirmove[dsrc] != ddst:
337 # files from the same directory moved to two different places
328 # files from the same directory moved to two different places
338 invalid.add(dsrc)
329 invalid.add(dsrc)
339 else:
330 else:
340 # looks good so far
331 # looks good so far
341 dirmove[dsrc + "/"] = ddst + "/"
332 dirmove[dsrc + "/"] = ddst + "/"
342
333
343 for i in invalid:
334 for i in invalid:
344 if i in dirmove:
335 if i in dirmove:
345 del dirmove[i]
336 del dirmove[i]
346 del d1, d2, invalid
337 del d1, d2, invalid
347
338
348 if not dirmove:
339 if not dirmove:
349 return copy, diverge
340 return copy, diverge
350
341
351 for d in dirmove:
342 for d in dirmove:
352 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
343 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
353
344
354 # check unaccounted nonoverlapping files against directory moves
345 # check unaccounted nonoverlapping files against directory moves
355 for f in u1 + u2:
346 for f in u1 + u2:
356 if f not in fullcopy:
347 if f not in fullcopy:
357 for d in dirmove:
348 for d in dirmove:
358 if f.startswith(d):
349 if f.startswith(d):
359 # new file added in a directory that was moved, move it
350 # new file added in a directory that was moved, move it
360 df = dirmove[d] + f[len(d):]
351 df = dirmove[d] + f[len(d):]
361 if df not in copy:
352 if df not in copy:
362 copy[f] = df
353 copy[f] = df
363 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
354 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
364 break
355 break
365
356
366 return copy, diverge
357 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now