##// END OF EJS Templates
copies: add docstring for mergecopies
Matt Mackall -
r16168:7bbabfe2 default
parent child Browse files
Show More
@@ -1,357 +1,366 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import util
8 import util
9 import heapq
9 import heapq
10
10
11 def _nonoverlap(d1, d2, d3):
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
14
15 def _dirname(f):
15 def _dirname(f):
16 s = f.rfind("/")
16 s = f.rfind("/")
17 if s == -1:
17 if s == -1:
18 return ""
18 return ""
19 return f[:s]
19 return f[:s]
20
20
21 def _dirs(files):
21 def _dirs(files):
22 d = set()
22 d = set()
23 for f in files:
23 for f in files:
24 f = _dirname(f)
24 f = _dirname(f)
25 while f not in d:
25 while f not in d:
26 d.add(f)
26 d.add(f)
27 f = _dirname(f)
27 f = _dirname(f)
28 return d
28 return d
29
29
30 def _findlimit(repo, a, b):
30 def _findlimit(repo, a, b):
31 """Find the earliest revision that's an ancestor of a or b but not both,
31 """Find the earliest revision that's an ancestor of a or b but not both,
32 None if no such revision exists.
32 None if no such revision exists.
33 """
33 """
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85 return limit
85 return limit
86
86
87 def _chain(src, dst, a, b):
87 def _chain(src, dst, a, b):
88 '''chain two sets of copies a->b'''
88 '''chain two sets of copies a->b'''
89 t = a.copy()
89 t = a.copy()
90 for k, v in b.iteritems():
90 for k, v in b.iteritems():
91 if v in t:
91 if v in t:
92 # found a chain
92 # found a chain
93 if t[v] != k:
93 if t[v] != k:
94 # file wasn't renamed back to itself
94 # file wasn't renamed back to itself
95 t[k] = t[v]
95 t[k] = t[v]
96 if v not in dst:
96 if v not in dst:
97 # chain was a rename, not a copy
97 # chain was a rename, not a copy
98 del t[v]
98 del t[v]
99 if v in src:
99 if v in src:
100 # file is a copy of an existing file
100 # file is a copy of an existing file
101 t[k] = v
101 t[k] = v
102
102
103 # remove criss-crossed copies
103 # remove criss-crossed copies
104 for k, v in t.items():
104 for k, v in t.items():
105 if k in src and v in dst:
105 if k in src and v in dst:
106 del t[k]
106 del t[k]
107
107
108 return t
108 return t
109
109
110 def _tracefile(fctx, actx):
110 def _tracefile(fctx, actx):
111 '''return file context that is the ancestor of fctx present in actx'''
111 '''return file context that is the ancestor of fctx present in actx'''
112 stop = actx.rev()
112 stop = actx.rev()
113 am = actx.manifest()
113 am = actx.manifest()
114
114
115 for f in fctx.ancestors():
115 for f in fctx.ancestors():
116 if am.get(f.path(), None) == f.filenode():
116 if am.get(f.path(), None) == f.filenode():
117 return f
117 return f
118 if f.rev() < stop:
118 if f.rev() < stop:
119 return None
119 return None
120
120
121 def _dirstatecopies(d):
121 def _dirstatecopies(d):
122 ds = d._repo.dirstate
122 ds = d._repo.dirstate
123 c = ds.copies().copy()
123 c = ds.copies().copy()
124 for k in c.keys():
124 for k in c.keys():
125 if ds[k] not in 'anm':
125 if ds[k] not in 'anm':
126 del c[k]
126 del c[k]
127 return c
127 return c
128
128
129 def _forwardcopies(a, b):
129 def _forwardcopies(a, b):
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
131
131
132 # check for working copy
132 # check for working copy
133 w = None
133 w = None
134 if b.rev() is None:
134 if b.rev() is None:
135 w = b
135 w = b
136 b = w.p1()
136 b = w.p1()
137 if a == b:
137 if a == b:
138 # short-circuit to avoid issues with merge states
138 # short-circuit to avoid issues with merge states
139 return _dirstatecopies(w)
139 return _dirstatecopies(w)
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145 for f in b:
145 for f in b:
146 if f not in a:
146 if f not in a:
147 ofctx = _tracefile(b[f], a)
147 ofctx = _tracefile(b[f], a)
148 if ofctx:
148 if ofctx:
149 cm[f] = ofctx.path()
149 cm[f] = ofctx.path()
150
150
151 # combine copies from dirstate if necessary
151 # combine copies from dirstate if necessary
152 if w is not None:
152 if w is not None:
153 cm = _chain(a, w, cm, _dirstatecopies(w))
153 cm = _chain(a, w, cm, _dirstatecopies(w))
154
154
155 return cm
155 return cm
156
156
157 def _backwardcopies(a, b):
157 def _backwardcopies(a, b):
158 # because the forward mapping is 1:n, we can lose renames here
158 # because the forward mapping is 1:n, we can lose renames here
159 # in particular, we find renames better than copies
159 # in particular, we find renames better than copies
160 f = _forwardcopies(b, a)
160 f = _forwardcopies(b, a)
161 r = {}
161 r = {}
162 for k, v in f.iteritems():
162 for k, v in f.iteritems():
163 r[v] = k
163 r[v] = k
164 return r
164 return r
165
165
166 def pathcopies(x, y):
166 def pathcopies(x, y):
167 '''find {dst@y: src@x} copy mapping for directed compare'''
167 '''find {dst@y: src@x} copy mapping for directed compare'''
168 if x == y or not x or not y:
168 if x == y or not x or not y:
169 return {}
169 return {}
170 a = y.ancestor(x)
170 a = y.ancestor(x)
171 if a == x:
171 if a == x:
172 return _forwardcopies(x, y)
172 return _forwardcopies(x, y)
173 if a == y:
173 if a == y:
174 return _backwardcopies(x, y)
174 return _backwardcopies(x, y)
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
176
176
177 def mergecopies(repo, c1, c2, ca, checkdirs=True):
177 def mergecopies(repo, c1, c2, ca, checkdirs=True):
178 """
178 """
179 Find moves and copies between context c1 and c2
179 Find moves and copies between context c1 and c2 that are relevant
180 for merging.
181
182 Returns two dicts, "copy" and "diverge".
183
184 "copy" is a mapping from source name -> destination name,
185 where source is in c1 and destination is in c2 or vice-versa.
186
187 "diverge" is a mapping of source name -> list of destination names
188 for divergent renames.
180 """
189 """
181 # avoid silly behavior for update from empty dir
190 # avoid silly behavior for update from empty dir
182 if not c1 or not c2 or c1 == c2:
191 if not c1 or not c2 or c1 == c2:
183 return {}, {}
192 return {}, {}
184
193
185 # avoid silly behavior for parent -> working dir
194 # avoid silly behavior for parent -> working dir
186 if c2.node() is None and c1.node() == repo.dirstate.p1():
195 if c2.node() is None and c1.node() == repo.dirstate.p1():
187 return repo.dirstate.copies(), {}
196 return repo.dirstate.copies(), {}
188
197
189 limit = _findlimit(repo, c1.rev(), c2.rev())
198 limit = _findlimit(repo, c1.rev(), c2.rev())
190 if limit is None:
199 if limit is None:
191 # no common ancestor, no copies
200 # no common ancestor, no copies
192 return {}, {}
201 return {}, {}
193 m1 = c1.manifest()
202 m1 = c1.manifest()
194 m2 = c2.manifest()
203 m2 = c2.manifest()
195 ma = ca.manifest()
204 ma = ca.manifest()
196
205
197 def makectx(f, n):
206 def makectx(f, n):
198 if len(n) != 20: # in a working context?
207 if len(n) != 20: # in a working context?
199 if c1.rev() is None:
208 if c1.rev() is None:
200 return c1.filectx(f)
209 return c1.filectx(f)
201 return c2.filectx(f)
210 return c2.filectx(f)
202 return repo.filectx(f, fileid=n)
211 return repo.filectx(f, fileid=n)
203
212
204 ctx = util.lrucachefunc(makectx)
213 ctx = util.lrucachefunc(makectx)
205 copy = {}
214 copy = {}
206 fullcopy = {}
215 fullcopy = {}
207 diverge = {}
216 diverge = {}
208
217
209 def related(f1, f2, limit):
218 def related(f1, f2, limit):
210 # Walk back to common ancestor to see if the two files originate
219 # Walk back to common ancestor to see if the two files originate
211 # from the same file. Since workingfilectx's rev() is None it messes
220 # from the same file. Since workingfilectx's rev() is None it messes
212 # up the integer comparison logic, hence the pre-step check for
221 # up the integer comparison logic, hence the pre-step check for
213 # None (f1 and f2 can only be workingfilectx's initially).
222 # None (f1 and f2 can only be workingfilectx's initially).
214
223
215 if f1 == f2:
224 if f1 == f2:
216 return f1 # a match
225 return f1 # a match
217
226
218 g1, g2 = f1.ancestors(), f2.ancestors()
227 g1, g2 = f1.ancestors(), f2.ancestors()
219 try:
228 try:
220 f1r, f2r = f1.rev(), f2.rev()
229 f1r, f2r = f1.rev(), f2.rev()
221
230
222 if f1r is None:
231 if f1r is None:
223 f1 = g1.next()
232 f1 = g1.next()
224 if f2r is None:
233 if f2r is None:
225 f2 = g2.next()
234 f2 = g2.next()
226
235
227 while True:
236 while True:
228 f1r, f2r = f1.rev(), f2.rev()
237 f1r, f2r = f1.rev(), f2.rev()
229 if f1r > f2r:
238 if f1r > f2r:
230 f1 = g1.next()
239 f1 = g1.next()
231 elif f2r > f1r:
240 elif f2r > f1r:
232 f2 = g2.next()
241 f2 = g2.next()
233 elif f1 == f2:
242 elif f1 == f2:
234 return f1 # a match
243 return f1 # a match
235 elif f1r == f2r or f1r < limit or f2r < limit:
244 elif f1r == f2r or f1r < limit or f2r < limit:
236 return False # copy no longer relevant
245 return False # copy no longer relevant
237 except StopIteration:
246 except StopIteration:
238 return False
247 return False
239
248
240 def checkcopies(f, m1, m2):
249 def checkcopies(f, m1, m2):
241 '''check possible copies of f from m1 to m2'''
250 '''check possible copies of f from m1 to m2'''
242 of = None
251 of = None
243 seen = set([f])
252 seen = set([f])
244 for oc in ctx(f, m1[f]).ancestors():
253 for oc in ctx(f, m1[f]).ancestors():
245 ocr = oc.rev()
254 ocr = oc.rev()
246 of = oc.path()
255 of = oc.path()
247 if of in seen:
256 if of in seen:
248 # check limit late - grab last rename before
257 # check limit late - grab last rename before
249 if ocr < limit:
258 if ocr < limit:
250 break
259 break
251 continue
260 continue
252 seen.add(of)
261 seen.add(of)
253
262
254 fullcopy[f] = of # remember for dir rename detection
263 fullcopy[f] = of # remember for dir rename detection
255 if of not in m2:
264 if of not in m2:
256 continue # no match, keep looking
265 continue # no match, keep looking
257 if m2[of] == ma.get(of):
266 if m2[of] == ma.get(of):
258 break # no merge needed, quit early
267 break # no merge needed, quit early
259 c2 = ctx(of, m2[of])
268 c2 = ctx(of, m2[of])
260 cr = related(oc, c2, ca.rev())
269 cr = related(oc, c2, ca.rev())
261 if cr and (of == f or of == c2.path()): # non-divergent
270 if cr and (of == f or of == c2.path()): # non-divergent
262 copy[f] = of
271 copy[f] = of
263 of = None
272 of = None
264 break
273 break
265
274
266 if of in ma:
275 if of in ma:
267 diverge.setdefault(of, []).append(f)
276 diverge.setdefault(of, []).append(f)
268
277
269 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
270
279
271 u1 = _nonoverlap(m1, m2, ma)
280 u1 = _nonoverlap(m1, m2, ma)
272 u2 = _nonoverlap(m2, m1, ma)
281 u2 = _nonoverlap(m2, m1, ma)
273
282
274 if u1:
283 if u1:
275 repo.ui.debug(" unmatched files in local:\n %s\n"
284 repo.ui.debug(" unmatched files in local:\n %s\n"
276 % "\n ".join(u1))
285 % "\n ".join(u1))
277 if u2:
286 if u2:
278 repo.ui.debug(" unmatched files in other:\n %s\n"
287 repo.ui.debug(" unmatched files in other:\n %s\n"
279 % "\n ".join(u2))
288 % "\n ".join(u2))
280
289
281 for f in u1:
290 for f in u1:
282 checkcopies(f, m1, m2)
291 checkcopies(f, m1, m2)
283 for f in u2:
292 for f in u2:
284 checkcopies(f, m2, m1)
293 checkcopies(f, m2, m1)
285
294
286 diverge2 = set()
295 diverge2 = set()
287 for of, fl in diverge.items():
296 for of, fl in diverge.items():
288 if len(fl) == 1 or of in c2:
297 if len(fl) == 1 or of in c2:
289 del diverge[of] # not actually divergent, or not a rename
298 del diverge[of] # not actually divergent, or not a rename
290 else:
299 else:
291 diverge2.update(fl) # reverse map for below
300 diverge2.update(fl) # reverse map for below
292
301
293 if fullcopy:
302 if fullcopy:
294 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
303 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
295 for f in fullcopy:
304 for f in fullcopy:
296 note = ""
305 note = ""
297 if f in copy:
306 if f in copy:
298 note += "*"
307 note += "*"
299 if f in diverge2:
308 if f in diverge2:
300 note += "!"
309 note += "!"
301 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
310 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
302 del diverge2
311 del diverge2
303
312
304 if not fullcopy or not checkdirs:
313 if not fullcopy or not checkdirs:
305 return copy, diverge
314 return copy, diverge
306
315
307 repo.ui.debug(" checking for directory renames\n")
316 repo.ui.debug(" checking for directory renames\n")
308
317
309 # generate a directory move map
318 # generate a directory move map
310 d1, d2 = _dirs(m1), _dirs(m2)
319 d1, d2 = _dirs(m1), _dirs(m2)
311 invalid = set()
320 invalid = set()
312 dirmove = {}
321 dirmove = {}
313
322
314 # examine each file copy for a potential directory move, which is
323 # examine each file copy for a potential directory move, which is
315 # when all the files in a directory are moved to a new directory
324 # when all the files in a directory are moved to a new directory
316 for dst, src in fullcopy.iteritems():
325 for dst, src in fullcopy.iteritems():
317 dsrc, ddst = _dirname(src), _dirname(dst)
326 dsrc, ddst = _dirname(src), _dirname(dst)
318 if dsrc in invalid:
327 if dsrc in invalid:
319 # already seen to be uninteresting
328 # already seen to be uninteresting
320 continue
329 continue
321 elif dsrc in d1 and ddst in d1:
330 elif dsrc in d1 and ddst in d1:
322 # directory wasn't entirely moved locally
331 # directory wasn't entirely moved locally
323 invalid.add(dsrc)
332 invalid.add(dsrc)
324 elif dsrc in d2 and ddst in d2:
333 elif dsrc in d2 and ddst in d2:
325 # directory wasn't entirely moved remotely
334 # directory wasn't entirely moved remotely
326 invalid.add(dsrc)
335 invalid.add(dsrc)
327 elif dsrc in dirmove and dirmove[dsrc] != ddst:
336 elif dsrc in dirmove and dirmove[dsrc] != ddst:
328 # files from the same directory moved to two different places
337 # files from the same directory moved to two different places
329 invalid.add(dsrc)
338 invalid.add(dsrc)
330 else:
339 else:
331 # looks good so far
340 # looks good so far
332 dirmove[dsrc + "/"] = ddst + "/"
341 dirmove[dsrc + "/"] = ddst + "/"
333
342
334 for i in invalid:
343 for i in invalid:
335 if i in dirmove:
344 if i in dirmove:
336 del dirmove[i]
345 del dirmove[i]
337 del d1, d2, invalid
346 del d1, d2, invalid
338
347
339 if not dirmove:
348 if not dirmove:
340 return copy, diverge
349 return copy, diverge
341
350
342 for d in dirmove:
351 for d in dirmove:
343 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
352 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
344
353
345 # check unaccounted nonoverlapping files against directory moves
354 # check unaccounted nonoverlapping files against directory moves
346 for f in u1 + u2:
355 for f in u1 + u2:
347 if f not in fullcopy:
356 if f not in fullcopy:
348 for d in dirmove:
357 for d in dirmove:
349 if f.startswith(d):
358 if f.startswith(d):
350 # new file added in a directory that was moved, move it
359 # new file added in a directory that was moved, move it
351 df = dirmove[d] + f[len(d):]
360 df = dirmove[d] + f[len(d):]
352 if df not in copy:
361 if df not in copy:
353 copy[f] = df
362 copy[f] = df
354 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
363 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
355 break
364 break
356
365
357 return copy, diverge
366 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now