##// END OF EJS Templates
copies: remove stray print
Matt Mackall -
r15994:3c2ce5c2 stable
parent child Browse files
Show More
@@ -1,358 +1,357 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import util
8 import util
9 import heapq
9 import heapq
10
10
11 def _nonoverlap(d1, d2, d3):
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
14
15 def _dirname(f):
15 def _dirname(f):
16 s = f.rfind("/")
16 s = f.rfind("/")
17 if s == -1:
17 if s == -1:
18 return ""
18 return ""
19 return f[:s]
19 return f[:s]
20
20
21 def _dirs(files):
21 def _dirs(files):
22 d = set()
22 d = set()
23 for f in files:
23 for f in files:
24 f = _dirname(f)
24 f = _dirname(f)
25 while f not in d:
25 while f not in d:
26 d.add(f)
26 d.add(f)
27 f = _dirname(f)
27 f = _dirname(f)
28 return d
28 return d
29
29
30 def _findlimit(repo, a, b):
30 def _findlimit(repo, a, b):
31 """Find the earliest revision that's an ancestor of a or b but not both,
31 """Find the earliest revision that's an ancestor of a or b but not both,
32 None if no such revision exists.
32 None if no such revision exists.
33 """
33 """
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85 return limit
85 return limit
86
86
87 def _chain(src, dst, a, b):
87 def _chain(src, dst, a, b):
88 '''chain two sets of copies a->b'''
88 '''chain two sets of copies a->b'''
89 t = a.copy()
89 t = a.copy()
90 for k, v in b.iteritems():
90 for k, v in b.iteritems():
91 if v in t:
91 if v in t:
92 # found a chain
92 # found a chain
93 if t[v] != k:
93 if t[v] != k:
94 # file wasn't renamed back to itself
94 # file wasn't renamed back to itself
95 t[k] = t[v]
95 t[k] = t[v]
96 if v not in dst:
96 if v not in dst:
97 # chain was a rename, not a copy
97 # chain was a rename, not a copy
98 del t[v]
98 del t[v]
99 if v in src:
99 if v in src:
100 # file is a copy of an existing file
100 # file is a copy of an existing file
101 t[k] = v
101 t[k] = v
102
102
103 # remove criss-crossed copies
103 # remove criss-crossed copies
104 for k, v in t.items():
104 for k, v in t.items():
105 if k in src and v in dst:
105 if k in src and v in dst:
106 print "bad", k, v
107 del t[k]
106 del t[k]
108
107
109 return t
108 return t
110
109
111 def _tracefile(fctx, actx):
110 def _tracefile(fctx, actx):
112 '''return file context that is the ancestor of fctx present in actx'''
111 '''return file context that is the ancestor of fctx present in actx'''
113 stop = actx.rev()
112 stop = actx.rev()
114 am = actx.manifest()
113 am = actx.manifest()
115
114
116 for f in fctx.ancestors():
115 for f in fctx.ancestors():
117 if am.get(f.path(), None) == f.filenode():
116 if am.get(f.path(), None) == f.filenode():
118 return f
117 return f
119 if f.rev() < stop:
118 if f.rev() < stop:
120 return None
119 return None
121
120
122 def _dirstatecopies(d):
121 def _dirstatecopies(d):
123 ds = d._repo.dirstate
122 ds = d._repo.dirstate
124 c = ds.copies().copy()
123 c = ds.copies().copy()
125 for k in c.keys():
124 for k in c.keys():
126 if ds[k] not in 'anm':
125 if ds[k] not in 'anm':
127 del c[k]
126 del c[k]
128 return c
127 return c
129
128
130 def _forwardcopies(a, b):
129 def _forwardcopies(a, b):
131 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
132
131
133 # check for working copy
132 # check for working copy
134 w = None
133 w = None
135 if b.rev() is None:
134 if b.rev() is None:
136 w = b
135 w = b
137 b = w.p1()
136 b = w.p1()
138 if a == b:
137 if a == b:
139 # short-circuit to avoid issues with merge states
138 # short-circuit to avoid issues with merge states
140 return _dirstatecopies(w)
139 return _dirstatecopies(w)
141
140
142 # find where new files came from
141 # find where new files came from
143 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
144 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
145 cm = {}
144 cm = {}
146 for f in b:
145 for f in b:
147 if f not in a:
146 if f not in a:
148 ofctx = _tracefile(b[f], a)
147 ofctx = _tracefile(b[f], a)
149 if ofctx:
148 if ofctx:
150 cm[f] = ofctx.path()
149 cm[f] = ofctx.path()
151
150
152 # combine copies from dirstate if necessary
151 # combine copies from dirstate if necessary
153 if w is not None:
152 if w is not None:
154 cm = _chain(a, w, cm, _dirstatecopies(w))
153 cm = _chain(a, w, cm, _dirstatecopies(w))
155
154
156 return cm
155 return cm
157
156
158 def _backwardcopies(a, b):
157 def _backwardcopies(a, b):
159 # because the forward mapping is 1:n, we can lose renames here
158 # because the forward mapping is 1:n, we can lose renames here
160 # in particular, we find renames better than copies
159 # in particular, we find renames better than copies
161 f = _forwardcopies(b, a)
160 f = _forwardcopies(b, a)
162 r = {}
161 r = {}
163 for k, v in f.iteritems():
162 for k, v in f.iteritems():
164 r[v] = k
163 r[v] = k
165 return r
164 return r
166
165
167 def pathcopies(x, y):
166 def pathcopies(x, y):
168 '''find {dst@y: src@x} copy mapping for directed compare'''
167 '''find {dst@y: src@x} copy mapping for directed compare'''
169 if x == y or not x or not y:
168 if x == y or not x or not y:
170 return {}
169 return {}
171 a = y.ancestor(x)
170 a = y.ancestor(x)
172 if a == x:
171 if a == x:
173 return _forwardcopies(x, y)
172 return _forwardcopies(x, y)
174 if a == y:
173 if a == y:
175 return _backwardcopies(x, y)
174 return _backwardcopies(x, y)
176 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
177
176
178 def mergecopies(repo, c1, c2, ca, checkdirs=True):
177 def mergecopies(repo, c1, c2, ca, checkdirs=True):
179 """
178 """
180 Find moves and copies between context c1 and c2
179 Find moves and copies between context c1 and c2
181 """
180 """
182 # avoid silly behavior for update from empty dir
181 # avoid silly behavior for update from empty dir
183 if not c1 or not c2 or c1 == c2:
182 if not c1 or not c2 or c1 == c2:
184 return {}, {}
183 return {}, {}
185
184
186 # avoid silly behavior for parent -> working dir
185 # avoid silly behavior for parent -> working dir
187 if c2.node() is None and c1.node() == repo.dirstate.p1():
186 if c2.node() is None and c1.node() == repo.dirstate.p1():
188 return repo.dirstate.copies(), {}
187 return repo.dirstate.copies(), {}
189
188
190 limit = _findlimit(repo, c1.rev(), c2.rev())
189 limit = _findlimit(repo, c1.rev(), c2.rev())
191 if limit is None:
190 if limit is None:
192 # no common ancestor, no copies
191 # no common ancestor, no copies
193 return {}, {}
192 return {}, {}
194 m1 = c1.manifest()
193 m1 = c1.manifest()
195 m2 = c2.manifest()
194 m2 = c2.manifest()
196 ma = ca.manifest()
195 ma = ca.manifest()
197
196
198 def makectx(f, n):
197 def makectx(f, n):
199 if len(n) != 20: # in a working context?
198 if len(n) != 20: # in a working context?
200 if c1.rev() is None:
199 if c1.rev() is None:
201 return c1.filectx(f)
200 return c1.filectx(f)
202 return c2.filectx(f)
201 return c2.filectx(f)
203 return repo.filectx(f, fileid=n)
202 return repo.filectx(f, fileid=n)
204
203
205 ctx = util.lrucachefunc(makectx)
204 ctx = util.lrucachefunc(makectx)
206 copy = {}
205 copy = {}
207 fullcopy = {}
206 fullcopy = {}
208 diverge = {}
207 diverge = {}
209
208
210 def related(f1, f2, limit):
209 def related(f1, f2, limit):
211 # Walk back to common ancestor to see if the two files originate
210 # Walk back to common ancestor to see if the two files originate
212 # from the same file. Since workingfilectx's rev() is None it messes
211 # from the same file. Since workingfilectx's rev() is None it messes
213 # up the integer comparison logic, hence the pre-step check for
212 # up the integer comparison logic, hence the pre-step check for
214 # None (f1 and f2 can only be workingfilectx's initially).
213 # None (f1 and f2 can only be workingfilectx's initially).
215
214
216 if f1 == f2:
215 if f1 == f2:
217 return f1 # a match
216 return f1 # a match
218
217
219 g1, g2 = f1.ancestors(), f2.ancestors()
218 g1, g2 = f1.ancestors(), f2.ancestors()
220 try:
219 try:
221 f1r, f2r = f1.rev(), f2.rev()
220 f1r, f2r = f1.rev(), f2.rev()
222
221
223 if f1r is None:
222 if f1r is None:
224 f1 = g1.next()
223 f1 = g1.next()
225 if f2r is None:
224 if f2r is None:
226 f2 = g2.next()
225 f2 = g2.next()
227
226
228 while True:
227 while True:
229 f1r, f2r = f1.rev(), f2.rev()
228 f1r, f2r = f1.rev(), f2.rev()
230 if f1r > f2r:
229 if f1r > f2r:
231 f1 = g1.next()
230 f1 = g1.next()
232 elif f2r > f1r:
231 elif f2r > f1r:
233 f2 = g2.next()
232 f2 = g2.next()
234 elif f1 == f2:
233 elif f1 == f2:
235 return f1 # a match
234 return f1 # a match
236 elif f1r == f2r or f1r < limit or f2r < limit:
235 elif f1r == f2r or f1r < limit or f2r < limit:
237 return False # copy no longer relevant
236 return False # copy no longer relevant
238 except StopIteration:
237 except StopIteration:
239 return False
238 return False
240
239
241 def checkcopies(f, m1, m2):
240 def checkcopies(f, m1, m2):
242 '''check possible copies of f from m1 to m2'''
241 '''check possible copies of f from m1 to m2'''
243 of = None
242 of = None
244 seen = set([f])
243 seen = set([f])
245 for oc in ctx(f, m1[f]).ancestors():
244 for oc in ctx(f, m1[f]).ancestors():
246 ocr = oc.rev()
245 ocr = oc.rev()
247 of = oc.path()
246 of = oc.path()
248 if of in seen:
247 if of in seen:
249 # check limit late - grab last rename before
248 # check limit late - grab last rename before
250 if ocr < limit:
249 if ocr < limit:
251 break
250 break
252 continue
251 continue
253 seen.add(of)
252 seen.add(of)
254
253
255 fullcopy[f] = of # remember for dir rename detection
254 fullcopy[f] = of # remember for dir rename detection
256 if of not in m2:
255 if of not in m2:
257 continue # no match, keep looking
256 continue # no match, keep looking
258 if m2[of] == ma.get(of):
257 if m2[of] == ma.get(of):
259 break # no merge needed, quit early
258 break # no merge needed, quit early
260 c2 = ctx(of, m2[of])
259 c2 = ctx(of, m2[of])
261 cr = related(oc, c2, ca.rev())
260 cr = related(oc, c2, ca.rev())
262 if cr and (of == f or of == c2.path()): # non-divergent
261 if cr and (of == f or of == c2.path()): # non-divergent
263 copy[f] = of
262 copy[f] = of
264 of = None
263 of = None
265 break
264 break
266
265
267 if of in ma:
266 if of in ma:
268 diverge.setdefault(of, []).append(f)
267 diverge.setdefault(of, []).append(f)
269
268
270 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
269 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
271
270
272 u1 = _nonoverlap(m1, m2, ma)
271 u1 = _nonoverlap(m1, m2, ma)
273 u2 = _nonoverlap(m2, m1, ma)
272 u2 = _nonoverlap(m2, m1, ma)
274
273
275 if u1:
274 if u1:
276 repo.ui.debug(" unmatched files in local:\n %s\n"
275 repo.ui.debug(" unmatched files in local:\n %s\n"
277 % "\n ".join(u1))
276 % "\n ".join(u1))
278 if u2:
277 if u2:
279 repo.ui.debug(" unmatched files in other:\n %s\n"
278 repo.ui.debug(" unmatched files in other:\n %s\n"
280 % "\n ".join(u2))
279 % "\n ".join(u2))
281
280
282 for f in u1:
281 for f in u1:
283 checkcopies(f, m1, m2)
282 checkcopies(f, m1, m2)
284 for f in u2:
283 for f in u2:
285 checkcopies(f, m2, m1)
284 checkcopies(f, m2, m1)
286
285
287 diverge2 = set()
286 diverge2 = set()
288 for of, fl in diverge.items():
287 for of, fl in diverge.items():
289 if len(fl) == 1 or of in c2:
288 if len(fl) == 1 or of in c2:
290 del diverge[of] # not actually divergent, or not a rename
289 del diverge[of] # not actually divergent, or not a rename
291 else:
290 else:
292 diverge2.update(fl) # reverse map for below
291 diverge2.update(fl) # reverse map for below
293
292
294 if fullcopy:
293 if fullcopy:
295 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
294 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
296 for f in fullcopy:
295 for f in fullcopy:
297 note = ""
296 note = ""
298 if f in copy:
297 if f in copy:
299 note += "*"
298 note += "*"
300 if f in diverge2:
299 if f in diverge2:
301 note += "!"
300 note += "!"
302 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
301 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
303 del diverge2
302 del diverge2
304
303
305 if not fullcopy or not checkdirs:
304 if not fullcopy or not checkdirs:
306 return copy, diverge
305 return copy, diverge
307
306
308 repo.ui.debug(" checking for directory renames\n")
307 repo.ui.debug(" checking for directory renames\n")
309
308
310 # generate a directory move map
309 # generate a directory move map
311 d1, d2 = _dirs(m1), _dirs(m2)
310 d1, d2 = _dirs(m1), _dirs(m2)
312 invalid = set()
311 invalid = set()
313 dirmove = {}
312 dirmove = {}
314
313
315 # examine each file copy for a potential directory move, which is
314 # examine each file copy for a potential directory move, which is
316 # when all the files in a directory are moved to a new directory
315 # when all the files in a directory are moved to a new directory
317 for dst, src in fullcopy.iteritems():
316 for dst, src in fullcopy.iteritems():
318 dsrc, ddst = _dirname(src), _dirname(dst)
317 dsrc, ddst = _dirname(src), _dirname(dst)
319 if dsrc in invalid:
318 if dsrc in invalid:
320 # already seen to be uninteresting
319 # already seen to be uninteresting
321 continue
320 continue
322 elif dsrc in d1 and ddst in d1:
321 elif dsrc in d1 and ddst in d1:
323 # directory wasn't entirely moved locally
322 # directory wasn't entirely moved locally
324 invalid.add(dsrc)
323 invalid.add(dsrc)
325 elif dsrc in d2 and ddst in d2:
324 elif dsrc in d2 and ddst in d2:
326 # directory wasn't entirely moved remotely
325 # directory wasn't entirely moved remotely
327 invalid.add(dsrc)
326 invalid.add(dsrc)
328 elif dsrc in dirmove and dirmove[dsrc] != ddst:
327 elif dsrc in dirmove and dirmove[dsrc] != ddst:
329 # files from the same directory moved to two different places
328 # files from the same directory moved to two different places
330 invalid.add(dsrc)
329 invalid.add(dsrc)
331 else:
330 else:
332 # looks good so far
331 # looks good so far
333 dirmove[dsrc + "/"] = ddst + "/"
332 dirmove[dsrc + "/"] = ddst + "/"
334
333
335 for i in invalid:
334 for i in invalid:
336 if i in dirmove:
335 if i in dirmove:
337 del dirmove[i]
336 del dirmove[i]
338 del d1, d2, invalid
337 del d1, d2, invalid
339
338
340 if not dirmove:
339 if not dirmove:
341 return copy, diverge
340 return copy, diverge
342
341
343 for d in dirmove:
342 for d in dirmove:
344 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
343 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
345
344
346 # check unaccounted nonoverlapping files against directory moves
345 # check unaccounted nonoverlapping files against directory moves
347 for f in u1 + u2:
346 for f in u1 + u2:
348 if f not in fullcopy:
347 if f not in fullcopy:
349 for d in dirmove:
348 for d in dirmove:
350 if f.startswith(d):
349 if f.startswith(d):
351 # new file added in a directory that was moved, move it
350 # new file added in a directory that was moved, move it
352 df = dirmove[d] + f[len(d):]
351 df = dirmove[d] + f[len(d):]
353 if df not in copy:
352 if df not in copy:
354 copy[f] = df
353 copy[f] = df
355 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
354 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
356 break
355 break
357
356
358 return copy, diverge
357 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now