##// END OF EJS Templates
copies: refactor symmetricdifference as _findlimit...
Matt Mackall -
r6431:a42d8d3e default
parent child Browse files
Show More
@@ -1,230 +1,233 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from node import nullid, nullrev
8 from node import nullid, nullrev
9 from i18n import _
9 from i18n import _
10 import util, heapq
10 import util, heapq
11
11
12 def _nonoverlap(d1, d2, d3):
12 def _nonoverlap(d1, d2, d3):
13 "Return list of elements in d1 not in d2 or d3"
13 "Return list of elements in d1 not in d2 or d3"
14 l = [d for d in d1 if d not in d3 and d not in d2]
14 l = [d for d in d1 if d not in d3 and d not in d2]
15 l.sort()
15 l.sort()
16 return l
16 return l
17
17
18 def _dirname(f):
18 def _dirname(f):
19 s = f.rfind("/")
19 s = f.rfind("/")
20 if s == -1:
20 if s == -1:
21 return ""
21 return ""
22 return f[:s]
22 return f[:s]
23
23
24 def _dirs(files):
24 def _dirs(files):
25 d = {}
25 d = {}
26 for f in files:
26 for f in files:
27 f = _dirname(f)
27 f = _dirname(f)
28 while f not in d:
28 while f not in d:
29 d[f] = True
29 d[f] = True
30 f = _dirname(f)
30 f = _dirname(f)
31 return d
31 return d
32
32
33 def _findoldnames(fctx, limit):
33 def _findoldnames(fctx, limit):
34 "find files that path was copied from, back to linkrev limit"
34 "find files that path was copied from, back to linkrev limit"
35 old = {}
35 old = {}
36 seen = {}
36 seen = {}
37 orig = fctx.path()
37 orig = fctx.path()
38 visit = [(fctx, 0)]
38 visit = [(fctx, 0)]
39 while visit:
39 while visit:
40 fc, depth = visit.pop()
40 fc, depth = visit.pop()
41 s = str(fc)
41 s = str(fc)
42 if s in seen:
42 if s in seen:
43 continue
43 continue
44 seen[s] = 1
44 seen[s] = 1
45 if fc.path() != orig and fc.path() not in old:
45 if fc.path() != orig and fc.path() not in old:
46 old[fc.path()] = (depth, fc.path()) # remember depth
46 old[fc.path()] = (depth, fc.path()) # remember depth
47 if fc.rev() < limit and fc.rev() is not None:
47 if fc.rev() < limit and fc.rev() is not None:
48 continue
48 continue
49 visit += [(p, depth - 1) for p in fc.parents()]
49 visit += [(p, depth - 1) for p in fc.parents()]
50
50
51 # return old names sorted by depth
51 # return old names sorted by depth
52 old = old.values()
52 old = old.values()
53 old.sort()
53 old.sort()
54 return [o[1] for o in old]
54 return [o[1] for o in old]
55
55
56 def _symmetricdifference(repo, a, b):
56 def _findlimit(repo, a, b):
57 """find revisions that are ancestors of a or b, but not both"""
57 "find the earliest revision that's an ancestor of a or b but not both"
58 # basic idea:
58 # basic idea:
59 # - mark a and b with different sides
59 # - mark a and b with different sides
60 # - if a parent's children are all on the same side, the parent is
60 # - if a parent's children are all on the same side, the parent is
61 # on that side, otherwise it is on no side
61 # on that side, otherwise it is on no side
62 # - walk the graph in topological order with the help of a heap;
62 # - walk the graph in topological order with the help of a heap;
63 # - add unseen parents to side map
63 # - add unseen parents to side map
64 # - clear side of any parent that has children on different sides
64 # - clear side of any parent that has children on different sides
65 # - track number of unvisited nodes that might still be on a side
65 # - track number of interesting revs that might still be on a side
66 # - quit when interesting nodes is zero
66 # - track the lowest interesting rev seen
67 # - quit when interesting revs is zero
67
68
68 cl = repo.changelog
69 cl = repo.changelog
69 working = cl.count()
70 working = cl.count() # pseudo rev for the working directory
70 if a is None:
71 if a is None:
71 a = working
72 a = working
72 if b is None:
73 if b is None:
73 b = working
74 b = working
74
75
75 side = {a: -1, b: 1}
76 side = {a: -1, b: 1}
76 visit = [-a, -b]
77 visit = [-a, -b]
77 heapq.heapify(visit)
78 heapq.heapify(visit)
78 interesting = len(visit)
79 interesting = len(visit)
80 limit = working
79
81
80 while interesting:
82 while interesting:
81 r = -heapq.heappop(visit)
83 r = -heapq.heappop(visit)
82 if r == working:
84 if r == working:
83 parents = [cl.rev(p) for p in repo.dirstate.parents()]
85 parents = [cl.rev(p) for p in repo.dirstate.parents()]
84 else:
86 else:
85 parents = cl.parentrevs(r)
87 parents = cl.parentrevs(r)
86 for p in parents:
88 for p in parents:
87 if p not in side:
89 if p not in side:
88 # first time we see p; add it to visit
90 # first time we see p; add it to visit
89 side[p] = side[r]
91 side[p] = side[r]
90 if side[p]:
92 if side[p]:
91 interesting += 1
93 interesting += 1
92 heapq.heappush(visit, -p)
94 heapq.heappush(visit, -p)
93 elif side[p] and side[p] != side[r]:
95 elif side[p] and side[p] != side[r]:
94 # p was interesting but now we know better
96 # p was interesting but now we know better
95 side[p] = 0
97 side[p] = 0
96 interesting -= 1
98 interesting -= 1
97 if side[r]:
99 if side[r]:
100 limit = r # lowest rev visited
98 interesting -= 1
101 interesting -= 1
99 yield r
102 return limit
100
103
101 def copies(repo, c1, c2, ca, checkdirs=False):
104 def copies(repo, c1, c2, ca, checkdirs=False):
102 """
105 """
103 Find moves and copies between context c1 and c2
106 Find moves and copies between context c1 and c2
104 """
107 """
105 # avoid silly behavior for update from empty dir
108 # avoid silly behavior for update from empty dir
106 if not c1 or not c2 or c1 == c2:
109 if not c1 or not c2 or c1 == c2:
107 return {}, {}
110 return {}, {}
108
111
109 limit = min(_symmetricdifference(repo, c1.rev(), c2.rev()))
112 limit = _findlimit(repo, c1.rev(), c2.rev())
110 m1 = c1.manifest()
113 m1 = c1.manifest()
111 m2 = c2.manifest()
114 m2 = c2.manifest()
112 ma = ca.manifest()
115 ma = ca.manifest()
113
116
114 def makectx(f, n):
117 def makectx(f, n):
115 if len(n) != 20: # in a working context?
118 if len(n) != 20: # in a working context?
116 if c1.rev() is None:
119 if c1.rev() is None:
117 return c1.filectx(f)
120 return c1.filectx(f)
118 return c2.filectx(f)
121 return c2.filectx(f)
119 return repo.filectx(f, fileid=n)
122 return repo.filectx(f, fileid=n)
120 ctx = util.cachefunc(makectx)
123 ctx = util.cachefunc(makectx)
121
124
122 copy = {}
125 copy = {}
123 fullcopy = {}
126 fullcopy = {}
124 diverge = {}
127 diverge = {}
125
128
126 def checkcopies(f, m1, m2):
129 def checkcopies(f, m1, m2):
127 '''check possible copies of f from m1 to m2'''
130 '''check possible copies of f from m1 to m2'''
128 c1 = ctx(f, m1[f])
131 c1 = ctx(f, m1[f])
129 for of in _findoldnames(c1, limit):
132 for of in _findoldnames(c1, limit):
130 fullcopy[f] = of # remember for dir rename detection
133 fullcopy[f] = of # remember for dir rename detection
131 if of in m2: # original file not in other manifest?
134 if of in m2: # original file not in other manifest?
132 # if the original file is unchanged on the other branch,
135 # if the original file is unchanged on the other branch,
133 # no merge needed
136 # no merge needed
134 if m2[of] != ma.get(of):
137 if m2[of] != ma.get(of):
135 c2 = ctx(of, m2[of])
138 c2 = ctx(of, m2[of])
136 ca = c1.ancestor(c2)
139 ca = c1.ancestor(c2)
137 # related and named changed on only one side?
140 # related and named changed on only one side?
138 if ca and (ca.path() == f or ca.path() == c2.path()):
141 if ca and (ca.path() == f or ca.path() == c2.path()):
139 if c1 != ca or c2 != ca: # merge needed?
142 if c1 != ca or c2 != ca: # merge needed?
140 copy[f] = of
143 copy[f] = of
141 elif of in ma:
144 elif of in ma:
142 diverge.setdefault(of, []).append(f)
145 diverge.setdefault(of, []).append(f)
143
146
144 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
147 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
145
148
146 u1 = _nonoverlap(m1, m2, ma)
149 u1 = _nonoverlap(m1, m2, ma)
147 u2 = _nonoverlap(m2, m1, ma)
150 u2 = _nonoverlap(m2, m1, ma)
148
151
149 if u1:
152 if u1:
150 repo.ui.debug(_(" unmatched files in local:\n %s\n")
153 repo.ui.debug(_(" unmatched files in local:\n %s\n")
151 % "\n ".join(u1))
154 % "\n ".join(u1))
152 if u2:
155 if u2:
153 repo.ui.debug(_(" unmatched files in other:\n %s\n")
156 repo.ui.debug(_(" unmatched files in other:\n %s\n")
154 % "\n ".join(u2))
157 % "\n ".join(u2))
155
158
156 for f in u1:
159 for f in u1:
157 checkcopies(f, m1, m2)
160 checkcopies(f, m1, m2)
158 for f in u2:
161 for f in u2:
159 checkcopies(f, m2, m1)
162 checkcopies(f, m2, m1)
160
163
161 diverge2 = {}
164 diverge2 = {}
162 for of, fl in diverge.items():
165 for of, fl in diverge.items():
163 if len(fl) == 1:
166 if len(fl) == 1:
164 del diverge[of] # not actually divergent
167 del diverge[of] # not actually divergent
165 else:
168 else:
166 diverge2.update(dict.fromkeys(fl)) # reverse map for below
169 diverge2.update(dict.fromkeys(fl)) # reverse map for below
167
170
168 if fullcopy:
171 if fullcopy:
169 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
172 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
170 for f in fullcopy:
173 for f in fullcopy:
171 note = ""
174 note = ""
172 if f in copy: note += "*"
175 if f in copy: note += "*"
173 if f in diverge2: note += "!"
176 if f in diverge2: note += "!"
174 repo.ui.debug(_(" %s -> %s %s\n") % (f, fullcopy[f], note))
177 repo.ui.debug(_(" %s -> %s %s\n") % (f, fullcopy[f], note))
175 del diverge2
178 del diverge2
176
179
177 if not fullcopy or not checkdirs:
180 if not fullcopy or not checkdirs:
178 return copy, diverge
181 return copy, diverge
179
182
180 repo.ui.debug(_(" checking for directory renames\n"))
183 repo.ui.debug(_(" checking for directory renames\n"))
181
184
182 # generate a directory move map
185 # generate a directory move map
183 d1, d2 = _dirs(m1), _dirs(m2)
186 d1, d2 = _dirs(m1), _dirs(m2)
184 invalid = {}
187 invalid = {}
185 dirmove = {}
188 dirmove = {}
186
189
187 # examine each file copy for a potential directory move, which is
190 # examine each file copy for a potential directory move, which is
188 # when all the files in a directory are moved to a new directory
191 # when all the files in a directory are moved to a new directory
189 for dst, src in fullcopy.items():
192 for dst, src in fullcopy.items():
190 dsrc, ddst = _dirname(src), _dirname(dst)
193 dsrc, ddst = _dirname(src), _dirname(dst)
191 if dsrc in invalid:
194 if dsrc in invalid:
192 # already seen to be uninteresting
195 # already seen to be uninteresting
193 continue
196 continue
194 elif dsrc in d1 and ddst in d1:
197 elif dsrc in d1 and ddst in d1:
195 # directory wasn't entirely moved locally
198 # directory wasn't entirely moved locally
196 invalid[dsrc] = True
199 invalid[dsrc] = True
197 elif dsrc in d2 and ddst in d2:
200 elif dsrc in d2 and ddst in d2:
198 # directory wasn't entirely moved remotely
201 # directory wasn't entirely moved remotely
199 invalid[dsrc] = True
202 invalid[dsrc] = True
200 elif dsrc in dirmove and dirmove[dsrc] != ddst:
203 elif dsrc in dirmove and dirmove[dsrc] != ddst:
201 # files from the same directory moved to two different places
204 # files from the same directory moved to two different places
202 invalid[dsrc] = True
205 invalid[dsrc] = True
203 else:
206 else:
204 # looks good so far
207 # looks good so far
205 dirmove[dsrc + "/"] = ddst + "/"
208 dirmove[dsrc + "/"] = ddst + "/"
206
209
207 for i in invalid:
210 for i in invalid:
208 if i in dirmove:
211 if i in dirmove:
209 del dirmove[i]
212 del dirmove[i]
210 del d1, d2, invalid
213 del d1, d2, invalid
211
214
212 if not dirmove:
215 if not dirmove:
213 return copy, diverge
216 return copy, diverge
214
217
215 for d in dirmove:
218 for d in dirmove:
216 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
219 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
217
220
218 # check unaccounted nonoverlapping files against directory moves
221 # check unaccounted nonoverlapping files against directory moves
219 for f in u1 + u2:
222 for f in u1 + u2:
220 if f not in fullcopy:
223 if f not in fullcopy:
221 for d in dirmove:
224 for d in dirmove:
222 if f.startswith(d):
225 if f.startswith(d):
223 # new file added in a directory that was moved, move it
226 # new file added in a directory that was moved, move it
224 df = dirmove[d] + f[len(d):]
227 df = dirmove[d] + f[len(d):]
225 if df not in copy:
228 if df not in copy:
226 copy[f] = df
229 copy[f] = df
227 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
230 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
228 break
231 break
229
232
230 return copy, diverge
233 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now