##// END OF EJS Templates
copies: refactor symmetricdifference as _findlimit...
Matt Mackall -
r6431:a42d8d3e default
parent child Browse files
Show More
@@ -1,230 +1,233 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from node import nullid, nullrev
9 9 from i18n import _
10 10 import util, heapq
11 11
12 12 def _nonoverlap(d1, d2, d3):
13 13 "Return list of elements in d1 not in d2 or d3"
14 14 l = [d for d in d1 if d not in d3 and d not in d2]
15 15 l.sort()
16 16 return l
17 17
18 18 def _dirname(f):
19 19 s = f.rfind("/")
20 20 if s == -1:
21 21 return ""
22 22 return f[:s]
23 23
24 24 def _dirs(files):
25 25 d = {}
26 26 for f in files:
27 27 f = _dirname(f)
28 28 while f not in d:
29 29 d[f] = True
30 30 f = _dirname(f)
31 31 return d
32 32
33 33 def _findoldnames(fctx, limit):
34 34 "find files that path was copied from, back to linkrev limit"
35 35 old = {}
36 36 seen = {}
37 37 orig = fctx.path()
38 38 visit = [(fctx, 0)]
39 39 while visit:
40 40 fc, depth = visit.pop()
41 41 s = str(fc)
42 42 if s in seen:
43 43 continue
44 44 seen[s] = 1
45 45 if fc.path() != orig and fc.path() not in old:
46 46 old[fc.path()] = (depth, fc.path()) # remember depth
47 47 if fc.rev() < limit and fc.rev() is not None:
48 48 continue
49 49 visit += [(p, depth - 1) for p in fc.parents()]
50 50
51 51 # return old names sorted by depth
52 52 old = old.values()
53 53 old.sort()
54 54 return [o[1] for o in old]
55 55
56 def _symmetricdifference(repo, a, b):
57 """find revisions that are ancestors of a or b, but not both"""
56 def _findlimit(repo, a, b):
57 "find the earliest revision that's an ancestor of a or b but not both"
58 58 # basic idea:
59 59 # - mark a and b with different sides
60 60 # - if a parent's children are all on the same side, the parent is
61 61 # on that side, otherwise it is on no side
62 62 # - walk the graph in topological order with the help of a heap;
63 63 # - add unseen parents to side map
64 64 # - clear side of any parent that has children on different sides
65 # - track number of unvisited nodes that might still be on a side
66 # - quit when interesting nodes is zero
65 # - track number of interesting revs that might still be on a side
66 # - track the lowest interesting rev seen
67 # - quit when interesting revs is zero
67 68
68 69 cl = repo.changelog
69 working = cl.count()
70 working = cl.count() # pseudo rev for the working directory
70 71 if a is None:
71 72 a = working
72 73 if b is None:
73 74 b = working
74 75
75 76 side = {a: -1, b: 1}
76 77 visit = [-a, -b]
77 78 heapq.heapify(visit)
78 79 interesting = len(visit)
80 limit = working
79 81
80 82 while interesting:
81 83 r = -heapq.heappop(visit)
82 84 if r == working:
83 85 parents = [cl.rev(p) for p in repo.dirstate.parents()]
84 86 else:
85 87 parents = cl.parentrevs(r)
86 88 for p in parents:
87 89 if p not in side:
88 90 # first time we see p; add it to visit
89 91 side[p] = side[r]
90 92 if side[p]:
91 93 interesting += 1
92 94 heapq.heappush(visit, -p)
93 95 elif side[p] and side[p] != side[r]:
94 96 # p was interesting but now we know better
95 97 side[p] = 0
96 98 interesting -= 1
97 99 if side[r]:
100 limit = r # lowest rev visited
98 101 interesting -= 1
99 yield r
102 return limit
100 103
101 104 def copies(repo, c1, c2, ca, checkdirs=False):
102 105 """
103 106 Find moves and copies between context c1 and c2
104 107 """
105 108 # avoid silly behavior for update from empty dir
106 109 if not c1 or not c2 or c1 == c2:
107 110 return {}, {}
108 111
109 limit = min(_symmetricdifference(repo, c1.rev(), c2.rev()))
112 limit = _findlimit(repo, c1.rev(), c2.rev())
110 113 m1 = c1.manifest()
111 114 m2 = c2.manifest()
112 115 ma = ca.manifest()
113 116
114 117 def makectx(f, n):
115 118 if len(n) != 20: # in a working context?
116 119 if c1.rev() is None:
117 120 return c1.filectx(f)
118 121 return c2.filectx(f)
119 122 return repo.filectx(f, fileid=n)
120 123 ctx = util.cachefunc(makectx)
121 124
122 125 copy = {}
123 126 fullcopy = {}
124 127 diverge = {}
125 128
126 129 def checkcopies(f, m1, m2):
127 130 '''check possible copies of f from m1 to m2'''
128 131 c1 = ctx(f, m1[f])
129 132 for of in _findoldnames(c1, limit):
130 133 fullcopy[f] = of # remember for dir rename detection
131 134 if of in m2: # original file not in other manifest?
132 135 # if the original file is unchanged on the other branch,
133 136 # no merge needed
134 137 if m2[of] != ma.get(of):
135 138 c2 = ctx(of, m2[of])
136 139 ca = c1.ancestor(c2)
137 140 # related and named changed on only one side?
138 141 if ca and (ca.path() == f or ca.path() == c2.path()):
139 142 if c1 != ca or c2 != ca: # merge needed?
140 143 copy[f] = of
141 144 elif of in ma:
142 145 diverge.setdefault(of, []).append(f)
143 146
144 147 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
145 148
146 149 u1 = _nonoverlap(m1, m2, ma)
147 150 u2 = _nonoverlap(m2, m1, ma)
148 151
149 152 if u1:
150 153 repo.ui.debug(_(" unmatched files in local:\n %s\n")
151 154 % "\n ".join(u1))
152 155 if u2:
153 156 repo.ui.debug(_(" unmatched files in other:\n %s\n")
154 157 % "\n ".join(u2))
155 158
156 159 for f in u1:
157 160 checkcopies(f, m1, m2)
158 161 for f in u2:
159 162 checkcopies(f, m2, m1)
160 163
161 164 diverge2 = {}
162 165 for of, fl in diverge.items():
163 166 if len(fl) == 1:
164 167 del diverge[of] # not actually divergent
165 168 else:
166 169 diverge2.update(dict.fromkeys(fl)) # reverse map for below
167 170
168 171 if fullcopy:
169 172 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
170 173 for f in fullcopy:
171 174 note = ""
172 175 if f in copy: note += "*"
173 176 if f in diverge2: note += "!"
174 177 repo.ui.debug(_(" %s -> %s %s\n") % (f, fullcopy[f], note))
175 178 del diverge2
176 179
177 180 if not fullcopy or not checkdirs:
178 181 return copy, diverge
179 182
180 183 repo.ui.debug(_(" checking for directory renames\n"))
181 184
182 185 # generate a directory move map
183 186 d1, d2 = _dirs(m1), _dirs(m2)
184 187 invalid = {}
185 188 dirmove = {}
186 189
187 190 # examine each file copy for a potential directory move, which is
188 191 # when all the files in a directory are moved to a new directory
189 192 for dst, src in fullcopy.items():
190 193 dsrc, ddst = _dirname(src), _dirname(dst)
191 194 if dsrc in invalid:
192 195 # already seen to be uninteresting
193 196 continue
194 197 elif dsrc in d1 and ddst in d1:
195 198 # directory wasn't entirely moved locally
196 199 invalid[dsrc] = True
197 200 elif dsrc in d2 and ddst in d2:
198 201 # directory wasn't entirely moved remotely
199 202 invalid[dsrc] = True
200 203 elif dsrc in dirmove and dirmove[dsrc] != ddst:
201 204 # files from the same directory moved to two different places
202 205 invalid[dsrc] = True
203 206 else:
204 207 # looks good so far
205 208 dirmove[dsrc + "/"] = ddst + "/"
206 209
207 210 for i in invalid:
208 211 if i in dirmove:
209 212 del dirmove[i]
210 213 del d1, d2, invalid
211 214
212 215 if not dirmove:
213 216 return copy, diverge
214 217
215 218 for d in dirmove:
216 219 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
217 220
218 221 # check unaccounted nonoverlapping files against directory moves
219 222 for f in u1 + u2:
220 223 if f not in fullcopy:
221 224 for d in dirmove:
222 225 if f.startswith(d):
223 226 # new file added in a directory that was moved, move it
224 227 df = dirmove[d] + f[len(d):]
225 228 if df not in copy:
226 229 copy[f] = df
227 230 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
228 231 break
229 232
230 233 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now