##// END OF EJS Templates
copies: use set instead of dict
Benoit Boissinot -
r8468:b35d11d1 default
parent child Browse files
Show More
@@ -1,233 +1,233 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import util
9 import util
10 import heapq
10 import heapq
11
11
12 def _nonoverlap(d1, d2, d3):
12 def _nonoverlap(d1, d2, d3):
13 "Return list of elements in d1 not in d2 or d3"
13 "Return list of elements in d1 not in d2 or d3"
14 return sorted([d for d in d1 if d not in d3 and d not in d2])
14 return sorted([d for d in d1 if d not in d3 and d not in d2])
15
15
16 def _dirname(f):
16 def _dirname(f):
17 s = f.rfind("/")
17 s = f.rfind("/")
18 if s == -1:
18 if s == -1:
19 return ""
19 return ""
20 return f[:s]
20 return f[:s]
21
21
22 def _dirs(files):
22 def _dirs(files):
23 d = {}
23 d = set()
24 for f in files:
24 for f in files:
25 f = _dirname(f)
25 f = _dirname(f)
26 while f not in d:
26 while f not in d:
27 d[f] = True
27 d.add(f)
28 f = _dirname(f)
28 f = _dirname(f)
29 return d
29 return d
30
30
31 def _findoldnames(fctx, limit):
31 def _findoldnames(fctx, limit):
32 "find files that path was copied from, back to linkrev limit"
32 "find files that path was copied from, back to linkrev limit"
33 old = {}
33 old = {}
34 seen = {}
34 seen = set()
35 orig = fctx.path()
35 orig = fctx.path()
36 visit = [(fctx, 0)]
36 visit = [(fctx, 0)]
37 while visit:
37 while visit:
38 fc, depth = visit.pop()
38 fc, depth = visit.pop()
39 s = str(fc)
39 s = str(fc)
40 if s in seen:
40 if s in seen:
41 continue
41 continue
42 seen[s] = 1
42 seen.add(s)
43 if fc.path() != orig and fc.path() not in old:
43 if fc.path() != orig and fc.path() not in old:
44 old[fc.path()] = (depth, fc.path()) # remember depth
44 old[fc.path()] = (depth, fc.path()) # remember depth
45 if fc.rev() < limit and fc.rev() is not None:
45 if fc.rev() < limit and fc.rev() is not None:
46 continue
46 continue
47 visit += [(p, depth - 1) for p in fc.parents()]
47 visit += [(p, depth - 1) for p in fc.parents()]
48
48
49 # return old names sorted by depth
49 # return old names sorted by depth
50 return [o[1] for o in sorted(old.values())]
50 return [o[1] for o in sorted(old.values())]
51
51
52 def _findlimit(repo, a, b):
52 def _findlimit(repo, a, b):
53 "find the earliest revision that's an ancestor of a or b but not both"
53 "find the earliest revision that's an ancestor of a or b but not both"
54 # basic idea:
54 # basic idea:
55 # - mark a and b with different sides
55 # - mark a and b with different sides
56 # - if a parent's children are all on the same side, the parent is
56 # - if a parent's children are all on the same side, the parent is
57 # on that side, otherwise it is on no side
57 # on that side, otherwise it is on no side
58 # - walk the graph in topological order with the help of a heap;
58 # - walk the graph in topological order with the help of a heap;
59 # - add unseen parents to side map
59 # - add unseen parents to side map
60 # - clear side of any parent that has children on different sides
60 # - clear side of any parent that has children on different sides
61 # - track number of interesting revs that might still be on a side
61 # - track number of interesting revs that might still be on a side
62 # - track the lowest interesting rev seen
62 # - track the lowest interesting rev seen
63 # - quit when interesting revs is zero
63 # - quit when interesting revs is zero
64
64
65 cl = repo.changelog
65 cl = repo.changelog
66 working = len(cl) # pseudo rev for the working directory
66 working = len(cl) # pseudo rev for the working directory
67 if a is None:
67 if a is None:
68 a = working
68 a = working
69 if b is None:
69 if b is None:
70 b = working
70 b = working
71
71
72 side = {a: -1, b: 1}
72 side = {a: -1, b: 1}
73 visit = [-a, -b]
73 visit = [-a, -b]
74 heapq.heapify(visit)
74 heapq.heapify(visit)
75 interesting = len(visit)
75 interesting = len(visit)
76 limit = working
76 limit = working
77
77
78 while interesting:
78 while interesting:
79 r = -heapq.heappop(visit)
79 r = -heapq.heappop(visit)
80 if r == working:
80 if r == working:
81 parents = [cl.rev(p) for p in repo.dirstate.parents()]
81 parents = [cl.rev(p) for p in repo.dirstate.parents()]
82 else:
82 else:
83 parents = cl.parentrevs(r)
83 parents = cl.parentrevs(r)
84 for p in parents:
84 for p in parents:
85 if p not in side:
85 if p not in side:
86 # first time we see p; add it to visit
86 # first time we see p; add it to visit
87 side[p] = side[r]
87 side[p] = side[r]
88 if side[p]:
88 if side[p]:
89 interesting += 1
89 interesting += 1
90 heapq.heappush(visit, -p)
90 heapq.heappush(visit, -p)
91 elif side[p] and side[p] != side[r]:
91 elif side[p] and side[p] != side[r]:
92 # p was interesting but now we know better
92 # p was interesting but now we know better
93 side[p] = 0
93 side[p] = 0
94 interesting -= 1
94 interesting -= 1
95 if side[r]:
95 if side[r]:
96 limit = r # lowest rev visited
96 limit = r # lowest rev visited
97 interesting -= 1
97 interesting -= 1
98 return limit
98 return limit
99
99
100 def copies(repo, c1, c2, ca, checkdirs=False):
100 def copies(repo, c1, c2, ca, checkdirs=False):
101 """
101 """
102 Find moves and copies between context c1 and c2
102 Find moves and copies between context c1 and c2
103 """
103 """
104 # avoid silly behavior for update from empty dir
104 # avoid silly behavior for update from empty dir
105 if not c1 or not c2 or c1 == c2:
105 if not c1 or not c2 or c1 == c2:
106 return {}, {}
106 return {}, {}
107
107
108 # avoid silly behavior for parent -> working dir
108 # avoid silly behavior for parent -> working dir
109 if c2.node() == None and c1.node() == repo.dirstate.parents()[0]:
109 if c2.node() == None and c1.node() == repo.dirstate.parents()[0]:
110 return repo.dirstate.copies(), {}
110 return repo.dirstate.copies(), {}
111
111
112 limit = _findlimit(repo, c1.rev(), c2.rev())
112 limit = _findlimit(repo, c1.rev(), c2.rev())
113 m1 = c1.manifest()
113 m1 = c1.manifest()
114 m2 = c2.manifest()
114 m2 = c2.manifest()
115 ma = ca.manifest()
115 ma = ca.manifest()
116
116
117 def makectx(f, n):
117 def makectx(f, n):
118 if len(n) != 20: # in a working context?
118 if len(n) != 20: # in a working context?
119 if c1.rev() is None:
119 if c1.rev() is None:
120 return c1.filectx(f)
120 return c1.filectx(f)
121 return c2.filectx(f)
121 return c2.filectx(f)
122 return repo.filectx(f, fileid=n)
122 return repo.filectx(f, fileid=n)
123 ctx = util.cachefunc(makectx)
123 ctx = util.cachefunc(makectx)
124
124
125 copy = {}
125 copy = {}
126 fullcopy = {}
126 fullcopy = {}
127 diverge = {}
127 diverge = {}
128
128
129 def checkcopies(f, m1, m2):
129 def checkcopies(f, m1, m2):
130 '''check possible copies of f from m1 to m2'''
130 '''check possible copies of f from m1 to m2'''
131 c1 = ctx(f, m1[f])
131 c1 = ctx(f, m1[f])
132 for of in _findoldnames(c1, limit):
132 for of in _findoldnames(c1, limit):
133 fullcopy[f] = of # remember for dir rename detection
133 fullcopy[f] = of # remember for dir rename detection
134 if of in m2: # original file not in other manifest?
134 if of in m2: # original file not in other manifest?
135 # if the original file is unchanged on the other branch,
135 # if the original file is unchanged on the other branch,
136 # no merge needed
136 # no merge needed
137 if m2[of] != ma.get(of):
137 if m2[of] != ma.get(of):
138 c2 = ctx(of, m2[of])
138 c2 = ctx(of, m2[of])
139 ca = c1.ancestor(c2)
139 ca = c1.ancestor(c2)
140 # related and named changed on only one side?
140 # related and named changed on only one side?
141 if ca and (ca.path() == f or ca.path() == c2.path()):
141 if ca and (ca.path() == f or ca.path() == c2.path()):
142 if c1 != ca or c2 != ca: # merge needed?
142 if c1 != ca or c2 != ca: # merge needed?
143 copy[f] = of
143 copy[f] = of
144 elif of in ma:
144 elif of in ma:
145 diverge.setdefault(of, []).append(f)
145 diverge.setdefault(of, []).append(f)
146
146
147 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
147 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
148
148
149 u1 = _nonoverlap(m1, m2, ma)
149 u1 = _nonoverlap(m1, m2, ma)
150 u2 = _nonoverlap(m2, m1, ma)
150 u2 = _nonoverlap(m2, m1, ma)
151
151
152 if u1:
152 if u1:
153 repo.ui.debug(_(" unmatched files in local:\n %s\n")
153 repo.ui.debug(_(" unmatched files in local:\n %s\n")
154 % "\n ".join(u1))
154 % "\n ".join(u1))
155 if u2:
155 if u2:
156 repo.ui.debug(_(" unmatched files in other:\n %s\n")
156 repo.ui.debug(_(" unmatched files in other:\n %s\n")
157 % "\n ".join(u2))
157 % "\n ".join(u2))
158
158
159 for f in u1:
159 for f in u1:
160 checkcopies(f, m1, m2)
160 checkcopies(f, m1, m2)
161 for f in u2:
161 for f in u2:
162 checkcopies(f, m2, m1)
162 checkcopies(f, m2, m1)
163
163
164 diverge2 = set()
164 diverge2 = set()
165 for of, fl in diverge.items():
165 for of, fl in diverge.items():
166 if len(fl) == 1:
166 if len(fl) == 1:
167 del diverge[of] # not actually divergent
167 del diverge[of] # not actually divergent
168 else:
168 else:
169 diverge2.update(fl) # reverse map for below
169 diverge2.update(fl) # reverse map for below
170
170
171 if fullcopy:
171 if fullcopy:
172 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
172 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
173 for f in fullcopy:
173 for f in fullcopy:
174 note = ""
174 note = ""
175 if f in copy: note += "*"
175 if f in copy: note += "*"
176 if f in diverge2: note += "!"
176 if f in diverge2: note += "!"
177 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
177 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
178 del diverge2
178 del diverge2
179
179
180 if not fullcopy or not checkdirs:
180 if not fullcopy or not checkdirs:
181 return copy, diverge
181 return copy, diverge
182
182
183 repo.ui.debug(_(" checking for directory renames\n"))
183 repo.ui.debug(_(" checking for directory renames\n"))
184
184
185 # generate a directory move map
185 # generate a directory move map
186 d1, d2 = _dirs(m1), _dirs(m2)
186 d1, d2 = _dirs(m1), _dirs(m2)
187 invalid = {}
187 invalid = set()
188 dirmove = {}
188 dirmove = {}
189
189
190 # examine each file copy for a potential directory move, which is
190 # examine each file copy for a potential directory move, which is
191 # when all the files in a directory are moved to a new directory
191 # when all the files in a directory are moved to a new directory
192 for dst, src in fullcopy.iteritems():
192 for dst, src in fullcopy.iteritems():
193 dsrc, ddst = _dirname(src), _dirname(dst)
193 dsrc, ddst = _dirname(src), _dirname(dst)
194 if dsrc in invalid:
194 if dsrc in invalid:
195 # already seen to be uninteresting
195 # already seen to be uninteresting
196 continue
196 continue
197 elif dsrc in d1 and ddst in d1:
197 elif dsrc in d1 and ddst in d1:
198 # directory wasn't entirely moved locally
198 # directory wasn't entirely moved locally
199 invalid[dsrc] = True
199 invalid.add(dsrc)
200 elif dsrc in d2 and ddst in d2:
200 elif dsrc in d2 and ddst in d2:
201 # directory wasn't entirely moved remotely
201 # directory wasn't entirely moved remotely
202 invalid[dsrc] = True
202 invalid.add(dsrc)
203 elif dsrc in dirmove and dirmove[dsrc] != ddst:
203 elif dsrc in dirmove and dirmove[dsrc] != ddst:
204 # files from the same directory moved to two different places
204 # files from the same directory moved to two different places
205 invalid[dsrc] = True
205 invalid.add(dsrc)
206 else:
206 else:
207 # looks good so far
207 # looks good so far
208 dirmove[dsrc + "/"] = ddst + "/"
208 dirmove[dsrc + "/"] = ddst + "/"
209
209
210 for i in invalid:
210 for i in invalid:
211 if i in dirmove:
211 if i in dirmove:
212 del dirmove[i]
212 del dirmove[i]
213 del d1, d2, invalid
213 del d1, d2, invalid
214
214
215 if not dirmove:
215 if not dirmove:
216 return copy, diverge
216 return copy, diverge
217
217
218 for d in dirmove:
218 for d in dirmove:
219 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
219 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
220
220
221 # check unaccounted nonoverlapping files against directory moves
221 # check unaccounted nonoverlapping files against directory moves
222 for f in u1 + u2:
222 for f in u1 + u2:
223 if f not in fullcopy:
223 if f not in fullcopy:
224 for d in dirmove:
224 for d in dirmove:
225 if f.startswith(d):
225 if f.startswith(d):
226 # new file added in a directory that was moved, move it
226 # new file added in a directory that was moved, move it
227 df = dirmove[d] + f[len(d):]
227 df = dirmove[d] + f[len(d):]
228 if df not in copy:
228 if df not in copy:
229 copy[f] = df
229 copy[f] = df
230 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
230 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
231 break
231 break
232
232
233 return copy, diverge
233 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now