##// END OF EJS Templates
ancestors: prefetch method outside of the loop...
Pierre-Yves David -
r25639:7125225a default
parent child Browse files
Show More
@@ -1,354 +1,358 b''
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import heapq
9 import heapq
10 from node import nullrev
10 from node import nullrev
11
11
12 def commonancestorsheads(pfunc, *nodes):
12 def commonancestorsheads(pfunc, *nodes):
13 """Returns a set with the heads of all common ancestors of all nodes,
13 """Returns a set with the heads of all common ancestors of all nodes,
14 heads(::nodes[0] and ::nodes[1] and ...) .
14 heads(::nodes[0] and ::nodes[1] and ...) .
15
15
16 pfunc must return a list of parent vertices for a given vertex.
16 pfunc must return a list of parent vertices for a given vertex.
17 """
17 """
18 if not isinstance(nodes, set):
18 if not isinstance(nodes, set):
19 nodes = set(nodes)
19 nodes = set(nodes)
20 if nullrev in nodes:
20 if nullrev in nodes:
21 return set()
21 return set()
22 if len(nodes) <= 1:
22 if len(nodes) <= 1:
23 return nodes
23 return nodes
24
24
25 allseen = (1 << len(nodes)) - 1
25 allseen = (1 << len(nodes)) - 1
26 seen = [0] * (max(nodes) + 1)
26 seen = [0] * (max(nodes) + 1)
27 for i, n in enumerate(nodes):
27 for i, n in enumerate(nodes):
28 seen[n] = 1 << i
28 seen[n] = 1 << i
29 poison = 1 << (i + 1)
29 poison = 1 << (i + 1)
30
30
31 gca = set()
31 gca = set()
32 interesting = len(nodes)
32 interesting = len(nodes)
33 nv = len(seen) - 1
33 nv = len(seen) - 1
34 while nv >= 0 and interesting:
34 while nv >= 0 and interesting:
35 v = nv
35 v = nv
36 nv -= 1
36 nv -= 1
37 if not seen[v]:
37 if not seen[v]:
38 continue
38 continue
39 sv = seen[v]
39 sv = seen[v]
40 if sv < poison:
40 if sv < poison:
41 interesting -= 1
41 interesting -= 1
42 if sv == allseen:
42 if sv == allseen:
43 gca.add(v)
43 gca.add(v)
44 sv |= poison
44 sv |= poison
45 if v in nodes:
45 if v in nodes:
46 # history is linear
46 # history is linear
47 return set([v])
47 return set([v])
48 if sv < poison:
48 if sv < poison:
49 for p in pfunc(v):
49 for p in pfunc(v):
50 sp = seen[p]
50 sp = seen[p]
51 if p == nullrev:
51 if p == nullrev:
52 continue
52 continue
53 if sp == 0:
53 if sp == 0:
54 seen[p] = sv
54 seen[p] = sv
55 interesting += 1
55 interesting += 1
56 elif sp != sv:
56 elif sp != sv:
57 seen[p] |= sv
57 seen[p] |= sv
58 else:
58 else:
59 for p in pfunc(v):
59 for p in pfunc(v):
60 if p == nullrev:
60 if p == nullrev:
61 continue
61 continue
62 sp = seen[p]
62 sp = seen[p]
63 if sp and sp < poison:
63 if sp and sp < poison:
64 interesting -= 1
64 interesting -= 1
65 seen[p] = sv
65 seen[p] = sv
66 return gca
66 return gca
67
67
68 def ancestors(pfunc, *orignodes):
68 def ancestors(pfunc, *orignodes):
69 """
69 """
70 Returns the common ancestors of a and b that are furthest from a
70 Returns the common ancestors of a and b that are furthest from a
71 root (as measured by longest path).
71 root (as measured by longest path).
72
72
73 pfunc must return a list of parent vertices for a given vertex.
73 pfunc must return a list of parent vertices for a given vertex.
74 """
74 """
75 def deepest(nodes):
75 def deepest(nodes):
76 interesting = {}
76 interesting = {}
77 count = max(nodes) + 1
77 count = max(nodes) + 1
78 depth = [0] * count
78 depth = [0] * count
79 seen = [0] * count
79 seen = [0] * count
80 mapping = []
80 mapping = []
81 for (i, n) in enumerate(sorted(nodes)):
81 for (i, n) in enumerate(sorted(nodes)):
82 depth[n] = 1
82 depth[n] = 1
83 b = 1 << i
83 b = 1 << i
84 seen[n] = b
84 seen[n] = b
85 interesting[b] = 1
85 interesting[b] = 1
86 mapping.append((b, n))
86 mapping.append((b, n))
87 nv = count - 1
87 nv = count - 1
88 while nv >= 0 and len(interesting) > 1:
88 while nv >= 0 and len(interesting) > 1:
89 v = nv
89 v = nv
90 nv -= 1
90 nv -= 1
91 dv = depth[v]
91 dv = depth[v]
92 if dv == 0:
92 if dv == 0:
93 continue
93 continue
94 sv = seen[v]
94 sv = seen[v]
95 for p in pfunc(v):
95 for p in pfunc(v):
96 if p == nullrev:
96 if p == nullrev:
97 continue
97 continue
98 dp = depth[p]
98 dp = depth[p]
99 nsp = sp = seen[p]
99 nsp = sp = seen[p]
100 if dp <= dv:
100 if dp <= dv:
101 depth[p] = dv + 1
101 depth[p] = dv + 1
102 if sp != sv:
102 if sp != sv:
103 interesting[sv] += 1
103 interesting[sv] += 1
104 nsp = seen[p] = sv
104 nsp = seen[p] = sv
105 if sp:
105 if sp:
106 interesting[sp] -= 1
106 interesting[sp] -= 1
107 if interesting[sp] == 0:
107 if interesting[sp] == 0:
108 del interesting[sp]
108 del interesting[sp]
109 elif dv == dp - 1:
109 elif dv == dp - 1:
110 nsp = sp | sv
110 nsp = sp | sv
111 if nsp == sp:
111 if nsp == sp:
112 continue
112 continue
113 seen[p] = nsp
113 seen[p] = nsp
114 interesting.setdefault(nsp, 0)
114 interesting.setdefault(nsp, 0)
115 interesting[nsp] += 1
115 interesting[nsp] += 1
116 interesting[sp] -= 1
116 interesting[sp] -= 1
117 if interesting[sp] == 0:
117 if interesting[sp] == 0:
118 del interesting[sp]
118 del interesting[sp]
119 interesting[sv] -= 1
119 interesting[sv] -= 1
120 if interesting[sv] == 0:
120 if interesting[sv] == 0:
121 del interesting[sv]
121 del interesting[sv]
122
122
123 if len(interesting) != 1:
123 if len(interesting) != 1:
124 return []
124 return []
125
125
126 k = 0
126 k = 0
127 for i in interesting:
127 for i in interesting:
128 k |= i
128 k |= i
129 return set(n for (i, n) in mapping if k & i)
129 return set(n for (i, n) in mapping if k & i)
130
130
131 gca = commonancestorsheads(pfunc, *orignodes)
131 gca = commonancestorsheads(pfunc, *orignodes)
132
132
133 if len(gca) <= 1:
133 if len(gca) <= 1:
134 return gca
134 return gca
135 return deepest(gca)
135 return deepest(gca)
136
136
137 class incrementalmissingancestors(object):
137 class incrementalmissingancestors(object):
138 '''persistent state used to calculate missing ancestors incrementally
138 '''persistent state used to calculate missing ancestors incrementally
139
139
140 Although similar in spirit to lazyancestors below, this is a separate class
140 Although similar in spirit to lazyancestors below, this is a separate class
141 because trying to support contains and missingancestors operations with the
141 because trying to support contains and missingancestors operations with the
142 same internal data structures adds needless complexity.'''
142 same internal data structures adds needless complexity.'''
143 def __init__(self, pfunc, bases):
143 def __init__(self, pfunc, bases):
144 self.bases = set(bases)
144 self.bases = set(bases)
145 if not self.bases:
145 if not self.bases:
146 self.bases.add(nullrev)
146 self.bases.add(nullrev)
147 self.pfunc = pfunc
147 self.pfunc = pfunc
148
148
149 def hasbases(self):
149 def hasbases(self):
150 '''whether the common set has any non-trivial bases'''
150 '''whether the common set has any non-trivial bases'''
151 return self.bases and self.bases != set([nullrev])
151 return self.bases and self.bases != set([nullrev])
152
152
153 def addbases(self, newbases):
153 def addbases(self, newbases):
154 '''grow the ancestor set by adding new bases'''
154 '''grow the ancestor set by adding new bases'''
155 self.bases.update(newbases)
155 self.bases.update(newbases)
156
156
157 def removeancestorsfrom(self, revs):
157 def removeancestorsfrom(self, revs):
158 '''remove all ancestors of bases from the set revs (in place)'''
158 '''remove all ancestors of bases from the set revs (in place)'''
159 bases = self.bases
159 bases = self.bases
160 pfunc = self.pfunc
160 pfunc = self.pfunc
161 revs.difference_update(bases)
161 revs.difference_update(bases)
162 # nullrev is always an ancestor
162 # nullrev is always an ancestor
163 revs.discard(nullrev)
163 revs.discard(nullrev)
164 if not revs:
164 if not revs:
165 return
165 return
166 # anything in revs > start is definitely not an ancestor of bases
166 # anything in revs > start is definitely not an ancestor of bases
167 # revs <= start needs to be investigated
167 # revs <= start needs to be investigated
168 start = max(bases)
168 start = max(bases)
169 keepcount = sum(1 for r in revs if r > start)
169 keepcount = sum(1 for r in revs if r > start)
170 if len(revs) == keepcount:
170 if len(revs) == keepcount:
171 # no revs to consider
171 # no revs to consider
172 return
172 return
173
173
174 for curr in xrange(start, min(revs) - 1, -1):
174 for curr in xrange(start, min(revs) - 1, -1):
175 if curr not in bases:
175 if curr not in bases:
176 continue
176 continue
177 revs.discard(curr)
177 revs.discard(curr)
178 bases.update(pfunc(curr))
178 bases.update(pfunc(curr))
179 if len(revs) == keepcount:
179 if len(revs) == keepcount:
180 # no more potential revs to discard
180 # no more potential revs to discard
181 break
181 break
182
182
183 def missingancestors(self, revs):
183 def missingancestors(self, revs):
184 '''return all the ancestors of revs that are not ancestors of self.bases
184 '''return all the ancestors of revs that are not ancestors of self.bases
185
185
186 This may include elements from revs.
186 This may include elements from revs.
187
187
188 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
188 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
189 revision number order, which is a topological order.'''
189 revision number order, which is a topological order.'''
190 revsvisit = set(revs)
190 revsvisit = set(revs)
191 basesvisit = self.bases
191 basesvisit = self.bases
192 pfunc = self.pfunc
192 pfunc = self.pfunc
193 bothvisit = revsvisit.intersection(basesvisit)
193 bothvisit = revsvisit.intersection(basesvisit)
194 revsvisit.difference_update(bothvisit)
194 revsvisit.difference_update(bothvisit)
195 if not revsvisit:
195 if not revsvisit:
196 return []
196 return []
197
197
198 start = max(max(revsvisit), max(basesvisit))
198 start = max(max(revsvisit), max(basesvisit))
199 # At this point, we hold the invariants that:
199 # At this point, we hold the invariants that:
200 # - revsvisit is the set of nodes we know are an ancestor of at least
200 # - revsvisit is the set of nodes we know are an ancestor of at least
201 # one of the nodes in revs
201 # one of the nodes in revs
202 # - basesvisit is the same for bases
202 # - basesvisit is the same for bases
203 # - bothvisit is the set of nodes we know are ancestors of at least one
203 # - bothvisit is the set of nodes we know are ancestors of at least one
204 # of the nodes in revs and one of the nodes in bases. bothvisit and
204 # of the nodes in revs and one of the nodes in bases. bothvisit and
205 # revsvisit are mutually exclusive, but bothvisit is a subset of
205 # revsvisit are mutually exclusive, but bothvisit is a subset of
206 # basesvisit.
206 # basesvisit.
207 # Now we walk down in reverse topo order, adding parents of nodes
207 # Now we walk down in reverse topo order, adding parents of nodes
208 # already visited to the sets while maintaining the invariants. When a
208 # already visited to the sets while maintaining the invariants. When a
209 # node is found in both revsvisit and basesvisit, it is removed from
209 # node is found in both revsvisit and basesvisit, it is removed from
210 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
210 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
211 # are no more ancestors of revs that aren't also ancestors of bases, so
211 # are no more ancestors of revs that aren't also ancestors of bases, so
212 # exit.
212 # exit.
213
213
214 missing = []
214 missing = []
215 for curr in xrange(start, nullrev, -1):
215 for curr in xrange(start, nullrev, -1):
216 if not revsvisit:
216 if not revsvisit:
217 break
217 break
218
218
219 if curr in bothvisit:
219 if curr in bothvisit:
220 bothvisit.remove(curr)
220 bothvisit.remove(curr)
221 # curr's parents might have made it into revsvisit through
221 # curr's parents might have made it into revsvisit through
222 # another path
222 # another path
223 for p in pfunc(curr):
223 for p in pfunc(curr):
224 revsvisit.discard(p)
224 revsvisit.discard(p)
225 basesvisit.add(p)
225 basesvisit.add(p)
226 bothvisit.add(p)
226 bothvisit.add(p)
227 continue
227 continue
228
228
229 if curr in revsvisit:
229 if curr in revsvisit:
230 missing.append(curr)
230 missing.append(curr)
231 revsvisit.remove(curr)
231 revsvisit.remove(curr)
232 thisvisit = revsvisit
232 thisvisit = revsvisit
233 othervisit = basesvisit
233 othervisit = basesvisit
234 elif curr in basesvisit:
234 elif curr in basesvisit:
235 thisvisit = basesvisit
235 thisvisit = basesvisit
236 othervisit = revsvisit
236 othervisit = revsvisit
237 else:
237 else:
238 # not an ancestor of revs or bases: ignore
238 # not an ancestor of revs or bases: ignore
239 continue
239 continue
240
240
241 for p in pfunc(curr):
241 for p in pfunc(curr):
242 if p == nullrev:
242 if p == nullrev:
243 pass
243 pass
244 elif p in othervisit or p in bothvisit:
244 elif p in othervisit or p in bothvisit:
245 # p is implicitly in thisvisit. This means p is or should be
245 # p is implicitly in thisvisit. This means p is or should be
246 # in bothvisit
246 # in bothvisit
247 revsvisit.discard(p)
247 revsvisit.discard(p)
248 basesvisit.add(p)
248 basesvisit.add(p)
249 bothvisit.add(p)
249 bothvisit.add(p)
250 else:
250 else:
251 # visit later
251 # visit later
252 thisvisit.add(p)
252 thisvisit.add(p)
253
253
254 missing.reverse()
254 missing.reverse()
255 return missing
255 return missing
256
256
257 class lazyancestors(object):
257 class lazyancestors(object):
258 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
258 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
259 """Create a new object generating ancestors for the given revs. Does
259 """Create a new object generating ancestors for the given revs. Does
260 not generate revs lower than stoprev.
260 not generate revs lower than stoprev.
261
261
262 This is computed lazily starting from revs. The object supports
262 This is computed lazily starting from revs. The object supports
263 iteration and membership.
263 iteration and membership.
264
264
265 cl should be a changelog and revs should be an iterable. inclusive is
265 cl should be a changelog and revs should be an iterable. inclusive is
266 a boolean that indicates whether revs should be included. Revs lower
266 a boolean that indicates whether revs should be included. Revs lower
267 than stoprev will not be generated.
267 than stoprev will not be generated.
268
268
269 Result does not include the null revision."""
269 Result does not include the null revision."""
270 self._parentrevs = pfunc
270 self._parentrevs = pfunc
271 self._initrevs = revs
271 self._initrevs = revs
272 self._stoprev = stoprev
272 self._stoprev = stoprev
273 self._inclusive = inclusive
273 self._inclusive = inclusive
274
274
275 # Initialize data structures for __contains__.
275 # Initialize data structures for __contains__.
276 # For __contains__, we use a heap rather than a deque because
276 # For __contains__, we use a heap rather than a deque because
277 # (a) it minimizes the number of parentrevs calls made
277 # (a) it minimizes the number of parentrevs calls made
278 # (b) it makes the loop termination condition obvious
278 # (b) it makes the loop termination condition obvious
279 # Python's heap is a min-heap. Multiply all values by -1 to convert it
279 # Python's heap is a min-heap. Multiply all values by -1 to convert it
280 # into a max-heap.
280 # into a max-heap.
281 self._containsvisit = [-rev for rev in revs]
281 self._containsvisit = [-rev for rev in revs]
282 heapq.heapify(self._containsvisit)
282 heapq.heapify(self._containsvisit)
283 if inclusive:
283 if inclusive:
284 self._containsseen = set(revs)
284 self._containsseen = set(revs)
285 else:
285 else:
286 self._containsseen = set()
286 self._containsseen = set()
287
287
288 def __nonzero__(self):
288 def __nonzero__(self):
289 """False if the set is empty, True otherwise."""
289 """False if the set is empty, True otherwise."""
290 try:
290 try:
291 iter(self).next()
291 iter(self).next()
292 return True
292 return True
293 except StopIteration:
293 except StopIteration:
294 return False
294 return False
295
295
296 def __iter__(self):
296 def __iter__(self):
297 """Generate the ancestors of _initrevs in reverse topological order.
297 """Generate the ancestors of _initrevs in reverse topological order.
298
298
299 If inclusive is False, yield a sequence of revision numbers starting
299 If inclusive is False, yield a sequence of revision numbers starting
300 with the parents of each revision in revs, i.e., each revision is *not*
300 with the parents of each revision in revs, i.e., each revision is *not*
301 considered an ancestor of itself. Results are in breadth-first order:
301 considered an ancestor of itself. Results are in breadth-first order:
302 parents of each rev in revs, then parents of those, etc.
302 parents of each rev in revs, then parents of those, etc.
303
303
304 If inclusive is True, yield all the revs first (ignoring stoprev),
304 If inclusive is True, yield all the revs first (ignoring stoprev),
305 then yield all the ancestors of revs as when inclusive is False.
305 then yield all the ancestors of revs as when inclusive is False.
306 If an element in revs is an ancestor of a different rev it is not
306 If an element in revs is an ancestor of a different rev it is not
307 yielded again."""
307 yielded again."""
308 seen = set()
308 seen = set()
309 revs = self._initrevs
309 revs = self._initrevs
310 if self._inclusive:
310 if self._inclusive:
311 for rev in revs:
311 for rev in revs:
312 yield rev
312 yield rev
313 seen.update(revs)
313 seen.update(revs)
314
314
315 parentrevs = self._parentrevs
315 parentrevs = self._parentrevs
316 stoprev = self._stoprev
316 stoprev = self._stoprev
317 visit = collections.deque(revs)
317 visit = collections.deque(revs)
318
318
319 see = seen.add
320 schedule = visit.append
321
319 while visit:
322 while visit:
320 for parent in parentrevs(visit.popleft()):
323 for parent in parentrevs(visit.popleft()):
321 if parent >= stoprev and parent not in seen:
324 if parent >= stoprev and parent not in seen:
322 visit.append(parent)
325 schedule(parent)
323 seen.add(parent)
326 see(parent)
324 yield parent
327 yield parent
325
328
326 def __contains__(self, target):
329 def __contains__(self, target):
327 """Test whether target is an ancestor of self._initrevs."""
330 """Test whether target is an ancestor of self._initrevs."""
328 # Trying to do both __iter__ and __contains__ using the same visit
331 # Trying to do both __iter__ and __contains__ using the same visit
329 # heap and seen set is complex enough that it slows down both. Keep
332 # heap and seen set is complex enough that it slows down both. Keep
330 # them separate.
333 # them separate.
331 seen = self._containsseen
334 seen = self._containsseen
332 if target in seen:
335 if target in seen:
333 return True
336 return True
334
337
335 parentrevs = self._parentrevs
338 parentrevs = self._parentrevs
336 visit = self._containsvisit
339 visit = self._containsvisit
337 stoprev = self._stoprev
340 stoprev = self._stoprev
338 heappop = heapq.heappop
341 heappop = heapq.heappop
339 heappush = heapq.heappush
342 heappush = heapq.heappush
343 see = seen.add
340
344
341 targetseen = False
345 targetseen = False
342
346
343 while visit and -visit[0] > target and not targetseen:
347 while visit and -visit[0] > target and not targetseen:
344 for parent in parentrevs(-heappop(visit)):
348 for parent in parentrevs(-heappop(visit)):
345 if parent < stoprev or parent in seen:
349 if parent < stoprev or parent in seen:
346 continue
350 continue
347 # We need to make sure we push all parents into the heap so
351 # We need to make sure we push all parents into the heap so
348 # that we leave it in a consistent state for future calls.
352 # that we leave it in a consistent state for future calls.
349 heappush(visit, -parent)
353 heappush(visit, -parent)
350 seen.add(parent)
354 see(parent)
351 if parent == target:
355 if parent == target:
352 targetseen = True
356 targetseen = True
353
357
354 return targetseen
358 return targetseen
General Comments 0
You need to be logged in to leave comments. Login now