##// END OF EJS Templates
ancestor.missingancestors: turn into a state-keeping class...
Siddharth Agarwal -
r23334:59e6e5dd default
parent child Browse files
Show More
@@ -1,312 +1,324
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import heapq
9 9 import util
10 10 from node import nullrev
11 11
12 12 def commonancestorsheads(pfunc, *nodes):
13 13 """Returns a set with the heads of all common ancestors of all nodes,
14 14 heads(::nodes[0] and ::nodes[1] and ...) .
15 15
16 16 pfunc must return a list of parent vertices for a given vertex.
17 17 """
18 18 if not isinstance(nodes, set):
19 19 nodes = set(nodes)
20 20 if nullrev in nodes:
21 21 return set()
22 22 if len(nodes) <= 1:
23 23 return nodes
24 24
25 25 allseen = (1 << len(nodes)) - 1
26 26 seen = [0] * (max(nodes) + 1)
27 27 for i, n in enumerate(nodes):
28 28 seen[n] = 1 << i
29 29 poison = 1 << (i + 1)
30 30
31 31 gca = set()
32 32 interesting = len(nodes)
33 33 nv = len(seen) - 1
34 34 while nv >= 0 and interesting:
35 35 v = nv
36 36 nv -= 1
37 37 if not seen[v]:
38 38 continue
39 39 sv = seen[v]
40 40 if sv < poison:
41 41 interesting -= 1
42 42 if sv == allseen:
43 43 gca.add(v)
44 44 sv |= poison
45 45 if v in nodes:
46 46 # history is linear
47 47 return set([v])
48 48 if sv < poison:
49 49 for p in pfunc(v):
50 50 sp = seen[p]
51 51 if p == nullrev:
52 52 continue
53 53 if sp == 0:
54 54 seen[p] = sv
55 55 interesting += 1
56 56 elif sp != sv:
57 57 seen[p] |= sv
58 58 else:
59 59 for p in pfunc(v):
60 60 if p == nullrev:
61 61 continue
62 62 sp = seen[p]
63 63 if sp and sp < poison:
64 64 interesting -= 1
65 65 seen[p] = sv
66 66 return gca
67 67
68 68 def ancestors(pfunc, *orignodes):
69 69 """
70 70 Returns the common ancestors of a and b that are furthest from a
71 71 root (as measured by longest path).
72 72
73 73 pfunc must return a list of parent vertices for a given vertex.
74 74 """
75 75 def deepest(nodes):
76 76 interesting = {}
77 77 count = max(nodes) + 1
78 78 depth = [0] * count
79 79 seen = [0] * count
80 80 mapping = []
81 81 for (i, n) in enumerate(sorted(nodes)):
82 82 depth[n] = 1
83 83 b = 1 << i
84 84 seen[n] = b
85 85 interesting[b] = 1
86 86 mapping.append((b, n))
87 87 nv = count - 1
88 88 while nv >= 0 and len(interesting) > 1:
89 89 v = nv
90 90 nv -= 1
91 91 dv = depth[v]
92 92 if dv == 0:
93 93 continue
94 94 sv = seen[v]
95 95 for p in pfunc(v):
96 96 if p == nullrev:
97 97 continue
98 98 dp = depth[p]
99 99 nsp = sp = seen[p]
100 100 if dp <= dv:
101 101 depth[p] = dv + 1
102 102 if sp != sv:
103 103 interesting[sv] += 1
104 104 nsp = seen[p] = sv
105 105 if sp:
106 106 interesting[sp] -= 1
107 107 if interesting[sp] == 0:
108 108 del interesting[sp]
109 109 elif dv == dp - 1:
110 110 nsp = sp | sv
111 111 if nsp == sp:
112 112 continue
113 113 seen[p] = nsp
114 114 interesting.setdefault(nsp, 0)
115 115 interesting[nsp] += 1
116 116 interesting[sp] -= 1
117 117 if interesting[sp] == 0:
118 118 del interesting[sp]
119 119 interesting[sv] -= 1
120 120 if interesting[sv] == 0:
121 121 del interesting[sv]
122 122
123 123 if len(interesting) != 1:
124 124 return []
125 125
126 126 k = 0
127 127 for i in interesting:
128 128 k |= i
129 129 return set(n for (i, n) in mapping if k & i)
130 130
131 131 gca = commonancestorsheads(pfunc, *orignodes)
132 132
133 133 if len(gca) <= 1:
134 134 return gca
135 135 return deepest(gca)
136 136
137 def missingancestors(revs, bases, pfunc):
138 """Return all the ancestors of revs that are not ancestors of bases.
139
140 This may include elements from revs.
137 class incrementalmissingancestors(object):
138 '''persistent state used to calculate missing ancestors incrementally
141 139
142 Equivalent to the revset (::revs - ::bases). Revs are returned in
143 revision number order, which is a topological order.
144
145 revs and bases should both be iterables. pfunc must return a list of
146 parent revs for a given revs.
147 """
140 Although similar in spirit to lazyancestors below, this is a separate class
141 because trying to support contains and missingancestors operations with the
142 same internal data structures adds needless complexity.'''
143 def __init__(self, pfunc, bases):
144 self.bases = set(bases)
145 if not self.bases:
146 self.bases.add(nullrev)
147 self.pfunc = pfunc
148 148
149 revsvisit = set(revs)
150 basesvisit = set(bases)
151 if not basesvisit:
152 basesvisit.add(nullrev)
153 bothvisit = revsvisit.intersection(basesvisit)
154 revsvisit.difference_update(bothvisit)
155 if not revsvisit:
156 return []
157 start = max(max(revsvisit), max(basesvisit))
158 # At this point, we hold the invariants that:
159 # - revsvisit is the set of nodes we know are an ancestor of at least one
160 # of the nodes in revs
161 # - basesvisit is the same for bases
162 # - bothvisit is the set of nodes we know are ancestors of at least one of
163 # the nodes in revs and one of the nodes in bases, and that are smaller
164 # than curr. bothvisit and revsvisit are mutually exclusive, but bothvisit
165 # is a subset of basesvisit.
166 # Now we walk down in reverse topo order, adding parents of nodes already
167 # visited to the sets while maintaining the invariants. When a node is found
168 # in both revsvisit and basesvisit, it is removed from revsvisit and added
169 # to bothvisit. When revsvisit becomes empty, there are no more ancestors of
170 # revs that aren't also ancestors of bases, so exit.
149 def missingancestors(self, revs):
150 '''return all the ancestors of revs that are not ancestors of self.bases
151
152 This may include elements from revs.
171 153
172 missing = []
173 for curr in xrange(start, nullrev, -1):
154 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
155 revision number order, which is a topological order.'''
156 revsvisit = set(revs)
157 basesvisit = self.bases
158 pfunc = self.pfunc
159 bothvisit = revsvisit.intersection(basesvisit)
160 revsvisit.difference_update(bothvisit)
174 161 if not revsvisit:
175 break
162 return []
176 163
177 if curr in bothvisit:
178 bothvisit.remove(curr)
179 # curr's parents might have made it into revsvisit through another
180 # path
181 for p in pfunc(curr):
182 revsvisit.discard(p)
183 basesvisit.add(p)
184 bothvisit.add(p)
185 continue
164 start = max(max(revsvisit), max(basesvisit))
165 # At this point, we hold the invariants that:
166 # - revsvisit is the set of nodes we know are an ancestor of at least
167 # one of the nodes in revs
168 # - basesvisit is the same for bases
169 # - bothvisit is the set of nodes we know are ancestors of at least one
170 # of the nodes in revs and one of the nodes in bases. bothvisit and
171 # revsvisit are mutually exclusive, but bothvisit is a subset of
172 # basesvisit.
173 # Now we walk down in reverse topo order, adding parents of nodes
174 # already visited to the sets while maintaining the invariants. When a
175 # node is found in both revsvisit and basesvisit, it is removed from
176 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
177 # are no more ancestors of revs that aren't also ancestors of bases, so
178 # exit.
179
180 missing = []
181 for curr in xrange(start, nullrev, -1):
182 if not revsvisit:
183 break
186 184
187 if curr in revsvisit:
188 missing.append(curr)
189 revsvisit.remove(curr)
190 thisvisit = revsvisit
191 othervisit = basesvisit
192 elif curr in basesvisit:
193 thisvisit = basesvisit
194 othervisit = revsvisit
195 else:
196 # not an ancestor of revs or bases: ignore
197 continue
185 if curr in bothvisit:
186 bothvisit.remove(curr)
187 # curr's parents might have made it into revsvisit through
188 # another path
189 for p in pfunc(curr):
190 revsvisit.discard(p)
191 basesvisit.add(p)
192 bothvisit.add(p)
193 continue
198 194
199 for p in pfunc(curr):
200 if p == nullrev:
201 pass
202 elif p in othervisit or p in bothvisit:
203 # p is implicitly in thisvisit. This means p is or should be
204 # in bothvisit
205 revsvisit.discard(p)
206 basesvisit.add(p)
207 bothvisit.add(p)
195 if curr in revsvisit:
196 missing.append(curr)
197 revsvisit.remove(curr)
198 thisvisit = revsvisit
199 othervisit = basesvisit
200 elif curr in basesvisit:
201 thisvisit = basesvisit
202 othervisit = revsvisit
208 203 else:
209 # visit later
210 thisvisit.add(p)
204 # not an ancestor of revs or bases: ignore
205 continue
211 206
212 missing.reverse()
213 return missing
207 for p in pfunc(curr):
208 if p == nullrev:
209 pass
210 elif p in othervisit or p in bothvisit:
211 # p is implicitly in thisvisit. This means p is or should be
212 # in bothvisit
213 revsvisit.discard(p)
214 basesvisit.add(p)
215 bothvisit.add(p)
216 else:
217 # visit later
218 thisvisit.add(p)
219
220 missing.reverse()
221 return missing
222
223 def missingancestors(revs, bases, pfunc):
224 inc = incrementalmissingancestors(pfunc, bases)
225 return inc.missingancestors(revs)
214 226
215 227 class lazyancestors(object):
216 228 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
217 229 """Create a new object generating ancestors for the given revs. Does
218 230 not generate revs lower than stoprev.
219 231
220 232 This is computed lazily starting from revs. The object supports
221 233 iteration and membership.
222 234
223 235 cl should be a changelog and revs should be an iterable. inclusive is
224 236 a boolean that indicates whether revs should be included. Revs lower
225 237 than stoprev will not be generated.
226 238
227 239 Result does not include the null revision."""
228 240 self._parentrevs = pfunc
229 241 self._initrevs = revs
230 242 self._stoprev = stoprev
231 243 self._inclusive = inclusive
232 244
233 245 # Initialize data structures for __contains__.
234 246 # For __contains__, we use a heap rather than a deque because
235 247 # (a) it minimizes the number of parentrevs calls made
236 248 # (b) it makes the loop termination condition obvious
237 249 # Python's heap is a min-heap. Multiply all values by -1 to convert it
238 250 # into a max-heap.
239 251 self._containsvisit = [-rev for rev in revs]
240 252 heapq.heapify(self._containsvisit)
241 253 if inclusive:
242 254 self._containsseen = set(revs)
243 255 else:
244 256 self._containsseen = set()
245 257
246 258 def __nonzero__(self):
247 259 """False if the set is empty, True otherwise."""
248 260 try:
249 261 iter(self).next()
250 262 return True
251 263 except StopIteration:
252 264 return False
253 265
254 266 def __iter__(self):
255 267 """Generate the ancestors of _initrevs in reverse topological order.
256 268
257 269 If inclusive is False, yield a sequence of revision numbers starting
258 270 with the parents of each revision in revs, i.e., each revision is *not*
259 271 considered an ancestor of itself. Results are in breadth-first order:
260 272 parents of each rev in revs, then parents of those, etc.
261 273
262 274 If inclusive is True, yield all the revs first (ignoring stoprev),
263 275 then yield all the ancestors of revs as when inclusive is False.
264 276 If an element in revs is an ancestor of a different rev it is not
265 277 yielded again."""
266 278 seen = set()
267 279 revs = self._initrevs
268 280 if self._inclusive:
269 281 for rev in revs:
270 282 yield rev
271 283 seen.update(revs)
272 284
273 285 parentrevs = self._parentrevs
274 286 stoprev = self._stoprev
275 287 visit = util.deque(revs)
276 288
277 289 while visit:
278 290 for parent in parentrevs(visit.popleft()):
279 291 if parent >= stoprev and parent not in seen:
280 292 visit.append(parent)
281 293 seen.add(parent)
282 294 yield parent
283 295
284 296 def __contains__(self, target):
285 297 """Test whether target is an ancestor of self._initrevs."""
286 298 # Trying to do both __iter__ and __contains__ using the same visit
287 299 # heap and seen set is complex enough that it slows down both. Keep
288 300 # them separate.
289 301 seen = self._containsseen
290 302 if target in seen:
291 303 return True
292 304
293 305 parentrevs = self._parentrevs
294 306 visit = self._containsvisit
295 307 stoprev = self._stoprev
296 308 heappop = heapq.heappop
297 309 heappush = heapq.heappush
298 310
299 311 targetseen = False
300 312
301 313 while visit and -visit[0] > target and not targetseen:
302 314 for parent in parentrevs(-heappop(visit)):
303 315 if parent < stoprev or parent in seen:
304 316 continue
305 317 # We need to make sure we push all parents into the heap so
306 318 # that we leave it in a consistent state for future calls.
307 319 heappush(visit, -parent)
308 320 seen.add(parent)
309 321 if parent == target:
310 322 targetseen = True
311 323
312 324 return targetseen
@@ -1,211 +1,212
1 1 from mercurial import ancestor, commands, hg, ui, util
2 2 from mercurial.node import nullrev
3 3 import binascii, getopt, math, os, random, sys, time
4 4
5 5 def buildgraph(rng, nodes=100, rootprob=0.05, mergeprob=0.2, prevprob=0.7):
6 6 '''nodes: total number of nodes in the graph
7 7 rootprob: probability that a new node (not 0) will be a root
8 8 mergeprob: probability that, excluding a root a node will be a merge
9 9 prevprob: probability that p1 will be the previous node
10 10
11 11 return value is a graph represented as an adjacency list.
12 12 '''
13 13 graph = [None] * nodes
14 14 for i in xrange(nodes):
15 15 if i == 0 or rng.random() < rootprob:
16 16 graph[i] = [nullrev]
17 17 elif i == 1:
18 18 graph[i] = [0]
19 19 elif rng.random() < mergeprob:
20 20 if i == 2 or rng.random() < prevprob:
21 21 # p1 is prev
22 22 p1 = i - 1
23 23 else:
24 24 p1 = rng.randrange(i - 1)
25 25 p2 = rng.choice(range(0, p1) + range(p1 + 1, i))
26 26 graph[i] = [p1, p2]
27 27 elif rng.random() < prevprob:
28 28 graph[i] = [i - 1]
29 29 else:
30 30 graph[i] = [rng.randrange(i - 1)]
31 31
32 32 return graph
33 33
34 34 def buildancestorsets(graph):
35 35 ancs = [None] * len(graph)
36 36 for i in xrange(len(graph)):
37 37 ancs[i] = set([i])
38 38 if graph[i] == [nullrev]:
39 39 continue
40 40 for p in graph[i]:
41 41 ancs[i].update(ancs[p])
42 42 return ancs
43 43
44 44 def naivemissingancestors(ancs, revs, bases):
45 45 res = set()
46 46 for rev in revs:
47 47 if rev != nullrev:
48 48 res.update(ancs[rev])
49 49 for base in bases:
50 50 if base != nullrev:
51 51 res.difference_update(ancs[base])
52 52 return sorted(res)
53 53
54 54 def test_missingancestors(seed, rng):
55 55 # empirically observed to take around 1 second
56 56 graphcount = 100
57 57 testcount = 100
58 58 nerrs = [0]
59 59 # the default mu and sigma give us a nice distribution of mostly
60 60 # single-digit counts (including 0) with some higher ones
61 61 def lognormrandom(mu, sigma):
62 62 return int(math.floor(rng.lognormvariate(mu, sigma)))
63 63
64 64 def samplerevs(nodes, mu=1.1, sigma=0.8):
65 65 count = min(lognormrandom(mu, sigma), len(nodes))
66 66 return rng.sample(nodes, count)
67 67
68 68 def err(seed, graph, bases, revs, output, expected):
69 69 if nerrs[0] == 0:
70 70 print >> sys.stderr, 'seed:', hex(seed)[:-1]
71 71 if gerrs[0] == 0:
72 72 print >> sys.stderr, 'graph:', graph
73 73 print >> sys.stderr, '* bases:', bases
74 74 print >> sys.stderr, '* revs: ', revs
75 75 print >> sys.stderr, '* output: ', output
76 76 print >> sys.stderr, '* expected:', expected
77 77 nerrs[0] += 1
78 78 gerrs[0] += 1
79 79
80 80 for g in xrange(graphcount):
81 81 graph = buildgraph(rng)
82 82 ancs = buildancestorsets(graph)
83 83 gerrs = [0]
84 84 for _ in xrange(testcount):
85 85 # start from nullrev to include it as a possibility
86 86 graphnodes = range(nullrev, len(graph))
87 87 bases = samplerevs(graphnodes)
88 88 revs = samplerevs(graphnodes)
89 89
90 90 # fast algorithm
91 h = ancestor.missingancestors(revs, bases, graph.__getitem__)
91 inc = ancestor.incrementalmissingancestors(graph.__getitem__, bases)
92 h = inc.missingancestors(revs)
92 93 # reference slow algorithm
93 94 r = naivemissingancestors(ancs, revs, bases)
94 95 if h != r:
95 96 err(seed, graph, bases, revs, h, r)
96 97
97 98 # graph is a dict of child->parent adjacency lists for this graph:
98 99 # o 13
99 100 # |
100 101 # | o 12
101 102 # | |
102 103 # | | o 11
103 104 # | | |\
104 105 # | | | | o 10
105 106 # | | | | |
106 107 # | o---+ | 9
107 108 # | | | | |
108 109 # o | | | | 8
109 110 # / / / /
110 111 # | | o | 7
111 112 # | | | |
112 113 # o---+ | 6
113 114 # / / /
114 115 # | | o 5
115 116 # | |/
116 117 # | o 4
117 118 # | |
118 119 # o | 3
119 120 # | |
120 121 # | o 2
121 122 # |/
122 123 # o 1
123 124 # |
124 125 # o 0
125 126
126 127 graph = {0: [-1], 1: [0], 2: [1], 3: [1], 4: [2], 5: [4], 6: [4],
127 128 7: [4], 8: [-1], 9: [6, 7], 10: [5], 11: [3, 7], 12: [9],
128 129 13: [8]}
129 130
130 131 def genlazyancestors(revs, stoprev=0, inclusive=False):
131 132 print ("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
132 133 (revs, stoprev, inclusive))
133 134 return ancestor.lazyancestors(graph.get, revs, stoprev=stoprev,
134 135 inclusive=inclusive)
135 136
136 137 def printlazyancestors(s, l):
137 138 print 'membership: %r' % [n for n in l if n in s]
138 139 print 'iteration: %r' % list(s)
139 140
140 141 def test_lazyancestors():
141 142 # Empty revs
142 143 s = genlazyancestors([])
143 144 printlazyancestors(s, [3, 0, -1])
144 145
145 146 # Standard example
146 147 s = genlazyancestors([11, 13])
147 148 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
148 149
149 150 # Standard with ancestry in the initial set (1 is ancestor of 3)
150 151 s = genlazyancestors([1, 3])
151 152 printlazyancestors(s, [1, -1, 0])
152 153
153 154 # Including revs
154 155 s = genlazyancestors([11, 13], inclusive=True)
155 156 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
156 157
157 158 # Test with stoprev
158 159 s = genlazyancestors([11, 13], stoprev=6)
159 160 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
160 161 s = genlazyancestors([11, 13], stoprev=6, inclusive=True)
161 162 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
162 163
163 164
164 165 # The C gca algorithm requires a real repo. These are textual descriptions of
165 166 # DAGs that have been known to be problematic.
166 167 dagtests = [
167 168 '+2*2*2/*3/2',
168 169 '+3*3/*2*2/*4*4/*4/2*4/2*2',
169 170 ]
170 171 def test_gca():
171 172 u = ui.ui()
172 173 for i, dag in enumerate(dagtests):
173 174 repo = hg.repository(u, 'gca%d' % i, create=1)
174 175 cl = repo.changelog
175 176 if not util.safehasattr(cl.index, 'ancestors'):
176 177 # C version not available
177 178 return
178 179
179 180 commands.debugbuilddag(u, repo, dag)
180 181 # Compare the results of the Python and C versions. This does not
181 182 # include choosing a winner when more than one gca exists -- we make
182 183 # sure both return exactly the same set of gcas.
183 184 for a in cl:
184 185 for b in cl:
185 186 cgcas = sorted(cl.index.ancestors(a, b))
186 187 pygcas = sorted(ancestor.ancestors(cl.parentrevs, a, b))
187 188 if cgcas != pygcas:
188 189 print "test_gca: for dag %s, gcas for %d, %d:" % (dag, a, b)
189 190 print " C returned: %s" % cgcas
190 191 print " Python returned: %s" % pygcas
191 192
192 193 def main():
193 194 seed = None
194 195 opts, args = getopt.getopt(sys.argv[1:], 's:', ['seed='])
195 196 for o, a in opts:
196 197 if o in ('-s', '--seed'):
197 198 seed = long(a, base=0) # accepts base 10 or 16 strings
198 199
199 200 if seed is None:
200 201 try:
201 202 seed = long(binascii.hexlify(os.urandom(16)), 16)
202 203 except AttributeError:
203 204 seed = long(time.time() * 1000)
204 205
205 206 rng = random.Random(seed)
206 207 test_missingancestors(seed, rng)
207 208 test_lazyancestors()
208 209 test_gca()
209 210
210 211 if __name__ == '__main__':
211 212 main()
General Comments 0
You need to be logged in to leave comments. Login now