##// END OF EJS Templates
ancestor: add a way to add to bases of a missing ancestor object...
Siddharth Agarwal -
r23341:bcc3012f default
parent child Browse files
Show More
@@ -1,324 +1,328
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import heapq
9 9 import util
10 10 from node import nullrev
11 11
12 12 def commonancestorsheads(pfunc, *nodes):
13 13 """Returns a set with the heads of all common ancestors of all nodes,
14 14 heads(::nodes[0] and ::nodes[1] and ...) .
15 15
16 16 pfunc must return a list of parent vertices for a given vertex.
17 17 """
18 18 if not isinstance(nodes, set):
19 19 nodes = set(nodes)
20 20 if nullrev in nodes:
21 21 return set()
22 22 if len(nodes) <= 1:
23 23 return nodes
24 24
25 25 allseen = (1 << len(nodes)) - 1
26 26 seen = [0] * (max(nodes) + 1)
27 27 for i, n in enumerate(nodes):
28 28 seen[n] = 1 << i
29 29 poison = 1 << (i + 1)
30 30
31 31 gca = set()
32 32 interesting = len(nodes)
33 33 nv = len(seen) - 1
34 34 while nv >= 0 and interesting:
35 35 v = nv
36 36 nv -= 1
37 37 if not seen[v]:
38 38 continue
39 39 sv = seen[v]
40 40 if sv < poison:
41 41 interesting -= 1
42 42 if sv == allseen:
43 43 gca.add(v)
44 44 sv |= poison
45 45 if v in nodes:
46 46 # history is linear
47 47 return set([v])
48 48 if sv < poison:
49 49 for p in pfunc(v):
50 50 sp = seen[p]
51 51 if p == nullrev:
52 52 continue
53 53 if sp == 0:
54 54 seen[p] = sv
55 55 interesting += 1
56 56 elif sp != sv:
57 57 seen[p] |= sv
58 58 else:
59 59 for p in pfunc(v):
60 60 if p == nullrev:
61 61 continue
62 62 sp = seen[p]
63 63 if sp and sp < poison:
64 64 interesting -= 1
65 65 seen[p] = sv
66 66 return gca
67 67
68 68 def ancestors(pfunc, *orignodes):
69 69 """
70 70 Returns the common ancestors of a and b that are furthest from a
71 71 root (as measured by longest path).
72 72
73 73 pfunc must return a list of parent vertices for a given vertex.
74 74 """
75 75 def deepest(nodes):
76 76 interesting = {}
77 77 count = max(nodes) + 1
78 78 depth = [0] * count
79 79 seen = [0] * count
80 80 mapping = []
81 81 for (i, n) in enumerate(sorted(nodes)):
82 82 depth[n] = 1
83 83 b = 1 << i
84 84 seen[n] = b
85 85 interesting[b] = 1
86 86 mapping.append((b, n))
87 87 nv = count - 1
88 88 while nv >= 0 and len(interesting) > 1:
89 89 v = nv
90 90 nv -= 1
91 91 dv = depth[v]
92 92 if dv == 0:
93 93 continue
94 94 sv = seen[v]
95 95 for p in pfunc(v):
96 96 if p == nullrev:
97 97 continue
98 98 dp = depth[p]
99 99 nsp = sp = seen[p]
100 100 if dp <= dv:
101 101 depth[p] = dv + 1
102 102 if sp != sv:
103 103 interesting[sv] += 1
104 104 nsp = seen[p] = sv
105 105 if sp:
106 106 interesting[sp] -= 1
107 107 if interesting[sp] == 0:
108 108 del interesting[sp]
109 109 elif dv == dp - 1:
110 110 nsp = sp | sv
111 111 if nsp == sp:
112 112 continue
113 113 seen[p] = nsp
114 114 interesting.setdefault(nsp, 0)
115 115 interesting[nsp] += 1
116 116 interesting[sp] -= 1
117 117 if interesting[sp] == 0:
118 118 del interesting[sp]
119 119 interesting[sv] -= 1
120 120 if interesting[sv] == 0:
121 121 del interesting[sv]
122 122
123 123 if len(interesting) != 1:
124 124 return []
125 125
126 126 k = 0
127 127 for i in interesting:
128 128 k |= i
129 129 return set(n for (i, n) in mapping if k & i)
130 130
131 131 gca = commonancestorsheads(pfunc, *orignodes)
132 132
133 133 if len(gca) <= 1:
134 134 return gca
135 135 return deepest(gca)
136 136
137 137 class incrementalmissingancestors(object):
138 138 '''persistent state used to calculate missing ancestors incrementally
139 139
140 140 Although similar in spirit to lazyancestors below, this is a separate class
141 141 because trying to support contains and missingancestors operations with the
142 142 same internal data structures adds needless complexity.'''
143 143 def __init__(self, pfunc, bases):
144 144 self.bases = set(bases)
145 145 if not self.bases:
146 146 self.bases.add(nullrev)
147 147 self.pfunc = pfunc
148 148
149 149 def hasbases(self):
150 150 '''whether the common set has any non-trivial bases'''
151 151 return self.bases and self.bases != set([nullrev])
152 152
153 def addbases(self, newbases):
154 '''grow the ancestor set by adding new bases'''
155 self.bases.update(newbases)
156
153 157 def missingancestors(self, revs):
154 158 '''return all the ancestors of revs that are not ancestors of self.bases
155 159
156 160 This may include elements from revs.
157 161
158 162 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
159 163 revision number order, which is a topological order.'''
160 164 revsvisit = set(revs)
161 165 basesvisit = self.bases
162 166 pfunc = self.pfunc
163 167 bothvisit = revsvisit.intersection(basesvisit)
164 168 revsvisit.difference_update(bothvisit)
165 169 if not revsvisit:
166 170 return []
167 171
168 172 start = max(max(revsvisit), max(basesvisit))
169 173 # At this point, we hold the invariants that:
170 174 # - revsvisit is the set of nodes we know are an ancestor of at least
171 175 # one of the nodes in revs
172 176 # - basesvisit is the same for bases
173 177 # - bothvisit is the set of nodes we know are ancestors of at least one
174 178 # of the nodes in revs and one of the nodes in bases. bothvisit and
175 179 # revsvisit are mutually exclusive, but bothvisit is a subset of
176 180 # basesvisit.
177 181 # Now we walk down in reverse topo order, adding parents of nodes
178 182 # already visited to the sets while maintaining the invariants. When a
179 183 # node is found in both revsvisit and basesvisit, it is removed from
180 184 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
181 185 # are no more ancestors of revs that aren't also ancestors of bases, so
182 186 # exit.
183 187
184 188 missing = []
185 189 for curr in xrange(start, nullrev, -1):
186 190 if not revsvisit:
187 191 break
188 192
189 193 if curr in bothvisit:
190 194 bothvisit.remove(curr)
191 195 # curr's parents might have made it into revsvisit through
192 196 # another path
193 197 for p in pfunc(curr):
194 198 revsvisit.discard(p)
195 199 basesvisit.add(p)
196 200 bothvisit.add(p)
197 201 continue
198 202
199 203 if curr in revsvisit:
200 204 missing.append(curr)
201 205 revsvisit.remove(curr)
202 206 thisvisit = revsvisit
203 207 othervisit = basesvisit
204 208 elif curr in basesvisit:
205 209 thisvisit = basesvisit
206 210 othervisit = revsvisit
207 211 else:
208 212 # not an ancestor of revs or bases: ignore
209 213 continue
210 214
211 215 for p in pfunc(curr):
212 216 if p == nullrev:
213 217 pass
214 218 elif p in othervisit or p in bothvisit:
215 219 # p is implicitly in thisvisit. This means p is or should be
216 220 # in bothvisit
217 221 revsvisit.discard(p)
218 222 basesvisit.add(p)
219 223 bothvisit.add(p)
220 224 else:
221 225 # visit later
222 226 thisvisit.add(p)
223 227
224 228 missing.reverse()
225 229 return missing
226 230
227 231 class lazyancestors(object):
228 232 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
229 233 """Create a new object generating ancestors for the given revs. Does
230 234 not generate revs lower than stoprev.
231 235
232 236 This is computed lazily starting from revs. The object supports
233 237 iteration and membership.
234 238
235 239 cl should be a changelog and revs should be an iterable. inclusive is
236 240 a boolean that indicates whether revs should be included. Revs lower
237 241 than stoprev will not be generated.
238 242
239 243 Result does not include the null revision."""
240 244 self._parentrevs = pfunc
241 245 self._initrevs = revs
242 246 self._stoprev = stoprev
243 247 self._inclusive = inclusive
244 248
245 249 # Initialize data structures for __contains__.
246 250 # For __contains__, we use a heap rather than a deque because
247 251 # (a) it minimizes the number of parentrevs calls made
248 252 # (b) it makes the loop termination condition obvious
249 253 # Python's heap is a min-heap. Multiply all values by -1 to convert it
250 254 # into a max-heap.
251 255 self._containsvisit = [-rev for rev in revs]
252 256 heapq.heapify(self._containsvisit)
253 257 if inclusive:
254 258 self._containsseen = set(revs)
255 259 else:
256 260 self._containsseen = set()
257 261
258 262 def __nonzero__(self):
259 263 """False if the set is empty, True otherwise."""
260 264 try:
261 265 iter(self).next()
262 266 return True
263 267 except StopIteration:
264 268 return False
265 269
266 270 def __iter__(self):
267 271 """Generate the ancestors of _initrevs in reverse topological order.
268 272
269 273 If inclusive is False, yield a sequence of revision numbers starting
270 274 with the parents of each revision in revs, i.e., each revision is *not*
271 275 considered an ancestor of itself. Results are in breadth-first order:
272 276 parents of each rev in revs, then parents of those, etc.
273 277
274 278 If inclusive is True, yield all the revs first (ignoring stoprev),
275 279 then yield all the ancestors of revs as when inclusive is False.
276 280 If an element in revs is an ancestor of a different rev it is not
277 281 yielded again."""
278 282 seen = set()
279 283 revs = self._initrevs
280 284 if self._inclusive:
281 285 for rev in revs:
282 286 yield rev
283 287 seen.update(revs)
284 288
285 289 parentrevs = self._parentrevs
286 290 stoprev = self._stoprev
287 291 visit = util.deque(revs)
288 292
289 293 while visit:
290 294 for parent in parentrevs(visit.popleft()):
291 295 if parent >= stoprev and parent not in seen:
292 296 visit.append(parent)
293 297 seen.add(parent)
294 298 yield parent
295 299
296 300 def __contains__(self, target):
297 301 """Test whether target is an ancestor of self._initrevs."""
298 302 # Trying to do both __iter__ and __contains__ using the same visit
299 303 # heap and seen set is complex enough that it slows down both. Keep
300 304 # them separate.
301 305 seen = self._containsseen
302 306 if target in seen:
303 307 return True
304 308
305 309 parentrevs = self._parentrevs
306 310 visit = self._containsvisit
307 311 stoprev = self._stoprev
308 312 heappop = heapq.heappop
309 313 heappush = heapq.heappush
310 314
311 315 targetseen = False
312 316
313 317 while visit and -visit[0] > target and not targetseen:
314 318 for parent in parentrevs(-heappop(visit)):
315 319 if parent < stoprev or parent in seen:
316 320 continue
317 321 # We need to make sure we push all parents into the heap so
318 322 # that we leave it in a consistent state for future calls.
319 323 heappush(visit, -parent)
320 324 seen.add(parent)
321 325 if parent == target:
322 326 targetseen = True
323 327
324 328 return targetseen
@@ -1,221 +1,228
1 1 from mercurial import ancestor, commands, hg, ui, util
2 2 from mercurial.node import nullrev
3 3 import binascii, getopt, math, os, random, sys, time
4 4
5 5 def buildgraph(rng, nodes=100, rootprob=0.05, mergeprob=0.2, prevprob=0.7):
6 6 '''nodes: total number of nodes in the graph
7 7 rootprob: probability that a new node (not 0) will be a root
8 8 mergeprob: probability that, excluding a root a node will be a merge
9 9 prevprob: probability that p1 will be the previous node
10 10
11 11 return value is a graph represented as an adjacency list.
12 12 '''
13 13 graph = [None] * nodes
14 14 for i in xrange(nodes):
15 15 if i == 0 or rng.random() < rootprob:
16 16 graph[i] = [nullrev]
17 17 elif i == 1:
18 18 graph[i] = [0]
19 19 elif rng.random() < mergeprob:
20 20 if i == 2 or rng.random() < prevprob:
21 21 # p1 is prev
22 22 p1 = i - 1
23 23 else:
24 24 p1 = rng.randrange(i - 1)
25 25 p2 = rng.choice(range(0, p1) + range(p1 + 1, i))
26 26 graph[i] = [p1, p2]
27 27 elif rng.random() < prevprob:
28 28 graph[i] = [i - 1]
29 29 else:
30 30 graph[i] = [rng.randrange(i - 1)]
31 31
32 32 return graph
33 33
34 34 def buildancestorsets(graph):
35 35 ancs = [None] * len(graph)
36 36 for i in xrange(len(graph)):
37 37 ancs[i] = set([i])
38 38 if graph[i] == [nullrev]:
39 39 continue
40 40 for p in graph[i]:
41 41 ancs[i].update(ancs[p])
42 42 return ancs
43 43
44 44 class naiveincrementalmissingancestors(object):
45 45 def __init__(self, ancs, bases):
46 46 self.ancs = ancs
47 47 self.bases = set(bases)
48 def addbases(self, newbases):
49 self.bases.update(newbases)
48 50 def missingancestors(self, revs):
49 51 res = set()
50 52 for rev in revs:
51 53 if rev != nullrev:
52 54 res.update(self.ancs[rev])
53 55 for base in self.bases:
54 56 if base != nullrev:
55 57 res.difference_update(self.ancs[base])
56 58 return sorted(res)
57 59
58 60 def test_missingancestors(seed, rng):
59 61 # empirically observed to take around 1 second
60 62 graphcount = 100
61 63 testcount = 10
62 64 inccount = 10
63 65 nerrs = [0]
64 66 # the default mu and sigma give us a nice distribution of mostly
65 67 # single-digit counts (including 0) with some higher ones
66 68 def lognormrandom(mu, sigma):
67 69 return int(math.floor(rng.lognormvariate(mu, sigma)))
68 70
69 71 def samplerevs(nodes, mu=1.1, sigma=0.8):
70 72 count = min(lognormrandom(mu, sigma), len(nodes))
71 73 return rng.sample(nodes, count)
72 74
73 75 def err(seed, graph, bases, seq, output, expected):
74 76 if nerrs[0] == 0:
75 77 print >> sys.stderr, 'seed:', hex(seed)[:-1]
76 78 if gerrs[0] == 0:
77 79 print >> sys.stderr, 'graph:', graph
78 80 print >> sys.stderr, '* bases:', bases
79 81 print >> sys.stderr, '* seq: ', seq
80 82 print >> sys.stderr, '* output: ', output
81 83 print >> sys.stderr, '* expected:', expected
82 84 nerrs[0] += 1
83 85 gerrs[0] += 1
84 86
85 87 for g in xrange(graphcount):
86 88 graph = buildgraph(rng)
87 89 ancs = buildancestorsets(graph)
88 90 gerrs = [0]
89 91 for _ in xrange(testcount):
90 92 # start from nullrev to include it as a possibility
91 93 graphnodes = range(nullrev, len(graph))
92 94 bases = samplerevs(graphnodes)
93 95
94 96 # fast algorithm
95 97 inc = ancestor.incrementalmissingancestors(graph.__getitem__, bases)
96 98 # reference slow algorithm
97 99 naiveinc = naiveincrementalmissingancestors(ancs, bases)
98 100 seq = []
99 101 for _ in xrange(inccount):
102 if rng.random() < 0.2:
103 newbases = samplerevs(graphnodes)
104 seq.append(('addbases', newbases))
105 inc.addbases(newbases)
106 naiveinc.addbases(newbases)
100 107 revs = samplerevs(graphnodes)
101 108 seq.append(('missingancestors', revs))
102 109 h = inc.missingancestors(revs)
103 110 r = naiveinc.missingancestors(revs)
104 111 if h != r:
105 112 err(seed, graph, bases, seq, h, r)
106 113
107 114 # graph is a dict of child->parent adjacency lists for this graph:
108 115 # o 13
109 116 # |
110 117 # | o 12
111 118 # | |
112 119 # | | o 11
113 120 # | | |\
114 121 # | | | | o 10
115 122 # | | | | |
116 123 # | o---+ | 9
117 124 # | | | | |
118 125 # o | | | | 8
119 126 # / / / /
120 127 # | | o | 7
121 128 # | | | |
122 129 # o---+ | 6
123 130 # / / /
124 131 # | | o 5
125 132 # | |/
126 133 # | o 4
127 134 # | |
128 135 # o | 3
129 136 # | |
130 137 # | o 2
131 138 # |/
132 139 # o 1
133 140 # |
134 141 # o 0
135 142
136 143 graph = {0: [-1], 1: [0], 2: [1], 3: [1], 4: [2], 5: [4], 6: [4],
137 144 7: [4], 8: [-1], 9: [6, 7], 10: [5], 11: [3, 7], 12: [9],
138 145 13: [8]}
139 146
140 147 def genlazyancestors(revs, stoprev=0, inclusive=False):
141 148 print ("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
142 149 (revs, stoprev, inclusive))
143 150 return ancestor.lazyancestors(graph.get, revs, stoprev=stoprev,
144 151 inclusive=inclusive)
145 152
146 153 def printlazyancestors(s, l):
147 154 print 'membership: %r' % [n for n in l if n in s]
148 155 print 'iteration: %r' % list(s)
149 156
150 157 def test_lazyancestors():
151 158 # Empty revs
152 159 s = genlazyancestors([])
153 160 printlazyancestors(s, [3, 0, -1])
154 161
155 162 # Standard example
156 163 s = genlazyancestors([11, 13])
157 164 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
158 165
159 166 # Standard with ancestry in the initial set (1 is ancestor of 3)
160 167 s = genlazyancestors([1, 3])
161 168 printlazyancestors(s, [1, -1, 0])
162 169
163 170 # Including revs
164 171 s = genlazyancestors([11, 13], inclusive=True)
165 172 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
166 173
167 174 # Test with stoprev
168 175 s = genlazyancestors([11, 13], stoprev=6)
169 176 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
170 177 s = genlazyancestors([11, 13], stoprev=6, inclusive=True)
171 178 printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
172 179
173 180
174 181 # The C gca algorithm requires a real repo. These are textual descriptions of
175 182 # DAGs that have been known to be problematic.
176 183 dagtests = [
177 184 '+2*2*2/*3/2',
178 185 '+3*3/*2*2/*4*4/*4/2*4/2*2',
179 186 ]
180 187 def test_gca():
181 188 u = ui.ui()
182 189 for i, dag in enumerate(dagtests):
183 190 repo = hg.repository(u, 'gca%d' % i, create=1)
184 191 cl = repo.changelog
185 192 if not util.safehasattr(cl.index, 'ancestors'):
186 193 # C version not available
187 194 return
188 195
189 196 commands.debugbuilddag(u, repo, dag)
190 197 # Compare the results of the Python and C versions. This does not
191 198 # include choosing a winner when more than one gca exists -- we make
192 199 # sure both return exactly the same set of gcas.
193 200 for a in cl:
194 201 for b in cl:
195 202 cgcas = sorted(cl.index.ancestors(a, b))
196 203 pygcas = sorted(ancestor.ancestors(cl.parentrevs, a, b))
197 204 if cgcas != pygcas:
198 205 print "test_gca: for dag %s, gcas for %d, %d:" % (dag, a, b)
199 206 print " C returned: %s" % cgcas
200 207 print " Python returned: %s" % pygcas
201 208
202 209 def main():
203 210 seed = None
204 211 opts, args = getopt.getopt(sys.argv[1:], 's:', ['seed='])
205 212 for o, a in opts:
206 213 if o in ('-s', '--seed'):
207 214 seed = long(a, base=0) # accepts base 10 or 16 strings
208 215
209 216 if seed is None:
210 217 try:
211 218 seed = long(binascii.hexlify(os.urandom(16)), 16)
212 219 except AttributeError:
213 220 seed = long(time.time() * 1000)
214 221
215 222 rng = random.Random(seed)
216 223 test_missingancestors(seed, rng)
217 224 test_lazyancestors()
218 225 test_gca()
219 226
220 227 if __name__ == '__main__':
221 228 main()
General Comments 0
You need to be logged in to leave comments. Login now