##// END OF EJS Templates
hbisect: use a defaultdict to avoid large allocations for a large changelogs...
David Soria Parra -
r35128:8287df8b default
parent child Browse files
Show More
@@ -1,300 +1,300
1 1 # changelog bisection for mercurial
2 2 #
3 3 # Copyright 2007 Matt Mackall
4 4 # Copyright 2005, 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
5 5 #
6 6 # Inspired by git bisect, extension skeleton taken from mq.py.
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import collections
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 short,
19 19 )
20 20 from . import (
21 21 error,
22 22 )
23 23
24 24 def bisect(repo, state):
25 25 """find the next node (if any) for testing during a bisect search.
26 26 returns a (nodes, number, good) tuple.
27 27
28 28 'nodes' is the final result of the bisect if 'number' is 0.
29 29 Otherwise 'number' indicates the remaining possible candidates for
30 30 the search and 'nodes' contains the next bisect target.
31 31 'good' is True if bisect is searching for a first good changeset, False
32 32 if searching for a first bad one.
33 33 """
34 34
35 35 changelog = repo.changelog
36 36 clparents = changelog.parentrevs
37 37 skip = set([changelog.rev(n) for n in state['skip']])
38 38
39 39 def buildancestors(bad, good):
40 40 badrev = min([changelog.rev(n) for n in bad])
41 ancestors = [None] * (len(changelog) + 1)
41 ancestors = collections.defaultdict(lambda: None)
42 42 for rev in repo.revs("descendants(%ln) - ancestors(%ln)", good, good):
43 43 ancestors[rev] = []
44 44 if ancestors[badrev] is None:
45 45 return badrev, None
46 46 return badrev, ancestors
47 47
48 48 good = False
49 49 badrev, ancestors = buildancestors(state['bad'], state['good'])
50 50 if not ancestors: # looking for bad to good transition?
51 51 good = True
52 52 badrev, ancestors = buildancestors(state['good'], state['bad'])
53 53 bad = changelog.node(badrev)
54 54 if not ancestors: # now we're confused
55 55 if (len(state['bad']) == 1 and len(state['good']) == 1 and
56 56 state['bad'] != state['good']):
57 57 raise error.Abort(_("starting revisions are not directly related"))
58 58 raise error.Abort(_("inconsistent state, %s:%s is good and bad")
59 59 % (badrev, short(bad)))
60 60
61 61 # build children dict
62 62 children = {}
63 63 visit = collections.deque([badrev])
64 64 candidates = []
65 65 while visit:
66 66 rev = visit.popleft()
67 67 if ancestors[rev] == []:
68 68 candidates.append(rev)
69 69 for prev in clparents(rev):
70 70 if prev != -1:
71 71 if prev in children:
72 72 children[prev].append(rev)
73 73 else:
74 74 children[prev] = [rev]
75 75 visit.append(prev)
76 76
77 77 candidates.sort()
78 78 # have we narrowed it down to one entry?
79 79 # or have all other possible candidates besides 'bad' have been skipped?
80 80 tot = len(candidates)
81 81 unskipped = [c for c in candidates if (c not in skip) and (c != badrev)]
82 82 if tot == 1 or not unskipped:
83 83 return ([changelog.node(c) for c in candidates], 0, good)
84 84 perfect = tot // 2
85 85
86 86 # find the best node to test
87 87 best_rev = None
88 88 best_len = -1
89 89 poison = set()
90 90 for rev in candidates:
91 91 if rev in poison:
92 92 # poison children
93 93 poison.update(children.get(rev, []))
94 94 continue
95 95
96 96 a = ancestors[rev] or [rev]
97 97 ancestors[rev] = None
98 98
99 99 x = len(a) # number of ancestors
100 100 y = tot - x # number of non-ancestors
101 101 value = min(x, y) # how good is this test?
102 102 if value > best_len and rev not in skip:
103 103 best_len = value
104 104 best_rev = rev
105 105 if value == perfect: # found a perfect candidate? quit early
106 106 break
107 107
108 108 if y < perfect and rev not in skip: # all downhill from here?
109 109 # poison children
110 110 poison.update(children.get(rev, []))
111 111 continue
112 112
113 113 for c in children.get(rev, []):
114 114 if ancestors[c]:
115 115 ancestors[c] = list(set(ancestors[c] + a))
116 116 else:
117 117 ancestors[c] = a + [c]
118 118
119 119 assert best_rev is not None
120 120 best_node = changelog.node(best_rev)
121 121
122 122 return ([best_node], tot, good)
123 123
124 124 def extendrange(repo, state, nodes, good):
125 125 # bisect is incomplete when it ends on a merge node and
126 126 # one of the parent was not checked.
127 127 parents = repo[nodes[0]].parents()
128 128 if len(parents) > 1:
129 129 if good:
130 130 side = state['bad']
131 131 else:
132 132 side = state['good']
133 133 num = len(set(i.node() for i in parents) & set(side))
134 134 if num == 1:
135 135 return parents[0].ancestor(parents[1])
136 136 return None
137 137
138 138 def load_state(repo):
139 139 state = {'current': [], 'good': [], 'bad': [], 'skip': []}
140 140 for l in repo.vfs.tryreadlines("bisect.state"):
141 141 kind, node = l[:-1].split()
142 142 node = repo.lookup(node)
143 143 if kind not in state:
144 144 raise error.Abort(_("unknown bisect kind %s") % kind)
145 145 state[kind].append(node)
146 146 return state
147 147
148 148
149 149 def save_state(repo, state):
150 150 f = repo.vfs("bisect.state", "w", atomictemp=True)
151 151 with repo.wlock():
152 152 for kind in sorted(state):
153 153 for node in state[kind]:
154 154 f.write("%s %s\n" % (kind, hex(node)))
155 155 f.close()
156 156
157 157 def resetstate(repo):
158 158 """remove any bisect state from the repository"""
159 159 if repo.vfs.exists("bisect.state"):
160 160 repo.vfs.unlink("bisect.state")
161 161
162 162 def checkstate(state):
163 163 """check we have both 'good' and 'bad' to define a range
164 164
165 165 Raise Abort exception otherwise."""
166 166 if state['good'] and state['bad']:
167 167 return True
168 168 if not state['good']:
169 169 raise error.Abort(_('cannot bisect (no known good revisions)'))
170 170 else:
171 171 raise error.Abort(_('cannot bisect (no known bad revisions)'))
172 172
173 173 def get(repo, status):
174 174 """
175 175 Return a list of revision(s) that match the given status:
176 176
177 177 - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
178 178 - ``goods``, ``bads`` : csets topologically good/bad
179 179 - ``range`` : csets taking part in the bisection
180 180 - ``pruned`` : csets that are goods, bads or skipped
181 181 - ``untested`` : csets whose fate is yet unknown
182 182 - ``ignored`` : csets ignored due to DAG topology
183 183 - ``current`` : the cset currently being bisected
184 184 """
185 185 state = load_state(repo)
186 186 if status in ('good', 'bad', 'skip', 'current'):
187 187 return map(repo.changelog.rev, state[status])
188 188 else:
189 189 # In the following sets, we do *not* call 'bisect()' with more
190 190 # than one level of recursion, because that can be very, very
191 191 # time consuming. Instead, we always develop the expression as
192 192 # much as possible.
193 193
194 194 # 'range' is all csets that make the bisection:
195 195 # - have a good ancestor and a bad descendant, or conversely
196 196 # that's because the bisection can go either way
197 197 range = '( bisect(bad)::bisect(good) | bisect(good)::bisect(bad) )'
198 198
199 199 _t = repo.revs('bisect(good)::bisect(bad)')
200 200 # The sets of topologically good or bad csets
201 201 if len(_t) == 0:
202 202 # Goods are topologically after bads
203 203 goods = 'bisect(good)::' # Pruned good csets
204 204 bads = '::bisect(bad)' # Pruned bad csets
205 205 else:
206 206 # Goods are topologically before bads
207 207 goods = '::bisect(good)' # Pruned good csets
208 208 bads = 'bisect(bad)::' # Pruned bad csets
209 209
210 210 # 'pruned' is all csets whose fate is already known: good, bad, skip
211 211 skips = 'bisect(skip)' # Pruned skipped csets
212 212 pruned = '( (%s) | (%s) | (%s) )' % (goods, bads, skips)
213 213
214 214 # 'untested' is all cset that are- in 'range', but not in 'pruned'
215 215 untested = '( (%s) - (%s) )' % (range, pruned)
216 216
217 217 # 'ignored' is all csets that were not used during the bisection
218 218 # due to DAG topology, but may however have had an impact.
219 219 # E.g., a branch merged between bads and goods, but whose branch-
220 220 # point is out-side of the range.
221 221 iba = '::bisect(bad) - ::bisect(good)' # Ignored bads' ancestors
222 222 iga = '::bisect(good) - ::bisect(bad)' # Ignored goods' ancestors
223 223 ignored = '( ( (%s) | (%s) ) - (%s) )' % (iba, iga, range)
224 224
225 225 if status == 'range':
226 226 return repo.revs(range)
227 227 elif status == 'pruned':
228 228 return repo.revs(pruned)
229 229 elif status == 'untested':
230 230 return repo.revs(untested)
231 231 elif status == 'ignored':
232 232 return repo.revs(ignored)
233 233 elif status == "goods":
234 234 return repo.revs(goods)
235 235 elif status == "bads":
236 236 return repo.revs(bads)
237 237 else:
238 238 raise error.ParseError(_('invalid bisect state'))
239 239
240 240 def label(repo, node):
241 241 rev = repo.changelog.rev(node)
242 242
243 243 # Try explicit sets
244 244 if rev in get(repo, 'good'):
245 245 # i18n: bisect changeset status
246 246 return _('good')
247 247 if rev in get(repo, 'bad'):
248 248 # i18n: bisect changeset status
249 249 return _('bad')
250 250 if rev in get(repo, 'skip'):
251 251 # i18n: bisect changeset status
252 252 return _('skipped')
253 253 if rev in get(repo, 'untested') or rev in get(repo, 'current'):
254 254 # i18n: bisect changeset status
255 255 return _('untested')
256 256 if rev in get(repo, 'ignored'):
257 257 # i18n: bisect changeset status
258 258 return _('ignored')
259 259
260 260 # Try implicit sets
261 261 if rev in get(repo, 'goods'):
262 262 # i18n: bisect changeset status
263 263 return _('good (implicit)')
264 264 if rev in get(repo, 'bads'):
265 265 # i18n: bisect changeset status
266 266 return _('bad (implicit)')
267 267
268 268 return None
269 269
270 270 def shortlabel(label):
271 271 if label:
272 272 return label[0].upper()
273 273
274 274 return None
275 275
276 276 def printresult(ui, repo, state, displayer, nodes, good):
277 277 if len(nodes) == 1:
278 278 # narrowed it down to a single revision
279 279 if good:
280 280 ui.write(_("The first good revision is:\n"))
281 281 else:
282 282 ui.write(_("The first bad revision is:\n"))
283 283 displayer.show(repo[nodes[0]])
284 284 extendnode = extendrange(repo, state, nodes, good)
285 285 if extendnode is not None:
286 286 ui.write(_('Not all ancestors of this changeset have been'
287 287 ' checked.\nUse bisect --extend to continue the '
288 288 'bisection from\nthe common ancestor, %s.\n')
289 289 % extendnode)
290 290 else:
291 291 # multiple possible revisions
292 292 if good:
293 293 ui.write(_("Due to skipped revisions, the first "
294 294 "good revision could be any of:\n"))
295 295 else:
296 296 ui.write(_("Due to skipped revisions, the first "
297 297 "bad revision could be any of:\n"))
298 298 for n in nodes:
299 299 displayer.show(repo[n])
300 300 displayer.close()
General Comments 0
You need to be logged in to leave comments. Login now