##// END OF EJS Templates
hbisect: use a defaultdict to avoid large allocations for a large changelogs...
David Soria Parra -
r35128:8287df8b default
parent child Browse files
Show More
@@ -1,300 +1,300
1 # changelog bisection for mercurial
1 # changelog bisection for mercurial
2 #
2 #
3 # Copyright 2007 Matt Mackall
3 # Copyright 2007 Matt Mackall
4 # Copyright 2005, 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
4 # Copyright 2005, 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
5 #
5 #
6 # Inspired by git bisect, extension skeleton taken from mq.py.
6 # Inspired by git bisect, extension skeleton taken from mq.py.
7 #
7 #
8 # This software may be used and distributed according to the terms of the
8 # This software may be used and distributed according to the terms of the
9 # GNU General Public License version 2 or any later version.
9 # GNU General Public License version 2 or any later version.
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import collections
13 import collections
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import (
16 from .node import (
17 hex,
17 hex,
18 short,
18 short,
19 )
19 )
20 from . import (
20 from . import (
21 error,
21 error,
22 )
22 )
23
23
24 def bisect(repo, state):
24 def bisect(repo, state):
25 """find the next node (if any) for testing during a bisect search.
25 """find the next node (if any) for testing during a bisect search.
26 returns a (nodes, number, good) tuple.
26 returns a (nodes, number, good) tuple.
27
27
28 'nodes' is the final result of the bisect if 'number' is 0.
28 'nodes' is the final result of the bisect if 'number' is 0.
29 Otherwise 'number' indicates the remaining possible candidates for
29 Otherwise 'number' indicates the remaining possible candidates for
30 the search and 'nodes' contains the next bisect target.
30 the search and 'nodes' contains the next bisect target.
31 'good' is True if bisect is searching for a first good changeset, False
31 'good' is True if bisect is searching for a first good changeset, False
32 if searching for a first bad one.
32 if searching for a first bad one.
33 """
33 """
34
34
35 changelog = repo.changelog
35 changelog = repo.changelog
36 clparents = changelog.parentrevs
36 clparents = changelog.parentrevs
37 skip = set([changelog.rev(n) for n in state['skip']])
37 skip = set([changelog.rev(n) for n in state['skip']])
38
38
39 def buildancestors(bad, good):
39 def buildancestors(bad, good):
40 badrev = min([changelog.rev(n) for n in bad])
40 badrev = min([changelog.rev(n) for n in bad])
41 ancestors = [None] * (len(changelog) + 1)
41 ancestors = collections.defaultdict(lambda: None)
42 for rev in repo.revs("descendants(%ln) - ancestors(%ln)", good, good):
42 for rev in repo.revs("descendants(%ln) - ancestors(%ln)", good, good):
43 ancestors[rev] = []
43 ancestors[rev] = []
44 if ancestors[badrev] is None:
44 if ancestors[badrev] is None:
45 return badrev, None
45 return badrev, None
46 return badrev, ancestors
46 return badrev, ancestors
47
47
48 good = False
48 good = False
49 badrev, ancestors = buildancestors(state['bad'], state['good'])
49 badrev, ancestors = buildancestors(state['bad'], state['good'])
50 if not ancestors: # looking for bad to good transition?
50 if not ancestors: # looking for bad to good transition?
51 good = True
51 good = True
52 badrev, ancestors = buildancestors(state['good'], state['bad'])
52 badrev, ancestors = buildancestors(state['good'], state['bad'])
53 bad = changelog.node(badrev)
53 bad = changelog.node(badrev)
54 if not ancestors: # now we're confused
54 if not ancestors: # now we're confused
55 if (len(state['bad']) == 1 and len(state['good']) == 1 and
55 if (len(state['bad']) == 1 and len(state['good']) == 1 and
56 state['bad'] != state['good']):
56 state['bad'] != state['good']):
57 raise error.Abort(_("starting revisions are not directly related"))
57 raise error.Abort(_("starting revisions are not directly related"))
58 raise error.Abort(_("inconsistent state, %s:%s is good and bad")
58 raise error.Abort(_("inconsistent state, %s:%s is good and bad")
59 % (badrev, short(bad)))
59 % (badrev, short(bad)))
60
60
61 # build children dict
61 # build children dict
62 children = {}
62 children = {}
63 visit = collections.deque([badrev])
63 visit = collections.deque([badrev])
64 candidates = []
64 candidates = []
65 while visit:
65 while visit:
66 rev = visit.popleft()
66 rev = visit.popleft()
67 if ancestors[rev] == []:
67 if ancestors[rev] == []:
68 candidates.append(rev)
68 candidates.append(rev)
69 for prev in clparents(rev):
69 for prev in clparents(rev):
70 if prev != -1:
70 if prev != -1:
71 if prev in children:
71 if prev in children:
72 children[prev].append(rev)
72 children[prev].append(rev)
73 else:
73 else:
74 children[prev] = [rev]
74 children[prev] = [rev]
75 visit.append(prev)
75 visit.append(prev)
76
76
77 candidates.sort()
77 candidates.sort()
78 # have we narrowed it down to one entry?
78 # have we narrowed it down to one entry?
79 # or have all other possible candidates besides 'bad' have been skipped?
79 # or have all other possible candidates besides 'bad' have been skipped?
80 tot = len(candidates)
80 tot = len(candidates)
81 unskipped = [c for c in candidates if (c not in skip) and (c != badrev)]
81 unskipped = [c for c in candidates if (c not in skip) and (c != badrev)]
82 if tot == 1 or not unskipped:
82 if tot == 1 or not unskipped:
83 return ([changelog.node(c) for c in candidates], 0, good)
83 return ([changelog.node(c) for c in candidates], 0, good)
84 perfect = tot // 2
84 perfect = tot // 2
85
85
86 # find the best node to test
86 # find the best node to test
87 best_rev = None
87 best_rev = None
88 best_len = -1
88 best_len = -1
89 poison = set()
89 poison = set()
90 for rev in candidates:
90 for rev in candidates:
91 if rev in poison:
91 if rev in poison:
92 # poison children
92 # poison children
93 poison.update(children.get(rev, []))
93 poison.update(children.get(rev, []))
94 continue
94 continue
95
95
96 a = ancestors[rev] or [rev]
96 a = ancestors[rev] or [rev]
97 ancestors[rev] = None
97 ancestors[rev] = None
98
98
99 x = len(a) # number of ancestors
99 x = len(a) # number of ancestors
100 y = tot - x # number of non-ancestors
100 y = tot - x # number of non-ancestors
101 value = min(x, y) # how good is this test?
101 value = min(x, y) # how good is this test?
102 if value > best_len and rev not in skip:
102 if value > best_len and rev not in skip:
103 best_len = value
103 best_len = value
104 best_rev = rev
104 best_rev = rev
105 if value == perfect: # found a perfect candidate? quit early
105 if value == perfect: # found a perfect candidate? quit early
106 break
106 break
107
107
108 if y < perfect and rev not in skip: # all downhill from here?
108 if y < perfect and rev not in skip: # all downhill from here?
109 # poison children
109 # poison children
110 poison.update(children.get(rev, []))
110 poison.update(children.get(rev, []))
111 continue
111 continue
112
112
113 for c in children.get(rev, []):
113 for c in children.get(rev, []):
114 if ancestors[c]:
114 if ancestors[c]:
115 ancestors[c] = list(set(ancestors[c] + a))
115 ancestors[c] = list(set(ancestors[c] + a))
116 else:
116 else:
117 ancestors[c] = a + [c]
117 ancestors[c] = a + [c]
118
118
119 assert best_rev is not None
119 assert best_rev is not None
120 best_node = changelog.node(best_rev)
120 best_node = changelog.node(best_rev)
121
121
122 return ([best_node], tot, good)
122 return ([best_node], tot, good)
123
123
124 def extendrange(repo, state, nodes, good):
124 def extendrange(repo, state, nodes, good):
125 # bisect is incomplete when it ends on a merge node and
125 # bisect is incomplete when it ends on a merge node and
126 # one of the parent was not checked.
126 # one of the parent was not checked.
127 parents = repo[nodes[0]].parents()
127 parents = repo[nodes[0]].parents()
128 if len(parents) > 1:
128 if len(parents) > 1:
129 if good:
129 if good:
130 side = state['bad']
130 side = state['bad']
131 else:
131 else:
132 side = state['good']
132 side = state['good']
133 num = len(set(i.node() for i in parents) & set(side))
133 num = len(set(i.node() for i in parents) & set(side))
134 if num == 1:
134 if num == 1:
135 return parents[0].ancestor(parents[1])
135 return parents[0].ancestor(parents[1])
136 return None
136 return None
137
137
138 def load_state(repo):
138 def load_state(repo):
139 state = {'current': [], 'good': [], 'bad': [], 'skip': []}
139 state = {'current': [], 'good': [], 'bad': [], 'skip': []}
140 for l in repo.vfs.tryreadlines("bisect.state"):
140 for l in repo.vfs.tryreadlines("bisect.state"):
141 kind, node = l[:-1].split()
141 kind, node = l[:-1].split()
142 node = repo.lookup(node)
142 node = repo.lookup(node)
143 if kind not in state:
143 if kind not in state:
144 raise error.Abort(_("unknown bisect kind %s") % kind)
144 raise error.Abort(_("unknown bisect kind %s") % kind)
145 state[kind].append(node)
145 state[kind].append(node)
146 return state
146 return state
147
147
148
148
149 def save_state(repo, state):
149 def save_state(repo, state):
150 f = repo.vfs("bisect.state", "w", atomictemp=True)
150 f = repo.vfs("bisect.state", "w", atomictemp=True)
151 with repo.wlock():
151 with repo.wlock():
152 for kind in sorted(state):
152 for kind in sorted(state):
153 for node in state[kind]:
153 for node in state[kind]:
154 f.write("%s %s\n" % (kind, hex(node)))
154 f.write("%s %s\n" % (kind, hex(node)))
155 f.close()
155 f.close()
156
156
157 def resetstate(repo):
157 def resetstate(repo):
158 """remove any bisect state from the repository"""
158 """remove any bisect state from the repository"""
159 if repo.vfs.exists("bisect.state"):
159 if repo.vfs.exists("bisect.state"):
160 repo.vfs.unlink("bisect.state")
160 repo.vfs.unlink("bisect.state")
161
161
162 def checkstate(state):
162 def checkstate(state):
163 """check we have both 'good' and 'bad' to define a range
163 """check we have both 'good' and 'bad' to define a range
164
164
165 Raise Abort exception otherwise."""
165 Raise Abort exception otherwise."""
166 if state['good'] and state['bad']:
166 if state['good'] and state['bad']:
167 return True
167 return True
168 if not state['good']:
168 if not state['good']:
169 raise error.Abort(_('cannot bisect (no known good revisions)'))
169 raise error.Abort(_('cannot bisect (no known good revisions)'))
170 else:
170 else:
171 raise error.Abort(_('cannot bisect (no known bad revisions)'))
171 raise error.Abort(_('cannot bisect (no known bad revisions)'))
172
172
173 def get(repo, status):
173 def get(repo, status):
174 """
174 """
175 Return a list of revision(s) that match the given status:
175 Return a list of revision(s) that match the given status:
176
176
177 - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
177 - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
178 - ``goods``, ``bads`` : csets topologically good/bad
178 - ``goods``, ``bads`` : csets topologically good/bad
179 - ``range`` : csets taking part in the bisection
179 - ``range`` : csets taking part in the bisection
180 - ``pruned`` : csets that are goods, bads or skipped
180 - ``pruned`` : csets that are goods, bads or skipped
181 - ``untested`` : csets whose fate is yet unknown
181 - ``untested`` : csets whose fate is yet unknown
182 - ``ignored`` : csets ignored due to DAG topology
182 - ``ignored`` : csets ignored due to DAG topology
183 - ``current`` : the cset currently being bisected
183 - ``current`` : the cset currently being bisected
184 """
184 """
185 state = load_state(repo)
185 state = load_state(repo)
186 if status in ('good', 'bad', 'skip', 'current'):
186 if status in ('good', 'bad', 'skip', 'current'):
187 return map(repo.changelog.rev, state[status])
187 return map(repo.changelog.rev, state[status])
188 else:
188 else:
189 # In the following sets, we do *not* call 'bisect()' with more
189 # In the following sets, we do *not* call 'bisect()' with more
190 # than one level of recursion, because that can be very, very
190 # than one level of recursion, because that can be very, very
191 # time consuming. Instead, we always develop the expression as
191 # time consuming. Instead, we always develop the expression as
192 # much as possible.
192 # much as possible.
193
193
194 # 'range' is all csets that make the bisection:
194 # 'range' is all csets that make the bisection:
195 # - have a good ancestor and a bad descendant, or conversely
195 # - have a good ancestor and a bad descendant, or conversely
196 # that's because the bisection can go either way
196 # that's because the bisection can go either way
197 range = '( bisect(bad)::bisect(good) | bisect(good)::bisect(bad) )'
197 range = '( bisect(bad)::bisect(good) | bisect(good)::bisect(bad) )'
198
198
199 _t = repo.revs('bisect(good)::bisect(bad)')
199 _t = repo.revs('bisect(good)::bisect(bad)')
200 # The sets of topologically good or bad csets
200 # The sets of topologically good or bad csets
201 if len(_t) == 0:
201 if len(_t) == 0:
202 # Goods are topologically after bads
202 # Goods are topologically after bads
203 goods = 'bisect(good)::' # Pruned good csets
203 goods = 'bisect(good)::' # Pruned good csets
204 bads = '::bisect(bad)' # Pruned bad csets
204 bads = '::bisect(bad)' # Pruned bad csets
205 else:
205 else:
206 # Goods are topologically before bads
206 # Goods are topologically before bads
207 goods = '::bisect(good)' # Pruned good csets
207 goods = '::bisect(good)' # Pruned good csets
208 bads = 'bisect(bad)::' # Pruned bad csets
208 bads = 'bisect(bad)::' # Pruned bad csets
209
209
210 # 'pruned' is all csets whose fate is already known: good, bad, skip
210 # 'pruned' is all csets whose fate is already known: good, bad, skip
211 skips = 'bisect(skip)' # Pruned skipped csets
211 skips = 'bisect(skip)' # Pruned skipped csets
212 pruned = '( (%s) | (%s) | (%s) )' % (goods, bads, skips)
212 pruned = '( (%s) | (%s) | (%s) )' % (goods, bads, skips)
213
213
214 # 'untested' is all cset that are- in 'range', but not in 'pruned'
214 # 'untested' is all cset that are- in 'range', but not in 'pruned'
215 untested = '( (%s) - (%s) )' % (range, pruned)
215 untested = '( (%s) - (%s) )' % (range, pruned)
216
216
217 # 'ignored' is all csets that were not used during the bisection
217 # 'ignored' is all csets that were not used during the bisection
218 # due to DAG topology, but may however have had an impact.
218 # due to DAG topology, but may however have had an impact.
219 # E.g., a branch merged between bads and goods, but whose branch-
219 # E.g., a branch merged between bads and goods, but whose branch-
220 # point is out-side of the range.
220 # point is out-side of the range.
221 iba = '::bisect(bad) - ::bisect(good)' # Ignored bads' ancestors
221 iba = '::bisect(bad) - ::bisect(good)' # Ignored bads' ancestors
222 iga = '::bisect(good) - ::bisect(bad)' # Ignored goods' ancestors
222 iga = '::bisect(good) - ::bisect(bad)' # Ignored goods' ancestors
223 ignored = '( ( (%s) | (%s) ) - (%s) )' % (iba, iga, range)
223 ignored = '( ( (%s) | (%s) ) - (%s) )' % (iba, iga, range)
224
224
225 if status == 'range':
225 if status == 'range':
226 return repo.revs(range)
226 return repo.revs(range)
227 elif status == 'pruned':
227 elif status == 'pruned':
228 return repo.revs(pruned)
228 return repo.revs(pruned)
229 elif status == 'untested':
229 elif status == 'untested':
230 return repo.revs(untested)
230 return repo.revs(untested)
231 elif status == 'ignored':
231 elif status == 'ignored':
232 return repo.revs(ignored)
232 return repo.revs(ignored)
233 elif status == "goods":
233 elif status == "goods":
234 return repo.revs(goods)
234 return repo.revs(goods)
235 elif status == "bads":
235 elif status == "bads":
236 return repo.revs(bads)
236 return repo.revs(bads)
237 else:
237 else:
238 raise error.ParseError(_('invalid bisect state'))
238 raise error.ParseError(_('invalid bisect state'))
239
239
240 def label(repo, node):
240 def label(repo, node):
241 rev = repo.changelog.rev(node)
241 rev = repo.changelog.rev(node)
242
242
243 # Try explicit sets
243 # Try explicit sets
244 if rev in get(repo, 'good'):
244 if rev in get(repo, 'good'):
245 # i18n: bisect changeset status
245 # i18n: bisect changeset status
246 return _('good')
246 return _('good')
247 if rev in get(repo, 'bad'):
247 if rev in get(repo, 'bad'):
248 # i18n: bisect changeset status
248 # i18n: bisect changeset status
249 return _('bad')
249 return _('bad')
250 if rev in get(repo, 'skip'):
250 if rev in get(repo, 'skip'):
251 # i18n: bisect changeset status
251 # i18n: bisect changeset status
252 return _('skipped')
252 return _('skipped')
253 if rev in get(repo, 'untested') or rev in get(repo, 'current'):
253 if rev in get(repo, 'untested') or rev in get(repo, 'current'):
254 # i18n: bisect changeset status
254 # i18n: bisect changeset status
255 return _('untested')
255 return _('untested')
256 if rev in get(repo, 'ignored'):
256 if rev in get(repo, 'ignored'):
257 # i18n: bisect changeset status
257 # i18n: bisect changeset status
258 return _('ignored')
258 return _('ignored')
259
259
260 # Try implicit sets
260 # Try implicit sets
261 if rev in get(repo, 'goods'):
261 if rev in get(repo, 'goods'):
262 # i18n: bisect changeset status
262 # i18n: bisect changeset status
263 return _('good (implicit)')
263 return _('good (implicit)')
264 if rev in get(repo, 'bads'):
264 if rev in get(repo, 'bads'):
265 # i18n: bisect changeset status
265 # i18n: bisect changeset status
266 return _('bad (implicit)')
266 return _('bad (implicit)')
267
267
268 return None
268 return None
269
269
270 def shortlabel(label):
270 def shortlabel(label):
271 if label:
271 if label:
272 return label[0].upper()
272 return label[0].upper()
273
273
274 return None
274 return None
275
275
276 def printresult(ui, repo, state, displayer, nodes, good):
276 def printresult(ui, repo, state, displayer, nodes, good):
277 if len(nodes) == 1:
277 if len(nodes) == 1:
278 # narrowed it down to a single revision
278 # narrowed it down to a single revision
279 if good:
279 if good:
280 ui.write(_("The first good revision is:\n"))
280 ui.write(_("The first good revision is:\n"))
281 else:
281 else:
282 ui.write(_("The first bad revision is:\n"))
282 ui.write(_("The first bad revision is:\n"))
283 displayer.show(repo[nodes[0]])
283 displayer.show(repo[nodes[0]])
284 extendnode = extendrange(repo, state, nodes, good)
284 extendnode = extendrange(repo, state, nodes, good)
285 if extendnode is not None:
285 if extendnode is not None:
286 ui.write(_('Not all ancestors of this changeset have been'
286 ui.write(_('Not all ancestors of this changeset have been'
287 ' checked.\nUse bisect --extend to continue the '
287 ' checked.\nUse bisect --extend to continue the '
288 'bisection from\nthe common ancestor, %s.\n')
288 'bisection from\nthe common ancestor, %s.\n')
289 % extendnode)
289 % extendnode)
290 else:
290 else:
291 # multiple possible revisions
291 # multiple possible revisions
292 if good:
292 if good:
293 ui.write(_("Due to skipped revisions, the first "
293 ui.write(_("Due to skipped revisions, the first "
294 "good revision could be any of:\n"))
294 "good revision could be any of:\n"))
295 else:
295 else:
296 ui.write(_("Due to skipped revisions, the first "
296 ui.write(_("Due to skipped revisions, the first "
297 "bad revision could be any of:\n"))
297 "bad revision could be any of:\n"))
298 for n in nodes:
298 for n in nodes:
299 displayer.show(repo[n])
299 displayer.show(repo[n])
300 displayer.close()
300 displayer.close()
General Comments 0
You need to be logged in to leave comments. Login now