##// END OF EJS Templates
branchmap: pre-filter topological heads before ancestors based filtering...
Pierre-Yves David -
r22357:9c3c3dc1 default
parent child Browse files
Show More
@@ -1,280 +1,287
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from node import bin, hex, nullid, nullrev
8 from node import bin, hex, nullid, nullrev
9 import encoding
9 import encoding
10 import util
10 import util
11 import time
11 import time
12
12
13 def _filename(repo):
13 def _filename(repo):
14 """name of a branchcache file for a given repo or repoview"""
14 """name of a branchcache file for a given repo or repoview"""
15 filename = "cache/branch2"
15 filename = "cache/branch2"
16 if repo.filtername:
16 if repo.filtername:
17 filename = '%s-%s' % (filename, repo.filtername)
17 filename = '%s-%s' % (filename, repo.filtername)
18 return filename
18 return filename
19
19
20 def read(repo):
20 def read(repo):
21 try:
21 try:
22 f = repo.opener(_filename(repo))
22 f = repo.opener(_filename(repo))
23 lines = f.read().split('\n')
23 lines = f.read().split('\n')
24 f.close()
24 f.close()
25 except (IOError, OSError):
25 except (IOError, OSError):
26 return None
26 return None
27
27
28 try:
28 try:
29 cachekey = lines.pop(0).split(" ", 2)
29 cachekey = lines.pop(0).split(" ", 2)
30 last, lrev = cachekey[:2]
30 last, lrev = cachekey[:2]
31 last, lrev = bin(last), int(lrev)
31 last, lrev = bin(last), int(lrev)
32 filteredhash = None
32 filteredhash = None
33 if len(cachekey) > 2:
33 if len(cachekey) > 2:
34 filteredhash = bin(cachekey[2])
34 filteredhash = bin(cachekey[2])
35 partial = branchcache(tipnode=last, tiprev=lrev,
35 partial = branchcache(tipnode=last, tiprev=lrev,
36 filteredhash=filteredhash)
36 filteredhash=filteredhash)
37 if not partial.validfor(repo):
37 if not partial.validfor(repo):
38 # invalidate the cache
38 # invalidate the cache
39 raise ValueError('tip differs')
39 raise ValueError('tip differs')
40 for l in lines:
40 for l in lines:
41 if not l:
41 if not l:
42 continue
42 continue
43 node, state, label = l.split(" ", 2)
43 node, state, label = l.split(" ", 2)
44 if state not in 'oc':
44 if state not in 'oc':
45 raise ValueError('invalid branch state')
45 raise ValueError('invalid branch state')
46 label = encoding.tolocal(label.strip())
46 label = encoding.tolocal(label.strip())
47 if not node in repo:
47 if not node in repo:
48 raise ValueError('node %s does not exist' % node)
48 raise ValueError('node %s does not exist' % node)
49 node = bin(node)
49 node = bin(node)
50 partial.setdefault(label, []).append(node)
50 partial.setdefault(label, []).append(node)
51 if state == 'c':
51 if state == 'c':
52 partial._closednodes.add(node)
52 partial._closednodes.add(node)
53 except KeyboardInterrupt:
53 except KeyboardInterrupt:
54 raise
54 raise
55 except Exception, inst:
55 except Exception, inst:
56 if repo.ui.debugflag:
56 if repo.ui.debugflag:
57 msg = 'invalid branchheads cache'
57 msg = 'invalid branchheads cache'
58 if repo.filtername is not None:
58 if repo.filtername is not None:
59 msg += ' (%s)' % repo.filtername
59 msg += ' (%s)' % repo.filtername
60 msg += ': %s\n'
60 msg += ': %s\n'
61 repo.ui.debug(msg % inst)
61 repo.ui.debug(msg % inst)
62 partial = None
62 partial = None
63 return partial
63 return partial
64
64
65 ### Nearest subset relation
65 ### Nearest subset relation
66 # Nearest subset of filter X is a filter Y so that:
66 # Nearest subset of filter X is a filter Y so that:
67 # * Y is included in X,
67 # * Y is included in X,
68 # * X - Y is as small as possible.
68 # * X - Y is as small as possible.
69 # This create and ordering used for branchmap purpose.
69 # This create and ordering used for branchmap purpose.
70 # the ordering may be partial
70 # the ordering may be partial
71 subsettable = {None: 'visible',
71 subsettable = {None: 'visible',
72 'visible': 'served',
72 'visible': 'served',
73 'served': 'immutable',
73 'served': 'immutable',
74 'immutable': 'base'}
74 'immutable': 'base'}
75
75
76 def updatecache(repo):
76 def updatecache(repo):
77 cl = repo.changelog
77 cl = repo.changelog
78 filtername = repo.filtername
78 filtername = repo.filtername
79 partial = repo._branchcaches.get(filtername)
79 partial = repo._branchcaches.get(filtername)
80
80
81 revs = []
81 revs = []
82 if partial is None or not partial.validfor(repo):
82 if partial is None or not partial.validfor(repo):
83 partial = read(repo)
83 partial = read(repo)
84 if partial is None:
84 if partial is None:
85 subsetname = subsettable.get(filtername)
85 subsetname = subsettable.get(filtername)
86 if subsetname is None:
86 if subsetname is None:
87 partial = branchcache()
87 partial = branchcache()
88 else:
88 else:
89 subset = repo.filtered(subsetname)
89 subset = repo.filtered(subsetname)
90 partial = subset.branchmap().copy()
90 partial = subset.branchmap().copy()
91 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
91 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
92 revs.extend(r for r in extrarevs if r <= partial.tiprev)
92 revs.extend(r for r in extrarevs if r <= partial.tiprev)
93 revs.extend(cl.revs(start=partial.tiprev + 1))
93 revs.extend(cl.revs(start=partial.tiprev + 1))
94 if revs:
94 if revs:
95 partial.update(repo, revs)
95 partial.update(repo, revs)
96 partial.write(repo)
96 partial.write(repo)
97 assert partial.validfor(repo), filtername
97 assert partial.validfor(repo), filtername
98 repo._branchcaches[repo.filtername] = partial
98 repo._branchcaches[repo.filtername] = partial
99
99
100 class branchcache(dict):
100 class branchcache(dict):
101 """A dict like object that hold branches heads cache.
101 """A dict like object that hold branches heads cache.
102
102
103 This cache is used to avoid costly computations to determine all the
103 This cache is used to avoid costly computations to determine all the
104 branch heads of a repo.
104 branch heads of a repo.
105
105
106 The cache is serialized on disk in the following format:
106 The cache is serialized on disk in the following format:
107
107
108 <tip hex node> <tip rev number> [optional filtered repo hex hash]
108 <tip hex node> <tip rev number> [optional filtered repo hex hash]
109 <branch head hex node> <open/closed state> <branch name>
109 <branch head hex node> <open/closed state> <branch name>
110 <branch head hex node> <open/closed state> <branch name>
110 <branch head hex node> <open/closed state> <branch name>
111 ...
111 ...
112
112
113 The first line is used to check if the cache is still valid. If the
113 The first line is used to check if the cache is still valid. If the
114 branch cache is for a filtered repo view, an optional third hash is
114 branch cache is for a filtered repo view, an optional third hash is
115 included that hashes the hashes of all filtered revisions.
115 included that hashes the hashes of all filtered revisions.
116
116
117 The open/closed state is represented by a single letter 'o' or 'c'.
117 The open/closed state is represented by a single letter 'o' or 'c'.
118 This field can be used to avoid changelog reads when determining if a
118 This field can be used to avoid changelog reads when determining if a
119 branch head closes a branch or not.
119 branch head closes a branch or not.
120 """
120 """
121
121
122 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
122 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
123 filteredhash=None, closednodes=None):
123 filteredhash=None, closednodes=None):
124 super(branchcache, self).__init__(entries)
124 super(branchcache, self).__init__(entries)
125 self.tipnode = tipnode
125 self.tipnode = tipnode
126 self.tiprev = tiprev
126 self.tiprev = tiprev
127 self.filteredhash = filteredhash
127 self.filteredhash = filteredhash
128 # closednodes is a set of nodes that close their branch. If the branch
128 # closednodes is a set of nodes that close their branch. If the branch
129 # cache has been updated, it may contain nodes that are no longer
129 # cache has been updated, it may contain nodes that are no longer
130 # heads.
130 # heads.
131 if closednodes is None:
131 if closednodes is None:
132 self._closednodes = set()
132 self._closednodes = set()
133 else:
133 else:
134 self._closednodes = closednodes
134 self._closednodes = closednodes
135
135
136 def _hashfiltered(self, repo):
136 def _hashfiltered(self, repo):
137 """build hash of revision filtered in the current cache
137 """build hash of revision filtered in the current cache
138
138
139 Tracking tipnode and tiprev is not enough to ensure validity of the
139 Tracking tipnode and tiprev is not enough to ensure validity of the
140 cache as they do not help to distinct cache that ignored various
140 cache as they do not help to distinct cache that ignored various
141 revision bellow tiprev.
141 revision bellow tiprev.
142
142
143 To detect such difference, we build a cache of all ignored revisions.
143 To detect such difference, we build a cache of all ignored revisions.
144 """
144 """
145 cl = repo.changelog
145 cl = repo.changelog
146 if not cl.filteredrevs:
146 if not cl.filteredrevs:
147 return None
147 return None
148 key = None
148 key = None
149 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
149 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
150 if revs:
150 if revs:
151 s = util.sha1()
151 s = util.sha1()
152 for rev in revs:
152 for rev in revs:
153 s.update('%s;' % rev)
153 s.update('%s;' % rev)
154 key = s.digest()
154 key = s.digest()
155 return key
155 return key
156
156
157 def validfor(self, repo):
157 def validfor(self, repo):
158 """Is the cache content valid regarding a repo
158 """Is the cache content valid regarding a repo
159
159
160 - False when cached tipnode is unknown or if we detect a strip.
160 - False when cached tipnode is unknown or if we detect a strip.
161 - True when cache is up to date or a subset of current repo."""
161 - True when cache is up to date or a subset of current repo."""
162 try:
162 try:
163 return ((self.tipnode == repo.changelog.node(self.tiprev))
163 return ((self.tipnode == repo.changelog.node(self.tiprev))
164 and (self.filteredhash == self._hashfiltered(repo)))
164 and (self.filteredhash == self._hashfiltered(repo)))
165 except IndexError:
165 except IndexError:
166 return False
166 return False
167
167
168 def _branchtip(self, heads):
168 def _branchtip(self, heads):
169 '''Return tuple with last open head in heads and false,
169 '''Return tuple with last open head in heads and false,
170 otherwise return last closed head and true.'''
170 otherwise return last closed head and true.'''
171 tip = heads[-1]
171 tip = heads[-1]
172 closed = True
172 closed = True
173 for h in reversed(heads):
173 for h in reversed(heads):
174 if h not in self._closednodes:
174 if h not in self._closednodes:
175 tip = h
175 tip = h
176 closed = False
176 closed = False
177 break
177 break
178 return tip, closed
178 return tip, closed
179
179
180 def branchtip(self, branch):
180 def branchtip(self, branch):
181 '''Return the tipmost open head on branch head, otherwise return the
181 '''Return the tipmost open head on branch head, otherwise return the
182 tipmost closed head on branch.
182 tipmost closed head on branch.
183 Raise KeyError for unknown branch.'''
183 Raise KeyError for unknown branch.'''
184 return self._branchtip(self[branch])[0]
184 return self._branchtip(self[branch])[0]
185
185
186 def branchheads(self, branch, closed=False):
186 def branchheads(self, branch, closed=False):
187 heads = self[branch]
187 heads = self[branch]
188 if not closed:
188 if not closed:
189 heads = [h for h in heads if h not in self._closednodes]
189 heads = [h for h in heads if h not in self._closednodes]
190 return heads
190 return heads
191
191
192 def iterbranches(self):
192 def iterbranches(self):
193 for bn, heads in self.iteritems():
193 for bn, heads in self.iteritems():
194 yield (bn, heads) + self._branchtip(heads)
194 yield (bn, heads) + self._branchtip(heads)
195
195
196 def copy(self):
196 def copy(self):
197 """return an deep copy of the branchcache object"""
197 """return an deep copy of the branchcache object"""
198 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
198 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
199 self._closednodes)
199 self._closednodes)
200
200
201 def write(self, repo):
201 def write(self, repo):
202 try:
202 try:
203 f = repo.opener(_filename(repo), "w", atomictemp=True)
203 f = repo.opener(_filename(repo), "w", atomictemp=True)
204 cachekey = [hex(self.tipnode), str(self.tiprev)]
204 cachekey = [hex(self.tipnode), str(self.tiprev)]
205 if self.filteredhash is not None:
205 if self.filteredhash is not None:
206 cachekey.append(hex(self.filteredhash))
206 cachekey.append(hex(self.filteredhash))
207 f.write(" ".join(cachekey) + '\n')
207 f.write(" ".join(cachekey) + '\n')
208 nodecount = 0
208 nodecount = 0
209 for label, nodes in sorted(self.iteritems()):
209 for label, nodes in sorted(self.iteritems()):
210 for node in nodes:
210 for node in nodes:
211 nodecount += 1
211 nodecount += 1
212 if node in self._closednodes:
212 if node in self._closednodes:
213 state = 'c'
213 state = 'c'
214 else:
214 else:
215 state = 'o'
215 state = 'o'
216 f.write("%s %s %s\n" % (hex(node), state,
216 f.write("%s %s %s\n" % (hex(node), state,
217 encoding.fromlocal(label)))
217 encoding.fromlocal(label)))
218 f.close()
218 f.close()
219 repo.ui.log('branchcache',
219 repo.ui.log('branchcache',
220 'wrote %s branch cache with %d labels and %d nodes\n',
220 'wrote %s branch cache with %d labels and %d nodes\n',
221 repo.filtername, len(self), nodecount)
221 repo.filtername, len(self), nodecount)
222 except (IOError, OSError, util.Abort), inst:
222 except (IOError, OSError, util.Abort), inst:
223 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
223 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
224 # Abort may be raise by read only opener
224 # Abort may be raise by read only opener
225 pass
225 pass
226
226
227 def update(self, repo, revgen):
227 def update(self, repo, revgen):
228 """Given a branchhead cache, self, that may have extra nodes or be
228 """Given a branchhead cache, self, that may have extra nodes or be
229 missing heads, and a generator of nodes that are strictly a superset of
229 missing heads, and a generator of nodes that are strictly a superset of
230 heads missing, this function updates self to be correct.
230 heads missing, this function updates self to be correct.
231 """
231 """
232 starttime = time.time()
232 starttime = time.time()
233 cl = repo.changelog
233 cl = repo.changelog
234 # collect new branch entries
234 # collect new branch entries
235 newbranches = {}
235 newbranches = {}
236 getbranchinfo = cl.branchinfo
236 getbranchinfo = cl.branchinfo
237 for r in revgen:
237 for r in revgen:
238 branch, closesbranch = getbranchinfo(r)
238 branch, closesbranch = getbranchinfo(r)
239 newbranches.setdefault(branch, []).append(r)
239 newbranches.setdefault(branch, []).append(r)
240 if closesbranch:
240 if closesbranch:
241 self._closednodes.add(cl.node(r))
241 self._closednodes.add(cl.node(r))
242
243 # fetch current topological heads to speed up filtering
244 topoheads = set(cl.headrevs())
245
242 # if older branchheads are reachable from new ones, they aren't
246 # if older branchheads are reachable from new ones, they aren't
243 # really branchheads. Note checking parents is insufficient:
247 # really branchheads. Note checking parents is insufficient:
244 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
248 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
245 for branch, newheadrevs in newbranches.iteritems():
249 for branch, newheadrevs in newbranches.iteritems():
246 bheads = self.setdefault(branch, [])
250 bheads = self.setdefault(branch, [])
247 bheadset = set(cl.rev(node) for node in bheads)
251 bheadset = set(cl.rev(node) for node in bheads)
248
252
249 # This have been tested True on all internal usage of this function.
253 # This have been tested True on all internal usage of this function.
250 # run it again in case of doubt
254 # run it again in case of doubt
251 # assert not (set(bheadrevs) & set(newheadrevs))
255 # assert not (set(bheadrevs) & set(newheadrevs))
252 newheadrevs.sort()
256 newheadrevs.sort()
253 bheadset.update(newheadrevs)
257 bheadset.update(newheadrevs)
254
258
255 # This prunes out two kinds of heads - heads that are superseded by
259 # This prunes out two kinds of heads - heads that are superseded by
256 # a head in newheadrevs, and newheadrevs that are not heads because
260 # a head in newheadrevs, and newheadrevs that are not heads because
257 # an existing head is their descendant.
261 # an existing head is their descendant.
258 ancestors = set(cl.ancestors(newheadrevs, min(bheadset)))
262 uncertain = bheadset - topoheads
263 if uncertain:
264 floorrev = min(uncertain)
265 ancestors = set(cl.ancestors(newheadrevs, floorrev))
259 bheadset -= ancestors
266 bheadset -= ancestors
260 bheadrevs = sorted(bheadset)
267 bheadrevs = sorted(bheadset)
261 self[branch] = [cl.node(rev) for rev in bheadrevs]
268 self[branch] = [cl.node(rev) for rev in bheadrevs]
262 tiprev = bheadrevs[-1]
269 tiprev = bheadrevs[-1]
263 if tiprev > self.tiprev:
270 if tiprev > self.tiprev:
264 self.tipnode = cl.node(tiprev)
271 self.tipnode = cl.node(tiprev)
265 self.tiprev = tiprev
272 self.tiprev = tiprev
266
273
267 if not self.validfor(repo):
274 if not self.validfor(repo):
268 # cache key are not valid anymore
275 # cache key are not valid anymore
269 self.tipnode = nullid
276 self.tipnode = nullid
270 self.tiprev = nullrev
277 self.tiprev = nullrev
271 for heads in self.values():
278 for heads in self.values():
272 tiprev = max(cl.rev(node) for node in heads)
279 tiprev = max(cl.rev(node) for node in heads)
273 if tiprev > self.tiprev:
280 if tiprev > self.tiprev:
274 self.tipnode = cl.node(tiprev)
281 self.tipnode = cl.node(tiprev)
275 self.tiprev = tiprev
282 self.tiprev = tiprev
276 self.filteredhash = self._hashfiltered(repo)
283 self.filteredhash = self._hashfiltered(repo)
277
284
278 duration = time.time() - starttime
285 duration = time.time() - starttime
279 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
286 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
280 repo.filtername, duration)
287 repo.filtername, duration)
General Comments 0
You need to be logged in to leave comments. Login now