##// END OF EJS Templates
branchcache: don't verify closed nodes in iteropen()...
Pulkit Goyal -
r42676:7c9d4cf2 default
parent child Browse files
Show More
@@ -1,670 +1,669 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """mapping of filtered views of repo with their branchcache"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129 def _unknownnode(node):
129 def _unknownnode(node):
130 """ raises ValueError when branchcache found a node which does not exists
130 """ raises ValueError when branchcache found a node which does not exists
131 """
131 """
132 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
132 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
133
133
134 class branchcache(object):
134 class branchcache(object):
135 """A dict like object that hold branches heads cache.
135 """A dict like object that hold branches heads cache.
136
136
137 This cache is used to avoid costly computations to determine all the
137 This cache is used to avoid costly computations to determine all the
138 branch heads of a repo.
138 branch heads of a repo.
139
139
140 The cache is serialized on disk in the following format:
140 The cache is serialized on disk in the following format:
141
141
142 <tip hex node> <tip rev number> [optional filtered repo hex hash]
142 <tip hex node> <tip rev number> [optional filtered repo hex hash]
143 <branch head hex node> <open/closed state> <branch name>
143 <branch head hex node> <open/closed state> <branch name>
144 <branch head hex node> <open/closed state> <branch name>
144 <branch head hex node> <open/closed state> <branch name>
145 ...
145 ...
146
146
147 The first line is used to check if the cache is still valid. If the
147 The first line is used to check if the cache is still valid. If the
148 branch cache is for a filtered repo view, an optional third hash is
148 branch cache is for a filtered repo view, an optional third hash is
149 included that hashes the hashes of all filtered revisions.
149 included that hashes the hashes of all filtered revisions.
150
150
151 The open/closed state is represented by a single letter 'o' or 'c'.
151 The open/closed state is represented by a single letter 'o' or 'c'.
152 This field can be used to avoid changelog reads when determining if a
152 This field can be used to avoid changelog reads when determining if a
153 branch head closes a branch or not.
153 branch head closes a branch or not.
154 """
154 """
155
155
156 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
156 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
157 filteredhash=None, closednodes=None, hasnode=None):
157 filteredhash=None, closednodes=None, hasnode=None):
158 """ hasnode is a function which can be used to verify whether changelog
158 """ hasnode is a function which can be used to verify whether changelog
159 has a given node or not. If it's not provided, we assume that every node
159 has a given node or not. If it's not provided, we assume that every node
160 we have exists in changelog """
160 we have exists in changelog """
161 self.tipnode = tipnode
161 self.tipnode = tipnode
162 self.tiprev = tiprev
162 self.tiprev = tiprev
163 self.filteredhash = filteredhash
163 self.filteredhash = filteredhash
164 # closednodes is a set of nodes that close their branch. If the branch
164 # closednodes is a set of nodes that close their branch. If the branch
165 # cache has been updated, it may contain nodes that are no longer
165 # cache has been updated, it may contain nodes that are no longer
166 # heads.
166 # heads.
167 if closednodes is None:
167 if closednodes is None:
168 self._closednodes = set()
168 self._closednodes = set()
169 else:
169 else:
170 self._closednodes = closednodes
170 self._closednodes = closednodes
171 self._entries = dict(entries)
171 self._entries = dict(entries)
172 # whether closed nodes are verified or not
172 # whether closed nodes are verified or not
173 self._closedverified = False
173 self._closedverified = False
174 # branches for which nodes are verified
174 # branches for which nodes are verified
175 self._verifiedbranches = set()
175 self._verifiedbranches = set()
176 self._hasnode = hasnode
176 self._hasnode = hasnode
177 if self._hasnode is None:
177 if self._hasnode is None:
178 self._hasnode = lambda x: True
178 self._hasnode = lambda x: True
179
179
180 def _verifyclosed(self):
180 def _verifyclosed(self):
181 """ verify the closed nodes we have """
181 """ verify the closed nodes we have """
182 if self._closedverified:
182 if self._closedverified:
183 return
183 return
184 for node in self._closednodes:
184 for node in self._closednodes:
185 if not self._hasnode(node):
185 if not self._hasnode(node):
186 _unknownnode(node)
186 _unknownnode(node)
187
187
188 self._closedverified = True
188 self._closedverified = True
189
189
190 def _verifybranch(self, branch):
190 def _verifybranch(self, branch):
191 """ verify head nodes for the given branch. If branch is None, verify
191 """ verify head nodes for the given branch. If branch is None, verify
192 for all the branches """
192 for all the branches """
193 if branch not in self._entries or branch in self._verifiedbranches:
193 if branch not in self._entries or branch in self._verifiedbranches:
194 return
194 return
195 for n in self._entries[branch]:
195 for n in self._entries[branch]:
196 if not self._hasnode(n):
196 if not self._hasnode(n):
197 _unknownnode(n)
197 _unknownnode(n)
198
198
199 self._verifiedbranches.add(branch)
199 self._verifiedbranches.add(branch)
200
200
201 def _verifyall(self):
201 def _verifyall(self):
202 """ verifies nodes of all the branches """
202 """ verifies nodes of all the branches """
203 for b in self._entries:
203 for b in self._entries:
204 self._verifybranch(b)
204 self._verifybranch(b)
205
205
206 def __iter__(self):
206 def __iter__(self):
207 return iter(self._entries)
207 return iter(self._entries)
208
208
209 def __setitem__(self, key, value):
209 def __setitem__(self, key, value):
210 self._entries[key] = value
210 self._entries[key] = value
211
211
212 def __getitem__(self, key):
212 def __getitem__(self, key):
213 self._verifybranch(key)
213 self._verifybranch(key)
214 return self._entries[key]
214 return self._entries[key]
215
215
216 def __contains__(self, key):
216 def __contains__(self, key):
217 self._verifybranch(key)
217 self._verifybranch(key)
218 return key in self._entries
218 return key in self._entries
219
219
220 def iteritems(self):
220 def iteritems(self):
221 self._verifyall()
221 self._verifyall()
222 return self._entries.iteritems()
222 return self._entries.iteritems()
223
223
224 def hasbranch(self, label):
224 def hasbranch(self, label):
225 """ checks whether a branch of this name exists or not """
225 """ checks whether a branch of this name exists or not """
226 self._verifybranch(label)
226 self._verifybranch(label)
227 return label in self._entries
227 return label in self._entries
228
228
229 @classmethod
229 @classmethod
230 def fromfile(cls, repo):
230 def fromfile(cls, repo):
231 f = None
231 f = None
232 try:
232 try:
233 f = repo.cachevfs(cls._filename(repo))
233 f = repo.cachevfs(cls._filename(repo))
234 lineiter = iter(f)
234 lineiter = iter(f)
235 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
235 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
236 last, lrev = cachekey[:2]
236 last, lrev = cachekey[:2]
237 last, lrev = bin(last), int(lrev)
237 last, lrev = bin(last), int(lrev)
238 filteredhash = None
238 filteredhash = None
239 hasnode = repo.changelog.hasnode
239 hasnode = repo.changelog.hasnode
240 if len(cachekey) > 2:
240 if len(cachekey) > 2:
241 filteredhash = bin(cachekey[2])
241 filteredhash = bin(cachekey[2])
242 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
242 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
243 hasnode=hasnode)
243 hasnode=hasnode)
244 if not bcache.validfor(repo):
244 if not bcache.validfor(repo):
245 # invalidate the cache
245 # invalidate the cache
246 raise ValueError(r'tip differs')
246 raise ValueError(r'tip differs')
247 bcache.load(repo, lineiter)
247 bcache.load(repo, lineiter)
248 except (IOError, OSError):
248 except (IOError, OSError):
249 return None
249 return None
250
250
251 except Exception as inst:
251 except Exception as inst:
252 if repo.ui.debugflag:
252 if repo.ui.debugflag:
253 msg = 'invalid branchheads cache'
253 msg = 'invalid branchheads cache'
254 if repo.filtername is not None:
254 if repo.filtername is not None:
255 msg += ' (%s)' % repo.filtername
255 msg += ' (%s)' % repo.filtername
256 msg += ': %s\n'
256 msg += ': %s\n'
257 repo.ui.debug(msg % pycompat.bytestr(inst))
257 repo.ui.debug(msg % pycompat.bytestr(inst))
258 bcache = None
258 bcache = None
259
259
260 finally:
260 finally:
261 if f:
261 if f:
262 f.close()
262 f.close()
263
263
264 return bcache
264 return bcache
265
265
266 def load(self, repo, lineiter):
266 def load(self, repo, lineiter):
267 """ fully loads the branchcache by reading from the file using the line
267 """ fully loads the branchcache by reading from the file using the line
268 iterator passed"""
268 iterator passed"""
269 for line in lineiter:
269 for line in lineiter:
270 line = line.rstrip('\n')
270 line = line.rstrip('\n')
271 if not line:
271 if not line:
272 continue
272 continue
273 node, state, label = line.split(" ", 2)
273 node, state, label = line.split(" ", 2)
274 if state not in 'oc':
274 if state not in 'oc':
275 raise ValueError(r'invalid branch state')
275 raise ValueError(r'invalid branch state')
276 label = encoding.tolocal(label.strip())
276 label = encoding.tolocal(label.strip())
277 node = bin(node)
277 node = bin(node)
278 self._entries.setdefault(label, []).append(node)
278 self._entries.setdefault(label, []).append(node)
279 if state == 'c':
279 if state == 'c':
280 self._closednodes.add(node)
280 self._closednodes.add(node)
281
281
282 @staticmethod
282 @staticmethod
283 def _filename(repo):
283 def _filename(repo):
284 """name of a branchcache file for a given repo or repoview"""
284 """name of a branchcache file for a given repo or repoview"""
285 filename = "branch2"
285 filename = "branch2"
286 if repo.filtername:
286 if repo.filtername:
287 filename = '%s-%s' % (filename, repo.filtername)
287 filename = '%s-%s' % (filename, repo.filtername)
288 return filename
288 return filename
289
289
290 def validfor(self, repo):
290 def validfor(self, repo):
291 """Is the cache content valid regarding a repo
291 """Is the cache content valid regarding a repo
292
292
293 - False when cached tipnode is unknown or if we detect a strip.
293 - False when cached tipnode is unknown or if we detect a strip.
294 - True when cache is up to date or a subset of current repo."""
294 - True when cache is up to date or a subset of current repo."""
295 try:
295 try:
296 return ((self.tipnode == repo.changelog.node(self.tiprev))
296 return ((self.tipnode == repo.changelog.node(self.tiprev))
297 and (self.filteredhash ==
297 and (self.filteredhash ==
298 scmutil.filteredhash(repo, self.tiprev)))
298 scmutil.filteredhash(repo, self.tiprev)))
299 except IndexError:
299 except IndexError:
300 return False
300 return False
301
301
302 def _branchtip(self, heads):
302 def _branchtip(self, heads):
303 '''Return tuple with last open head in heads and false,
303 '''Return tuple with last open head in heads and false,
304 otherwise return last closed head and true.'''
304 otherwise return last closed head and true.'''
305 tip = heads[-1]
305 tip = heads[-1]
306 closed = True
306 closed = True
307 self._verifyclosed()
307 self._verifyclosed()
308 for h in reversed(heads):
308 for h in reversed(heads):
309 if h not in self._closednodes:
309 if h not in self._closednodes:
310 tip = h
310 tip = h
311 closed = False
311 closed = False
312 break
312 break
313 return tip, closed
313 return tip, closed
314
314
315 def branchtip(self, branch):
315 def branchtip(self, branch):
316 '''Return the tipmost open head on branch head, otherwise return the
316 '''Return the tipmost open head on branch head, otherwise return the
317 tipmost closed head on branch.
317 tipmost closed head on branch.
318 Raise KeyError for unknown branch.'''
318 Raise KeyError for unknown branch.'''
319 return self._branchtip(self[branch])[0]
319 return self._branchtip(self[branch])[0]
320
320
321 def iteropen(self, nodes):
321 def iteropen(self, nodes):
322 self._verifyclosed()
323 return (n for n in nodes if n not in self._closednodes)
322 return (n for n in nodes if n not in self._closednodes)
324
323
325 def branchheads(self, branch, closed=False):
324 def branchheads(self, branch, closed=False):
326 self._verifybranch(branch)
325 self._verifybranch(branch)
327 heads = self._entries[branch]
326 heads = self._entries[branch]
328 if not closed:
327 if not closed:
329 heads = list(self.iteropen(heads))
328 heads = list(self.iteropen(heads))
330 return heads
329 return heads
331
330
332 def iterbranches(self):
331 def iterbranches(self):
333 for bn, heads in self.iteritems():
332 for bn, heads in self.iteritems():
334 yield (bn, heads) + self._branchtip(heads)
333 yield (bn, heads) + self._branchtip(heads)
335
334
336 def iterheads(self):
335 def iterheads(self):
337 """ returns all the heads """
336 """ returns all the heads """
338 self._verifyall()
337 self._verifyall()
339 return self._entries.itervalues()
338 return self._entries.itervalues()
340
339
341 def copy(self):
340 def copy(self):
342 """return an deep copy of the branchcache object"""
341 """return an deep copy of the branchcache object"""
343 self._verifyall()
342 self._verifyall()
344 return type(self)(
343 return type(self)(
345 self._entries, self.tipnode, self.tiprev, self.filteredhash,
344 self._entries, self.tipnode, self.tiprev, self.filteredhash,
346 self._closednodes)
345 self._closednodes)
347
346
348 def write(self, repo):
347 def write(self, repo):
349 try:
348 try:
350 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
349 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
351 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
350 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
352 if self.filteredhash is not None:
351 if self.filteredhash is not None:
353 cachekey.append(hex(self.filteredhash))
352 cachekey.append(hex(self.filteredhash))
354 f.write(" ".join(cachekey) + '\n')
353 f.write(" ".join(cachekey) + '\n')
355 nodecount = 0
354 nodecount = 0
356 for label, nodes in sorted(self.iteritems()):
355 for label, nodes in sorted(self.iteritems()):
357 label = encoding.fromlocal(label)
356 label = encoding.fromlocal(label)
358 for node in nodes:
357 for node in nodes:
359 nodecount += 1
358 nodecount += 1
360 if node in self._closednodes:
359 if node in self._closednodes:
361 state = 'c'
360 state = 'c'
362 else:
361 else:
363 state = 'o'
362 state = 'o'
364 f.write("%s %s %s\n" % (hex(node), state, label))
363 f.write("%s %s %s\n" % (hex(node), state, label))
365 f.close()
364 f.close()
366 repo.ui.log('branchcache',
365 repo.ui.log('branchcache',
367 'wrote %s branch cache with %d labels and %d nodes\n',
366 'wrote %s branch cache with %d labels and %d nodes\n',
368 repo.filtername, len(self._entries), nodecount)
367 repo.filtername, len(self._entries), nodecount)
369 except (IOError, OSError, error.Abort) as inst:
368 except (IOError, OSError, error.Abort) as inst:
370 # Abort may be raised by read only opener, so log and continue
369 # Abort may be raised by read only opener, so log and continue
371 repo.ui.debug("couldn't write branch cache: %s\n" %
370 repo.ui.debug("couldn't write branch cache: %s\n" %
372 stringutil.forcebytestr(inst))
371 stringutil.forcebytestr(inst))
373
372
374 def update(self, repo, revgen):
373 def update(self, repo, revgen):
375 """Given a branchhead cache, self, that may have extra nodes or be
374 """Given a branchhead cache, self, that may have extra nodes or be
376 missing heads, and a generator of nodes that are strictly a superset of
375 missing heads, and a generator of nodes that are strictly a superset of
377 heads missing, this function updates self to be correct.
376 heads missing, this function updates self to be correct.
378 """
377 """
379 starttime = util.timer()
378 starttime = util.timer()
380 cl = repo.changelog
379 cl = repo.changelog
381 # collect new branch entries
380 # collect new branch entries
382 newbranches = {}
381 newbranches = {}
383 getbranchinfo = repo.revbranchcache().branchinfo
382 getbranchinfo = repo.revbranchcache().branchinfo
384 for r in revgen:
383 for r in revgen:
385 branch, closesbranch = getbranchinfo(r)
384 branch, closesbranch = getbranchinfo(r)
386 newbranches.setdefault(branch, []).append(r)
385 newbranches.setdefault(branch, []).append(r)
387 if closesbranch:
386 if closesbranch:
388 self._closednodes.add(cl.node(r))
387 self._closednodes.add(cl.node(r))
389
388
390 # fetch current topological heads to speed up filtering
389 # fetch current topological heads to speed up filtering
391 topoheads = set(cl.headrevs())
390 topoheads = set(cl.headrevs())
392
391
393 # if older branchheads are reachable from new ones, they aren't
392 # if older branchheads are reachable from new ones, they aren't
394 # really branchheads. Note checking parents is insufficient:
393 # really branchheads. Note checking parents is insufficient:
395 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
394 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
396 for branch, newheadrevs in newbranches.iteritems():
395 for branch, newheadrevs in newbranches.iteritems():
397 bheads = self._entries.setdefault(branch, [])
396 bheads = self._entries.setdefault(branch, [])
398 bheadset = set(cl.rev(node) for node in bheads)
397 bheadset = set(cl.rev(node) for node in bheads)
399
398
400 # This have been tested True on all internal usage of this function.
399 # This have been tested True on all internal usage of this function.
401 # run it again in case of doubt
400 # run it again in case of doubt
402 # assert not (set(bheadrevs) & set(newheadrevs))
401 # assert not (set(bheadrevs) & set(newheadrevs))
403 bheadset.update(newheadrevs)
402 bheadset.update(newheadrevs)
404
403
405 # This prunes out two kinds of heads - heads that are superseded by
404 # This prunes out two kinds of heads - heads that are superseded by
406 # a head in newheadrevs, and newheadrevs that are not heads because
405 # a head in newheadrevs, and newheadrevs that are not heads because
407 # an existing head is their descendant.
406 # an existing head is their descendant.
408 uncertain = bheadset - topoheads
407 uncertain = bheadset - topoheads
409 if uncertain:
408 if uncertain:
410 floorrev = min(uncertain)
409 floorrev = min(uncertain)
411 ancestors = set(cl.ancestors(newheadrevs, floorrev))
410 ancestors = set(cl.ancestors(newheadrevs, floorrev))
412 bheadset -= ancestors
411 bheadset -= ancestors
413 bheadrevs = sorted(bheadset)
412 bheadrevs = sorted(bheadset)
414 self[branch] = [cl.node(rev) for rev in bheadrevs]
413 self[branch] = [cl.node(rev) for rev in bheadrevs]
415 tiprev = bheadrevs[-1]
414 tiprev = bheadrevs[-1]
416 if tiprev > self.tiprev:
415 if tiprev > self.tiprev:
417 self.tipnode = cl.node(tiprev)
416 self.tipnode = cl.node(tiprev)
418 self.tiprev = tiprev
417 self.tiprev = tiprev
419
418
420 if not self.validfor(repo):
419 if not self.validfor(repo):
421 # cache key are not valid anymore
420 # cache key are not valid anymore
422 self.tipnode = nullid
421 self.tipnode = nullid
423 self.tiprev = nullrev
422 self.tiprev = nullrev
424 for heads in self.iterheads():
423 for heads in self.iterheads():
425 tiprev = max(cl.rev(node) for node in heads)
424 tiprev = max(cl.rev(node) for node in heads)
426 if tiprev > self.tiprev:
425 if tiprev > self.tiprev:
427 self.tipnode = cl.node(tiprev)
426 self.tipnode = cl.node(tiprev)
428 self.tiprev = tiprev
427 self.tiprev = tiprev
429 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
428 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
430
429
431 duration = util.timer() - starttime
430 duration = util.timer() - starttime
432 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
431 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
433 repo.filtername or b'None', duration)
432 repo.filtername or b'None', duration)
434
433
435 self.write(repo)
434 self.write(repo)
436
435
437
436
438 class remotebranchcache(branchcache):
437 class remotebranchcache(branchcache):
439 """Branchmap info for a remote connection, should not write locally"""
438 """Branchmap info for a remote connection, should not write locally"""
440 def write(self, repo):
439 def write(self, repo):
441 pass
440 pass
442
441
443
442
444 # Revision branch info cache
443 # Revision branch info cache
445
444
446 _rbcversion = '-v1'
445 _rbcversion = '-v1'
447 _rbcnames = 'rbc-names' + _rbcversion
446 _rbcnames = 'rbc-names' + _rbcversion
448 _rbcrevs = 'rbc-revs' + _rbcversion
447 _rbcrevs = 'rbc-revs' + _rbcversion
449 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
448 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
450 _rbcrecfmt = '>4sI'
449 _rbcrecfmt = '>4sI'
451 _rbcrecsize = calcsize(_rbcrecfmt)
450 _rbcrecsize = calcsize(_rbcrecfmt)
452 _rbcnodelen = 4
451 _rbcnodelen = 4
453 _rbcbranchidxmask = 0x7fffffff
452 _rbcbranchidxmask = 0x7fffffff
454 _rbccloseflag = 0x80000000
453 _rbccloseflag = 0x80000000
455
454
456 class revbranchcache(object):
455 class revbranchcache(object):
457 """Persistent cache, mapping from revision number to branch name and close.
456 """Persistent cache, mapping from revision number to branch name and close.
458 This is a low level cache, independent of filtering.
457 This is a low level cache, independent of filtering.
459
458
460 Branch names are stored in rbc-names in internal encoding separated by 0.
459 Branch names are stored in rbc-names in internal encoding separated by 0.
461 rbc-names is append-only, and each branch name is only stored once and will
460 rbc-names is append-only, and each branch name is only stored once and will
462 thus have a unique index.
461 thus have a unique index.
463
462
464 The branch info for each revision is stored in rbc-revs as constant size
463 The branch info for each revision is stored in rbc-revs as constant size
465 records. The whole file is read into memory, but it is only 'parsed' on
464 records. The whole file is read into memory, but it is only 'parsed' on
466 demand. The file is usually append-only but will be truncated if repo
465 demand. The file is usually append-only but will be truncated if repo
467 modification is detected.
466 modification is detected.
468 The record for each revision contains the first 4 bytes of the
467 The record for each revision contains the first 4 bytes of the
469 corresponding node hash, and the record is only used if it still matches.
468 corresponding node hash, and the record is only used if it still matches.
470 Even a completely trashed rbc-revs fill thus still give the right result
469 Even a completely trashed rbc-revs fill thus still give the right result
471 while converging towards full recovery ... assuming no incorrectly matching
470 while converging towards full recovery ... assuming no incorrectly matching
472 node hashes.
471 node hashes.
473 The record also contains 4 bytes where 31 bits contains the index of the
472 The record also contains 4 bytes where 31 bits contains the index of the
474 branch and the last bit indicate that it is a branch close commit.
473 branch and the last bit indicate that it is a branch close commit.
475 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
474 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
476 and will grow with it but be 1/8th of its size.
475 and will grow with it but be 1/8th of its size.
477 """
476 """
478
477
479 def __init__(self, repo, readonly=True):
478 def __init__(self, repo, readonly=True):
480 assert repo.filtername is None
479 assert repo.filtername is None
481 self._repo = repo
480 self._repo = repo
482 self._names = [] # branch names in local encoding with static index
481 self._names = [] # branch names in local encoding with static index
483 self._rbcrevs = bytearray()
482 self._rbcrevs = bytearray()
484 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
483 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
485 try:
484 try:
486 bndata = repo.cachevfs.read(_rbcnames)
485 bndata = repo.cachevfs.read(_rbcnames)
487 self._rbcsnameslen = len(bndata) # for verification before writing
486 self._rbcsnameslen = len(bndata) # for verification before writing
488 if bndata:
487 if bndata:
489 self._names = [encoding.tolocal(bn)
488 self._names = [encoding.tolocal(bn)
490 for bn in bndata.split('\0')]
489 for bn in bndata.split('\0')]
491 except (IOError, OSError):
490 except (IOError, OSError):
492 if readonly:
491 if readonly:
493 # don't try to use cache - fall back to the slow path
492 # don't try to use cache - fall back to the slow path
494 self.branchinfo = self._branchinfo
493 self.branchinfo = self._branchinfo
495
494
496 if self._names:
495 if self._names:
497 try:
496 try:
498 data = repo.cachevfs.read(_rbcrevs)
497 data = repo.cachevfs.read(_rbcrevs)
499 self._rbcrevs[:] = data
498 self._rbcrevs[:] = data
500 except (IOError, OSError) as inst:
499 except (IOError, OSError) as inst:
501 repo.ui.debug("couldn't read revision branch cache: %s\n" %
500 repo.ui.debug("couldn't read revision branch cache: %s\n" %
502 stringutil.forcebytestr(inst))
501 stringutil.forcebytestr(inst))
503 # remember number of good records on disk
502 # remember number of good records on disk
504 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
503 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
505 len(repo.changelog))
504 len(repo.changelog))
506 if self._rbcrevslen == 0:
505 if self._rbcrevslen == 0:
507 self._names = []
506 self._names = []
508 self._rbcnamescount = len(self._names) # number of names read at
507 self._rbcnamescount = len(self._names) # number of names read at
509 # _rbcsnameslen
508 # _rbcsnameslen
510
509
511 def _clear(self):
510 def _clear(self):
512 self._rbcsnameslen = 0
511 self._rbcsnameslen = 0
513 del self._names[:]
512 del self._names[:]
514 self._rbcnamescount = 0
513 self._rbcnamescount = 0
515 self._rbcrevslen = len(self._repo.changelog)
514 self._rbcrevslen = len(self._repo.changelog)
516 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
515 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
517 util.clearcachedproperty(self, '_namesreverse')
516 util.clearcachedproperty(self, '_namesreverse')
518
517
519 @util.propertycache
518 @util.propertycache
520 def _namesreverse(self):
519 def _namesreverse(self):
521 return dict((b, r) for r, b in enumerate(self._names))
520 return dict((b, r) for r, b in enumerate(self._names))
522
521
523 def branchinfo(self, rev):
522 def branchinfo(self, rev):
524 """Return branch name and close flag for rev, using and updating
523 """Return branch name and close flag for rev, using and updating
525 persistent cache."""
524 persistent cache."""
526 changelog = self._repo.changelog
525 changelog = self._repo.changelog
527 rbcrevidx = rev * _rbcrecsize
526 rbcrevidx = rev * _rbcrecsize
528
527
529 # avoid negative index, changelog.read(nullrev) is fast without cache
528 # avoid negative index, changelog.read(nullrev) is fast without cache
530 if rev == nullrev:
529 if rev == nullrev:
531 return changelog.branchinfo(rev)
530 return changelog.branchinfo(rev)
532
531
533 # if requested rev isn't allocated, grow and cache the rev info
532 # if requested rev isn't allocated, grow and cache the rev info
534 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
533 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
535 return self._branchinfo(rev)
534 return self._branchinfo(rev)
536
535
537 # fast path: extract data from cache, use it if node is matching
536 # fast path: extract data from cache, use it if node is matching
538 reponode = changelog.node(rev)[:_rbcnodelen]
537 reponode = changelog.node(rev)[:_rbcnodelen]
539 cachenode, branchidx = unpack_from(
538 cachenode, branchidx = unpack_from(
540 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
539 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
541 close = bool(branchidx & _rbccloseflag)
540 close = bool(branchidx & _rbccloseflag)
542 if close:
541 if close:
543 branchidx &= _rbcbranchidxmask
542 branchidx &= _rbcbranchidxmask
544 if cachenode == '\0\0\0\0':
543 if cachenode == '\0\0\0\0':
545 pass
544 pass
546 elif cachenode == reponode:
545 elif cachenode == reponode:
547 try:
546 try:
548 return self._names[branchidx], close
547 return self._names[branchidx], close
549 except IndexError:
548 except IndexError:
550 # recover from invalid reference to unknown branch
549 # recover from invalid reference to unknown branch
551 self._repo.ui.debug("referenced branch names not found"
550 self._repo.ui.debug("referenced branch names not found"
552 " - rebuilding revision branch cache from scratch\n")
551 " - rebuilding revision branch cache from scratch\n")
553 self._clear()
552 self._clear()
554 else:
553 else:
555 # rev/node map has changed, invalidate the cache from here up
554 # rev/node map has changed, invalidate the cache from here up
556 self._repo.ui.debug("history modification detected - truncating "
555 self._repo.ui.debug("history modification detected - truncating "
557 "revision branch cache to revision %d\n" % rev)
556 "revision branch cache to revision %d\n" % rev)
558 truncate = rbcrevidx + _rbcrecsize
557 truncate = rbcrevidx + _rbcrecsize
559 del self._rbcrevs[truncate:]
558 del self._rbcrevs[truncate:]
560 self._rbcrevslen = min(self._rbcrevslen, truncate)
559 self._rbcrevslen = min(self._rbcrevslen, truncate)
561
560
562 # fall back to slow path and make sure it will be written to disk
561 # fall back to slow path and make sure it will be written to disk
563 return self._branchinfo(rev)
562 return self._branchinfo(rev)
564
563
565 def _branchinfo(self, rev):
564 def _branchinfo(self, rev):
566 """Retrieve branch info from changelog and update _rbcrevs"""
565 """Retrieve branch info from changelog and update _rbcrevs"""
567 changelog = self._repo.changelog
566 changelog = self._repo.changelog
568 b, close = changelog.branchinfo(rev)
567 b, close = changelog.branchinfo(rev)
569 if b in self._namesreverse:
568 if b in self._namesreverse:
570 branchidx = self._namesreverse[b]
569 branchidx = self._namesreverse[b]
571 else:
570 else:
572 branchidx = len(self._names)
571 branchidx = len(self._names)
573 self._names.append(b)
572 self._names.append(b)
574 self._namesreverse[b] = branchidx
573 self._namesreverse[b] = branchidx
575 reponode = changelog.node(rev)
574 reponode = changelog.node(rev)
576 if close:
575 if close:
577 branchidx |= _rbccloseflag
576 branchidx |= _rbccloseflag
578 self._setcachedata(rev, reponode, branchidx)
577 self._setcachedata(rev, reponode, branchidx)
579 return b, close
578 return b, close
580
579
581 def setdata(self, branch, rev, node, close):
580 def setdata(self, branch, rev, node, close):
582 """add new data information to the cache"""
581 """add new data information to the cache"""
583 if branch in self._namesreverse:
582 if branch in self._namesreverse:
584 branchidx = self._namesreverse[branch]
583 branchidx = self._namesreverse[branch]
585 else:
584 else:
586 branchidx = len(self._names)
585 branchidx = len(self._names)
587 self._names.append(branch)
586 self._names.append(branch)
588 self._namesreverse[branch] = branchidx
587 self._namesreverse[branch] = branchidx
589 if close:
588 if close:
590 branchidx |= _rbccloseflag
589 branchidx |= _rbccloseflag
591 self._setcachedata(rev, node, branchidx)
590 self._setcachedata(rev, node, branchidx)
592 # If no cache data were readable (non exists, bad permission, etc)
591 # If no cache data were readable (non exists, bad permission, etc)
593 # the cache was bypassing itself by setting:
592 # the cache was bypassing itself by setting:
594 #
593 #
595 # self.branchinfo = self._branchinfo
594 # self.branchinfo = self._branchinfo
596 #
595 #
597 # Since we now have data in the cache, we need to drop this bypassing.
596 # Since we now have data in the cache, we need to drop this bypassing.
598 if r'branchinfo' in vars(self):
597 if r'branchinfo' in vars(self):
599 del self.branchinfo
598 del self.branchinfo
600
599
601 def _setcachedata(self, rev, node, branchidx):
600 def _setcachedata(self, rev, node, branchidx):
602 """Writes the node's branch data to the in-memory cache data."""
601 """Writes the node's branch data to the in-memory cache data."""
603 if rev == nullrev:
602 if rev == nullrev:
604 return
603 return
605 rbcrevidx = rev * _rbcrecsize
604 rbcrevidx = rev * _rbcrecsize
606 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
605 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
607 self._rbcrevs.extend('\0' *
606 self._rbcrevs.extend('\0' *
608 (len(self._repo.changelog) * _rbcrecsize -
607 (len(self._repo.changelog) * _rbcrecsize -
609 len(self._rbcrevs)))
608 len(self._rbcrevs)))
610 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
609 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
611 self._rbcrevslen = min(self._rbcrevslen, rev)
610 self._rbcrevslen = min(self._rbcrevslen, rev)
612
611
613 tr = self._repo.currenttransaction()
612 tr = self._repo.currenttransaction()
614 if tr:
613 if tr:
615 tr.addfinalize('write-revbranchcache', self.write)
614 tr.addfinalize('write-revbranchcache', self.write)
616
615
617 def write(self, tr=None):
616 def write(self, tr=None):
618 """Save branch cache if it is dirty."""
617 """Save branch cache if it is dirty."""
619 repo = self._repo
618 repo = self._repo
620 wlock = None
619 wlock = None
621 step = ''
620 step = ''
622 try:
621 try:
623 if self._rbcnamescount < len(self._names):
622 if self._rbcnamescount < len(self._names):
624 step = ' names'
623 step = ' names'
625 wlock = repo.wlock(wait=False)
624 wlock = repo.wlock(wait=False)
626 if self._rbcnamescount != 0:
625 if self._rbcnamescount != 0:
627 f = repo.cachevfs.open(_rbcnames, 'ab')
626 f = repo.cachevfs.open(_rbcnames, 'ab')
628 if f.tell() == self._rbcsnameslen:
627 if f.tell() == self._rbcsnameslen:
629 f.write('\0')
628 f.write('\0')
630 else:
629 else:
631 f.close()
630 f.close()
632 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
631 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
633 self._rbcnamescount = 0
632 self._rbcnamescount = 0
634 self._rbcrevslen = 0
633 self._rbcrevslen = 0
635 if self._rbcnamescount == 0:
634 if self._rbcnamescount == 0:
636 # before rewriting names, make sure references are removed
635 # before rewriting names, make sure references are removed
637 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
636 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
638 f = repo.cachevfs.open(_rbcnames, 'wb')
637 f = repo.cachevfs.open(_rbcnames, 'wb')
639 f.write('\0'.join(encoding.fromlocal(b)
638 f.write('\0'.join(encoding.fromlocal(b)
640 for b in self._names[self._rbcnamescount:]))
639 for b in self._names[self._rbcnamescount:]))
641 self._rbcsnameslen = f.tell()
640 self._rbcsnameslen = f.tell()
642 f.close()
641 f.close()
643 self._rbcnamescount = len(self._names)
642 self._rbcnamescount = len(self._names)
644
643
645 start = self._rbcrevslen * _rbcrecsize
644 start = self._rbcrevslen * _rbcrecsize
646 if start != len(self._rbcrevs):
645 if start != len(self._rbcrevs):
647 step = ''
646 step = ''
648 if wlock is None:
647 if wlock is None:
649 wlock = repo.wlock(wait=False)
648 wlock = repo.wlock(wait=False)
650 revs = min(len(repo.changelog),
649 revs = min(len(repo.changelog),
651 len(self._rbcrevs) // _rbcrecsize)
650 len(self._rbcrevs) // _rbcrecsize)
652 f = repo.cachevfs.open(_rbcrevs, 'ab')
651 f = repo.cachevfs.open(_rbcrevs, 'ab')
653 if f.tell() != start:
652 if f.tell() != start:
654 repo.ui.debug("truncating cache/%s to %d\n"
653 repo.ui.debug("truncating cache/%s to %d\n"
655 % (_rbcrevs, start))
654 % (_rbcrevs, start))
656 f.seek(start)
655 f.seek(start)
657 if f.tell() != start:
656 if f.tell() != start:
658 start = 0
657 start = 0
659 f.seek(start)
658 f.seek(start)
660 f.truncate()
659 f.truncate()
661 end = revs * _rbcrecsize
660 end = revs * _rbcrecsize
662 f.write(self._rbcrevs[start:end])
661 f.write(self._rbcrevs[start:end])
663 f.close()
662 f.close()
664 self._rbcrevslen = revs
663 self._rbcrevslen = revs
665 except (IOError, OSError, error.Abort, error.LockError) as inst:
664 except (IOError, OSError, error.Abort, error.LockError) as inst:
666 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
665 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
667 % (step, stringutil.forcebytestr(inst)))
666 % (step, stringutil.forcebytestr(inst)))
668 finally:
667 finally:
669 if wlock is not None:
668 if wlock is not None:
670 wlock.release()
669 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now