##// END OF EJS Templates
branchcache: lazily validate nodes from the branchmap...
Pulkit Goyal -
r42290:65786549 default
parent child Browse files
Show More
@@ -1,667 +1,670 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """mapping of filtered views of repo with their branchcache"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129 def _unknownnode(node):
129 def _unknownnode(node):
130 """ raises ValueError when branchcache found a node which does not exists
130 """ raises ValueError when branchcache found a node which does not exists
131 """
131 """
132 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
132 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
133
133
134 class branchcache(object):
134 class branchcache(object):
135 """A dict like object that hold branches heads cache.
135 """A dict like object that hold branches heads cache.
136
136
137 This cache is used to avoid costly computations to determine all the
137 This cache is used to avoid costly computations to determine all the
138 branch heads of a repo.
138 branch heads of a repo.
139
139
140 The cache is serialized on disk in the following format:
140 The cache is serialized on disk in the following format:
141
141
142 <tip hex node> <tip rev number> [optional filtered repo hex hash]
142 <tip hex node> <tip rev number> [optional filtered repo hex hash]
143 <branch head hex node> <open/closed state> <branch name>
143 <branch head hex node> <open/closed state> <branch name>
144 <branch head hex node> <open/closed state> <branch name>
144 <branch head hex node> <open/closed state> <branch name>
145 ...
145 ...
146
146
147 The first line is used to check if the cache is still valid. If the
147 The first line is used to check if the cache is still valid. If the
148 branch cache is for a filtered repo view, an optional third hash is
148 branch cache is for a filtered repo view, an optional third hash is
149 included that hashes the hashes of all filtered revisions.
149 included that hashes the hashes of all filtered revisions.
150
150
151 The open/closed state is represented by a single letter 'o' or 'c'.
151 The open/closed state is represented by a single letter 'o' or 'c'.
152 This field can be used to avoid changelog reads when determining if a
152 This field can be used to avoid changelog reads when determining if a
153 branch head closes a branch or not.
153 branch head closes a branch or not.
154 """
154 """
155
155
156 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
156 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
157 filteredhash=None, closednodes=None, hasnode=None):
157 filteredhash=None, closednodes=None, hasnode=None):
158 """ hasnode is a function which can be used to verify whether changelog
158 """ hasnode is a function which can be used to verify whether changelog
159 has a given node or not. If it's not provided, we assume that every node
159 has a given node or not. If it's not provided, we assume that every node
160 we have exists in changelog """
160 we have exists in changelog """
161 self.tipnode = tipnode
161 self.tipnode = tipnode
162 self.tiprev = tiprev
162 self.tiprev = tiprev
163 self.filteredhash = filteredhash
163 self.filteredhash = filteredhash
164 # closednodes is a set of nodes that close their branch. If the branch
164 # closednodes is a set of nodes that close their branch. If the branch
165 # cache has been updated, it may contain nodes that are no longer
165 # cache has been updated, it may contain nodes that are no longer
166 # heads.
166 # heads.
167 if closednodes is None:
167 if closednodes is None:
168 self._closednodes = set()
168 self._closednodes = set()
169 else:
169 else:
170 self._closednodes = closednodes
170 self._closednodes = closednodes
171 self._entries = dict(entries)
171 self._entries = dict(entries)
172 # whether closed nodes are verified or not
172 # whether closed nodes are verified or not
173 self._closedverified = False
173 self._closedverified = False
174 # branches for which nodes are verified
174 # branches for which nodes are verified
175 self._verifiedbranches = set()
175 self._verifiedbranches = set()
176 self._hasnode = hasnode
176 self._hasnode = hasnode
177 if self._hasnode is None:
177 if self._hasnode is None:
178 self._hasnode = lambda x: True
178 self._hasnode = lambda x: True
179
179
180 def _verifyclosed(self):
180 def _verifyclosed(self):
181 """ verify the closed nodes we have """
181 """ verify the closed nodes we have """
182 if self._closedverified:
182 if self._closedverified:
183 return
183 return
184 for node in self._closednodes:
184 for node in self._closednodes:
185 if not self._hasnode(node):
185 if not self._hasnode(node):
186 _unknownnode(node)
186 _unknownnode(node)
187
187
188 self._closedverified = True
188 self._closedverified = True
189
189
190 def _verifybranch(self, branch):
190 def _verifybranch(self, branch):
191 """ verify head nodes for the given branch. If branch is None, verify
191 """ verify head nodes for the given branch. If branch is None, verify
192 for all the branches """
192 for all the branches """
193 if branch not in self._entries or branch in self._verifiedbranches:
193 if branch not in self._entries or branch in self._verifiedbranches:
194 return
194 return
195 for n in self._entries[branch]:
195 for n in self._entries[branch]:
196 if not self._hasnode(n):
196 if not self._hasnode(n):
197 _unknownnode(n)
197 _unknownnode(n)
198
198
199 self._verifiedbranches.add(branch)
199 self._verifiedbranches.add(branch)
200
200
201 def _verifyall(self):
201 def _verifyall(self):
202 """ verifies nodes of all the branches """
202 """ verifies nodes of all the branches """
203 for b in self._entries:
203 for b in self._entries:
204 self._verifybranch(b)
204 self._verifybranch(b)
205
205
206 def __iter__(self):
206 def __iter__(self):
207 return iter(self._entries)
207 return iter(self._entries)
208
208
209 def __setitem__(self, key, value):
209 def __setitem__(self, key, value):
210 self._entries[key] = value
210 self._entries[key] = value
211
211
212 def __getitem__(self, key):
212 def __getitem__(self, key):
213 self._verifybranch(key)
213 return self._entries[key]
214 return self._entries[key]
214
215
215 def __contains__(self, key):
216 def __contains__(self, key):
217 self._verifybranch(key)
216 return key in self._entries
218 return key in self._entries
217
219
218 def iteritems(self):
220 def iteritems(self):
221 self._verifyall()
219 return self._entries.iteritems()
222 return self._entries.iteritems()
220
223
221 def hasbranch(self, label):
224 def hasbranch(self, label):
222 """ checks whether a branch of this name exists or not """
225 """ checks whether a branch of this name exists or not """
226 self._verifybranch(label)
223 return label in self._entries
227 return label in self._entries
224
228
225 @classmethod
229 @classmethod
226 def fromfile(cls, repo):
230 def fromfile(cls, repo):
227 f = None
231 f = None
228 try:
232 try:
229 f = repo.cachevfs(cls._filename(repo))
233 f = repo.cachevfs(cls._filename(repo))
230 lineiter = iter(f)
234 lineiter = iter(f)
231 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
235 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
232 last, lrev = cachekey[:2]
236 last, lrev = cachekey[:2]
233 last, lrev = bin(last), int(lrev)
237 last, lrev = bin(last), int(lrev)
234 filteredhash = None
238 filteredhash = None
235 hasnode = repo.changelog.hasnode
239 hasnode = repo.changelog.hasnode
236 if len(cachekey) > 2:
240 if len(cachekey) > 2:
237 filteredhash = bin(cachekey[2])
241 filteredhash = bin(cachekey[2])
238 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
242 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
239 hasnode=hasnode)
243 hasnode=hasnode)
240 if not bcache.validfor(repo):
244 if not bcache.validfor(repo):
241 # invalidate the cache
245 # invalidate the cache
242 raise ValueError(r'tip differs')
246 raise ValueError(r'tip differs')
243 bcache.load(repo, lineiter)
247 bcache.load(repo, lineiter)
244 except (IOError, OSError):
248 except (IOError, OSError):
245 return None
249 return None
246
250
247 except Exception as inst:
251 except Exception as inst:
248 if repo.ui.debugflag:
252 if repo.ui.debugflag:
249 msg = 'invalid branchheads cache'
253 msg = 'invalid branchheads cache'
250 if repo.filtername is not None:
254 if repo.filtername is not None:
251 msg += ' (%s)' % repo.filtername
255 msg += ' (%s)' % repo.filtername
252 msg += ': %s\n'
256 msg += ': %s\n'
253 repo.ui.debug(msg % pycompat.bytestr(inst))
257 repo.ui.debug(msg % pycompat.bytestr(inst))
254 bcache = None
258 bcache = None
255
259
256 finally:
260 finally:
257 if f:
261 if f:
258 f.close()
262 f.close()
259
263
260 return bcache
264 return bcache
261
265
262 def load(self, repo, lineiter):
266 def load(self, repo, lineiter):
263 """ fully loads the branchcache by reading from the file using the line
267 """ fully loads the branchcache by reading from the file using the line
264 iterator passed"""
268 iterator passed"""
265 cl = repo.changelog
266 for line in lineiter:
269 for line in lineiter:
267 line = line.rstrip('\n')
270 line = line.rstrip('\n')
268 if not line:
271 if not line:
269 continue
272 continue
270 node, state, label = line.split(" ", 2)
273 node, state, label = line.split(" ", 2)
271 if state not in 'oc':
274 if state not in 'oc':
272 raise ValueError(r'invalid branch state')
275 raise ValueError(r'invalid branch state')
273 label = encoding.tolocal(label.strip())
276 label = encoding.tolocal(label.strip())
274 node = bin(node)
277 node = bin(node)
275 if not cl.hasnode(node):
276 raise ValueError(
277 r'node %s does not exist' % pycompat.sysstr(hex(node)))
278 self._entries.setdefault(label, []).append(node)
278 self._entries.setdefault(label, []).append(node)
279 self._verifiedbranches.add(label)
280 if state == 'c':
279 if state == 'c':
281 self._closednodes.add(node)
280 self._closednodes.add(node)
282 self._closedverified = True
283
281
284 @staticmethod
282 @staticmethod
285 def _filename(repo):
283 def _filename(repo):
286 """name of a branchcache file for a given repo or repoview"""
284 """name of a branchcache file for a given repo or repoview"""
287 filename = "branch2"
285 filename = "branch2"
288 if repo.filtername:
286 if repo.filtername:
289 filename = '%s-%s' % (filename, repo.filtername)
287 filename = '%s-%s' % (filename, repo.filtername)
290 return filename
288 return filename
291
289
292 def validfor(self, repo):
290 def validfor(self, repo):
293 """Is the cache content valid regarding a repo
291 """Is the cache content valid regarding a repo
294
292
295 - False when cached tipnode is unknown or if we detect a strip.
293 - False when cached tipnode is unknown or if we detect a strip.
296 - True when cache is up to date or a subset of current repo."""
294 - True when cache is up to date or a subset of current repo."""
297 try:
295 try:
298 return ((self.tipnode == repo.changelog.node(self.tiprev))
296 return ((self.tipnode == repo.changelog.node(self.tiprev))
299 and (self.filteredhash ==
297 and (self.filteredhash ==
300 scmutil.filteredhash(repo, self.tiprev)))
298 scmutil.filteredhash(repo, self.tiprev)))
301 except IndexError:
299 except IndexError:
302 return False
300 return False
303
301
304 def _branchtip(self, heads):
302 def _branchtip(self, heads):
305 '''Return tuple with last open head in heads and false,
303 '''Return tuple with last open head in heads and false,
306 otherwise return last closed head and true.'''
304 otherwise return last closed head and true.'''
307 tip = heads[-1]
305 tip = heads[-1]
308 closed = True
306 closed = True
307 self._verifyclosed()
309 for h in reversed(heads):
308 for h in reversed(heads):
310 if h not in self._closednodes:
309 if h not in self._closednodes:
311 tip = h
310 tip = h
312 closed = False
311 closed = False
313 break
312 break
314 return tip, closed
313 return tip, closed
315
314
316 def branchtip(self, branch):
315 def branchtip(self, branch):
317 '''Return the tipmost open head on branch head, otherwise return the
316 '''Return the tipmost open head on branch head, otherwise return the
318 tipmost closed head on branch.
317 tipmost closed head on branch.
319 Raise KeyError for unknown branch.'''
318 Raise KeyError for unknown branch.'''
320 return self._branchtip(self[branch])[0]
319 return self._branchtip(self[branch])[0]
321
320
322 def iteropen(self, nodes):
321 def iteropen(self, nodes):
322 self._verifyclosed()
323 return (n for n in nodes if n not in self._closednodes)
323 return (n for n in nodes if n not in self._closednodes)
324
324
325 def branchheads(self, branch, closed=False):
325 def branchheads(self, branch, closed=False):
326 self._verifybranch(branch)
326 heads = self._entries[branch]
327 heads = self._entries[branch]
327 if not closed:
328 if not closed:
328 heads = list(self.iteropen(heads))
329 heads = list(self.iteropen(heads))
329 return heads
330 return heads
330
331
331 def iterbranches(self):
332 def iterbranches(self):
332 for bn, heads in self.iteritems():
333 for bn, heads in self.iteritems():
333 yield (bn, heads) + self._branchtip(heads)
334 yield (bn, heads) + self._branchtip(heads)
334
335
335 def iterheads(self):
336 def iterheads(self):
336 """ returns all the heads """
337 """ returns all the heads """
338 self._verifyall()
337 return self._entries.itervalues()
339 return self._entries.itervalues()
338
340
339 def copy(self):
341 def copy(self):
340 """return an deep copy of the branchcache object"""
342 """return an deep copy of the branchcache object"""
343 self._verifyall()
341 return type(self)(
344 return type(self)(
342 self._entries, self.tipnode, self.tiprev, self.filteredhash,
345 self._entries, self.tipnode, self.tiprev, self.filteredhash,
343 self._closednodes)
346 self._closednodes)
344
347
345 def write(self, repo):
348 def write(self, repo):
346 try:
349 try:
347 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
350 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
348 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
351 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
349 if self.filteredhash is not None:
352 if self.filteredhash is not None:
350 cachekey.append(hex(self.filteredhash))
353 cachekey.append(hex(self.filteredhash))
351 f.write(" ".join(cachekey) + '\n')
354 f.write(" ".join(cachekey) + '\n')
352 nodecount = 0
355 nodecount = 0
353 for label, nodes in sorted(self.iteritems()):
356 for label, nodes in sorted(self.iteritems()):
354 label = encoding.fromlocal(label)
357 label = encoding.fromlocal(label)
355 for node in nodes:
358 for node in nodes:
356 nodecount += 1
359 nodecount += 1
357 if node in self._closednodes:
360 if node in self._closednodes:
358 state = 'c'
361 state = 'c'
359 else:
362 else:
360 state = 'o'
363 state = 'o'
361 f.write("%s %s %s\n" % (hex(node), state, label))
364 f.write("%s %s %s\n" % (hex(node), state, label))
362 f.close()
365 f.close()
363 repo.ui.log('branchcache',
366 repo.ui.log('branchcache',
364 'wrote %s branch cache with %d labels and %d nodes\n',
367 'wrote %s branch cache with %d labels and %d nodes\n',
365 repo.filtername, len(self._entries), nodecount)
368 repo.filtername, len(self._entries), nodecount)
366 except (IOError, OSError, error.Abort) as inst:
369 except (IOError, OSError, error.Abort) as inst:
367 # Abort may be raised by read only opener, so log and continue
370 # Abort may be raised by read only opener, so log and continue
368 repo.ui.debug("couldn't write branch cache: %s\n" %
371 repo.ui.debug("couldn't write branch cache: %s\n" %
369 stringutil.forcebytestr(inst))
372 stringutil.forcebytestr(inst))
370
373
371 def update(self, repo, revgen):
374 def update(self, repo, revgen):
372 """Given a branchhead cache, self, that may have extra nodes or be
375 """Given a branchhead cache, self, that may have extra nodes or be
373 missing heads, and a generator of nodes that are strictly a superset of
376 missing heads, and a generator of nodes that are strictly a superset of
374 heads missing, this function updates self to be correct.
377 heads missing, this function updates self to be correct.
375 """
378 """
376 starttime = util.timer()
379 starttime = util.timer()
377 cl = repo.changelog
380 cl = repo.changelog
378 # collect new branch entries
381 # collect new branch entries
379 newbranches = {}
382 newbranches = {}
380 getbranchinfo = repo.revbranchcache().branchinfo
383 getbranchinfo = repo.revbranchcache().branchinfo
381 for r in revgen:
384 for r in revgen:
382 branch, closesbranch = getbranchinfo(r)
385 branch, closesbranch = getbranchinfo(r)
383 newbranches.setdefault(branch, []).append(r)
386 newbranches.setdefault(branch, []).append(r)
384 if closesbranch:
387 if closesbranch:
385 self._closednodes.add(cl.node(r))
388 self._closednodes.add(cl.node(r))
386
389
387 # fetch current topological heads to speed up filtering
390 # fetch current topological heads to speed up filtering
388 topoheads = set(cl.headrevs())
391 topoheads = set(cl.headrevs())
389
392
390 # if older branchheads are reachable from new ones, they aren't
393 # if older branchheads are reachable from new ones, they aren't
391 # really branchheads. Note checking parents is insufficient:
394 # really branchheads. Note checking parents is insufficient:
392 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
395 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
393 for branch, newheadrevs in newbranches.iteritems():
396 for branch, newheadrevs in newbranches.iteritems():
394 bheads = self._entries.setdefault(branch, [])
397 bheads = self._entries.setdefault(branch, [])
395 bheadset = set(cl.rev(node) for node in bheads)
398 bheadset = set(cl.rev(node) for node in bheads)
396
399
397 # This have been tested True on all internal usage of this function.
400 # This have been tested True on all internal usage of this function.
398 # run it again in case of doubt
401 # run it again in case of doubt
399 # assert not (set(bheadrevs) & set(newheadrevs))
402 # assert not (set(bheadrevs) & set(newheadrevs))
400 bheadset.update(newheadrevs)
403 bheadset.update(newheadrevs)
401
404
402 # This prunes out two kinds of heads - heads that are superseded by
405 # This prunes out two kinds of heads - heads that are superseded by
403 # a head in newheadrevs, and newheadrevs that are not heads because
406 # a head in newheadrevs, and newheadrevs that are not heads because
404 # an existing head is their descendant.
407 # an existing head is their descendant.
405 uncertain = bheadset - topoheads
408 uncertain = bheadset - topoheads
406 if uncertain:
409 if uncertain:
407 floorrev = min(uncertain)
410 floorrev = min(uncertain)
408 ancestors = set(cl.ancestors(newheadrevs, floorrev))
411 ancestors = set(cl.ancestors(newheadrevs, floorrev))
409 bheadset -= ancestors
412 bheadset -= ancestors
410 bheadrevs = sorted(bheadset)
413 bheadrevs = sorted(bheadset)
411 self[branch] = [cl.node(rev) for rev in bheadrevs]
414 self[branch] = [cl.node(rev) for rev in bheadrevs]
412 tiprev = bheadrevs[-1]
415 tiprev = bheadrevs[-1]
413 if tiprev > self.tiprev:
416 if tiprev > self.tiprev:
414 self.tipnode = cl.node(tiprev)
417 self.tipnode = cl.node(tiprev)
415 self.tiprev = tiprev
418 self.tiprev = tiprev
416
419
417 if not self.validfor(repo):
420 if not self.validfor(repo):
418 # cache key are not valid anymore
421 # cache key are not valid anymore
419 self.tipnode = nullid
422 self.tipnode = nullid
420 self.tiprev = nullrev
423 self.tiprev = nullrev
421 for heads in self.iterheads():
424 for heads in self.iterheads():
422 tiprev = max(cl.rev(node) for node in heads)
425 tiprev = max(cl.rev(node) for node in heads)
423 if tiprev > self.tiprev:
426 if tiprev > self.tiprev:
424 self.tipnode = cl.node(tiprev)
427 self.tipnode = cl.node(tiprev)
425 self.tiprev = tiprev
428 self.tiprev = tiprev
426 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
429 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
427
430
428 duration = util.timer() - starttime
431 duration = util.timer() - starttime
429 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
432 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
430 repo.filtername or b'None', duration)
433 repo.filtername or b'None', duration)
431
434
432 self.write(repo)
435 self.write(repo)
433
436
434
437
435 class remotebranchcache(branchcache):
438 class remotebranchcache(branchcache):
436 """Branchmap info for a remote connection, should not write locally"""
439 """Branchmap info for a remote connection, should not write locally"""
437 def write(self, repo):
440 def write(self, repo):
438 pass
441 pass
439
442
440
443
441 # Revision branch info cache
444 # Revision branch info cache
442
445
443 _rbcversion = '-v1'
446 _rbcversion = '-v1'
444 _rbcnames = 'rbc-names' + _rbcversion
447 _rbcnames = 'rbc-names' + _rbcversion
445 _rbcrevs = 'rbc-revs' + _rbcversion
448 _rbcrevs = 'rbc-revs' + _rbcversion
446 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
449 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
447 _rbcrecfmt = '>4sI'
450 _rbcrecfmt = '>4sI'
448 _rbcrecsize = calcsize(_rbcrecfmt)
451 _rbcrecsize = calcsize(_rbcrecfmt)
449 _rbcnodelen = 4
452 _rbcnodelen = 4
450 _rbcbranchidxmask = 0x7fffffff
453 _rbcbranchidxmask = 0x7fffffff
451 _rbccloseflag = 0x80000000
454 _rbccloseflag = 0x80000000
452
455
453 class revbranchcache(object):
456 class revbranchcache(object):
454 """Persistent cache, mapping from revision number to branch name and close.
457 """Persistent cache, mapping from revision number to branch name and close.
455 This is a low level cache, independent of filtering.
458 This is a low level cache, independent of filtering.
456
459
457 Branch names are stored in rbc-names in internal encoding separated by 0.
460 Branch names are stored in rbc-names in internal encoding separated by 0.
458 rbc-names is append-only, and each branch name is only stored once and will
461 rbc-names is append-only, and each branch name is only stored once and will
459 thus have a unique index.
462 thus have a unique index.
460
463
461 The branch info for each revision is stored in rbc-revs as constant size
464 The branch info for each revision is stored in rbc-revs as constant size
462 records. The whole file is read into memory, but it is only 'parsed' on
465 records. The whole file is read into memory, but it is only 'parsed' on
463 demand. The file is usually append-only but will be truncated if repo
466 demand. The file is usually append-only but will be truncated if repo
464 modification is detected.
467 modification is detected.
465 The record for each revision contains the first 4 bytes of the
468 The record for each revision contains the first 4 bytes of the
466 corresponding node hash, and the record is only used if it still matches.
469 corresponding node hash, and the record is only used if it still matches.
467 Even a completely trashed rbc-revs fill thus still give the right result
470 Even a completely trashed rbc-revs fill thus still give the right result
468 while converging towards full recovery ... assuming no incorrectly matching
471 while converging towards full recovery ... assuming no incorrectly matching
469 node hashes.
472 node hashes.
470 The record also contains 4 bytes where 31 bits contains the index of the
473 The record also contains 4 bytes where 31 bits contains the index of the
471 branch and the last bit indicate that it is a branch close commit.
474 branch and the last bit indicate that it is a branch close commit.
472 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
475 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
473 and will grow with it but be 1/8th of its size.
476 and will grow with it but be 1/8th of its size.
474 """
477 """
475
478
476 def __init__(self, repo, readonly=True):
479 def __init__(self, repo, readonly=True):
477 assert repo.filtername is None
480 assert repo.filtername is None
478 self._repo = repo
481 self._repo = repo
479 self._names = [] # branch names in local encoding with static index
482 self._names = [] # branch names in local encoding with static index
480 self._rbcrevs = bytearray()
483 self._rbcrevs = bytearray()
481 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
484 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
482 try:
485 try:
483 bndata = repo.cachevfs.read(_rbcnames)
486 bndata = repo.cachevfs.read(_rbcnames)
484 self._rbcsnameslen = len(bndata) # for verification before writing
487 self._rbcsnameslen = len(bndata) # for verification before writing
485 if bndata:
488 if bndata:
486 self._names = [encoding.tolocal(bn)
489 self._names = [encoding.tolocal(bn)
487 for bn in bndata.split('\0')]
490 for bn in bndata.split('\0')]
488 except (IOError, OSError):
491 except (IOError, OSError):
489 if readonly:
492 if readonly:
490 # don't try to use cache - fall back to the slow path
493 # don't try to use cache - fall back to the slow path
491 self.branchinfo = self._branchinfo
494 self.branchinfo = self._branchinfo
492
495
493 if self._names:
496 if self._names:
494 try:
497 try:
495 data = repo.cachevfs.read(_rbcrevs)
498 data = repo.cachevfs.read(_rbcrevs)
496 self._rbcrevs[:] = data
499 self._rbcrevs[:] = data
497 except (IOError, OSError) as inst:
500 except (IOError, OSError) as inst:
498 repo.ui.debug("couldn't read revision branch cache: %s\n" %
501 repo.ui.debug("couldn't read revision branch cache: %s\n" %
499 stringutil.forcebytestr(inst))
502 stringutil.forcebytestr(inst))
500 # remember number of good records on disk
503 # remember number of good records on disk
501 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
504 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
502 len(repo.changelog))
505 len(repo.changelog))
503 if self._rbcrevslen == 0:
506 if self._rbcrevslen == 0:
504 self._names = []
507 self._names = []
505 self._rbcnamescount = len(self._names) # number of names read at
508 self._rbcnamescount = len(self._names) # number of names read at
506 # _rbcsnameslen
509 # _rbcsnameslen
507
510
508 def _clear(self):
511 def _clear(self):
509 self._rbcsnameslen = 0
512 self._rbcsnameslen = 0
510 del self._names[:]
513 del self._names[:]
511 self._rbcnamescount = 0
514 self._rbcnamescount = 0
512 self._rbcrevslen = len(self._repo.changelog)
515 self._rbcrevslen = len(self._repo.changelog)
513 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
516 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
514 util.clearcachedproperty(self, '_namesreverse')
517 util.clearcachedproperty(self, '_namesreverse')
515
518
516 @util.propertycache
519 @util.propertycache
517 def _namesreverse(self):
520 def _namesreverse(self):
518 return dict((b, r) for r, b in enumerate(self._names))
521 return dict((b, r) for r, b in enumerate(self._names))
519
522
520 def branchinfo(self, rev):
523 def branchinfo(self, rev):
521 """Return branch name and close flag for rev, using and updating
524 """Return branch name and close flag for rev, using and updating
522 persistent cache."""
525 persistent cache."""
523 changelog = self._repo.changelog
526 changelog = self._repo.changelog
524 rbcrevidx = rev * _rbcrecsize
527 rbcrevidx = rev * _rbcrecsize
525
528
526 # avoid negative index, changelog.read(nullrev) is fast without cache
529 # avoid negative index, changelog.read(nullrev) is fast without cache
527 if rev == nullrev:
530 if rev == nullrev:
528 return changelog.branchinfo(rev)
531 return changelog.branchinfo(rev)
529
532
530 # if requested rev isn't allocated, grow and cache the rev info
533 # if requested rev isn't allocated, grow and cache the rev info
531 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
534 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
532 return self._branchinfo(rev)
535 return self._branchinfo(rev)
533
536
534 # fast path: extract data from cache, use it if node is matching
537 # fast path: extract data from cache, use it if node is matching
535 reponode = changelog.node(rev)[:_rbcnodelen]
538 reponode = changelog.node(rev)[:_rbcnodelen]
536 cachenode, branchidx = unpack_from(
539 cachenode, branchidx = unpack_from(
537 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
540 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
538 close = bool(branchidx & _rbccloseflag)
541 close = bool(branchidx & _rbccloseflag)
539 if close:
542 if close:
540 branchidx &= _rbcbranchidxmask
543 branchidx &= _rbcbranchidxmask
541 if cachenode == '\0\0\0\0':
544 if cachenode == '\0\0\0\0':
542 pass
545 pass
543 elif cachenode == reponode:
546 elif cachenode == reponode:
544 try:
547 try:
545 return self._names[branchidx], close
548 return self._names[branchidx], close
546 except IndexError:
549 except IndexError:
547 # recover from invalid reference to unknown branch
550 # recover from invalid reference to unknown branch
548 self._repo.ui.debug("referenced branch names not found"
551 self._repo.ui.debug("referenced branch names not found"
549 " - rebuilding revision branch cache from scratch\n")
552 " - rebuilding revision branch cache from scratch\n")
550 self._clear()
553 self._clear()
551 else:
554 else:
552 # rev/node map has changed, invalidate the cache from here up
555 # rev/node map has changed, invalidate the cache from here up
553 self._repo.ui.debug("history modification detected - truncating "
556 self._repo.ui.debug("history modification detected - truncating "
554 "revision branch cache to revision %d\n" % rev)
557 "revision branch cache to revision %d\n" % rev)
555 truncate = rbcrevidx + _rbcrecsize
558 truncate = rbcrevidx + _rbcrecsize
556 del self._rbcrevs[truncate:]
559 del self._rbcrevs[truncate:]
557 self._rbcrevslen = min(self._rbcrevslen, truncate)
560 self._rbcrevslen = min(self._rbcrevslen, truncate)
558
561
559 # fall back to slow path and make sure it will be written to disk
562 # fall back to slow path and make sure it will be written to disk
560 return self._branchinfo(rev)
563 return self._branchinfo(rev)
561
564
562 def _branchinfo(self, rev):
565 def _branchinfo(self, rev):
563 """Retrieve branch info from changelog and update _rbcrevs"""
566 """Retrieve branch info from changelog and update _rbcrevs"""
564 changelog = self._repo.changelog
567 changelog = self._repo.changelog
565 b, close = changelog.branchinfo(rev)
568 b, close = changelog.branchinfo(rev)
566 if b in self._namesreverse:
569 if b in self._namesreverse:
567 branchidx = self._namesreverse[b]
570 branchidx = self._namesreverse[b]
568 else:
571 else:
569 branchidx = len(self._names)
572 branchidx = len(self._names)
570 self._names.append(b)
573 self._names.append(b)
571 self._namesreverse[b] = branchidx
574 self._namesreverse[b] = branchidx
572 reponode = changelog.node(rev)
575 reponode = changelog.node(rev)
573 if close:
576 if close:
574 branchidx |= _rbccloseflag
577 branchidx |= _rbccloseflag
575 self._setcachedata(rev, reponode, branchidx)
578 self._setcachedata(rev, reponode, branchidx)
576 return b, close
579 return b, close
577
580
578 def setdata(self, branch, rev, node, close):
581 def setdata(self, branch, rev, node, close):
579 """add new data information to the cache"""
582 """add new data information to the cache"""
580 if branch in self._namesreverse:
583 if branch in self._namesreverse:
581 branchidx = self._namesreverse[branch]
584 branchidx = self._namesreverse[branch]
582 else:
585 else:
583 branchidx = len(self._names)
586 branchidx = len(self._names)
584 self._names.append(branch)
587 self._names.append(branch)
585 self._namesreverse[branch] = branchidx
588 self._namesreverse[branch] = branchidx
586 if close:
589 if close:
587 branchidx |= _rbccloseflag
590 branchidx |= _rbccloseflag
588 self._setcachedata(rev, node, branchidx)
591 self._setcachedata(rev, node, branchidx)
589 # If no cache data were readable (non exists, bad permission, etc)
592 # If no cache data were readable (non exists, bad permission, etc)
590 # the cache was bypassing itself by setting:
593 # the cache was bypassing itself by setting:
591 #
594 #
592 # self.branchinfo = self._branchinfo
595 # self.branchinfo = self._branchinfo
593 #
596 #
594 # Since we now have data in the cache, we need to drop this bypassing.
597 # Since we now have data in the cache, we need to drop this bypassing.
595 if r'branchinfo' in vars(self):
598 if r'branchinfo' in vars(self):
596 del self.branchinfo
599 del self.branchinfo
597
600
598 def _setcachedata(self, rev, node, branchidx):
601 def _setcachedata(self, rev, node, branchidx):
599 """Writes the node's branch data to the in-memory cache data."""
602 """Writes the node's branch data to the in-memory cache data."""
600 if rev == nullrev:
603 if rev == nullrev:
601 return
604 return
602 rbcrevidx = rev * _rbcrecsize
605 rbcrevidx = rev * _rbcrecsize
603 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
606 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
604 self._rbcrevs.extend('\0' *
607 self._rbcrevs.extend('\0' *
605 (len(self._repo.changelog) * _rbcrecsize -
608 (len(self._repo.changelog) * _rbcrecsize -
606 len(self._rbcrevs)))
609 len(self._rbcrevs)))
607 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
610 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
608 self._rbcrevslen = min(self._rbcrevslen, rev)
611 self._rbcrevslen = min(self._rbcrevslen, rev)
609
612
610 tr = self._repo.currenttransaction()
613 tr = self._repo.currenttransaction()
611 if tr:
614 if tr:
612 tr.addfinalize('write-revbranchcache', self.write)
615 tr.addfinalize('write-revbranchcache', self.write)
613
616
614 def write(self, tr=None):
617 def write(self, tr=None):
615 """Save branch cache if it is dirty."""
618 """Save branch cache if it is dirty."""
616 repo = self._repo
619 repo = self._repo
617 wlock = None
620 wlock = None
618 step = ''
621 step = ''
619 try:
622 try:
620 if self._rbcnamescount < len(self._names):
623 if self._rbcnamescount < len(self._names):
621 step = ' names'
624 step = ' names'
622 wlock = repo.wlock(wait=False)
625 wlock = repo.wlock(wait=False)
623 if self._rbcnamescount != 0:
626 if self._rbcnamescount != 0:
624 f = repo.cachevfs.open(_rbcnames, 'ab')
627 f = repo.cachevfs.open(_rbcnames, 'ab')
625 if f.tell() == self._rbcsnameslen:
628 if f.tell() == self._rbcsnameslen:
626 f.write('\0')
629 f.write('\0')
627 else:
630 else:
628 f.close()
631 f.close()
629 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
632 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
630 self._rbcnamescount = 0
633 self._rbcnamescount = 0
631 self._rbcrevslen = 0
634 self._rbcrevslen = 0
632 if self._rbcnamescount == 0:
635 if self._rbcnamescount == 0:
633 # before rewriting names, make sure references are removed
636 # before rewriting names, make sure references are removed
634 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
637 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
635 f = repo.cachevfs.open(_rbcnames, 'wb')
638 f = repo.cachevfs.open(_rbcnames, 'wb')
636 f.write('\0'.join(encoding.fromlocal(b)
639 f.write('\0'.join(encoding.fromlocal(b)
637 for b in self._names[self._rbcnamescount:]))
640 for b in self._names[self._rbcnamescount:]))
638 self._rbcsnameslen = f.tell()
641 self._rbcsnameslen = f.tell()
639 f.close()
642 f.close()
640 self._rbcnamescount = len(self._names)
643 self._rbcnamescount = len(self._names)
641
644
642 start = self._rbcrevslen * _rbcrecsize
645 start = self._rbcrevslen * _rbcrecsize
643 if start != len(self._rbcrevs):
646 if start != len(self._rbcrevs):
644 step = ''
647 step = ''
645 if wlock is None:
648 if wlock is None:
646 wlock = repo.wlock(wait=False)
649 wlock = repo.wlock(wait=False)
647 revs = min(len(repo.changelog),
650 revs = min(len(repo.changelog),
648 len(self._rbcrevs) // _rbcrecsize)
651 len(self._rbcrevs) // _rbcrecsize)
649 f = repo.cachevfs.open(_rbcrevs, 'ab')
652 f = repo.cachevfs.open(_rbcrevs, 'ab')
650 if f.tell() != start:
653 if f.tell() != start:
651 repo.ui.debug("truncating cache/%s to %d\n"
654 repo.ui.debug("truncating cache/%s to %d\n"
652 % (_rbcrevs, start))
655 % (_rbcrevs, start))
653 f.seek(start)
656 f.seek(start)
654 if f.tell() != start:
657 if f.tell() != start:
655 start = 0
658 start = 0
656 f.seek(start)
659 f.seek(start)
657 f.truncate()
660 f.truncate()
658 end = revs * _rbcrecsize
661 end = revs * _rbcrecsize
659 f.write(self._rbcrevs[start:end])
662 f.write(self._rbcrevs[start:end])
660 f.close()
663 f.close()
661 self._rbcrevslen = revs
664 self._rbcrevslen = revs
662 except (IOError, OSError, error.Abort, error.LockError) as inst:
665 except (IOError, OSError, error.Abort, error.LockError) as inst:
663 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
666 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
664 % (step, stringutil.forcebytestr(inst)))
667 % (step, stringutil.forcebytestr(inst)))
665 finally:
668 finally:
666 if wlock is not None:
669 if wlock is not None:
667 wlock.release()
670 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now