##// END OF EJS Templates
branchmap: improve doc about BranchMapCache class...
Pulkit Goyal -
r41867:a87ca1d7 default
parent child Browse files
Show More
@@ -1,596 +1,596 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """Cache mapping"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129
129
130 class branchcache(dict):
130 class branchcache(dict):
131 """A dict like object that hold branches heads cache.
131 """A dict like object that hold branches heads cache.
132
132
133 This cache is used to avoid costly computations to determine all the
133 This cache is used to avoid costly computations to determine all the
134 branch heads of a repo.
134 branch heads of a repo.
135
135
136 The cache is serialized on disk in the following format:
136 The cache is serialized on disk in the following format:
137
137
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 <branch head hex node> <open/closed state> <branch name>
139 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
141 ...
141 ...
142
142
143 The first line is used to check if the cache is still valid. If the
143 The first line is used to check if the cache is still valid. If the
144 branch cache is for a filtered repo view, an optional third hash is
144 branch cache is for a filtered repo view, an optional third hash is
145 included that hashes the hashes of all filtered revisions.
145 included that hashes the hashes of all filtered revisions.
146
146
147 The open/closed state is represented by a single letter 'o' or 'c'.
147 The open/closed state is represented by a single letter 'o' or 'c'.
148 This field can be used to avoid changelog reads when determining if a
148 This field can be used to avoid changelog reads when determining if a
149 branch head closes a branch or not.
149 branch head closes a branch or not.
150 """
150 """
151
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
153 filteredhash=None, closednodes=None):
154 super(branchcache, self).__init__(entries)
154 super(branchcache, self).__init__(entries)
155 self.tipnode = tipnode
155 self.tipnode = tipnode
156 self.tiprev = tiprev
156 self.tiprev = tiprev
157 self.filteredhash = filteredhash
157 self.filteredhash = filteredhash
158 # closednodes is a set of nodes that close their branch. If the branch
158 # closednodes is a set of nodes that close their branch. If the branch
159 # cache has been updated, it may contain nodes that are no longer
159 # cache has been updated, it may contain nodes that are no longer
160 # heads.
160 # heads.
161 if closednodes is None:
161 if closednodes is None:
162 self._closednodes = set()
162 self._closednodes = set()
163 else:
163 else:
164 self._closednodes = closednodes
164 self._closednodes = closednodes
165
165
166 @classmethod
166 @classmethod
167 def fromfile(cls, repo):
167 def fromfile(cls, repo):
168 f = None
168 f = None
169 try:
169 try:
170 f = repo.cachevfs(cls._filename(repo))
170 f = repo.cachevfs(cls._filename(repo))
171 lineiter = iter(f)
171 lineiter = iter(f)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
173 last, lrev = cachekey[:2]
173 last, lrev = cachekey[:2]
174 last, lrev = bin(last), int(lrev)
174 last, lrev = bin(last), int(lrev)
175 filteredhash = None
175 filteredhash = None
176 if len(cachekey) > 2:
176 if len(cachekey) > 2:
177 filteredhash = bin(cachekey[2])
177 filteredhash = bin(cachekey[2])
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
179 if not bcache.validfor(repo):
179 if not bcache.validfor(repo):
180 # invalidate the cache
180 # invalidate the cache
181 raise ValueError(r'tip differs')
181 raise ValueError(r'tip differs')
182 cl = repo.changelog
182 cl = repo.changelog
183 for line in lineiter:
183 for line in lineiter:
184 line = line.rstrip('\n')
184 line = line.rstrip('\n')
185 if not line:
185 if not line:
186 continue
186 continue
187 node, state, label = line.split(" ", 2)
187 node, state, label = line.split(" ", 2)
188 if state not in 'oc':
188 if state not in 'oc':
189 raise ValueError(r'invalid branch state')
189 raise ValueError(r'invalid branch state')
190 label = encoding.tolocal(label.strip())
190 label = encoding.tolocal(label.strip())
191 node = bin(node)
191 node = bin(node)
192 if not cl.hasnode(node):
192 if not cl.hasnode(node):
193 raise ValueError(
193 raise ValueError(
194 r'node %s does not exist' % pycompat.sysstr(hex(node)))
194 r'node %s does not exist' % pycompat.sysstr(hex(node)))
195 bcache.setdefault(label, []).append(node)
195 bcache.setdefault(label, []).append(node)
196 if state == 'c':
196 if state == 'c':
197 bcache._closednodes.add(node)
197 bcache._closednodes.add(node)
198
198
199 except (IOError, OSError):
199 except (IOError, OSError):
200 return None
200 return None
201
201
202 except Exception as inst:
202 except Exception as inst:
203 if repo.ui.debugflag:
203 if repo.ui.debugflag:
204 msg = 'invalid branchheads cache'
204 msg = 'invalid branchheads cache'
205 if repo.filtername is not None:
205 if repo.filtername is not None:
206 msg += ' (%s)' % repo.filtername
206 msg += ' (%s)' % repo.filtername
207 msg += ': %s\n'
207 msg += ': %s\n'
208 repo.ui.debug(msg % pycompat.bytestr(inst))
208 repo.ui.debug(msg % pycompat.bytestr(inst))
209 bcache = None
209 bcache = None
210
210
211 finally:
211 finally:
212 if f:
212 if f:
213 f.close()
213 f.close()
214
214
215 return bcache
215 return bcache
216
216
217 @staticmethod
217 @staticmethod
218 def _filename(repo):
218 def _filename(repo):
219 """name of a branchcache file for a given repo or repoview"""
219 """name of a branchcache file for a given repo or repoview"""
220 filename = "branch2"
220 filename = "branch2"
221 if repo.filtername:
221 if repo.filtername:
222 filename = '%s-%s' % (filename, repo.filtername)
222 filename = '%s-%s' % (filename, repo.filtername)
223 return filename
223 return filename
224
224
225 def validfor(self, repo):
225 def validfor(self, repo):
226 """Is the cache content valid regarding a repo
226 """Is the cache content valid regarding a repo
227
227
228 - False when cached tipnode is unknown or if we detect a strip.
228 - False when cached tipnode is unknown or if we detect a strip.
229 - True when cache is up to date or a subset of current repo."""
229 - True when cache is up to date or a subset of current repo."""
230 try:
230 try:
231 return ((self.tipnode == repo.changelog.node(self.tiprev))
231 return ((self.tipnode == repo.changelog.node(self.tiprev))
232 and (self.filteredhash == \
232 and (self.filteredhash == \
233 scmutil.filteredhash(repo, self.tiprev)))
233 scmutil.filteredhash(repo, self.tiprev)))
234 except IndexError:
234 except IndexError:
235 return False
235 return False
236
236
237 def _branchtip(self, heads):
237 def _branchtip(self, heads):
238 '''Return tuple with last open head in heads and false,
238 '''Return tuple with last open head in heads and false,
239 otherwise return last closed head and true.'''
239 otherwise return last closed head and true.'''
240 tip = heads[-1]
240 tip = heads[-1]
241 closed = True
241 closed = True
242 for h in reversed(heads):
242 for h in reversed(heads):
243 if h not in self._closednodes:
243 if h not in self._closednodes:
244 tip = h
244 tip = h
245 closed = False
245 closed = False
246 break
246 break
247 return tip, closed
247 return tip, closed
248
248
249 def branchtip(self, branch):
249 def branchtip(self, branch):
250 '''Return the tipmost open head on branch head, otherwise return the
250 '''Return the tipmost open head on branch head, otherwise return the
251 tipmost closed head on branch.
251 tipmost closed head on branch.
252 Raise KeyError for unknown branch.'''
252 Raise KeyError for unknown branch.'''
253 return self._branchtip(self[branch])[0]
253 return self._branchtip(self[branch])[0]
254
254
255 def iteropen(self, nodes):
255 def iteropen(self, nodes):
256 return (n for n in nodes if n not in self._closednodes)
256 return (n for n in nodes if n not in self._closednodes)
257
257
258 def branchheads(self, branch, closed=False):
258 def branchheads(self, branch, closed=False):
259 heads = self[branch]
259 heads = self[branch]
260 if not closed:
260 if not closed:
261 heads = list(self.iteropen(heads))
261 heads = list(self.iteropen(heads))
262 return heads
262 return heads
263
263
264 def iterbranches(self):
264 def iterbranches(self):
265 for bn, heads in self.iteritems():
265 for bn, heads in self.iteritems():
266 yield (bn, heads) + self._branchtip(heads)
266 yield (bn, heads) + self._branchtip(heads)
267
267
268 def copy(self):
268 def copy(self):
269 """return an deep copy of the branchcache object"""
269 """return an deep copy of the branchcache object"""
270 return type(self)(
270 return type(self)(
271 self, self.tipnode, self.tiprev, self.filteredhash,
271 self, self.tipnode, self.tiprev, self.filteredhash,
272 self._closednodes)
272 self._closednodes)
273
273
274 def write(self, repo):
274 def write(self, repo):
275 try:
275 try:
276 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
276 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
277 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
277 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
278 if self.filteredhash is not None:
278 if self.filteredhash is not None:
279 cachekey.append(hex(self.filteredhash))
279 cachekey.append(hex(self.filteredhash))
280 f.write(" ".join(cachekey) + '\n')
280 f.write(" ".join(cachekey) + '\n')
281 nodecount = 0
281 nodecount = 0
282 for label, nodes in sorted(self.iteritems()):
282 for label, nodes in sorted(self.iteritems()):
283 label = encoding.fromlocal(label)
283 label = encoding.fromlocal(label)
284 for node in nodes:
284 for node in nodes:
285 nodecount += 1
285 nodecount += 1
286 if node in self._closednodes:
286 if node in self._closednodes:
287 state = 'c'
287 state = 'c'
288 else:
288 else:
289 state = 'o'
289 state = 'o'
290 f.write("%s %s %s\n" % (hex(node), state, label))
290 f.write("%s %s %s\n" % (hex(node), state, label))
291 f.close()
291 f.close()
292 repo.ui.log('branchcache',
292 repo.ui.log('branchcache',
293 'wrote %s branch cache with %d labels and %d nodes\n',
293 'wrote %s branch cache with %d labels and %d nodes\n',
294 repo.filtername, len(self), nodecount)
294 repo.filtername, len(self), nodecount)
295 except (IOError, OSError, error.Abort) as inst:
295 except (IOError, OSError, error.Abort) as inst:
296 # Abort may be raised by read only opener, so log and continue
296 # Abort may be raised by read only opener, so log and continue
297 repo.ui.debug("couldn't write branch cache: %s\n" %
297 repo.ui.debug("couldn't write branch cache: %s\n" %
298 stringutil.forcebytestr(inst))
298 stringutil.forcebytestr(inst))
299
299
300 def update(self, repo, revgen):
300 def update(self, repo, revgen):
301 """Given a branchhead cache, self, that may have extra nodes or be
301 """Given a branchhead cache, self, that may have extra nodes or be
302 missing heads, and a generator of nodes that are strictly a superset of
302 missing heads, and a generator of nodes that are strictly a superset of
303 heads missing, this function updates self to be correct.
303 heads missing, this function updates self to be correct.
304 """
304 """
305 starttime = util.timer()
305 starttime = util.timer()
306 cl = repo.changelog
306 cl = repo.changelog
307 # collect new branch entries
307 # collect new branch entries
308 newbranches = {}
308 newbranches = {}
309 getbranchinfo = repo.revbranchcache().branchinfo
309 getbranchinfo = repo.revbranchcache().branchinfo
310 for r in revgen:
310 for r in revgen:
311 branch, closesbranch = getbranchinfo(r)
311 branch, closesbranch = getbranchinfo(r)
312 newbranches.setdefault(branch, []).append(r)
312 newbranches.setdefault(branch, []).append(r)
313 if closesbranch:
313 if closesbranch:
314 self._closednodes.add(cl.node(r))
314 self._closednodes.add(cl.node(r))
315
315
316 # fetch current topological heads to speed up filtering
316 # fetch current topological heads to speed up filtering
317 topoheads = set(cl.headrevs())
317 topoheads = set(cl.headrevs())
318
318
319 # if older branchheads are reachable from new ones, they aren't
319 # if older branchheads are reachable from new ones, they aren't
320 # really branchheads. Note checking parents is insufficient:
320 # really branchheads. Note checking parents is insufficient:
321 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
321 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
322 for branch, newheadrevs in newbranches.iteritems():
322 for branch, newheadrevs in newbranches.iteritems():
323 bheads = self.setdefault(branch, [])
323 bheads = self.setdefault(branch, [])
324 bheadset = set(cl.rev(node) for node in bheads)
324 bheadset = set(cl.rev(node) for node in bheads)
325
325
326 # This have been tested True on all internal usage of this function.
326 # This have been tested True on all internal usage of this function.
327 # run it again in case of doubt
327 # run it again in case of doubt
328 # assert not (set(bheadrevs) & set(newheadrevs))
328 # assert not (set(bheadrevs) & set(newheadrevs))
329 bheadset.update(newheadrevs)
329 bheadset.update(newheadrevs)
330
330
331 # This prunes out two kinds of heads - heads that are superseded by
331 # This prunes out two kinds of heads - heads that are superseded by
332 # a head in newheadrevs, and newheadrevs that are not heads because
332 # a head in newheadrevs, and newheadrevs that are not heads because
333 # an existing head is their descendant.
333 # an existing head is their descendant.
334 uncertain = bheadset - topoheads
334 uncertain = bheadset - topoheads
335 if uncertain:
335 if uncertain:
336 floorrev = min(uncertain)
336 floorrev = min(uncertain)
337 ancestors = set(cl.ancestors(newheadrevs, floorrev))
337 ancestors = set(cl.ancestors(newheadrevs, floorrev))
338 bheadset -= ancestors
338 bheadset -= ancestors
339 bheadrevs = sorted(bheadset)
339 bheadrevs = sorted(bheadset)
340 self[branch] = [cl.node(rev) for rev in bheadrevs]
340 self[branch] = [cl.node(rev) for rev in bheadrevs]
341 tiprev = bheadrevs[-1]
341 tiprev = bheadrevs[-1]
342 if tiprev > self.tiprev:
342 if tiprev > self.tiprev:
343 self.tipnode = cl.node(tiprev)
343 self.tipnode = cl.node(tiprev)
344 self.tiprev = tiprev
344 self.tiprev = tiprev
345
345
346 if not self.validfor(repo):
346 if not self.validfor(repo):
347 # cache key are not valid anymore
347 # cache key are not valid anymore
348 self.tipnode = nullid
348 self.tipnode = nullid
349 self.tiprev = nullrev
349 self.tiprev = nullrev
350 for heads in self.values():
350 for heads in self.values():
351 tiprev = max(cl.rev(node) for node in heads)
351 tiprev = max(cl.rev(node) for node in heads)
352 if tiprev > self.tiprev:
352 if tiprev > self.tiprev:
353 self.tipnode = cl.node(tiprev)
353 self.tipnode = cl.node(tiprev)
354 self.tiprev = tiprev
354 self.tiprev = tiprev
355 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
355 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
356
356
357 duration = util.timer() - starttime
357 duration = util.timer() - starttime
358 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
358 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
359 repo.filtername, duration)
359 repo.filtername, duration)
360
360
361 self.write(repo)
361 self.write(repo)
362
362
363
363
364 class remotebranchcache(branchcache):
364 class remotebranchcache(branchcache):
365 """Branchmap info for a remote connection, should not write locally"""
365 """Branchmap info for a remote connection, should not write locally"""
366 def write(self, repo):
366 def write(self, repo):
367 pass
367 pass
368
368
369
369
370 # Revision branch info cache
370 # Revision branch info cache
371
371
372 _rbcversion = '-v1'
372 _rbcversion = '-v1'
373 _rbcnames = 'rbc-names' + _rbcversion
373 _rbcnames = 'rbc-names' + _rbcversion
374 _rbcrevs = 'rbc-revs' + _rbcversion
374 _rbcrevs = 'rbc-revs' + _rbcversion
375 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
375 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
376 _rbcrecfmt = '>4sI'
376 _rbcrecfmt = '>4sI'
377 _rbcrecsize = calcsize(_rbcrecfmt)
377 _rbcrecsize = calcsize(_rbcrecfmt)
378 _rbcnodelen = 4
378 _rbcnodelen = 4
379 _rbcbranchidxmask = 0x7fffffff
379 _rbcbranchidxmask = 0x7fffffff
380 _rbccloseflag = 0x80000000
380 _rbccloseflag = 0x80000000
381
381
382 class revbranchcache(object):
382 class revbranchcache(object):
383 """Persistent cache, mapping from revision number to branch name and close.
383 """Persistent cache, mapping from revision number to branch name and close.
384 This is a low level cache, independent of filtering.
384 This is a low level cache, independent of filtering.
385
385
386 Branch names are stored in rbc-names in internal encoding separated by 0.
386 Branch names are stored in rbc-names in internal encoding separated by 0.
387 rbc-names is append-only, and each branch name is only stored once and will
387 rbc-names is append-only, and each branch name is only stored once and will
388 thus have a unique index.
388 thus have a unique index.
389
389
390 The branch info for each revision is stored in rbc-revs as constant size
390 The branch info for each revision is stored in rbc-revs as constant size
391 records. The whole file is read into memory, but it is only 'parsed' on
391 records. The whole file is read into memory, but it is only 'parsed' on
392 demand. The file is usually append-only but will be truncated if repo
392 demand. The file is usually append-only but will be truncated if repo
393 modification is detected.
393 modification is detected.
394 The record for each revision contains the first 4 bytes of the
394 The record for each revision contains the first 4 bytes of the
395 corresponding node hash, and the record is only used if it still matches.
395 corresponding node hash, and the record is only used if it still matches.
396 Even a completely trashed rbc-revs fill thus still give the right result
396 Even a completely trashed rbc-revs fill thus still give the right result
397 while converging towards full recovery ... assuming no incorrectly matching
397 while converging towards full recovery ... assuming no incorrectly matching
398 node hashes.
398 node hashes.
399 The record also contains 4 bytes where 31 bits contains the index of the
399 The record also contains 4 bytes where 31 bits contains the index of the
400 branch and the last bit indicate that it is a branch close commit.
400 branch and the last bit indicate that it is a branch close commit.
401 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
401 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
402 and will grow with it but be 1/8th of its size.
402 and will grow with it but be 1/8th of its size.
403 """
403 """
404
404
405 def __init__(self, repo, readonly=True):
405 def __init__(self, repo, readonly=True):
406 assert repo.filtername is None
406 assert repo.filtername is None
407 self._repo = repo
407 self._repo = repo
408 self._names = [] # branch names in local encoding with static index
408 self._names = [] # branch names in local encoding with static index
409 self._rbcrevs = bytearray()
409 self._rbcrevs = bytearray()
410 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
410 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
411 try:
411 try:
412 bndata = repo.cachevfs.read(_rbcnames)
412 bndata = repo.cachevfs.read(_rbcnames)
413 self._rbcsnameslen = len(bndata) # for verification before writing
413 self._rbcsnameslen = len(bndata) # for verification before writing
414 if bndata:
414 if bndata:
415 self._names = [encoding.tolocal(bn)
415 self._names = [encoding.tolocal(bn)
416 for bn in bndata.split('\0')]
416 for bn in bndata.split('\0')]
417 except (IOError, OSError):
417 except (IOError, OSError):
418 if readonly:
418 if readonly:
419 # don't try to use cache - fall back to the slow path
419 # don't try to use cache - fall back to the slow path
420 self.branchinfo = self._branchinfo
420 self.branchinfo = self._branchinfo
421
421
422 if self._names:
422 if self._names:
423 try:
423 try:
424 data = repo.cachevfs.read(_rbcrevs)
424 data = repo.cachevfs.read(_rbcrevs)
425 self._rbcrevs[:] = data
425 self._rbcrevs[:] = data
426 except (IOError, OSError) as inst:
426 except (IOError, OSError) as inst:
427 repo.ui.debug("couldn't read revision branch cache: %s\n" %
427 repo.ui.debug("couldn't read revision branch cache: %s\n" %
428 stringutil.forcebytestr(inst))
428 stringutil.forcebytestr(inst))
429 # remember number of good records on disk
429 # remember number of good records on disk
430 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
430 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
431 len(repo.changelog))
431 len(repo.changelog))
432 if self._rbcrevslen == 0:
432 if self._rbcrevslen == 0:
433 self._names = []
433 self._names = []
434 self._rbcnamescount = len(self._names) # number of names read at
434 self._rbcnamescount = len(self._names) # number of names read at
435 # _rbcsnameslen
435 # _rbcsnameslen
436
436
437 def _clear(self):
437 def _clear(self):
438 self._rbcsnameslen = 0
438 self._rbcsnameslen = 0
439 del self._names[:]
439 del self._names[:]
440 self._rbcnamescount = 0
440 self._rbcnamescount = 0
441 self._rbcrevslen = len(self._repo.changelog)
441 self._rbcrevslen = len(self._repo.changelog)
442 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
442 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
443 util.clearcachedproperty(self, '_namesreverse')
443 util.clearcachedproperty(self, '_namesreverse')
444
444
445 @util.propertycache
445 @util.propertycache
446 def _namesreverse(self):
446 def _namesreverse(self):
447 return dict((b, r) for r, b in enumerate(self._names))
447 return dict((b, r) for r, b in enumerate(self._names))
448
448
449 def branchinfo(self, rev):
449 def branchinfo(self, rev):
450 """Return branch name and close flag for rev, using and updating
450 """Return branch name and close flag for rev, using and updating
451 persistent cache."""
451 persistent cache."""
452 changelog = self._repo.changelog
452 changelog = self._repo.changelog
453 rbcrevidx = rev * _rbcrecsize
453 rbcrevidx = rev * _rbcrecsize
454
454
455 # avoid negative index, changelog.read(nullrev) is fast without cache
455 # avoid negative index, changelog.read(nullrev) is fast without cache
456 if rev == nullrev:
456 if rev == nullrev:
457 return changelog.branchinfo(rev)
457 return changelog.branchinfo(rev)
458
458
459 # if requested rev isn't allocated, grow and cache the rev info
459 # if requested rev isn't allocated, grow and cache the rev info
460 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
461 return self._branchinfo(rev)
461 return self._branchinfo(rev)
462
462
463 # fast path: extract data from cache, use it if node is matching
463 # fast path: extract data from cache, use it if node is matching
464 reponode = changelog.node(rev)[:_rbcnodelen]
464 reponode = changelog.node(rev)[:_rbcnodelen]
465 cachenode, branchidx = unpack_from(
465 cachenode, branchidx = unpack_from(
466 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
466 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
467 close = bool(branchidx & _rbccloseflag)
467 close = bool(branchidx & _rbccloseflag)
468 if close:
468 if close:
469 branchidx &= _rbcbranchidxmask
469 branchidx &= _rbcbranchidxmask
470 if cachenode == '\0\0\0\0':
470 if cachenode == '\0\0\0\0':
471 pass
471 pass
472 elif cachenode == reponode:
472 elif cachenode == reponode:
473 try:
473 try:
474 return self._names[branchidx], close
474 return self._names[branchidx], close
475 except IndexError:
475 except IndexError:
476 # recover from invalid reference to unknown branch
476 # recover from invalid reference to unknown branch
477 self._repo.ui.debug("referenced branch names not found"
477 self._repo.ui.debug("referenced branch names not found"
478 " - rebuilding revision branch cache from scratch\n")
478 " - rebuilding revision branch cache from scratch\n")
479 self._clear()
479 self._clear()
480 else:
480 else:
481 # rev/node map has changed, invalidate the cache from here up
481 # rev/node map has changed, invalidate the cache from here up
482 self._repo.ui.debug("history modification detected - truncating "
482 self._repo.ui.debug("history modification detected - truncating "
483 "revision branch cache to revision %d\n" % rev)
483 "revision branch cache to revision %d\n" % rev)
484 truncate = rbcrevidx + _rbcrecsize
484 truncate = rbcrevidx + _rbcrecsize
485 del self._rbcrevs[truncate:]
485 del self._rbcrevs[truncate:]
486 self._rbcrevslen = min(self._rbcrevslen, truncate)
486 self._rbcrevslen = min(self._rbcrevslen, truncate)
487
487
488 # fall back to slow path and make sure it will be written to disk
488 # fall back to slow path and make sure it will be written to disk
489 return self._branchinfo(rev)
489 return self._branchinfo(rev)
490
490
491 def _branchinfo(self, rev):
491 def _branchinfo(self, rev):
492 """Retrieve branch info from changelog and update _rbcrevs"""
492 """Retrieve branch info from changelog and update _rbcrevs"""
493 changelog = self._repo.changelog
493 changelog = self._repo.changelog
494 b, close = changelog.branchinfo(rev)
494 b, close = changelog.branchinfo(rev)
495 if b in self._namesreverse:
495 if b in self._namesreverse:
496 branchidx = self._namesreverse[b]
496 branchidx = self._namesreverse[b]
497 else:
497 else:
498 branchidx = len(self._names)
498 branchidx = len(self._names)
499 self._names.append(b)
499 self._names.append(b)
500 self._namesreverse[b] = branchidx
500 self._namesreverse[b] = branchidx
501 reponode = changelog.node(rev)
501 reponode = changelog.node(rev)
502 if close:
502 if close:
503 branchidx |= _rbccloseflag
503 branchidx |= _rbccloseflag
504 self._setcachedata(rev, reponode, branchidx)
504 self._setcachedata(rev, reponode, branchidx)
505 return b, close
505 return b, close
506
506
507 def setdata(self, branch, rev, node, close):
507 def setdata(self, branch, rev, node, close):
508 """add new data information to the cache"""
508 """add new data information to the cache"""
509 if branch in self._namesreverse:
509 if branch in self._namesreverse:
510 branchidx = self._namesreverse[branch]
510 branchidx = self._namesreverse[branch]
511 else:
511 else:
512 branchidx = len(self._names)
512 branchidx = len(self._names)
513 self._names.append(branch)
513 self._names.append(branch)
514 self._namesreverse[branch] = branchidx
514 self._namesreverse[branch] = branchidx
515 if close:
515 if close:
516 branchidx |= _rbccloseflag
516 branchidx |= _rbccloseflag
517 self._setcachedata(rev, node, branchidx)
517 self._setcachedata(rev, node, branchidx)
518 # If no cache data were readable (non exists, bad permission, etc)
518 # If no cache data were readable (non exists, bad permission, etc)
519 # the cache was bypassing itself by setting:
519 # the cache was bypassing itself by setting:
520 #
520 #
521 # self.branchinfo = self._branchinfo
521 # self.branchinfo = self._branchinfo
522 #
522 #
523 # Since we now have data in the cache, we need to drop this bypassing.
523 # Since we now have data in the cache, we need to drop this bypassing.
524 if r'branchinfo' in vars(self):
524 if r'branchinfo' in vars(self):
525 del self.branchinfo
525 del self.branchinfo
526
526
527 def _setcachedata(self, rev, node, branchidx):
527 def _setcachedata(self, rev, node, branchidx):
528 """Writes the node's branch data to the in-memory cache data."""
528 """Writes the node's branch data to the in-memory cache data."""
529 if rev == nullrev:
529 if rev == nullrev:
530 return
530 return
531 rbcrevidx = rev * _rbcrecsize
531 rbcrevidx = rev * _rbcrecsize
532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
533 self._rbcrevs.extend('\0' *
533 self._rbcrevs.extend('\0' *
534 (len(self._repo.changelog) * _rbcrecsize -
534 (len(self._repo.changelog) * _rbcrecsize -
535 len(self._rbcrevs)))
535 len(self._rbcrevs)))
536 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
536 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
537 self._rbcrevslen = min(self._rbcrevslen, rev)
537 self._rbcrevslen = min(self._rbcrevslen, rev)
538
538
539 tr = self._repo.currenttransaction()
539 tr = self._repo.currenttransaction()
540 if tr:
540 if tr:
541 tr.addfinalize('write-revbranchcache', self.write)
541 tr.addfinalize('write-revbranchcache', self.write)
542
542
543 def write(self, tr=None):
543 def write(self, tr=None):
544 """Save branch cache if it is dirty."""
544 """Save branch cache if it is dirty."""
545 repo = self._repo
545 repo = self._repo
546 wlock = None
546 wlock = None
547 step = ''
547 step = ''
548 try:
548 try:
549 if self._rbcnamescount < len(self._names):
549 if self._rbcnamescount < len(self._names):
550 step = ' names'
550 step = ' names'
551 wlock = repo.wlock(wait=False)
551 wlock = repo.wlock(wait=False)
552 if self._rbcnamescount != 0:
552 if self._rbcnamescount != 0:
553 f = repo.cachevfs.open(_rbcnames, 'ab')
553 f = repo.cachevfs.open(_rbcnames, 'ab')
554 if f.tell() == self._rbcsnameslen:
554 if f.tell() == self._rbcsnameslen:
555 f.write('\0')
555 f.write('\0')
556 else:
556 else:
557 f.close()
557 f.close()
558 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
558 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
559 self._rbcnamescount = 0
559 self._rbcnamescount = 0
560 self._rbcrevslen = 0
560 self._rbcrevslen = 0
561 if self._rbcnamescount == 0:
561 if self._rbcnamescount == 0:
562 # before rewriting names, make sure references are removed
562 # before rewriting names, make sure references are removed
563 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
563 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
564 f = repo.cachevfs.open(_rbcnames, 'wb')
564 f = repo.cachevfs.open(_rbcnames, 'wb')
565 f.write('\0'.join(encoding.fromlocal(b)
565 f.write('\0'.join(encoding.fromlocal(b)
566 for b in self._names[self._rbcnamescount:]))
566 for b in self._names[self._rbcnamescount:]))
567 self._rbcsnameslen = f.tell()
567 self._rbcsnameslen = f.tell()
568 f.close()
568 f.close()
569 self._rbcnamescount = len(self._names)
569 self._rbcnamescount = len(self._names)
570
570
571 start = self._rbcrevslen * _rbcrecsize
571 start = self._rbcrevslen * _rbcrecsize
572 if start != len(self._rbcrevs):
572 if start != len(self._rbcrevs):
573 step = ''
573 step = ''
574 if wlock is None:
574 if wlock is None:
575 wlock = repo.wlock(wait=False)
575 wlock = repo.wlock(wait=False)
576 revs = min(len(repo.changelog),
576 revs = min(len(repo.changelog),
577 len(self._rbcrevs) // _rbcrecsize)
577 len(self._rbcrevs) // _rbcrecsize)
578 f = repo.cachevfs.open(_rbcrevs, 'ab')
578 f = repo.cachevfs.open(_rbcrevs, 'ab')
579 if f.tell() != start:
579 if f.tell() != start:
580 repo.ui.debug("truncating cache/%s to %d\n"
580 repo.ui.debug("truncating cache/%s to %d\n"
581 % (_rbcrevs, start))
581 % (_rbcrevs, start))
582 f.seek(start)
582 f.seek(start)
583 if f.tell() != start:
583 if f.tell() != start:
584 start = 0
584 start = 0
585 f.seek(start)
585 f.seek(start)
586 f.truncate()
586 f.truncate()
587 end = revs * _rbcrecsize
587 end = revs * _rbcrecsize
588 f.write(self._rbcrevs[start:end])
588 f.write(self._rbcrevs[start:end])
589 f.close()
589 f.close()
590 self._rbcrevslen = revs
590 self._rbcrevslen = revs
591 except (IOError, OSError, error.Abort, error.LockError) as inst:
591 except (IOError, OSError, error.Abort, error.LockError) as inst:
592 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
592 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
593 % (step, stringutil.forcebytestr(inst)))
593 % (step, stringutil.forcebytestr(inst)))
594 finally:
594 finally:
595 if wlock is not None:
595 if wlock is not None:
596 wlock.release()
596 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now