##// END OF EJS Templates
branchmap: add some clarifications and clean up flow...
Martijn Pieters -
r41708:c795c462 default
parent child Browse files
Show More
@@ -1,570 +1,581 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46 def updatecache(repo):
46 def updatecache(repo):
47 """Update the cache for the given filtered view on a repository"""
48 # This can trigger updates for the caches for subsets of the filtered
49 # view, e.g. when there is no cache for this filtered view or the cache
50 # is stale.
51
47 cl = repo.changelog
52 cl = repo.changelog
48 filtername = repo.filtername
53 filtername = repo.filtername
49 bcache = repo._branchcaches.get(filtername)
54 bcache = repo._branchcaches.get(filtername)
55 if bcache is None or not bcache.validfor(repo):
56 # cache object missing or cache object stale? Read from disk
57 bcache = branchcache.fromfile(repo)
50
58
51 revs = []
59 revs = []
52 if bcache is None or not bcache.validfor(repo):
53 bcache = branchcache.fromfile(repo)
54 if bcache is None:
60 if bcache is None:
61 # no (fresh) cache available anymore, perhaps we can re-use
62 # the cache for a subset, then extend that to add info on missing
63 # revisions.
55 subsetname = subsettable.get(filtername)
64 subsetname = subsettable.get(filtername)
56 if subsetname is None:
65 if subsetname is not None:
57 bcache = branchcache()
58 else:
59 subset = repo.filtered(subsetname)
66 subset = repo.filtered(subsetname)
60 bcache = subset.branchmap().copy()
67 bcache = subset.branchmap().copy()
61 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
68 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
62 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
69 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
70 else:
71 # nothing to fall back on, start empty.
72 bcache = branchcache()
73
63 revs.extend(cl.revs(start=bcache.tiprev + 1))
74 revs.extend(cl.revs(start=bcache.tiprev + 1))
64 if revs:
75 if revs:
65 bcache.update(repo, revs)
76 bcache.update(repo, revs)
66
77
67 assert bcache.validfor(repo), filtername
78 assert bcache.validfor(repo), filtername
68 repo._branchcaches[repo.filtername] = bcache
79 repo._branchcaches[repo.filtername] = bcache
69
80
70 def replacecache(repo, bm):
81 def replacecache(repo, bm):
71 """Replace the branchmap cache for a repo with a branch mapping.
82 """Replace the branchmap cache for a repo with a branch mapping.
72
83
73 This is likely only called during clone with a branch map from a remote.
84 This is likely only called during clone with a branch map from a remote.
74 """
85 """
75 cl = repo.changelog
86 cl = repo.changelog
76 clrev = cl.rev
87 clrev = cl.rev
77 clbranchinfo = cl.branchinfo
88 clbranchinfo = cl.branchinfo
78 rbheads = []
89 rbheads = []
79 closed = []
90 closed = []
80 for bheads in bm.itervalues():
91 for bheads in bm.itervalues():
81 rbheads.extend(bheads)
92 rbheads.extend(bheads)
82 for h in bheads:
93 for h in bheads:
83 r = clrev(h)
94 r = clrev(h)
84 b, c = clbranchinfo(r)
95 b, c = clbranchinfo(r)
85 if c:
96 if c:
86 closed.append(h)
97 closed.append(h)
87
98
88 if rbheads:
99 if rbheads:
89 rtiprev = max((int(clrev(node))
100 rtiprev = max((int(clrev(node))
90 for node in rbheads))
101 for node in rbheads))
91 cache = branchcache(bm,
102 cache = branchcache(bm,
92 repo[rtiprev].node(),
103 repo[rtiprev].node(),
93 rtiprev,
104 rtiprev,
94 closednodes=closed)
105 closednodes=closed)
95
106
96 # Try to stick it as low as possible
107 # Try to stick it as low as possible
97 # filter above served are unlikely to be fetch from a clone
108 # filter above served are unlikely to be fetch from a clone
98 for candidate in ('base', 'immutable', 'served'):
109 for candidate in ('base', 'immutable', 'served'):
99 rview = repo.filtered(candidate)
110 rview = repo.filtered(candidate)
100 if cache.validfor(rview):
111 if cache.validfor(rview):
101 repo._branchcaches[candidate] = cache
112 repo._branchcaches[candidate] = cache
102 cache.write(rview)
113 cache.write(rview)
103 break
114 break
104
115
105 class branchcache(dict):
116 class branchcache(dict):
106 """A dict like object that hold branches heads cache.
117 """A dict like object that hold branches heads cache.
107
118
108 This cache is used to avoid costly computations to determine all the
119 This cache is used to avoid costly computations to determine all the
109 branch heads of a repo.
120 branch heads of a repo.
110
121
111 The cache is serialized on disk in the following format:
122 The cache is serialized on disk in the following format:
112
123
113 <tip hex node> <tip rev number> [optional filtered repo hex hash]
124 <tip hex node> <tip rev number> [optional filtered repo hex hash]
114 <branch head hex node> <open/closed state> <branch name>
125 <branch head hex node> <open/closed state> <branch name>
115 <branch head hex node> <open/closed state> <branch name>
126 <branch head hex node> <open/closed state> <branch name>
116 ...
127 ...
117
128
118 The first line is used to check if the cache is still valid. If the
129 The first line is used to check if the cache is still valid. If the
119 branch cache is for a filtered repo view, an optional third hash is
130 branch cache is for a filtered repo view, an optional third hash is
120 included that hashes the hashes of all filtered revisions.
131 included that hashes the hashes of all filtered revisions.
121
132
122 The open/closed state is represented by a single letter 'o' or 'c'.
133 The open/closed state is represented by a single letter 'o' or 'c'.
123 This field can be used to avoid changelog reads when determining if a
134 This field can be used to avoid changelog reads when determining if a
124 branch head closes a branch or not.
135 branch head closes a branch or not.
125 """
136 """
126 @classmethod
137 @classmethod
127 def fromfile(cls, repo):
138 def fromfile(cls, repo):
128 f = None
139 f = None
129 try:
140 try:
130 f = repo.cachevfs(cls._filename(repo))
141 f = repo.cachevfs(cls._filename(repo))
131 lineiter = iter(f)
142 lineiter = iter(f)
132 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
143 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
133 last, lrev = cachekey[:2]
144 last, lrev = cachekey[:2]
134 last, lrev = bin(last), int(lrev)
145 last, lrev = bin(last), int(lrev)
135 filteredhash = None
146 filteredhash = None
136 if len(cachekey) > 2:
147 if len(cachekey) > 2:
137 filteredhash = bin(cachekey[2])
148 filteredhash = bin(cachekey[2])
138 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
149 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
139 if not bcache.validfor(repo):
150 if not bcache.validfor(repo):
140 # invalidate the cache
151 # invalidate the cache
141 raise ValueError(r'tip differs')
152 raise ValueError(r'tip differs')
142 cl = repo.changelog
153 cl = repo.changelog
143 for line in lineiter:
154 for line in lineiter:
144 line = line.rstrip('\n')
155 line = line.rstrip('\n')
145 if not line:
156 if not line:
146 continue
157 continue
147 node, state, label = line.split(" ", 2)
158 node, state, label = line.split(" ", 2)
148 if state not in 'oc':
159 if state not in 'oc':
149 raise ValueError(r'invalid branch state')
160 raise ValueError(r'invalid branch state')
150 label = encoding.tolocal(label.strip())
161 label = encoding.tolocal(label.strip())
151 node = bin(node)
162 node = bin(node)
152 if not cl.hasnode(node):
163 if not cl.hasnode(node):
153 raise ValueError(
164 raise ValueError(
154 r'node %s does not exist' % pycompat.sysstr(hex(node)))
165 r'node %s does not exist' % pycompat.sysstr(hex(node)))
155 bcache.setdefault(label, []).append(node)
166 bcache.setdefault(label, []).append(node)
156 if state == 'c':
167 if state == 'c':
157 bcache._closednodes.add(node)
168 bcache._closednodes.add(node)
158
169
159 except (IOError, OSError):
170 except (IOError, OSError):
160 return None
171 return None
161
172
162 except Exception as inst:
173 except Exception as inst:
163 if repo.ui.debugflag:
174 if repo.ui.debugflag:
164 msg = 'invalid branchheads cache'
175 msg = 'invalid branchheads cache'
165 if repo.filtername is not None:
176 if repo.filtername is not None:
166 msg += ' (%s)' % repo.filtername
177 msg += ' (%s)' % repo.filtername
167 msg += ': %s\n'
178 msg += ': %s\n'
168 repo.ui.debug(msg % pycompat.bytestr(inst))
179 repo.ui.debug(msg % pycompat.bytestr(inst))
169 bcache = None
180 bcache = None
170
181
171 finally:
182 finally:
172 if f:
183 if f:
173 f.close()
184 f.close()
174
185
175 return bcache
186 return bcache
176
187
177 @staticmethod
188 @staticmethod
178 def _filename(repo):
189 def _filename(repo):
179 """name of a branchcache file for a given repo or repoview"""
190 """name of a branchcache file for a given repo or repoview"""
180 filename = "branch2"
191 filename = "branch2"
181 if repo.filtername:
192 if repo.filtername:
182 filename = '%s-%s' % (filename, repo.filtername)
193 filename = '%s-%s' % (filename, repo.filtername)
183 return filename
194 return filename
184
195
185 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
196 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
186 filteredhash=None, closednodes=None):
197 filteredhash=None, closednodes=None):
187 super(branchcache, self).__init__(entries)
198 super(branchcache, self).__init__(entries)
188 self.tipnode = tipnode
199 self.tipnode = tipnode
189 self.tiprev = tiprev
200 self.tiprev = tiprev
190 self.filteredhash = filteredhash
201 self.filteredhash = filteredhash
191 # closednodes is a set of nodes that close their branch. If the branch
202 # closednodes is a set of nodes that close their branch. If the branch
192 # cache has been updated, it may contain nodes that are no longer
203 # cache has been updated, it may contain nodes that are no longer
193 # heads.
204 # heads.
194 if closednodes is None:
205 if closednodes is None:
195 self._closednodes = set()
206 self._closednodes = set()
196 else:
207 else:
197 self._closednodes = closednodes
208 self._closednodes = closednodes
198
209
199 def validfor(self, repo):
210 def validfor(self, repo):
200 """Is the cache content valid regarding a repo
211 """Is the cache content valid regarding a repo
201
212
202 - False when cached tipnode is unknown or if we detect a strip.
213 - False when cached tipnode is unknown or if we detect a strip.
203 - True when cache is up to date or a subset of current repo."""
214 - True when cache is up to date or a subset of current repo."""
204 try:
215 try:
205 return ((self.tipnode == repo.changelog.node(self.tiprev))
216 return ((self.tipnode == repo.changelog.node(self.tiprev))
206 and (self.filteredhash == \
217 and (self.filteredhash == \
207 scmutil.filteredhash(repo, self.tiprev)))
218 scmutil.filteredhash(repo, self.tiprev)))
208 except IndexError:
219 except IndexError:
209 return False
220 return False
210
221
211 def _branchtip(self, heads):
222 def _branchtip(self, heads):
212 '''Return tuple with last open head in heads and false,
223 '''Return tuple with last open head in heads and false,
213 otherwise return last closed head and true.'''
224 otherwise return last closed head and true.'''
214 tip = heads[-1]
225 tip = heads[-1]
215 closed = True
226 closed = True
216 for h in reversed(heads):
227 for h in reversed(heads):
217 if h not in self._closednodes:
228 if h not in self._closednodes:
218 tip = h
229 tip = h
219 closed = False
230 closed = False
220 break
231 break
221 return tip, closed
232 return tip, closed
222
233
223 def branchtip(self, branch):
234 def branchtip(self, branch):
224 '''Return the tipmost open head on branch head, otherwise return the
235 '''Return the tipmost open head on branch head, otherwise return the
225 tipmost closed head on branch.
236 tipmost closed head on branch.
226 Raise KeyError for unknown branch.'''
237 Raise KeyError for unknown branch.'''
227 return self._branchtip(self[branch])[0]
238 return self._branchtip(self[branch])[0]
228
239
229 def iteropen(self, nodes):
240 def iteropen(self, nodes):
230 return (n for n in nodes if n not in self._closednodes)
241 return (n for n in nodes if n not in self._closednodes)
231
242
232 def branchheads(self, branch, closed=False):
243 def branchheads(self, branch, closed=False):
233 heads = self[branch]
244 heads = self[branch]
234 if not closed:
245 if not closed:
235 heads = list(self.iteropen(heads))
246 heads = list(self.iteropen(heads))
236 return heads
247 return heads
237
248
238 def iterbranches(self):
249 def iterbranches(self):
239 for bn, heads in self.iteritems():
250 for bn, heads in self.iteritems():
240 yield (bn, heads) + self._branchtip(heads)
251 yield (bn, heads) + self._branchtip(heads)
241
252
242 def copy(self):
253 def copy(self):
243 """return an deep copy of the branchcache object"""
254 """return an deep copy of the branchcache object"""
244 return type(self)(
255 return type(self)(
245 self, self.tipnode, self.tiprev, self.filteredhash,
256 self, self.tipnode, self.tiprev, self.filteredhash,
246 self._closednodes)
257 self._closednodes)
247
258
248 def write(self, repo):
259 def write(self, repo):
249 try:
260 try:
250 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
261 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
251 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
262 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
252 if self.filteredhash is not None:
263 if self.filteredhash is not None:
253 cachekey.append(hex(self.filteredhash))
264 cachekey.append(hex(self.filteredhash))
254 f.write(" ".join(cachekey) + '\n')
265 f.write(" ".join(cachekey) + '\n')
255 nodecount = 0
266 nodecount = 0
256 for label, nodes in sorted(self.iteritems()):
267 for label, nodes in sorted(self.iteritems()):
257 for node in nodes:
268 for node in nodes:
258 nodecount += 1
269 nodecount += 1
259 if node in self._closednodes:
270 if node in self._closednodes:
260 state = 'c'
271 state = 'c'
261 else:
272 else:
262 state = 'o'
273 state = 'o'
263 f.write("%s %s %s\n" % (hex(node), state,
274 f.write("%s %s %s\n" % (hex(node), state,
264 encoding.fromlocal(label)))
275 encoding.fromlocal(label)))
265 f.close()
276 f.close()
266 repo.ui.log('branchcache',
277 repo.ui.log('branchcache',
267 'wrote %s branch cache with %d labels and %d nodes\n',
278 'wrote %s branch cache with %d labels and %d nodes\n',
268 repo.filtername, len(self), nodecount)
279 repo.filtername, len(self), nodecount)
269 except (IOError, OSError, error.Abort) as inst:
280 except (IOError, OSError, error.Abort) as inst:
270 # Abort may be raised by read only opener, so log and continue
281 # Abort may be raised by read only opener, so log and continue
271 repo.ui.debug("couldn't write branch cache: %s\n" %
282 repo.ui.debug("couldn't write branch cache: %s\n" %
272 stringutil.forcebytestr(inst))
283 stringutil.forcebytestr(inst))
273
284
274 def update(self, repo, revgen):
285 def update(self, repo, revgen):
275 """Given a branchhead cache, self, that may have extra nodes or be
286 """Given a branchhead cache, self, that may have extra nodes or be
276 missing heads, and a generator of nodes that are strictly a superset of
287 missing heads, and a generator of nodes that are strictly a superset of
277 heads missing, this function updates self to be correct.
288 heads missing, this function updates self to be correct.
278 """
289 """
279 starttime = util.timer()
290 starttime = util.timer()
280 cl = repo.changelog
291 cl = repo.changelog
281 # collect new branch entries
292 # collect new branch entries
282 newbranches = {}
293 newbranches = {}
283 getbranchinfo = repo.revbranchcache().branchinfo
294 getbranchinfo = repo.revbranchcache().branchinfo
284 for r in revgen:
295 for r in revgen:
285 branch, closesbranch = getbranchinfo(r)
296 branch, closesbranch = getbranchinfo(r)
286 newbranches.setdefault(branch, []).append(r)
297 newbranches.setdefault(branch, []).append(r)
287 if closesbranch:
298 if closesbranch:
288 self._closednodes.add(cl.node(r))
299 self._closednodes.add(cl.node(r))
289
300
290 # fetch current topological heads to speed up filtering
301 # fetch current topological heads to speed up filtering
291 topoheads = set(cl.headrevs())
302 topoheads = set(cl.headrevs())
292
303
293 # if older branchheads are reachable from new ones, they aren't
304 # if older branchheads are reachable from new ones, they aren't
294 # really branchheads. Note checking parents is insufficient:
305 # really branchheads. Note checking parents is insufficient:
295 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
306 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
296 for branch, newheadrevs in newbranches.iteritems():
307 for branch, newheadrevs in newbranches.iteritems():
297 bheads = self.setdefault(branch, [])
308 bheads = self.setdefault(branch, [])
298 bheadset = set(cl.rev(node) for node in bheads)
309 bheadset = set(cl.rev(node) for node in bheads)
299
310
300 # This have been tested True on all internal usage of this function.
311 # This have been tested True on all internal usage of this function.
301 # run it again in case of doubt
312 # run it again in case of doubt
302 # assert not (set(bheadrevs) & set(newheadrevs))
313 # assert not (set(bheadrevs) & set(newheadrevs))
303 bheadset.update(newheadrevs)
314 bheadset.update(newheadrevs)
304
315
305 # This prunes out two kinds of heads - heads that are superseded by
316 # This prunes out two kinds of heads - heads that are superseded by
306 # a head in newheadrevs, and newheadrevs that are not heads because
317 # a head in newheadrevs, and newheadrevs that are not heads because
307 # an existing head is their descendant.
318 # an existing head is their descendant.
308 uncertain = bheadset - topoheads
319 uncertain = bheadset - topoheads
309 if uncertain:
320 if uncertain:
310 floorrev = min(uncertain)
321 floorrev = min(uncertain)
311 ancestors = set(cl.ancestors(newheadrevs, floorrev))
322 ancestors = set(cl.ancestors(newheadrevs, floorrev))
312 bheadset -= ancestors
323 bheadset -= ancestors
313 bheadrevs = sorted(bheadset)
324 bheadrevs = sorted(bheadset)
314 self[branch] = [cl.node(rev) for rev in bheadrevs]
325 self[branch] = [cl.node(rev) for rev in bheadrevs]
315 tiprev = bheadrevs[-1]
326 tiprev = bheadrevs[-1]
316 if tiprev > self.tiprev:
327 if tiprev > self.tiprev:
317 self.tipnode = cl.node(tiprev)
328 self.tipnode = cl.node(tiprev)
318 self.tiprev = tiprev
329 self.tiprev = tiprev
319
330
320 if not self.validfor(repo):
331 if not self.validfor(repo):
321 # cache key are not valid anymore
332 # cache key are not valid anymore
322 self.tipnode = nullid
333 self.tipnode = nullid
323 self.tiprev = nullrev
334 self.tiprev = nullrev
324 for heads in self.values():
335 for heads in self.values():
325 tiprev = max(cl.rev(node) for node in heads)
336 tiprev = max(cl.rev(node) for node in heads)
326 if tiprev > self.tiprev:
337 if tiprev > self.tiprev:
327 self.tipnode = cl.node(tiprev)
338 self.tipnode = cl.node(tiprev)
328 self.tiprev = tiprev
339 self.tiprev = tiprev
329 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
340 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
330
341
331 duration = util.timer() - starttime
342 duration = util.timer() - starttime
332 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
343 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
333 repo.filtername, duration)
344 repo.filtername, duration)
334
345
335 self.write(repo)
346 self.write(repo)
336
347
337
348
338 class remotebranchcache(branchcache):
349 class remotebranchcache(branchcache):
339 """Branchmap info for a remote connection, should not write locally"""
350 """Branchmap info for a remote connection, should not write locally"""
340 def write(self, repo):
351 def write(self, repo):
341 pass
352 pass
342
353
343
354
344 # Revision branch info cache
355 # Revision branch info cache
345
356
346 _rbcversion = '-v1'
357 _rbcversion = '-v1'
347 _rbcnames = 'rbc-names' + _rbcversion
358 _rbcnames = 'rbc-names' + _rbcversion
348 _rbcrevs = 'rbc-revs' + _rbcversion
359 _rbcrevs = 'rbc-revs' + _rbcversion
349 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
360 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
350 _rbcrecfmt = '>4sI'
361 _rbcrecfmt = '>4sI'
351 _rbcrecsize = calcsize(_rbcrecfmt)
362 _rbcrecsize = calcsize(_rbcrecfmt)
352 _rbcnodelen = 4
363 _rbcnodelen = 4
353 _rbcbranchidxmask = 0x7fffffff
364 _rbcbranchidxmask = 0x7fffffff
354 _rbccloseflag = 0x80000000
365 _rbccloseflag = 0x80000000
355
366
356 class revbranchcache(object):
367 class revbranchcache(object):
357 """Persistent cache, mapping from revision number to branch name and close.
368 """Persistent cache, mapping from revision number to branch name and close.
358 This is a low level cache, independent of filtering.
369 This is a low level cache, independent of filtering.
359
370
360 Branch names are stored in rbc-names in internal encoding separated by 0.
371 Branch names are stored in rbc-names in internal encoding separated by 0.
361 rbc-names is append-only, and each branch name is only stored once and will
372 rbc-names is append-only, and each branch name is only stored once and will
362 thus have a unique index.
373 thus have a unique index.
363
374
364 The branch info for each revision is stored in rbc-revs as constant size
375 The branch info for each revision is stored in rbc-revs as constant size
365 records. The whole file is read into memory, but it is only 'parsed' on
376 records. The whole file is read into memory, but it is only 'parsed' on
366 demand. The file is usually append-only but will be truncated if repo
377 demand. The file is usually append-only but will be truncated if repo
367 modification is detected.
378 modification is detected.
368 The record for each revision contains the first 4 bytes of the
379 The record for each revision contains the first 4 bytes of the
369 corresponding node hash, and the record is only used if it still matches.
380 corresponding node hash, and the record is only used if it still matches.
370 Even a completely trashed rbc-revs fill thus still give the right result
381 Even a completely trashed rbc-revs fill thus still give the right result
371 while converging towards full recovery ... assuming no incorrectly matching
382 while converging towards full recovery ... assuming no incorrectly matching
372 node hashes.
383 node hashes.
373 The record also contains 4 bytes where 31 bits contains the index of the
384 The record also contains 4 bytes where 31 bits contains the index of the
374 branch and the last bit indicate that it is a branch close commit.
385 branch and the last bit indicate that it is a branch close commit.
375 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
386 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
376 and will grow with it but be 1/8th of its size.
387 and will grow with it but be 1/8th of its size.
377 """
388 """
378
389
379 def __init__(self, repo, readonly=True):
390 def __init__(self, repo, readonly=True):
380 assert repo.filtername is None
391 assert repo.filtername is None
381 self._repo = repo
392 self._repo = repo
382 self._names = [] # branch names in local encoding with static index
393 self._names = [] # branch names in local encoding with static index
383 self._rbcrevs = bytearray()
394 self._rbcrevs = bytearray()
384 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
395 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
385 try:
396 try:
386 bndata = repo.cachevfs.read(_rbcnames)
397 bndata = repo.cachevfs.read(_rbcnames)
387 self._rbcsnameslen = len(bndata) # for verification before writing
398 self._rbcsnameslen = len(bndata) # for verification before writing
388 if bndata:
399 if bndata:
389 self._names = [encoding.tolocal(bn)
400 self._names = [encoding.tolocal(bn)
390 for bn in bndata.split('\0')]
401 for bn in bndata.split('\0')]
391 except (IOError, OSError):
402 except (IOError, OSError):
392 if readonly:
403 if readonly:
393 # don't try to use cache - fall back to the slow path
404 # don't try to use cache - fall back to the slow path
394 self.branchinfo = self._branchinfo
405 self.branchinfo = self._branchinfo
395
406
396 if self._names:
407 if self._names:
397 try:
408 try:
398 data = repo.cachevfs.read(_rbcrevs)
409 data = repo.cachevfs.read(_rbcrevs)
399 self._rbcrevs[:] = data
410 self._rbcrevs[:] = data
400 except (IOError, OSError) as inst:
411 except (IOError, OSError) as inst:
401 repo.ui.debug("couldn't read revision branch cache: %s\n" %
412 repo.ui.debug("couldn't read revision branch cache: %s\n" %
402 stringutil.forcebytestr(inst))
413 stringutil.forcebytestr(inst))
403 # remember number of good records on disk
414 # remember number of good records on disk
404 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
415 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
405 len(repo.changelog))
416 len(repo.changelog))
406 if self._rbcrevslen == 0:
417 if self._rbcrevslen == 0:
407 self._names = []
418 self._names = []
408 self._rbcnamescount = len(self._names) # number of names read at
419 self._rbcnamescount = len(self._names) # number of names read at
409 # _rbcsnameslen
420 # _rbcsnameslen
410
421
411 def _clear(self):
422 def _clear(self):
412 self._rbcsnameslen = 0
423 self._rbcsnameslen = 0
413 del self._names[:]
424 del self._names[:]
414 self._rbcnamescount = 0
425 self._rbcnamescount = 0
415 self._rbcrevslen = len(self._repo.changelog)
426 self._rbcrevslen = len(self._repo.changelog)
416 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
427 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
417 util.clearcachedproperty(self, '_namesreverse')
428 util.clearcachedproperty(self, '_namesreverse')
418
429
419 @util.propertycache
430 @util.propertycache
420 def _namesreverse(self):
431 def _namesreverse(self):
421 return dict((b, r) for r, b in enumerate(self._names))
432 return dict((b, r) for r, b in enumerate(self._names))
422
433
423 def branchinfo(self, rev):
434 def branchinfo(self, rev):
424 """Return branch name and close flag for rev, using and updating
435 """Return branch name and close flag for rev, using and updating
425 persistent cache."""
436 persistent cache."""
426 changelog = self._repo.changelog
437 changelog = self._repo.changelog
427 rbcrevidx = rev * _rbcrecsize
438 rbcrevidx = rev * _rbcrecsize
428
439
429 # avoid negative index, changelog.read(nullrev) is fast without cache
440 # avoid negative index, changelog.read(nullrev) is fast without cache
430 if rev == nullrev:
441 if rev == nullrev:
431 return changelog.branchinfo(rev)
442 return changelog.branchinfo(rev)
432
443
433 # if requested rev isn't allocated, grow and cache the rev info
444 # if requested rev isn't allocated, grow and cache the rev info
434 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
445 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
435 return self._branchinfo(rev)
446 return self._branchinfo(rev)
436
447
437 # fast path: extract data from cache, use it if node is matching
448 # fast path: extract data from cache, use it if node is matching
438 reponode = changelog.node(rev)[:_rbcnodelen]
449 reponode = changelog.node(rev)[:_rbcnodelen]
439 cachenode, branchidx = unpack_from(
450 cachenode, branchidx = unpack_from(
440 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
451 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
441 close = bool(branchidx & _rbccloseflag)
452 close = bool(branchidx & _rbccloseflag)
442 if close:
453 if close:
443 branchidx &= _rbcbranchidxmask
454 branchidx &= _rbcbranchidxmask
444 if cachenode == '\0\0\0\0':
455 if cachenode == '\0\0\0\0':
445 pass
456 pass
446 elif cachenode == reponode:
457 elif cachenode == reponode:
447 try:
458 try:
448 return self._names[branchidx], close
459 return self._names[branchidx], close
449 except IndexError:
460 except IndexError:
450 # recover from invalid reference to unknown branch
461 # recover from invalid reference to unknown branch
451 self._repo.ui.debug("referenced branch names not found"
462 self._repo.ui.debug("referenced branch names not found"
452 " - rebuilding revision branch cache from scratch\n")
463 " - rebuilding revision branch cache from scratch\n")
453 self._clear()
464 self._clear()
454 else:
465 else:
455 # rev/node map has changed, invalidate the cache from here up
466 # rev/node map has changed, invalidate the cache from here up
456 self._repo.ui.debug("history modification detected - truncating "
467 self._repo.ui.debug("history modification detected - truncating "
457 "revision branch cache to revision %d\n" % rev)
468 "revision branch cache to revision %d\n" % rev)
458 truncate = rbcrevidx + _rbcrecsize
469 truncate = rbcrevidx + _rbcrecsize
459 del self._rbcrevs[truncate:]
470 del self._rbcrevs[truncate:]
460 self._rbcrevslen = min(self._rbcrevslen, truncate)
471 self._rbcrevslen = min(self._rbcrevslen, truncate)
461
472
462 # fall back to slow path and make sure it will be written to disk
473 # fall back to slow path and make sure it will be written to disk
463 return self._branchinfo(rev)
474 return self._branchinfo(rev)
464
475
465 def _branchinfo(self, rev):
476 def _branchinfo(self, rev):
466 """Retrieve branch info from changelog and update _rbcrevs"""
477 """Retrieve branch info from changelog and update _rbcrevs"""
467 changelog = self._repo.changelog
478 changelog = self._repo.changelog
468 b, close = changelog.branchinfo(rev)
479 b, close = changelog.branchinfo(rev)
469 if b in self._namesreverse:
480 if b in self._namesreverse:
470 branchidx = self._namesreverse[b]
481 branchidx = self._namesreverse[b]
471 else:
482 else:
472 branchidx = len(self._names)
483 branchidx = len(self._names)
473 self._names.append(b)
484 self._names.append(b)
474 self._namesreverse[b] = branchidx
485 self._namesreverse[b] = branchidx
475 reponode = changelog.node(rev)
486 reponode = changelog.node(rev)
476 if close:
487 if close:
477 branchidx |= _rbccloseflag
488 branchidx |= _rbccloseflag
478 self._setcachedata(rev, reponode, branchidx)
489 self._setcachedata(rev, reponode, branchidx)
479 return b, close
490 return b, close
480
491
481 def setdata(self, branch, rev, node, close):
492 def setdata(self, branch, rev, node, close):
482 """add new data information to the cache"""
493 """add new data information to the cache"""
483 if branch in self._namesreverse:
494 if branch in self._namesreverse:
484 branchidx = self._namesreverse[branch]
495 branchidx = self._namesreverse[branch]
485 else:
496 else:
486 branchidx = len(self._names)
497 branchidx = len(self._names)
487 self._names.append(branch)
498 self._names.append(branch)
488 self._namesreverse[branch] = branchidx
499 self._namesreverse[branch] = branchidx
489 if close:
500 if close:
490 branchidx |= _rbccloseflag
501 branchidx |= _rbccloseflag
491 self._setcachedata(rev, node, branchidx)
502 self._setcachedata(rev, node, branchidx)
492 # If no cache data were readable (non exists, bad permission, etc)
503 # If no cache data were readable (non exists, bad permission, etc)
493 # the cache was bypassing itself by setting:
504 # the cache was bypassing itself by setting:
494 #
505 #
495 # self.branchinfo = self._branchinfo
506 # self.branchinfo = self._branchinfo
496 #
507 #
497 # Since we now have data in the cache, we need to drop this bypassing.
508 # Since we now have data in the cache, we need to drop this bypassing.
498 if r'branchinfo' in vars(self):
509 if r'branchinfo' in vars(self):
499 del self.branchinfo
510 del self.branchinfo
500
511
501 def _setcachedata(self, rev, node, branchidx):
512 def _setcachedata(self, rev, node, branchidx):
502 """Writes the node's branch data to the in-memory cache data."""
513 """Writes the node's branch data to the in-memory cache data."""
503 if rev == nullrev:
514 if rev == nullrev:
504 return
515 return
505 rbcrevidx = rev * _rbcrecsize
516 rbcrevidx = rev * _rbcrecsize
506 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
517 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
507 self._rbcrevs.extend('\0' *
518 self._rbcrevs.extend('\0' *
508 (len(self._repo.changelog) * _rbcrecsize -
519 (len(self._repo.changelog) * _rbcrecsize -
509 len(self._rbcrevs)))
520 len(self._rbcrevs)))
510 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
521 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
511 self._rbcrevslen = min(self._rbcrevslen, rev)
522 self._rbcrevslen = min(self._rbcrevslen, rev)
512
523
513 tr = self._repo.currenttransaction()
524 tr = self._repo.currenttransaction()
514 if tr:
525 if tr:
515 tr.addfinalize('write-revbranchcache', self.write)
526 tr.addfinalize('write-revbranchcache', self.write)
516
527
517 def write(self, tr=None):
528 def write(self, tr=None):
518 """Save branch cache if it is dirty."""
529 """Save branch cache if it is dirty."""
519 repo = self._repo
530 repo = self._repo
520 wlock = None
531 wlock = None
521 step = ''
532 step = ''
522 try:
533 try:
523 if self._rbcnamescount < len(self._names):
534 if self._rbcnamescount < len(self._names):
524 step = ' names'
535 step = ' names'
525 wlock = repo.wlock(wait=False)
536 wlock = repo.wlock(wait=False)
526 if self._rbcnamescount != 0:
537 if self._rbcnamescount != 0:
527 f = repo.cachevfs.open(_rbcnames, 'ab')
538 f = repo.cachevfs.open(_rbcnames, 'ab')
528 if f.tell() == self._rbcsnameslen:
539 if f.tell() == self._rbcsnameslen:
529 f.write('\0')
540 f.write('\0')
530 else:
541 else:
531 f.close()
542 f.close()
532 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
543 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
533 self._rbcnamescount = 0
544 self._rbcnamescount = 0
534 self._rbcrevslen = 0
545 self._rbcrevslen = 0
535 if self._rbcnamescount == 0:
546 if self._rbcnamescount == 0:
536 # before rewriting names, make sure references are removed
547 # before rewriting names, make sure references are removed
537 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
548 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
538 f = repo.cachevfs.open(_rbcnames, 'wb')
549 f = repo.cachevfs.open(_rbcnames, 'wb')
539 f.write('\0'.join(encoding.fromlocal(b)
550 f.write('\0'.join(encoding.fromlocal(b)
540 for b in self._names[self._rbcnamescount:]))
551 for b in self._names[self._rbcnamescount:]))
541 self._rbcsnameslen = f.tell()
552 self._rbcsnameslen = f.tell()
542 f.close()
553 f.close()
543 self._rbcnamescount = len(self._names)
554 self._rbcnamescount = len(self._names)
544
555
545 start = self._rbcrevslen * _rbcrecsize
556 start = self._rbcrevslen * _rbcrecsize
546 if start != len(self._rbcrevs):
557 if start != len(self._rbcrevs):
547 step = ''
558 step = ''
548 if wlock is None:
559 if wlock is None:
549 wlock = repo.wlock(wait=False)
560 wlock = repo.wlock(wait=False)
550 revs = min(len(repo.changelog),
561 revs = min(len(repo.changelog),
551 len(self._rbcrevs) // _rbcrecsize)
562 len(self._rbcrevs) // _rbcrecsize)
552 f = repo.cachevfs.open(_rbcrevs, 'ab')
563 f = repo.cachevfs.open(_rbcrevs, 'ab')
553 if f.tell() != start:
564 if f.tell() != start:
554 repo.ui.debug("truncating cache/%s to %d\n"
565 repo.ui.debug("truncating cache/%s to %d\n"
555 % (_rbcrevs, start))
566 % (_rbcrevs, start))
556 f.seek(start)
567 f.seek(start)
557 if f.tell() != start:
568 if f.tell() != start:
558 start = 0
569 start = 0
559 f.seek(start)
570 f.seek(start)
560 f.truncate()
571 f.truncate()
561 end = revs * _rbcrecsize
572 end = revs * _rbcrecsize
562 f.write(self._rbcrevs[start:end])
573 f.write(self._rbcrevs[start:end])
563 f.close()
574 f.close()
564 self._rbcrevslen = revs
575 self._rbcrevslen = revs
565 except (IOError, OSError, error.Abort, error.LockError) as inst:
576 except (IOError, OSError, error.Abort, error.LockError) as inst:
566 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
577 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
567 % (step, stringutil.forcebytestr(inst)))
578 % (step, stringutil.forcebytestr(inst)))
568 finally:
579 finally:
569 if wlock is not None:
580 if wlock is not None:
570 wlock.release()
581 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now