##// END OF EJS Templates
branchcache: move loading of branch names and nodes into it's own function...
Pulkit Goyal -
r41959:68bbcc70 default
parent child Browse files
Show More
@@ -1,596 +1,600 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """mapping of filtered views of repo with their branchcache"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129
129
130 class branchcache(dict):
130 class branchcache(dict):
131 """A dict like object that hold branches heads cache.
131 """A dict like object that hold branches heads cache.
132
132
133 This cache is used to avoid costly computations to determine all the
133 This cache is used to avoid costly computations to determine all the
134 branch heads of a repo.
134 branch heads of a repo.
135
135
136 The cache is serialized on disk in the following format:
136 The cache is serialized on disk in the following format:
137
137
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 <branch head hex node> <open/closed state> <branch name>
139 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
141 ...
141 ...
142
142
143 The first line is used to check if the cache is still valid. If the
143 The first line is used to check if the cache is still valid. If the
144 branch cache is for a filtered repo view, an optional third hash is
144 branch cache is for a filtered repo view, an optional third hash is
145 included that hashes the hashes of all filtered revisions.
145 included that hashes the hashes of all filtered revisions.
146
146
147 The open/closed state is represented by a single letter 'o' or 'c'.
147 The open/closed state is represented by a single letter 'o' or 'c'.
148 This field can be used to avoid changelog reads when determining if a
148 This field can be used to avoid changelog reads when determining if a
149 branch head closes a branch or not.
149 branch head closes a branch or not.
150 """
150 """
151
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
153 filteredhash=None, closednodes=None):
154 super(branchcache, self).__init__(entries)
154 super(branchcache, self).__init__(entries)
155 self.tipnode = tipnode
155 self.tipnode = tipnode
156 self.tiprev = tiprev
156 self.tiprev = tiprev
157 self.filteredhash = filteredhash
157 self.filteredhash = filteredhash
158 # closednodes is a set of nodes that close their branch. If the branch
158 # closednodes is a set of nodes that close their branch. If the branch
159 # cache has been updated, it may contain nodes that are no longer
159 # cache has been updated, it may contain nodes that are no longer
160 # heads.
160 # heads.
161 if closednodes is None:
161 if closednodes is None:
162 self._closednodes = set()
162 self._closednodes = set()
163 else:
163 else:
164 self._closednodes = closednodes
164 self._closednodes = closednodes
165
165
166 @classmethod
166 @classmethod
167 def fromfile(cls, repo):
167 def fromfile(cls, repo):
168 f = None
168 f = None
169 try:
169 try:
170 f = repo.cachevfs(cls._filename(repo))
170 f = repo.cachevfs(cls._filename(repo))
171 lineiter = iter(f)
171 lineiter = iter(f)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
173 last, lrev = cachekey[:2]
173 last, lrev = cachekey[:2]
174 last, lrev = bin(last), int(lrev)
174 last, lrev = bin(last), int(lrev)
175 filteredhash = None
175 filteredhash = None
176 if len(cachekey) > 2:
176 if len(cachekey) > 2:
177 filteredhash = bin(cachekey[2])
177 filteredhash = bin(cachekey[2])
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
179 if not bcache.validfor(repo):
179 if not bcache.validfor(repo):
180 # invalidate the cache
180 # invalidate the cache
181 raise ValueError(r'tip differs')
181 raise ValueError(r'tip differs')
182 cl = repo.changelog
182 bcache.load(repo, f)
183 for line in lineiter:
184 line = line.rstrip('\n')
185 if not line:
186 continue
187 node, state, label = line.split(" ", 2)
188 if state not in 'oc':
189 raise ValueError(r'invalid branch state')
190 label = encoding.tolocal(label.strip())
191 node = bin(node)
192 if not cl.hasnode(node):
193 raise ValueError(
194 r'node %s does not exist' % pycompat.sysstr(hex(node)))
195 bcache.setdefault(label, []).append(node)
196 if state == 'c':
197 bcache._closednodes.add(node)
198
199 except (IOError, OSError):
183 except (IOError, OSError):
200 return None
184 return None
201
185
202 except Exception as inst:
186 except Exception as inst:
203 if repo.ui.debugflag:
187 if repo.ui.debugflag:
204 msg = 'invalid branchheads cache'
188 msg = 'invalid branchheads cache'
205 if repo.filtername is not None:
189 if repo.filtername is not None:
206 msg += ' (%s)' % repo.filtername
190 msg += ' (%s)' % repo.filtername
207 msg += ': %s\n'
191 msg += ': %s\n'
208 repo.ui.debug(msg % pycompat.bytestr(inst))
192 repo.ui.debug(msg % pycompat.bytestr(inst))
209 bcache = None
193 bcache = None
210
194
211 finally:
195 finally:
212 if f:
196 if f:
213 f.close()
197 f.close()
214
198
215 return bcache
199 return bcache
216
200
201 def load(self, repo, f):
202 """ fully loads the branchcache by reading from the file f """
203 cl = repo.changelog
204 lineiter = iter(f)
205 for line in lineiter:
206 line = line.rstrip('\n')
207 if not line:
208 continue
209 node, state, label = line.split(" ", 2)
210 if state not in 'oc':
211 raise ValueError(r'invalid branch state')
212 label = encoding.tolocal(label.strip())
213 node = bin(node)
214 if not cl.hasnode(node):
215 raise ValueError(
216 r'node %s does not exist' % pycompat.sysstr(hex(node)))
217 self.setdefault(label, []).append(node)
218 if state == 'c':
219 self._closednodes.add(node)
220
217 @staticmethod
221 @staticmethod
218 def _filename(repo):
222 def _filename(repo):
219 """name of a branchcache file for a given repo or repoview"""
223 """name of a branchcache file for a given repo or repoview"""
220 filename = "branch2"
224 filename = "branch2"
221 if repo.filtername:
225 if repo.filtername:
222 filename = '%s-%s' % (filename, repo.filtername)
226 filename = '%s-%s' % (filename, repo.filtername)
223 return filename
227 return filename
224
228
225 def validfor(self, repo):
229 def validfor(self, repo):
226 """Is the cache content valid regarding a repo
230 """Is the cache content valid regarding a repo
227
231
228 - False when cached tipnode is unknown or if we detect a strip.
232 - False when cached tipnode is unknown or if we detect a strip.
229 - True when cache is up to date or a subset of current repo."""
233 - True when cache is up to date or a subset of current repo."""
230 try:
234 try:
231 return ((self.tipnode == repo.changelog.node(self.tiprev))
235 return ((self.tipnode == repo.changelog.node(self.tiprev))
232 and (self.filteredhash ==
236 and (self.filteredhash ==
233 scmutil.filteredhash(repo, self.tiprev)))
237 scmutil.filteredhash(repo, self.tiprev)))
234 except IndexError:
238 except IndexError:
235 return False
239 return False
236
240
237 def _branchtip(self, heads):
241 def _branchtip(self, heads):
238 '''Return tuple with last open head in heads and false,
242 '''Return tuple with last open head in heads and false,
239 otherwise return last closed head and true.'''
243 otherwise return last closed head and true.'''
240 tip = heads[-1]
244 tip = heads[-1]
241 closed = True
245 closed = True
242 for h in reversed(heads):
246 for h in reversed(heads):
243 if h not in self._closednodes:
247 if h not in self._closednodes:
244 tip = h
248 tip = h
245 closed = False
249 closed = False
246 break
250 break
247 return tip, closed
251 return tip, closed
248
252
249 def branchtip(self, branch):
253 def branchtip(self, branch):
250 '''Return the tipmost open head on branch head, otherwise return the
254 '''Return the tipmost open head on branch head, otherwise return the
251 tipmost closed head on branch.
255 tipmost closed head on branch.
252 Raise KeyError for unknown branch.'''
256 Raise KeyError for unknown branch.'''
253 return self._branchtip(self[branch])[0]
257 return self._branchtip(self[branch])[0]
254
258
255 def iteropen(self, nodes):
259 def iteropen(self, nodes):
256 return (n for n in nodes if n not in self._closednodes)
260 return (n for n in nodes if n not in self._closednodes)
257
261
258 def branchheads(self, branch, closed=False):
262 def branchheads(self, branch, closed=False):
259 heads = self[branch]
263 heads = self[branch]
260 if not closed:
264 if not closed:
261 heads = list(self.iteropen(heads))
265 heads = list(self.iteropen(heads))
262 return heads
266 return heads
263
267
264 def iterbranches(self):
268 def iterbranches(self):
265 for bn, heads in self.iteritems():
269 for bn, heads in self.iteritems():
266 yield (bn, heads) + self._branchtip(heads)
270 yield (bn, heads) + self._branchtip(heads)
267
271
268 def copy(self):
272 def copy(self):
269 """return an deep copy of the branchcache object"""
273 """return an deep copy of the branchcache object"""
270 return type(self)(
274 return type(self)(
271 self, self.tipnode, self.tiprev, self.filteredhash,
275 self, self.tipnode, self.tiprev, self.filteredhash,
272 self._closednodes)
276 self._closednodes)
273
277
274 def write(self, repo):
278 def write(self, repo):
275 try:
279 try:
276 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
280 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
277 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
281 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
278 if self.filteredhash is not None:
282 if self.filteredhash is not None:
279 cachekey.append(hex(self.filteredhash))
283 cachekey.append(hex(self.filteredhash))
280 f.write(" ".join(cachekey) + '\n')
284 f.write(" ".join(cachekey) + '\n')
281 nodecount = 0
285 nodecount = 0
282 for label, nodes in sorted(self.iteritems()):
286 for label, nodes in sorted(self.iteritems()):
283 label = encoding.fromlocal(label)
287 label = encoding.fromlocal(label)
284 for node in nodes:
288 for node in nodes:
285 nodecount += 1
289 nodecount += 1
286 if node in self._closednodes:
290 if node in self._closednodes:
287 state = 'c'
291 state = 'c'
288 else:
292 else:
289 state = 'o'
293 state = 'o'
290 f.write("%s %s %s\n" % (hex(node), state, label))
294 f.write("%s %s %s\n" % (hex(node), state, label))
291 f.close()
295 f.close()
292 repo.ui.log('branchcache',
296 repo.ui.log('branchcache',
293 'wrote %s branch cache with %d labels and %d nodes\n',
297 'wrote %s branch cache with %d labels and %d nodes\n',
294 repo.filtername, len(self), nodecount)
298 repo.filtername, len(self), nodecount)
295 except (IOError, OSError, error.Abort) as inst:
299 except (IOError, OSError, error.Abort) as inst:
296 # Abort may be raised by read only opener, so log and continue
300 # Abort may be raised by read only opener, so log and continue
297 repo.ui.debug("couldn't write branch cache: %s\n" %
301 repo.ui.debug("couldn't write branch cache: %s\n" %
298 stringutil.forcebytestr(inst))
302 stringutil.forcebytestr(inst))
299
303
300 def update(self, repo, revgen):
304 def update(self, repo, revgen):
301 """Given a branchhead cache, self, that may have extra nodes or be
305 """Given a branchhead cache, self, that may have extra nodes or be
302 missing heads, and a generator of nodes that are strictly a superset of
306 missing heads, and a generator of nodes that are strictly a superset of
303 heads missing, this function updates self to be correct.
307 heads missing, this function updates self to be correct.
304 """
308 """
305 starttime = util.timer()
309 starttime = util.timer()
306 cl = repo.changelog
310 cl = repo.changelog
307 # collect new branch entries
311 # collect new branch entries
308 newbranches = {}
312 newbranches = {}
309 getbranchinfo = repo.revbranchcache().branchinfo
313 getbranchinfo = repo.revbranchcache().branchinfo
310 for r in revgen:
314 for r in revgen:
311 branch, closesbranch = getbranchinfo(r)
315 branch, closesbranch = getbranchinfo(r)
312 newbranches.setdefault(branch, []).append(r)
316 newbranches.setdefault(branch, []).append(r)
313 if closesbranch:
317 if closesbranch:
314 self._closednodes.add(cl.node(r))
318 self._closednodes.add(cl.node(r))
315
319
316 # fetch current topological heads to speed up filtering
320 # fetch current topological heads to speed up filtering
317 topoheads = set(cl.headrevs())
321 topoheads = set(cl.headrevs())
318
322
319 # if older branchheads are reachable from new ones, they aren't
323 # if older branchheads are reachable from new ones, they aren't
320 # really branchheads. Note checking parents is insufficient:
324 # really branchheads. Note checking parents is insufficient:
321 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
325 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
322 for branch, newheadrevs in newbranches.iteritems():
326 for branch, newheadrevs in newbranches.iteritems():
323 bheads = self.setdefault(branch, [])
327 bheads = self.setdefault(branch, [])
324 bheadset = set(cl.rev(node) for node in bheads)
328 bheadset = set(cl.rev(node) for node in bheads)
325
329
326 # This have been tested True on all internal usage of this function.
330 # This have been tested True on all internal usage of this function.
327 # run it again in case of doubt
331 # run it again in case of doubt
328 # assert not (set(bheadrevs) & set(newheadrevs))
332 # assert not (set(bheadrevs) & set(newheadrevs))
329 bheadset.update(newheadrevs)
333 bheadset.update(newheadrevs)
330
334
331 # This prunes out two kinds of heads - heads that are superseded by
335 # This prunes out two kinds of heads - heads that are superseded by
332 # a head in newheadrevs, and newheadrevs that are not heads because
336 # a head in newheadrevs, and newheadrevs that are not heads because
333 # an existing head is their descendant.
337 # an existing head is their descendant.
334 uncertain = bheadset - topoheads
338 uncertain = bheadset - topoheads
335 if uncertain:
339 if uncertain:
336 floorrev = min(uncertain)
340 floorrev = min(uncertain)
337 ancestors = set(cl.ancestors(newheadrevs, floorrev))
341 ancestors = set(cl.ancestors(newheadrevs, floorrev))
338 bheadset -= ancestors
342 bheadset -= ancestors
339 bheadrevs = sorted(bheadset)
343 bheadrevs = sorted(bheadset)
340 self[branch] = [cl.node(rev) for rev in bheadrevs]
344 self[branch] = [cl.node(rev) for rev in bheadrevs]
341 tiprev = bheadrevs[-1]
345 tiprev = bheadrevs[-1]
342 if tiprev > self.tiprev:
346 if tiprev > self.tiprev:
343 self.tipnode = cl.node(tiprev)
347 self.tipnode = cl.node(tiprev)
344 self.tiprev = tiprev
348 self.tiprev = tiprev
345
349
346 if not self.validfor(repo):
350 if not self.validfor(repo):
347 # cache key are not valid anymore
351 # cache key are not valid anymore
348 self.tipnode = nullid
352 self.tipnode = nullid
349 self.tiprev = nullrev
353 self.tiprev = nullrev
350 for heads in self.values():
354 for heads in self.values():
351 tiprev = max(cl.rev(node) for node in heads)
355 tiprev = max(cl.rev(node) for node in heads)
352 if tiprev > self.tiprev:
356 if tiprev > self.tiprev:
353 self.tipnode = cl.node(tiprev)
357 self.tipnode = cl.node(tiprev)
354 self.tiprev = tiprev
358 self.tiprev = tiprev
355 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
359 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
356
360
357 duration = util.timer() - starttime
361 duration = util.timer() - starttime
358 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
362 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
359 repo.filtername, duration)
363 repo.filtername, duration)
360
364
361 self.write(repo)
365 self.write(repo)
362
366
363
367
364 class remotebranchcache(branchcache):
368 class remotebranchcache(branchcache):
365 """Branchmap info for a remote connection, should not write locally"""
369 """Branchmap info for a remote connection, should not write locally"""
366 def write(self, repo):
370 def write(self, repo):
367 pass
371 pass
368
372
369
373
370 # Revision branch info cache
374 # Revision branch info cache
371
375
372 _rbcversion = '-v1'
376 _rbcversion = '-v1'
373 _rbcnames = 'rbc-names' + _rbcversion
377 _rbcnames = 'rbc-names' + _rbcversion
374 _rbcrevs = 'rbc-revs' + _rbcversion
378 _rbcrevs = 'rbc-revs' + _rbcversion
375 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
379 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
376 _rbcrecfmt = '>4sI'
380 _rbcrecfmt = '>4sI'
377 _rbcrecsize = calcsize(_rbcrecfmt)
381 _rbcrecsize = calcsize(_rbcrecfmt)
378 _rbcnodelen = 4
382 _rbcnodelen = 4
379 _rbcbranchidxmask = 0x7fffffff
383 _rbcbranchidxmask = 0x7fffffff
380 _rbccloseflag = 0x80000000
384 _rbccloseflag = 0x80000000
381
385
382 class revbranchcache(object):
386 class revbranchcache(object):
383 """Persistent cache, mapping from revision number to branch name and close.
387 """Persistent cache, mapping from revision number to branch name and close.
384 This is a low level cache, independent of filtering.
388 This is a low level cache, independent of filtering.
385
389
386 Branch names are stored in rbc-names in internal encoding separated by 0.
390 Branch names are stored in rbc-names in internal encoding separated by 0.
387 rbc-names is append-only, and each branch name is only stored once and will
391 rbc-names is append-only, and each branch name is only stored once and will
388 thus have a unique index.
392 thus have a unique index.
389
393
390 The branch info for each revision is stored in rbc-revs as constant size
394 The branch info for each revision is stored in rbc-revs as constant size
391 records. The whole file is read into memory, but it is only 'parsed' on
395 records. The whole file is read into memory, but it is only 'parsed' on
392 demand. The file is usually append-only but will be truncated if repo
396 demand. The file is usually append-only but will be truncated if repo
393 modification is detected.
397 modification is detected.
394 The record for each revision contains the first 4 bytes of the
398 The record for each revision contains the first 4 bytes of the
395 corresponding node hash, and the record is only used if it still matches.
399 corresponding node hash, and the record is only used if it still matches.
396 Even a completely trashed rbc-revs fill thus still give the right result
400 Even a completely trashed rbc-revs fill thus still give the right result
397 while converging towards full recovery ... assuming no incorrectly matching
401 while converging towards full recovery ... assuming no incorrectly matching
398 node hashes.
402 node hashes.
399 The record also contains 4 bytes where 31 bits contains the index of the
403 The record also contains 4 bytes where 31 bits contains the index of the
400 branch and the last bit indicate that it is a branch close commit.
404 branch and the last bit indicate that it is a branch close commit.
401 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
405 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
402 and will grow with it but be 1/8th of its size.
406 and will grow with it but be 1/8th of its size.
403 """
407 """
404
408
405 def __init__(self, repo, readonly=True):
409 def __init__(self, repo, readonly=True):
406 assert repo.filtername is None
410 assert repo.filtername is None
407 self._repo = repo
411 self._repo = repo
408 self._names = [] # branch names in local encoding with static index
412 self._names = [] # branch names in local encoding with static index
409 self._rbcrevs = bytearray()
413 self._rbcrevs = bytearray()
410 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
414 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
411 try:
415 try:
412 bndata = repo.cachevfs.read(_rbcnames)
416 bndata = repo.cachevfs.read(_rbcnames)
413 self._rbcsnameslen = len(bndata) # for verification before writing
417 self._rbcsnameslen = len(bndata) # for verification before writing
414 if bndata:
418 if bndata:
415 self._names = [encoding.tolocal(bn)
419 self._names = [encoding.tolocal(bn)
416 for bn in bndata.split('\0')]
420 for bn in bndata.split('\0')]
417 except (IOError, OSError):
421 except (IOError, OSError):
418 if readonly:
422 if readonly:
419 # don't try to use cache - fall back to the slow path
423 # don't try to use cache - fall back to the slow path
420 self.branchinfo = self._branchinfo
424 self.branchinfo = self._branchinfo
421
425
422 if self._names:
426 if self._names:
423 try:
427 try:
424 data = repo.cachevfs.read(_rbcrevs)
428 data = repo.cachevfs.read(_rbcrevs)
425 self._rbcrevs[:] = data
429 self._rbcrevs[:] = data
426 except (IOError, OSError) as inst:
430 except (IOError, OSError) as inst:
427 repo.ui.debug("couldn't read revision branch cache: %s\n" %
431 repo.ui.debug("couldn't read revision branch cache: %s\n" %
428 stringutil.forcebytestr(inst))
432 stringutil.forcebytestr(inst))
429 # remember number of good records on disk
433 # remember number of good records on disk
430 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
434 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
431 len(repo.changelog))
435 len(repo.changelog))
432 if self._rbcrevslen == 0:
436 if self._rbcrevslen == 0:
433 self._names = []
437 self._names = []
434 self._rbcnamescount = len(self._names) # number of names read at
438 self._rbcnamescount = len(self._names) # number of names read at
435 # _rbcsnameslen
439 # _rbcsnameslen
436
440
437 def _clear(self):
441 def _clear(self):
438 self._rbcsnameslen = 0
442 self._rbcsnameslen = 0
439 del self._names[:]
443 del self._names[:]
440 self._rbcnamescount = 0
444 self._rbcnamescount = 0
441 self._rbcrevslen = len(self._repo.changelog)
445 self._rbcrevslen = len(self._repo.changelog)
442 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
446 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
443 util.clearcachedproperty(self, '_namesreverse')
447 util.clearcachedproperty(self, '_namesreverse')
444
448
445 @util.propertycache
449 @util.propertycache
446 def _namesreverse(self):
450 def _namesreverse(self):
447 return dict((b, r) for r, b in enumerate(self._names))
451 return dict((b, r) for r, b in enumerate(self._names))
448
452
449 def branchinfo(self, rev):
453 def branchinfo(self, rev):
450 """Return branch name and close flag for rev, using and updating
454 """Return branch name and close flag for rev, using and updating
451 persistent cache."""
455 persistent cache."""
452 changelog = self._repo.changelog
456 changelog = self._repo.changelog
453 rbcrevidx = rev * _rbcrecsize
457 rbcrevidx = rev * _rbcrecsize
454
458
455 # avoid negative index, changelog.read(nullrev) is fast without cache
459 # avoid negative index, changelog.read(nullrev) is fast without cache
456 if rev == nullrev:
460 if rev == nullrev:
457 return changelog.branchinfo(rev)
461 return changelog.branchinfo(rev)
458
462
459 # if requested rev isn't allocated, grow and cache the rev info
463 # if requested rev isn't allocated, grow and cache the rev info
460 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
464 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
461 return self._branchinfo(rev)
465 return self._branchinfo(rev)
462
466
463 # fast path: extract data from cache, use it if node is matching
467 # fast path: extract data from cache, use it if node is matching
464 reponode = changelog.node(rev)[:_rbcnodelen]
468 reponode = changelog.node(rev)[:_rbcnodelen]
465 cachenode, branchidx = unpack_from(
469 cachenode, branchidx = unpack_from(
466 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
470 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
467 close = bool(branchidx & _rbccloseflag)
471 close = bool(branchidx & _rbccloseflag)
468 if close:
472 if close:
469 branchidx &= _rbcbranchidxmask
473 branchidx &= _rbcbranchidxmask
470 if cachenode == '\0\0\0\0':
474 if cachenode == '\0\0\0\0':
471 pass
475 pass
472 elif cachenode == reponode:
476 elif cachenode == reponode:
473 try:
477 try:
474 return self._names[branchidx], close
478 return self._names[branchidx], close
475 except IndexError:
479 except IndexError:
476 # recover from invalid reference to unknown branch
480 # recover from invalid reference to unknown branch
477 self._repo.ui.debug("referenced branch names not found"
481 self._repo.ui.debug("referenced branch names not found"
478 " - rebuilding revision branch cache from scratch\n")
482 " - rebuilding revision branch cache from scratch\n")
479 self._clear()
483 self._clear()
480 else:
484 else:
481 # rev/node map has changed, invalidate the cache from here up
485 # rev/node map has changed, invalidate the cache from here up
482 self._repo.ui.debug("history modification detected - truncating "
486 self._repo.ui.debug("history modification detected - truncating "
483 "revision branch cache to revision %d\n" % rev)
487 "revision branch cache to revision %d\n" % rev)
484 truncate = rbcrevidx + _rbcrecsize
488 truncate = rbcrevidx + _rbcrecsize
485 del self._rbcrevs[truncate:]
489 del self._rbcrevs[truncate:]
486 self._rbcrevslen = min(self._rbcrevslen, truncate)
490 self._rbcrevslen = min(self._rbcrevslen, truncate)
487
491
488 # fall back to slow path and make sure it will be written to disk
492 # fall back to slow path and make sure it will be written to disk
489 return self._branchinfo(rev)
493 return self._branchinfo(rev)
490
494
491 def _branchinfo(self, rev):
495 def _branchinfo(self, rev):
492 """Retrieve branch info from changelog and update _rbcrevs"""
496 """Retrieve branch info from changelog and update _rbcrevs"""
493 changelog = self._repo.changelog
497 changelog = self._repo.changelog
494 b, close = changelog.branchinfo(rev)
498 b, close = changelog.branchinfo(rev)
495 if b in self._namesreverse:
499 if b in self._namesreverse:
496 branchidx = self._namesreverse[b]
500 branchidx = self._namesreverse[b]
497 else:
501 else:
498 branchidx = len(self._names)
502 branchidx = len(self._names)
499 self._names.append(b)
503 self._names.append(b)
500 self._namesreverse[b] = branchidx
504 self._namesreverse[b] = branchidx
501 reponode = changelog.node(rev)
505 reponode = changelog.node(rev)
502 if close:
506 if close:
503 branchidx |= _rbccloseflag
507 branchidx |= _rbccloseflag
504 self._setcachedata(rev, reponode, branchidx)
508 self._setcachedata(rev, reponode, branchidx)
505 return b, close
509 return b, close
506
510
507 def setdata(self, branch, rev, node, close):
511 def setdata(self, branch, rev, node, close):
508 """add new data information to the cache"""
512 """add new data information to the cache"""
509 if branch in self._namesreverse:
513 if branch in self._namesreverse:
510 branchidx = self._namesreverse[branch]
514 branchidx = self._namesreverse[branch]
511 else:
515 else:
512 branchidx = len(self._names)
516 branchidx = len(self._names)
513 self._names.append(branch)
517 self._names.append(branch)
514 self._namesreverse[branch] = branchidx
518 self._namesreverse[branch] = branchidx
515 if close:
519 if close:
516 branchidx |= _rbccloseflag
520 branchidx |= _rbccloseflag
517 self._setcachedata(rev, node, branchidx)
521 self._setcachedata(rev, node, branchidx)
518 # If no cache data were readable (non exists, bad permission, etc)
522 # If no cache data were readable (non exists, bad permission, etc)
519 # the cache was bypassing itself by setting:
523 # the cache was bypassing itself by setting:
520 #
524 #
521 # self.branchinfo = self._branchinfo
525 # self.branchinfo = self._branchinfo
522 #
526 #
523 # Since we now have data in the cache, we need to drop this bypassing.
527 # Since we now have data in the cache, we need to drop this bypassing.
524 if r'branchinfo' in vars(self):
528 if r'branchinfo' in vars(self):
525 del self.branchinfo
529 del self.branchinfo
526
530
527 def _setcachedata(self, rev, node, branchidx):
531 def _setcachedata(self, rev, node, branchidx):
528 """Writes the node's branch data to the in-memory cache data."""
532 """Writes the node's branch data to the in-memory cache data."""
529 if rev == nullrev:
533 if rev == nullrev:
530 return
534 return
531 rbcrevidx = rev * _rbcrecsize
535 rbcrevidx = rev * _rbcrecsize
532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
536 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
533 self._rbcrevs.extend('\0' *
537 self._rbcrevs.extend('\0' *
534 (len(self._repo.changelog) * _rbcrecsize -
538 (len(self._repo.changelog) * _rbcrecsize -
535 len(self._rbcrevs)))
539 len(self._rbcrevs)))
536 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
540 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
537 self._rbcrevslen = min(self._rbcrevslen, rev)
541 self._rbcrevslen = min(self._rbcrevslen, rev)
538
542
539 tr = self._repo.currenttransaction()
543 tr = self._repo.currenttransaction()
540 if tr:
544 if tr:
541 tr.addfinalize('write-revbranchcache', self.write)
545 tr.addfinalize('write-revbranchcache', self.write)
542
546
543 def write(self, tr=None):
547 def write(self, tr=None):
544 """Save branch cache if it is dirty."""
548 """Save branch cache if it is dirty."""
545 repo = self._repo
549 repo = self._repo
546 wlock = None
550 wlock = None
547 step = ''
551 step = ''
548 try:
552 try:
549 if self._rbcnamescount < len(self._names):
553 if self._rbcnamescount < len(self._names):
550 step = ' names'
554 step = ' names'
551 wlock = repo.wlock(wait=False)
555 wlock = repo.wlock(wait=False)
552 if self._rbcnamescount != 0:
556 if self._rbcnamescount != 0:
553 f = repo.cachevfs.open(_rbcnames, 'ab')
557 f = repo.cachevfs.open(_rbcnames, 'ab')
554 if f.tell() == self._rbcsnameslen:
558 if f.tell() == self._rbcsnameslen:
555 f.write('\0')
559 f.write('\0')
556 else:
560 else:
557 f.close()
561 f.close()
558 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
562 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
559 self._rbcnamescount = 0
563 self._rbcnamescount = 0
560 self._rbcrevslen = 0
564 self._rbcrevslen = 0
561 if self._rbcnamescount == 0:
565 if self._rbcnamescount == 0:
562 # before rewriting names, make sure references are removed
566 # before rewriting names, make sure references are removed
563 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
567 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
564 f = repo.cachevfs.open(_rbcnames, 'wb')
568 f = repo.cachevfs.open(_rbcnames, 'wb')
565 f.write('\0'.join(encoding.fromlocal(b)
569 f.write('\0'.join(encoding.fromlocal(b)
566 for b in self._names[self._rbcnamescount:]))
570 for b in self._names[self._rbcnamescount:]))
567 self._rbcsnameslen = f.tell()
571 self._rbcsnameslen = f.tell()
568 f.close()
572 f.close()
569 self._rbcnamescount = len(self._names)
573 self._rbcnamescount = len(self._names)
570
574
571 start = self._rbcrevslen * _rbcrecsize
575 start = self._rbcrevslen * _rbcrecsize
572 if start != len(self._rbcrevs):
576 if start != len(self._rbcrevs):
573 step = ''
577 step = ''
574 if wlock is None:
578 if wlock is None:
575 wlock = repo.wlock(wait=False)
579 wlock = repo.wlock(wait=False)
576 revs = min(len(repo.changelog),
580 revs = min(len(repo.changelog),
577 len(self._rbcrevs) // _rbcrecsize)
581 len(self._rbcrevs) // _rbcrecsize)
578 f = repo.cachevfs.open(_rbcrevs, 'ab')
582 f = repo.cachevfs.open(_rbcrevs, 'ab')
579 if f.tell() != start:
583 if f.tell() != start:
580 repo.ui.debug("truncating cache/%s to %d\n"
584 repo.ui.debug("truncating cache/%s to %d\n"
581 % (_rbcrevs, start))
585 % (_rbcrevs, start))
582 f.seek(start)
586 f.seek(start)
583 if f.tell() != start:
587 if f.tell() != start:
584 start = 0
588 start = 0
585 f.seek(start)
589 f.seek(start)
586 f.truncate()
590 f.truncate()
587 end = revs * _rbcrecsize
591 end = revs * _rbcrecsize
588 f.write(self._rbcrevs[start:end])
592 f.write(self._rbcrevs[start:end])
589 f.close()
593 f.close()
590 self._rbcrevslen = revs
594 self._rbcrevslen = revs
591 except (IOError, OSError, error.Abort, error.LockError) as inst:
595 except (IOError, OSError, error.Abort, error.LockError) as inst:
592 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
596 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
593 % (step, stringutil.forcebytestr(inst)))
597 % (step, stringutil.forcebytestr(inst)))
594 finally:
598 finally:
595 if wlock is not None:
599 if wlock is not None:
596 wlock.release()
600 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now