##// END OF EJS Templates
branchmap: move __init__ up in branchcache class...
Pulkit Goyal -
r41826:bfc49f1d default
parent child Browse files
Show More
@@ -1,595 +1,596 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """Cache mapping"""
48 """Cache mapping"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129
129
130 class branchcache(dict):
130 class branchcache(dict):
131 """A dict like object that hold branches heads cache.
131 """A dict like object that hold branches heads cache.
132
132
133 This cache is used to avoid costly computations to determine all the
133 This cache is used to avoid costly computations to determine all the
134 branch heads of a repo.
134 branch heads of a repo.
135
135
136 The cache is serialized on disk in the following format:
136 The cache is serialized on disk in the following format:
137
137
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 <branch head hex node> <open/closed state> <branch name>
139 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
141 ...
141 ...
142
142
143 The first line is used to check if the cache is still valid. If the
143 The first line is used to check if the cache is still valid. If the
144 branch cache is for a filtered repo view, an optional third hash is
144 branch cache is for a filtered repo view, an optional third hash is
145 included that hashes the hashes of all filtered revisions.
145 included that hashes the hashes of all filtered revisions.
146
146
147 The open/closed state is represented by a single letter 'o' or 'c'.
147 The open/closed state is represented by a single letter 'o' or 'c'.
148 This field can be used to avoid changelog reads when determining if a
148 This field can be used to avoid changelog reads when determining if a
149 branch head closes a branch or not.
149 branch head closes a branch or not.
150 """
150 """
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
154 super(branchcache, self).__init__(entries)
155 self.tipnode = tipnode
156 self.tiprev = tiprev
157 self.filteredhash = filteredhash
158 # closednodes is a set of nodes that close their branch. If the branch
159 # cache has been updated, it may contain nodes that are no longer
160 # heads.
161 if closednodes is None:
162 self._closednodes = set()
163 else:
164 self._closednodes = closednodes
165
151 @classmethod
166 @classmethod
152 def fromfile(cls, repo):
167 def fromfile(cls, repo):
153 f = None
168 f = None
154 try:
169 try:
155 f = repo.cachevfs(cls._filename(repo))
170 f = repo.cachevfs(cls._filename(repo))
156 lineiter = iter(f)
171 lineiter = iter(f)
157 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
158 last, lrev = cachekey[:2]
173 last, lrev = cachekey[:2]
159 last, lrev = bin(last), int(lrev)
174 last, lrev = bin(last), int(lrev)
160 filteredhash = None
175 filteredhash = None
161 if len(cachekey) > 2:
176 if len(cachekey) > 2:
162 filteredhash = bin(cachekey[2])
177 filteredhash = bin(cachekey[2])
163 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
164 if not bcache.validfor(repo):
179 if not bcache.validfor(repo):
165 # invalidate the cache
180 # invalidate the cache
166 raise ValueError(r'tip differs')
181 raise ValueError(r'tip differs')
167 cl = repo.changelog
182 cl = repo.changelog
168 for line in lineiter:
183 for line in lineiter:
169 line = line.rstrip('\n')
184 line = line.rstrip('\n')
170 if not line:
185 if not line:
171 continue
186 continue
172 node, state, label = line.split(" ", 2)
187 node, state, label = line.split(" ", 2)
173 if state not in 'oc':
188 if state not in 'oc':
174 raise ValueError(r'invalid branch state')
189 raise ValueError(r'invalid branch state')
175 label = encoding.tolocal(label.strip())
190 label = encoding.tolocal(label.strip())
176 node = bin(node)
191 node = bin(node)
177 if not cl.hasnode(node):
192 if not cl.hasnode(node):
178 raise ValueError(
193 raise ValueError(
179 r'node %s does not exist' % pycompat.sysstr(hex(node)))
194 r'node %s does not exist' % pycompat.sysstr(hex(node)))
180 bcache.setdefault(label, []).append(node)
195 bcache.setdefault(label, []).append(node)
181 if state == 'c':
196 if state == 'c':
182 bcache._closednodes.add(node)
197 bcache._closednodes.add(node)
183
198
184 except (IOError, OSError):
199 except (IOError, OSError):
185 return None
200 return None
186
201
187 except Exception as inst:
202 except Exception as inst:
188 if repo.ui.debugflag:
203 if repo.ui.debugflag:
189 msg = 'invalid branchheads cache'
204 msg = 'invalid branchheads cache'
190 if repo.filtername is not None:
205 if repo.filtername is not None:
191 msg += ' (%s)' % repo.filtername
206 msg += ' (%s)' % repo.filtername
192 msg += ': %s\n'
207 msg += ': %s\n'
193 repo.ui.debug(msg % pycompat.bytestr(inst))
208 repo.ui.debug(msg % pycompat.bytestr(inst))
194 bcache = None
209 bcache = None
195
210
196 finally:
211 finally:
197 if f:
212 if f:
198 f.close()
213 f.close()
199
214
200 return bcache
215 return bcache
201
216
202 @staticmethod
217 @staticmethod
203 def _filename(repo):
218 def _filename(repo):
204 """name of a branchcache file for a given repo or repoview"""
219 """name of a branchcache file for a given repo or repoview"""
205 filename = "branch2"
220 filename = "branch2"
206 if repo.filtername:
221 if repo.filtername:
207 filename = '%s-%s' % (filename, repo.filtername)
222 filename = '%s-%s' % (filename, repo.filtername)
208 return filename
223 return filename
209
224
210 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
211 filteredhash=None, closednodes=None):
212 super(branchcache, self).__init__(entries)
213 self.tipnode = tipnode
214 self.tiprev = tiprev
215 self.filteredhash = filteredhash
216 # closednodes is a set of nodes that close their branch. If the branch
217 # cache has been updated, it may contain nodes that are no longer
218 # heads.
219 if closednodes is None:
220 self._closednodes = set()
221 else:
222 self._closednodes = closednodes
223
224 def validfor(self, repo):
225 def validfor(self, repo):
225 """Is the cache content valid regarding a repo
226 """Is the cache content valid regarding a repo
226
227
227 - False when cached tipnode is unknown or if we detect a strip.
228 - False when cached tipnode is unknown or if we detect a strip.
228 - True when cache is up to date or a subset of current repo."""
229 - True when cache is up to date or a subset of current repo."""
229 try:
230 try:
230 return ((self.tipnode == repo.changelog.node(self.tiprev))
231 return ((self.tipnode == repo.changelog.node(self.tiprev))
231 and (self.filteredhash == \
232 and (self.filteredhash == \
232 scmutil.filteredhash(repo, self.tiprev)))
233 scmutil.filteredhash(repo, self.tiprev)))
233 except IndexError:
234 except IndexError:
234 return False
235 return False
235
236
236 def _branchtip(self, heads):
237 def _branchtip(self, heads):
237 '''Return tuple with last open head in heads and false,
238 '''Return tuple with last open head in heads and false,
238 otherwise return last closed head and true.'''
239 otherwise return last closed head and true.'''
239 tip = heads[-1]
240 tip = heads[-1]
240 closed = True
241 closed = True
241 for h in reversed(heads):
242 for h in reversed(heads):
242 if h not in self._closednodes:
243 if h not in self._closednodes:
243 tip = h
244 tip = h
244 closed = False
245 closed = False
245 break
246 break
246 return tip, closed
247 return tip, closed
247
248
248 def branchtip(self, branch):
249 def branchtip(self, branch):
249 '''Return the tipmost open head on branch head, otherwise return the
250 '''Return the tipmost open head on branch head, otherwise return the
250 tipmost closed head on branch.
251 tipmost closed head on branch.
251 Raise KeyError for unknown branch.'''
252 Raise KeyError for unknown branch.'''
252 return self._branchtip(self[branch])[0]
253 return self._branchtip(self[branch])[0]
253
254
254 def iteropen(self, nodes):
255 def iteropen(self, nodes):
255 return (n for n in nodes if n not in self._closednodes)
256 return (n for n in nodes if n not in self._closednodes)
256
257
257 def branchheads(self, branch, closed=False):
258 def branchheads(self, branch, closed=False):
258 heads = self[branch]
259 heads = self[branch]
259 if not closed:
260 if not closed:
260 heads = list(self.iteropen(heads))
261 heads = list(self.iteropen(heads))
261 return heads
262 return heads
262
263
263 def iterbranches(self):
264 def iterbranches(self):
264 for bn, heads in self.iteritems():
265 for bn, heads in self.iteritems():
265 yield (bn, heads) + self._branchtip(heads)
266 yield (bn, heads) + self._branchtip(heads)
266
267
267 def copy(self):
268 def copy(self):
268 """return an deep copy of the branchcache object"""
269 """return an deep copy of the branchcache object"""
269 return type(self)(
270 return type(self)(
270 self, self.tipnode, self.tiprev, self.filteredhash,
271 self, self.tipnode, self.tiprev, self.filteredhash,
271 self._closednodes)
272 self._closednodes)
272
273
273 def write(self, repo):
274 def write(self, repo):
274 try:
275 try:
275 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
276 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
276 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
277 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
277 if self.filteredhash is not None:
278 if self.filteredhash is not None:
278 cachekey.append(hex(self.filteredhash))
279 cachekey.append(hex(self.filteredhash))
279 f.write(" ".join(cachekey) + '\n')
280 f.write(" ".join(cachekey) + '\n')
280 nodecount = 0
281 nodecount = 0
281 for label, nodes in sorted(self.iteritems()):
282 for label, nodes in sorted(self.iteritems()):
282 for node in nodes:
283 for node in nodes:
283 nodecount += 1
284 nodecount += 1
284 if node in self._closednodes:
285 if node in self._closednodes:
285 state = 'c'
286 state = 'c'
286 else:
287 else:
287 state = 'o'
288 state = 'o'
288 f.write("%s %s %s\n" % (hex(node), state,
289 f.write("%s %s %s\n" % (hex(node), state,
289 encoding.fromlocal(label)))
290 encoding.fromlocal(label)))
290 f.close()
291 f.close()
291 repo.ui.log('branchcache',
292 repo.ui.log('branchcache',
292 'wrote %s branch cache with %d labels and %d nodes\n',
293 'wrote %s branch cache with %d labels and %d nodes\n',
293 repo.filtername, len(self), nodecount)
294 repo.filtername, len(self), nodecount)
294 except (IOError, OSError, error.Abort) as inst:
295 except (IOError, OSError, error.Abort) as inst:
295 # Abort may be raised by read only opener, so log and continue
296 # Abort may be raised by read only opener, so log and continue
296 repo.ui.debug("couldn't write branch cache: %s\n" %
297 repo.ui.debug("couldn't write branch cache: %s\n" %
297 stringutil.forcebytestr(inst))
298 stringutil.forcebytestr(inst))
298
299
299 def update(self, repo, revgen):
300 def update(self, repo, revgen):
300 """Given a branchhead cache, self, that may have extra nodes or be
301 """Given a branchhead cache, self, that may have extra nodes or be
301 missing heads, and a generator of nodes that are strictly a superset of
302 missing heads, and a generator of nodes that are strictly a superset of
302 heads missing, this function updates self to be correct.
303 heads missing, this function updates self to be correct.
303 """
304 """
304 starttime = util.timer()
305 starttime = util.timer()
305 cl = repo.changelog
306 cl = repo.changelog
306 # collect new branch entries
307 # collect new branch entries
307 newbranches = {}
308 newbranches = {}
308 getbranchinfo = repo.revbranchcache().branchinfo
309 getbranchinfo = repo.revbranchcache().branchinfo
309 for r in revgen:
310 for r in revgen:
310 branch, closesbranch = getbranchinfo(r)
311 branch, closesbranch = getbranchinfo(r)
311 newbranches.setdefault(branch, []).append(r)
312 newbranches.setdefault(branch, []).append(r)
312 if closesbranch:
313 if closesbranch:
313 self._closednodes.add(cl.node(r))
314 self._closednodes.add(cl.node(r))
314
315
315 # fetch current topological heads to speed up filtering
316 # fetch current topological heads to speed up filtering
316 topoheads = set(cl.headrevs())
317 topoheads = set(cl.headrevs())
317
318
318 # if older branchheads are reachable from new ones, they aren't
319 # if older branchheads are reachable from new ones, they aren't
319 # really branchheads. Note checking parents is insufficient:
320 # really branchheads. Note checking parents is insufficient:
320 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
321 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
321 for branch, newheadrevs in newbranches.iteritems():
322 for branch, newheadrevs in newbranches.iteritems():
322 bheads = self.setdefault(branch, [])
323 bheads = self.setdefault(branch, [])
323 bheadset = set(cl.rev(node) for node in bheads)
324 bheadset = set(cl.rev(node) for node in bheads)
324
325
325 # This have been tested True on all internal usage of this function.
326 # This have been tested True on all internal usage of this function.
326 # run it again in case of doubt
327 # run it again in case of doubt
327 # assert not (set(bheadrevs) & set(newheadrevs))
328 # assert not (set(bheadrevs) & set(newheadrevs))
328 bheadset.update(newheadrevs)
329 bheadset.update(newheadrevs)
329
330
330 # This prunes out two kinds of heads - heads that are superseded by
331 # This prunes out two kinds of heads - heads that are superseded by
331 # a head in newheadrevs, and newheadrevs that are not heads because
332 # a head in newheadrevs, and newheadrevs that are not heads because
332 # an existing head is their descendant.
333 # an existing head is their descendant.
333 uncertain = bheadset - topoheads
334 uncertain = bheadset - topoheads
334 if uncertain:
335 if uncertain:
335 floorrev = min(uncertain)
336 floorrev = min(uncertain)
336 ancestors = set(cl.ancestors(newheadrevs, floorrev))
337 ancestors = set(cl.ancestors(newheadrevs, floorrev))
337 bheadset -= ancestors
338 bheadset -= ancestors
338 bheadrevs = sorted(bheadset)
339 bheadrevs = sorted(bheadset)
339 self[branch] = [cl.node(rev) for rev in bheadrevs]
340 self[branch] = [cl.node(rev) for rev in bheadrevs]
340 tiprev = bheadrevs[-1]
341 tiprev = bheadrevs[-1]
341 if tiprev > self.tiprev:
342 if tiprev > self.tiprev:
342 self.tipnode = cl.node(tiprev)
343 self.tipnode = cl.node(tiprev)
343 self.tiprev = tiprev
344 self.tiprev = tiprev
344
345
345 if not self.validfor(repo):
346 if not self.validfor(repo):
346 # cache key are not valid anymore
347 # cache key are not valid anymore
347 self.tipnode = nullid
348 self.tipnode = nullid
348 self.tiprev = nullrev
349 self.tiprev = nullrev
349 for heads in self.values():
350 for heads in self.values():
350 tiprev = max(cl.rev(node) for node in heads)
351 tiprev = max(cl.rev(node) for node in heads)
351 if tiprev > self.tiprev:
352 if tiprev > self.tiprev:
352 self.tipnode = cl.node(tiprev)
353 self.tipnode = cl.node(tiprev)
353 self.tiprev = tiprev
354 self.tiprev = tiprev
354 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
355 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
355
356
356 duration = util.timer() - starttime
357 duration = util.timer() - starttime
357 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
358 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
358 repo.filtername, duration)
359 repo.filtername, duration)
359
360
360 self.write(repo)
361 self.write(repo)
361
362
362
363
363 class remotebranchcache(branchcache):
364 class remotebranchcache(branchcache):
364 """Branchmap info for a remote connection, should not write locally"""
365 """Branchmap info for a remote connection, should not write locally"""
365 def write(self, repo):
366 def write(self, repo):
366 pass
367 pass
367
368
368
369
369 # Revision branch info cache
370 # Revision branch info cache
370
371
371 _rbcversion = '-v1'
372 _rbcversion = '-v1'
372 _rbcnames = 'rbc-names' + _rbcversion
373 _rbcnames = 'rbc-names' + _rbcversion
373 _rbcrevs = 'rbc-revs' + _rbcversion
374 _rbcrevs = 'rbc-revs' + _rbcversion
374 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
375 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
375 _rbcrecfmt = '>4sI'
376 _rbcrecfmt = '>4sI'
376 _rbcrecsize = calcsize(_rbcrecfmt)
377 _rbcrecsize = calcsize(_rbcrecfmt)
377 _rbcnodelen = 4
378 _rbcnodelen = 4
378 _rbcbranchidxmask = 0x7fffffff
379 _rbcbranchidxmask = 0x7fffffff
379 _rbccloseflag = 0x80000000
380 _rbccloseflag = 0x80000000
380
381
381 class revbranchcache(object):
382 class revbranchcache(object):
382 """Persistent cache, mapping from revision number to branch name and close.
383 """Persistent cache, mapping from revision number to branch name and close.
383 This is a low level cache, independent of filtering.
384 This is a low level cache, independent of filtering.
384
385
385 Branch names are stored in rbc-names in internal encoding separated by 0.
386 Branch names are stored in rbc-names in internal encoding separated by 0.
386 rbc-names is append-only, and each branch name is only stored once and will
387 rbc-names is append-only, and each branch name is only stored once and will
387 thus have a unique index.
388 thus have a unique index.
388
389
389 The branch info for each revision is stored in rbc-revs as constant size
390 The branch info for each revision is stored in rbc-revs as constant size
390 records. The whole file is read into memory, but it is only 'parsed' on
391 records. The whole file is read into memory, but it is only 'parsed' on
391 demand. The file is usually append-only but will be truncated if repo
392 demand. The file is usually append-only but will be truncated if repo
392 modification is detected.
393 modification is detected.
393 The record for each revision contains the first 4 bytes of the
394 The record for each revision contains the first 4 bytes of the
394 corresponding node hash, and the record is only used if it still matches.
395 corresponding node hash, and the record is only used if it still matches.
395 Even a completely trashed rbc-revs fill thus still give the right result
396 Even a completely trashed rbc-revs fill thus still give the right result
396 while converging towards full recovery ... assuming no incorrectly matching
397 while converging towards full recovery ... assuming no incorrectly matching
397 node hashes.
398 node hashes.
398 The record also contains 4 bytes where 31 bits contains the index of the
399 The record also contains 4 bytes where 31 bits contains the index of the
399 branch and the last bit indicate that it is a branch close commit.
400 branch and the last bit indicate that it is a branch close commit.
400 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
401 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
401 and will grow with it but be 1/8th of its size.
402 and will grow with it but be 1/8th of its size.
402 """
403 """
403
404
404 def __init__(self, repo, readonly=True):
405 def __init__(self, repo, readonly=True):
405 assert repo.filtername is None
406 assert repo.filtername is None
406 self._repo = repo
407 self._repo = repo
407 self._names = [] # branch names in local encoding with static index
408 self._names = [] # branch names in local encoding with static index
408 self._rbcrevs = bytearray()
409 self._rbcrevs = bytearray()
409 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
410 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
410 try:
411 try:
411 bndata = repo.cachevfs.read(_rbcnames)
412 bndata = repo.cachevfs.read(_rbcnames)
412 self._rbcsnameslen = len(bndata) # for verification before writing
413 self._rbcsnameslen = len(bndata) # for verification before writing
413 if bndata:
414 if bndata:
414 self._names = [encoding.tolocal(bn)
415 self._names = [encoding.tolocal(bn)
415 for bn in bndata.split('\0')]
416 for bn in bndata.split('\0')]
416 except (IOError, OSError):
417 except (IOError, OSError):
417 if readonly:
418 if readonly:
418 # don't try to use cache - fall back to the slow path
419 # don't try to use cache - fall back to the slow path
419 self.branchinfo = self._branchinfo
420 self.branchinfo = self._branchinfo
420
421
421 if self._names:
422 if self._names:
422 try:
423 try:
423 data = repo.cachevfs.read(_rbcrevs)
424 data = repo.cachevfs.read(_rbcrevs)
424 self._rbcrevs[:] = data
425 self._rbcrevs[:] = data
425 except (IOError, OSError) as inst:
426 except (IOError, OSError) as inst:
426 repo.ui.debug("couldn't read revision branch cache: %s\n" %
427 repo.ui.debug("couldn't read revision branch cache: %s\n" %
427 stringutil.forcebytestr(inst))
428 stringutil.forcebytestr(inst))
428 # remember number of good records on disk
429 # remember number of good records on disk
429 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
430 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
430 len(repo.changelog))
431 len(repo.changelog))
431 if self._rbcrevslen == 0:
432 if self._rbcrevslen == 0:
432 self._names = []
433 self._names = []
433 self._rbcnamescount = len(self._names) # number of names read at
434 self._rbcnamescount = len(self._names) # number of names read at
434 # _rbcsnameslen
435 # _rbcsnameslen
435
436
436 def _clear(self):
437 def _clear(self):
437 self._rbcsnameslen = 0
438 self._rbcsnameslen = 0
438 del self._names[:]
439 del self._names[:]
439 self._rbcnamescount = 0
440 self._rbcnamescount = 0
440 self._rbcrevslen = len(self._repo.changelog)
441 self._rbcrevslen = len(self._repo.changelog)
441 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
442 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
442 util.clearcachedproperty(self, '_namesreverse')
443 util.clearcachedproperty(self, '_namesreverse')
443
444
444 @util.propertycache
445 @util.propertycache
445 def _namesreverse(self):
446 def _namesreverse(self):
446 return dict((b, r) for r, b in enumerate(self._names))
447 return dict((b, r) for r, b in enumerate(self._names))
447
448
448 def branchinfo(self, rev):
449 def branchinfo(self, rev):
449 """Return branch name and close flag for rev, using and updating
450 """Return branch name and close flag for rev, using and updating
450 persistent cache."""
451 persistent cache."""
451 changelog = self._repo.changelog
452 changelog = self._repo.changelog
452 rbcrevidx = rev * _rbcrecsize
453 rbcrevidx = rev * _rbcrecsize
453
454
454 # avoid negative index, changelog.read(nullrev) is fast without cache
455 # avoid negative index, changelog.read(nullrev) is fast without cache
455 if rev == nullrev:
456 if rev == nullrev:
456 return changelog.branchinfo(rev)
457 return changelog.branchinfo(rev)
457
458
458 # if requested rev isn't allocated, grow and cache the rev info
459 # if requested rev isn't allocated, grow and cache the rev info
459 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 return self._branchinfo(rev)
461 return self._branchinfo(rev)
461
462
462 # fast path: extract data from cache, use it if node is matching
463 # fast path: extract data from cache, use it if node is matching
463 reponode = changelog.node(rev)[:_rbcnodelen]
464 reponode = changelog.node(rev)[:_rbcnodelen]
464 cachenode, branchidx = unpack_from(
465 cachenode, branchidx = unpack_from(
465 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
466 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
466 close = bool(branchidx & _rbccloseflag)
467 close = bool(branchidx & _rbccloseflag)
467 if close:
468 if close:
468 branchidx &= _rbcbranchidxmask
469 branchidx &= _rbcbranchidxmask
469 if cachenode == '\0\0\0\0':
470 if cachenode == '\0\0\0\0':
470 pass
471 pass
471 elif cachenode == reponode:
472 elif cachenode == reponode:
472 try:
473 try:
473 return self._names[branchidx], close
474 return self._names[branchidx], close
474 except IndexError:
475 except IndexError:
475 # recover from invalid reference to unknown branch
476 # recover from invalid reference to unknown branch
476 self._repo.ui.debug("referenced branch names not found"
477 self._repo.ui.debug("referenced branch names not found"
477 " - rebuilding revision branch cache from scratch\n")
478 " - rebuilding revision branch cache from scratch\n")
478 self._clear()
479 self._clear()
479 else:
480 else:
480 # rev/node map has changed, invalidate the cache from here up
481 # rev/node map has changed, invalidate the cache from here up
481 self._repo.ui.debug("history modification detected - truncating "
482 self._repo.ui.debug("history modification detected - truncating "
482 "revision branch cache to revision %d\n" % rev)
483 "revision branch cache to revision %d\n" % rev)
483 truncate = rbcrevidx + _rbcrecsize
484 truncate = rbcrevidx + _rbcrecsize
484 del self._rbcrevs[truncate:]
485 del self._rbcrevs[truncate:]
485 self._rbcrevslen = min(self._rbcrevslen, truncate)
486 self._rbcrevslen = min(self._rbcrevslen, truncate)
486
487
487 # fall back to slow path and make sure it will be written to disk
488 # fall back to slow path and make sure it will be written to disk
488 return self._branchinfo(rev)
489 return self._branchinfo(rev)
489
490
490 def _branchinfo(self, rev):
491 def _branchinfo(self, rev):
491 """Retrieve branch info from changelog and update _rbcrevs"""
492 """Retrieve branch info from changelog and update _rbcrevs"""
492 changelog = self._repo.changelog
493 changelog = self._repo.changelog
493 b, close = changelog.branchinfo(rev)
494 b, close = changelog.branchinfo(rev)
494 if b in self._namesreverse:
495 if b in self._namesreverse:
495 branchidx = self._namesreverse[b]
496 branchidx = self._namesreverse[b]
496 else:
497 else:
497 branchidx = len(self._names)
498 branchidx = len(self._names)
498 self._names.append(b)
499 self._names.append(b)
499 self._namesreverse[b] = branchidx
500 self._namesreverse[b] = branchidx
500 reponode = changelog.node(rev)
501 reponode = changelog.node(rev)
501 if close:
502 if close:
502 branchidx |= _rbccloseflag
503 branchidx |= _rbccloseflag
503 self._setcachedata(rev, reponode, branchidx)
504 self._setcachedata(rev, reponode, branchidx)
504 return b, close
505 return b, close
505
506
506 def setdata(self, branch, rev, node, close):
507 def setdata(self, branch, rev, node, close):
507 """add new data information to the cache"""
508 """add new data information to the cache"""
508 if branch in self._namesreverse:
509 if branch in self._namesreverse:
509 branchidx = self._namesreverse[branch]
510 branchidx = self._namesreverse[branch]
510 else:
511 else:
511 branchidx = len(self._names)
512 branchidx = len(self._names)
512 self._names.append(branch)
513 self._names.append(branch)
513 self._namesreverse[branch] = branchidx
514 self._namesreverse[branch] = branchidx
514 if close:
515 if close:
515 branchidx |= _rbccloseflag
516 branchidx |= _rbccloseflag
516 self._setcachedata(rev, node, branchidx)
517 self._setcachedata(rev, node, branchidx)
517 # If no cache data were readable (non exists, bad permission, etc)
518 # If no cache data were readable (non exists, bad permission, etc)
518 # the cache was bypassing itself by setting:
519 # the cache was bypassing itself by setting:
519 #
520 #
520 # self.branchinfo = self._branchinfo
521 # self.branchinfo = self._branchinfo
521 #
522 #
522 # Since we now have data in the cache, we need to drop this bypassing.
523 # Since we now have data in the cache, we need to drop this bypassing.
523 if r'branchinfo' in vars(self):
524 if r'branchinfo' in vars(self):
524 del self.branchinfo
525 del self.branchinfo
525
526
526 def _setcachedata(self, rev, node, branchidx):
527 def _setcachedata(self, rev, node, branchidx):
527 """Writes the node's branch data to the in-memory cache data."""
528 """Writes the node's branch data to the in-memory cache data."""
528 if rev == nullrev:
529 if rev == nullrev:
529 return
530 return
530 rbcrevidx = rev * _rbcrecsize
531 rbcrevidx = rev * _rbcrecsize
531 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
532 self._rbcrevs.extend('\0' *
533 self._rbcrevs.extend('\0' *
533 (len(self._repo.changelog) * _rbcrecsize -
534 (len(self._repo.changelog) * _rbcrecsize -
534 len(self._rbcrevs)))
535 len(self._rbcrevs)))
535 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
536 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
536 self._rbcrevslen = min(self._rbcrevslen, rev)
537 self._rbcrevslen = min(self._rbcrevslen, rev)
537
538
538 tr = self._repo.currenttransaction()
539 tr = self._repo.currenttransaction()
539 if tr:
540 if tr:
540 tr.addfinalize('write-revbranchcache', self.write)
541 tr.addfinalize('write-revbranchcache', self.write)
541
542
542 def write(self, tr=None):
543 def write(self, tr=None):
543 """Save branch cache if it is dirty."""
544 """Save branch cache if it is dirty."""
544 repo = self._repo
545 repo = self._repo
545 wlock = None
546 wlock = None
546 step = ''
547 step = ''
547 try:
548 try:
548 if self._rbcnamescount < len(self._names):
549 if self._rbcnamescount < len(self._names):
549 step = ' names'
550 step = ' names'
550 wlock = repo.wlock(wait=False)
551 wlock = repo.wlock(wait=False)
551 if self._rbcnamescount != 0:
552 if self._rbcnamescount != 0:
552 f = repo.cachevfs.open(_rbcnames, 'ab')
553 f = repo.cachevfs.open(_rbcnames, 'ab')
553 if f.tell() == self._rbcsnameslen:
554 if f.tell() == self._rbcsnameslen:
554 f.write('\0')
555 f.write('\0')
555 else:
556 else:
556 f.close()
557 f.close()
557 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
558 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
558 self._rbcnamescount = 0
559 self._rbcnamescount = 0
559 self._rbcrevslen = 0
560 self._rbcrevslen = 0
560 if self._rbcnamescount == 0:
561 if self._rbcnamescount == 0:
561 # before rewriting names, make sure references are removed
562 # before rewriting names, make sure references are removed
562 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
563 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
563 f = repo.cachevfs.open(_rbcnames, 'wb')
564 f = repo.cachevfs.open(_rbcnames, 'wb')
564 f.write('\0'.join(encoding.fromlocal(b)
565 f.write('\0'.join(encoding.fromlocal(b)
565 for b in self._names[self._rbcnamescount:]))
566 for b in self._names[self._rbcnamescount:]))
566 self._rbcsnameslen = f.tell()
567 self._rbcsnameslen = f.tell()
567 f.close()
568 f.close()
568 self._rbcnamescount = len(self._names)
569 self._rbcnamescount = len(self._names)
569
570
570 start = self._rbcrevslen * _rbcrecsize
571 start = self._rbcrevslen * _rbcrecsize
571 if start != len(self._rbcrevs):
572 if start != len(self._rbcrevs):
572 step = ''
573 step = ''
573 if wlock is None:
574 if wlock is None:
574 wlock = repo.wlock(wait=False)
575 wlock = repo.wlock(wait=False)
575 revs = min(len(repo.changelog),
576 revs = min(len(repo.changelog),
576 len(self._rbcrevs) // _rbcrecsize)
577 len(self._rbcrevs) // _rbcrecsize)
577 f = repo.cachevfs.open(_rbcrevs, 'ab')
578 f = repo.cachevfs.open(_rbcrevs, 'ab')
578 if f.tell() != start:
579 if f.tell() != start:
579 repo.ui.debug("truncating cache/%s to %d\n"
580 repo.ui.debug("truncating cache/%s to %d\n"
580 % (_rbcrevs, start))
581 % (_rbcrevs, start))
581 f.seek(start)
582 f.seek(start)
582 if f.tell() != start:
583 if f.tell() != start:
583 start = 0
584 start = 0
584 f.seek(start)
585 f.seek(start)
585 f.truncate()
586 f.truncate()
586 end = revs * _rbcrecsize
587 end = revs * _rbcrecsize
587 f.write(self._rbcrevs[start:end])
588 f.write(self._rbcrevs[start:end])
588 f.close()
589 f.close()
589 self._rbcrevslen = revs
590 self._rbcrevslen = revs
590 except (IOError, OSError, error.Abort, error.LockError) as inst:
591 except (IOError, OSError, error.Abort, error.LockError) as inst:
591 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
592 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
592 % (step, stringutil.forcebytestr(inst)))
593 % (step, stringutil.forcebytestr(inst)))
593 finally:
594 finally:
594 if wlock is not None:
595 if wlock is not None:
595 wlock.release()
596 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now