##// END OF EJS Templates
branchmap: avoid changelog and attribute lookups in replacecache()...
Gregory Szorc -
r40411:76d4272b default
parent child Browse files
Show More
@@ -1,556 +1,559 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33 def _filename(repo):
33 def _filename(repo):
34 """name of a branchcache file for a given repo or repoview"""
34 """name of a branchcache file for a given repo or repoview"""
35 filename = "branch2"
35 filename = "branch2"
36 if repo.filtername:
36 if repo.filtername:
37 filename = '%s-%s' % (filename, repo.filtername)
37 filename = '%s-%s' % (filename, repo.filtername)
38 return filename
38 return filename
39
39
40 def read(repo):
40 def read(repo):
41 f = None
41 f = None
42 try:
42 try:
43 f = repo.cachevfs(_filename(repo))
43 f = repo.cachevfs(_filename(repo))
44 lineiter = iter(f)
44 lineiter = iter(f)
45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
46 last, lrev = cachekey[:2]
46 last, lrev = cachekey[:2]
47 last, lrev = bin(last), int(lrev)
47 last, lrev = bin(last), int(lrev)
48 filteredhash = None
48 filteredhash = None
49 if len(cachekey) > 2:
49 if len(cachekey) > 2:
50 filteredhash = bin(cachekey[2])
50 filteredhash = bin(cachekey[2])
51 partial = branchcache(tipnode=last, tiprev=lrev,
51 partial = branchcache(tipnode=last, tiprev=lrev,
52 filteredhash=filteredhash)
52 filteredhash=filteredhash)
53 if not partial.validfor(repo):
53 if not partial.validfor(repo):
54 # invalidate the cache
54 # invalidate the cache
55 raise ValueError(r'tip differs')
55 raise ValueError(r'tip differs')
56 cl = repo.changelog
56 cl = repo.changelog
57 for l in lineiter:
57 for l in lineiter:
58 l = l.rstrip('\n')
58 l = l.rstrip('\n')
59 if not l:
59 if not l:
60 continue
60 continue
61 node, state, label = l.split(" ", 2)
61 node, state, label = l.split(" ", 2)
62 if state not in 'oc':
62 if state not in 'oc':
63 raise ValueError(r'invalid branch state')
63 raise ValueError(r'invalid branch state')
64 label = encoding.tolocal(label.strip())
64 label = encoding.tolocal(label.strip())
65 node = bin(node)
65 node = bin(node)
66 if not cl.hasnode(node):
66 if not cl.hasnode(node):
67 raise ValueError(
67 raise ValueError(
68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 partial.setdefault(label, []).append(node)
69 partial.setdefault(label, []).append(node)
70 if state == 'c':
70 if state == 'c':
71 partial._closednodes.add(node)
71 partial._closednodes.add(node)
72
72
73 except (IOError, OSError):
73 except (IOError, OSError):
74 return None
74 return None
75
75
76 except Exception as inst:
76 except Exception as inst:
77 if repo.ui.debugflag:
77 if repo.ui.debugflag:
78 msg = 'invalid branchheads cache'
78 msg = 'invalid branchheads cache'
79 if repo.filtername is not None:
79 if repo.filtername is not None:
80 msg += ' (%s)' % repo.filtername
80 msg += ' (%s)' % repo.filtername
81 msg += ': %s\n'
81 msg += ': %s\n'
82 repo.ui.debug(msg % pycompat.bytestr(inst))
82 repo.ui.debug(msg % pycompat.bytestr(inst))
83 partial = None
83 partial = None
84
84
85 finally:
85 finally:
86 if f:
86 if f:
87 f.close()
87 f.close()
88
88
89 return partial
89 return partial
90
90
91 ### Nearest subset relation
91 ### Nearest subset relation
92 # Nearest subset of filter X is a filter Y so that:
92 # Nearest subset of filter X is a filter Y so that:
93 # * Y is included in X,
93 # * Y is included in X,
94 # * X - Y is as small as possible.
94 # * X - Y is as small as possible.
95 # This create and ordering used for branchmap purpose.
95 # This create and ordering used for branchmap purpose.
96 # the ordering may be partial
96 # the ordering may be partial
97 subsettable = {None: 'visible',
97 subsettable = {None: 'visible',
98 'visible-hidden': 'visible',
98 'visible-hidden': 'visible',
99 'visible': 'served',
99 'visible': 'served',
100 'served': 'immutable',
100 'served': 'immutable',
101 'immutable': 'base'}
101 'immutable': 'base'}
102
102
103 def updatecache(repo):
103 def updatecache(repo):
104 cl = repo.changelog
104 cl = repo.changelog
105 filtername = repo.filtername
105 filtername = repo.filtername
106 partial = repo._branchcaches.get(filtername)
106 partial = repo._branchcaches.get(filtername)
107
107
108 revs = []
108 revs = []
109 if partial is None or not partial.validfor(repo):
109 if partial is None or not partial.validfor(repo):
110 partial = read(repo)
110 partial = read(repo)
111 if partial is None:
111 if partial is None:
112 subsetname = subsettable.get(filtername)
112 subsetname = subsettable.get(filtername)
113 if subsetname is None:
113 if subsetname is None:
114 partial = branchcache()
114 partial = branchcache()
115 else:
115 else:
116 subset = repo.filtered(subsetname)
116 subset = repo.filtered(subsetname)
117 partial = subset.branchmap().copy()
117 partial = subset.branchmap().copy()
118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
120 revs.extend(cl.revs(start=partial.tiprev + 1))
120 revs.extend(cl.revs(start=partial.tiprev + 1))
121 if revs:
121 if revs:
122 partial.update(repo, revs)
122 partial.update(repo, revs)
123 partial.write(repo)
123 partial.write(repo)
124
124
125 assert partial.validfor(repo), filtername
125 assert partial.validfor(repo), filtername
126 repo._branchcaches[repo.filtername] = partial
126 repo._branchcaches[repo.filtername] = partial
127
127
128 def replacecache(repo, bm):
128 def replacecache(repo, bm):
129 """Replace the branchmap cache for a repo with a branch mapping.
129 """Replace the branchmap cache for a repo with a branch mapping.
130
130
131 This is likely only called during clone with a branch map from a remote.
131 This is likely only called during clone with a branch map from a remote.
132 """
132 """
133 cl = repo.changelog
134 clrev = cl.rev
135 clbranchinfo = cl.branchinfo
133 rbheads = []
136 rbheads = []
134 closed = []
137 closed = []
135 for bheads in bm.itervalues():
138 for bheads in bm.itervalues():
136 rbheads.extend(bheads)
139 rbheads.extend(bheads)
137 for h in bheads:
140 for h in bheads:
138 r = repo.changelog.rev(h)
141 r = clrev(h)
139 b, c = repo.changelog.branchinfo(r)
142 b, c = clbranchinfo(r)
140 if c:
143 if c:
141 closed.append(h)
144 closed.append(h)
142
145
143 if rbheads:
146 if rbheads:
144 rtiprev = max((int(repo.changelog.rev(node))
147 rtiprev = max((int(clrev(node))
145 for node in rbheads))
148 for node in rbheads))
146 cache = branchcache(bm,
149 cache = branchcache(bm,
147 repo[rtiprev].node(),
150 repo[rtiprev].node(),
148 rtiprev,
151 rtiprev,
149 closednodes=closed)
152 closednodes=closed)
150
153
151 # Try to stick it as low as possible
154 # Try to stick it as low as possible
152 # filter above served are unlikely to be fetch from a clone
155 # filter above served are unlikely to be fetch from a clone
153 for candidate in ('base', 'immutable', 'served'):
156 for candidate in ('base', 'immutable', 'served'):
154 rview = repo.filtered(candidate)
157 rview = repo.filtered(candidate)
155 if cache.validfor(rview):
158 if cache.validfor(rview):
156 repo._branchcaches[candidate] = cache
159 repo._branchcaches[candidate] = cache
157 cache.write(rview)
160 cache.write(rview)
158 break
161 break
159
162
160 class branchcache(dict):
163 class branchcache(dict):
161 """A dict like object that hold branches heads cache.
164 """A dict like object that hold branches heads cache.
162
165
163 This cache is used to avoid costly computations to determine all the
166 This cache is used to avoid costly computations to determine all the
164 branch heads of a repo.
167 branch heads of a repo.
165
168
166 The cache is serialized on disk in the following format:
169 The cache is serialized on disk in the following format:
167
170
168 <tip hex node> <tip rev number> [optional filtered repo hex hash]
171 <tip hex node> <tip rev number> [optional filtered repo hex hash]
169 <branch head hex node> <open/closed state> <branch name>
172 <branch head hex node> <open/closed state> <branch name>
170 <branch head hex node> <open/closed state> <branch name>
173 <branch head hex node> <open/closed state> <branch name>
171 ...
174 ...
172
175
173 The first line is used to check if the cache is still valid. If the
176 The first line is used to check if the cache is still valid. If the
174 branch cache is for a filtered repo view, an optional third hash is
177 branch cache is for a filtered repo view, an optional third hash is
175 included that hashes the hashes of all filtered revisions.
178 included that hashes the hashes of all filtered revisions.
176
179
177 The open/closed state is represented by a single letter 'o' or 'c'.
180 The open/closed state is represented by a single letter 'o' or 'c'.
178 This field can be used to avoid changelog reads when determining if a
181 This field can be used to avoid changelog reads when determining if a
179 branch head closes a branch or not.
182 branch head closes a branch or not.
180 """
183 """
181
184
182 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
185 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
183 filteredhash=None, closednodes=None):
186 filteredhash=None, closednodes=None):
184 super(branchcache, self).__init__(entries)
187 super(branchcache, self).__init__(entries)
185 self.tipnode = tipnode
188 self.tipnode = tipnode
186 self.tiprev = tiprev
189 self.tiprev = tiprev
187 self.filteredhash = filteredhash
190 self.filteredhash = filteredhash
188 # closednodes is a set of nodes that close their branch. If the branch
191 # closednodes is a set of nodes that close their branch. If the branch
189 # cache has been updated, it may contain nodes that are no longer
192 # cache has been updated, it may contain nodes that are no longer
190 # heads.
193 # heads.
191 if closednodes is None:
194 if closednodes is None:
192 self._closednodes = set()
195 self._closednodes = set()
193 else:
196 else:
194 self._closednodes = closednodes
197 self._closednodes = closednodes
195
198
196 def validfor(self, repo):
199 def validfor(self, repo):
197 """Is the cache content valid regarding a repo
200 """Is the cache content valid regarding a repo
198
201
199 - False when cached tipnode is unknown or if we detect a strip.
202 - False when cached tipnode is unknown or if we detect a strip.
200 - True when cache is up to date or a subset of current repo."""
203 - True when cache is up to date or a subset of current repo."""
201 try:
204 try:
202 return ((self.tipnode == repo.changelog.node(self.tiprev))
205 return ((self.tipnode == repo.changelog.node(self.tiprev))
203 and (self.filteredhash == \
206 and (self.filteredhash == \
204 scmutil.filteredhash(repo, self.tiprev)))
207 scmutil.filteredhash(repo, self.tiprev)))
205 except IndexError:
208 except IndexError:
206 return False
209 return False
207
210
208 def _branchtip(self, heads):
211 def _branchtip(self, heads):
209 '''Return tuple with last open head in heads and false,
212 '''Return tuple with last open head in heads and false,
210 otherwise return last closed head and true.'''
213 otherwise return last closed head and true.'''
211 tip = heads[-1]
214 tip = heads[-1]
212 closed = True
215 closed = True
213 for h in reversed(heads):
216 for h in reversed(heads):
214 if h not in self._closednodes:
217 if h not in self._closednodes:
215 tip = h
218 tip = h
216 closed = False
219 closed = False
217 break
220 break
218 return tip, closed
221 return tip, closed
219
222
220 def branchtip(self, branch):
223 def branchtip(self, branch):
221 '''Return the tipmost open head on branch head, otherwise return the
224 '''Return the tipmost open head on branch head, otherwise return the
222 tipmost closed head on branch.
225 tipmost closed head on branch.
223 Raise KeyError for unknown branch.'''
226 Raise KeyError for unknown branch.'''
224 return self._branchtip(self[branch])[0]
227 return self._branchtip(self[branch])[0]
225
228
226 def iteropen(self, nodes):
229 def iteropen(self, nodes):
227 return (n for n in nodes if n not in self._closednodes)
230 return (n for n in nodes if n not in self._closednodes)
228
231
229 def branchheads(self, branch, closed=False):
232 def branchheads(self, branch, closed=False):
230 heads = self[branch]
233 heads = self[branch]
231 if not closed:
234 if not closed:
232 heads = list(self.iteropen(heads))
235 heads = list(self.iteropen(heads))
233 return heads
236 return heads
234
237
235 def iterbranches(self):
238 def iterbranches(self):
236 for bn, heads in self.iteritems():
239 for bn, heads in self.iteritems():
237 yield (bn, heads) + self._branchtip(heads)
240 yield (bn, heads) + self._branchtip(heads)
238
241
239 def copy(self):
242 def copy(self):
240 """return an deep copy of the branchcache object"""
243 """return an deep copy of the branchcache object"""
241 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
244 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
242 self._closednodes)
245 self._closednodes)
243
246
244 def write(self, repo):
247 def write(self, repo):
245 try:
248 try:
246 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
249 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
247 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
250 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
248 if self.filteredhash is not None:
251 if self.filteredhash is not None:
249 cachekey.append(hex(self.filteredhash))
252 cachekey.append(hex(self.filteredhash))
250 f.write(" ".join(cachekey) + '\n')
253 f.write(" ".join(cachekey) + '\n')
251 nodecount = 0
254 nodecount = 0
252 for label, nodes in sorted(self.iteritems()):
255 for label, nodes in sorted(self.iteritems()):
253 for node in nodes:
256 for node in nodes:
254 nodecount += 1
257 nodecount += 1
255 if node in self._closednodes:
258 if node in self._closednodes:
256 state = 'c'
259 state = 'c'
257 else:
260 else:
258 state = 'o'
261 state = 'o'
259 f.write("%s %s %s\n" % (hex(node), state,
262 f.write("%s %s %s\n" % (hex(node), state,
260 encoding.fromlocal(label)))
263 encoding.fromlocal(label)))
261 f.close()
264 f.close()
262 repo.ui.log('branchcache',
265 repo.ui.log('branchcache',
263 'wrote %s branch cache with %d labels and %d nodes\n',
266 'wrote %s branch cache with %d labels and %d nodes\n',
264 repo.filtername, len(self), nodecount)
267 repo.filtername, len(self), nodecount)
265 except (IOError, OSError, error.Abort) as inst:
268 except (IOError, OSError, error.Abort) as inst:
266 # Abort may be raised by read only opener, so log and continue
269 # Abort may be raised by read only opener, so log and continue
267 repo.ui.debug("couldn't write branch cache: %s\n" %
270 repo.ui.debug("couldn't write branch cache: %s\n" %
268 stringutil.forcebytestr(inst))
271 stringutil.forcebytestr(inst))
269
272
270 def update(self, repo, revgen):
273 def update(self, repo, revgen):
271 """Given a branchhead cache, self, that may have extra nodes or be
274 """Given a branchhead cache, self, that may have extra nodes or be
272 missing heads, and a generator of nodes that are strictly a superset of
275 missing heads, and a generator of nodes that are strictly a superset of
273 heads missing, this function updates self to be correct.
276 heads missing, this function updates self to be correct.
274 """
277 """
275 starttime = util.timer()
278 starttime = util.timer()
276 cl = repo.changelog
279 cl = repo.changelog
277 # collect new branch entries
280 # collect new branch entries
278 newbranches = {}
281 newbranches = {}
279 getbranchinfo = repo.revbranchcache().branchinfo
282 getbranchinfo = repo.revbranchcache().branchinfo
280 for r in revgen:
283 for r in revgen:
281 branch, closesbranch = getbranchinfo(r, changelog=cl)
284 branch, closesbranch = getbranchinfo(r, changelog=cl)
282 newbranches.setdefault(branch, []).append(r)
285 newbranches.setdefault(branch, []).append(r)
283 if closesbranch:
286 if closesbranch:
284 self._closednodes.add(cl.node(r))
287 self._closednodes.add(cl.node(r))
285
288
286 # fetch current topological heads to speed up filtering
289 # fetch current topological heads to speed up filtering
287 topoheads = set(cl.headrevs())
290 topoheads = set(cl.headrevs())
288
291
289 # if older branchheads are reachable from new ones, they aren't
292 # if older branchheads are reachable from new ones, they aren't
290 # really branchheads. Note checking parents is insufficient:
293 # really branchheads. Note checking parents is insufficient:
291 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
294 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
292 for branch, newheadrevs in newbranches.iteritems():
295 for branch, newheadrevs in newbranches.iteritems():
293 bheads = self.setdefault(branch, [])
296 bheads = self.setdefault(branch, [])
294 bheadset = set(cl.rev(node) for node in bheads)
297 bheadset = set(cl.rev(node) for node in bheads)
295
298
296 # This have been tested True on all internal usage of this function.
299 # This have been tested True on all internal usage of this function.
297 # run it again in case of doubt
300 # run it again in case of doubt
298 # assert not (set(bheadrevs) & set(newheadrevs))
301 # assert not (set(bheadrevs) & set(newheadrevs))
299 bheadset.update(newheadrevs)
302 bheadset.update(newheadrevs)
300
303
301 # This prunes out two kinds of heads - heads that are superseded by
304 # This prunes out two kinds of heads - heads that are superseded by
302 # a head in newheadrevs, and newheadrevs that are not heads because
305 # a head in newheadrevs, and newheadrevs that are not heads because
303 # an existing head is their descendant.
306 # an existing head is their descendant.
304 uncertain = bheadset - topoheads
307 uncertain = bheadset - topoheads
305 if uncertain:
308 if uncertain:
306 floorrev = min(uncertain)
309 floorrev = min(uncertain)
307 ancestors = set(cl.ancestors(newheadrevs, floorrev))
310 ancestors = set(cl.ancestors(newheadrevs, floorrev))
308 bheadset -= ancestors
311 bheadset -= ancestors
309 bheadrevs = sorted(bheadset)
312 bheadrevs = sorted(bheadset)
310 self[branch] = [cl.node(rev) for rev in bheadrevs]
313 self[branch] = [cl.node(rev) for rev in bheadrevs]
311 tiprev = bheadrevs[-1]
314 tiprev = bheadrevs[-1]
312 if tiprev > self.tiprev:
315 if tiprev > self.tiprev:
313 self.tipnode = cl.node(tiprev)
316 self.tipnode = cl.node(tiprev)
314 self.tiprev = tiprev
317 self.tiprev = tiprev
315
318
316 if not self.validfor(repo):
319 if not self.validfor(repo):
317 # cache key are not valid anymore
320 # cache key are not valid anymore
318 self.tipnode = nullid
321 self.tipnode = nullid
319 self.tiprev = nullrev
322 self.tiprev = nullrev
320 for heads in self.values():
323 for heads in self.values():
321 tiprev = max(cl.rev(node) for node in heads)
324 tiprev = max(cl.rev(node) for node in heads)
322 if tiprev > self.tiprev:
325 if tiprev > self.tiprev:
323 self.tipnode = cl.node(tiprev)
326 self.tipnode = cl.node(tiprev)
324 self.tiprev = tiprev
327 self.tiprev = tiprev
325 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
328 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
326
329
327 duration = util.timer() - starttime
330 duration = util.timer() - starttime
328 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
331 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
329 repo.filtername, duration)
332 repo.filtername, duration)
330
333
331 # Revision branch info cache
334 # Revision branch info cache
332
335
333 _rbcversion = '-v1'
336 _rbcversion = '-v1'
334 _rbcnames = 'rbc-names' + _rbcversion
337 _rbcnames = 'rbc-names' + _rbcversion
335 _rbcrevs = 'rbc-revs' + _rbcversion
338 _rbcrevs = 'rbc-revs' + _rbcversion
336 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
339 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
337 _rbcrecfmt = '>4sI'
340 _rbcrecfmt = '>4sI'
338 _rbcrecsize = calcsize(_rbcrecfmt)
341 _rbcrecsize = calcsize(_rbcrecfmt)
339 _rbcnodelen = 4
342 _rbcnodelen = 4
340 _rbcbranchidxmask = 0x7fffffff
343 _rbcbranchidxmask = 0x7fffffff
341 _rbccloseflag = 0x80000000
344 _rbccloseflag = 0x80000000
342
345
343 class revbranchcache(object):
346 class revbranchcache(object):
344 """Persistent cache, mapping from revision number to branch name and close.
347 """Persistent cache, mapping from revision number to branch name and close.
345 This is a low level cache, independent of filtering.
348 This is a low level cache, independent of filtering.
346
349
347 Branch names are stored in rbc-names in internal encoding separated by 0.
350 Branch names are stored in rbc-names in internal encoding separated by 0.
348 rbc-names is append-only, and each branch name is only stored once and will
351 rbc-names is append-only, and each branch name is only stored once and will
349 thus have a unique index.
352 thus have a unique index.
350
353
351 The branch info for each revision is stored in rbc-revs as constant size
354 The branch info for each revision is stored in rbc-revs as constant size
352 records. The whole file is read into memory, but it is only 'parsed' on
355 records. The whole file is read into memory, but it is only 'parsed' on
353 demand. The file is usually append-only but will be truncated if repo
356 demand. The file is usually append-only but will be truncated if repo
354 modification is detected.
357 modification is detected.
355 The record for each revision contains the first 4 bytes of the
358 The record for each revision contains the first 4 bytes of the
356 corresponding node hash, and the record is only used if it still matches.
359 corresponding node hash, and the record is only used if it still matches.
357 Even a completely trashed rbc-revs fill thus still give the right result
360 Even a completely trashed rbc-revs fill thus still give the right result
358 while converging towards full recovery ... assuming no incorrectly matching
361 while converging towards full recovery ... assuming no incorrectly matching
359 node hashes.
362 node hashes.
360 The record also contains 4 bytes where 31 bits contains the index of the
363 The record also contains 4 bytes where 31 bits contains the index of the
361 branch and the last bit indicate that it is a branch close commit.
364 branch and the last bit indicate that it is a branch close commit.
362 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
365 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
363 and will grow with it but be 1/8th of its size.
366 and will grow with it but be 1/8th of its size.
364 """
367 """
365
368
366 def __init__(self, repo, readonly=True):
369 def __init__(self, repo, readonly=True):
367 assert repo.filtername is None
370 assert repo.filtername is None
368 self._repo = repo
371 self._repo = repo
369 self._names = [] # branch names in local encoding with static index
372 self._names = [] # branch names in local encoding with static index
370 self._rbcrevs = bytearray()
373 self._rbcrevs = bytearray()
371 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
374 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
372 try:
375 try:
373 bndata = repo.cachevfs.read(_rbcnames)
376 bndata = repo.cachevfs.read(_rbcnames)
374 self._rbcsnameslen = len(bndata) # for verification before writing
377 self._rbcsnameslen = len(bndata) # for verification before writing
375 if bndata:
378 if bndata:
376 self._names = [encoding.tolocal(bn)
379 self._names = [encoding.tolocal(bn)
377 for bn in bndata.split('\0')]
380 for bn in bndata.split('\0')]
378 except (IOError, OSError):
381 except (IOError, OSError):
379 if readonly:
382 if readonly:
380 # don't try to use cache - fall back to the slow path
383 # don't try to use cache - fall back to the slow path
381 self.branchinfo = self._branchinfo
384 self.branchinfo = self._branchinfo
382
385
383 if self._names:
386 if self._names:
384 try:
387 try:
385 data = repo.cachevfs.read(_rbcrevs)
388 data = repo.cachevfs.read(_rbcrevs)
386 self._rbcrevs[:] = data
389 self._rbcrevs[:] = data
387 except (IOError, OSError) as inst:
390 except (IOError, OSError) as inst:
388 repo.ui.debug("couldn't read revision branch cache: %s\n" %
391 repo.ui.debug("couldn't read revision branch cache: %s\n" %
389 stringutil.forcebytestr(inst))
392 stringutil.forcebytestr(inst))
390 # remember number of good records on disk
393 # remember number of good records on disk
391 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
394 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
392 len(repo.changelog))
395 len(repo.changelog))
393 if self._rbcrevslen == 0:
396 if self._rbcrevslen == 0:
394 self._names = []
397 self._names = []
395 self._rbcnamescount = len(self._names) # number of names read at
398 self._rbcnamescount = len(self._names) # number of names read at
396 # _rbcsnameslen
399 # _rbcsnameslen
397 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
400 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
398
401
399 def _clear(self):
402 def _clear(self):
400 self._rbcsnameslen = 0
403 self._rbcsnameslen = 0
401 del self._names[:]
404 del self._names[:]
402 self._rbcnamescount = 0
405 self._rbcnamescount = 0
403 self._namesreverse.clear()
406 self._namesreverse.clear()
404 self._rbcrevslen = len(self._repo.changelog)
407 self._rbcrevslen = len(self._repo.changelog)
405 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
408 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
406
409
407 def branchinfo(self, rev, changelog=None):
410 def branchinfo(self, rev, changelog=None):
408 """Return branch name and close flag for rev, using and updating
411 """Return branch name and close flag for rev, using and updating
409 persistent cache."""
412 persistent cache."""
410 changelog = changelog or self._repo.changelog
413 changelog = changelog or self._repo.changelog
411 rbcrevidx = rev * _rbcrecsize
414 rbcrevidx = rev * _rbcrecsize
412
415
413 # avoid negative index, changelog.read(nullrev) is fast without cache
416 # avoid negative index, changelog.read(nullrev) is fast without cache
414 if rev == nullrev:
417 if rev == nullrev:
415 return changelog.branchinfo(rev)
418 return changelog.branchinfo(rev)
416
419
417 # if requested rev isn't allocated, grow and cache the rev info
420 # if requested rev isn't allocated, grow and cache the rev info
418 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
421 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
419 return self._branchinfo(rev, changelog=changelog)
422 return self._branchinfo(rev, changelog=changelog)
420
423
421 # fast path: extract data from cache, use it if node is matching
424 # fast path: extract data from cache, use it if node is matching
422 reponode = changelog.node(rev)[:_rbcnodelen]
425 reponode = changelog.node(rev)[:_rbcnodelen]
423 cachenode, branchidx = unpack_from(
426 cachenode, branchidx = unpack_from(
424 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
427 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
425 close = bool(branchidx & _rbccloseflag)
428 close = bool(branchidx & _rbccloseflag)
426 if close:
429 if close:
427 branchidx &= _rbcbranchidxmask
430 branchidx &= _rbcbranchidxmask
428 if cachenode == '\0\0\0\0':
431 if cachenode == '\0\0\0\0':
429 pass
432 pass
430 elif cachenode == reponode:
433 elif cachenode == reponode:
431 try:
434 try:
432 return self._names[branchidx], close
435 return self._names[branchidx], close
433 except IndexError:
436 except IndexError:
434 # recover from invalid reference to unknown branch
437 # recover from invalid reference to unknown branch
435 self._repo.ui.debug("referenced branch names not found"
438 self._repo.ui.debug("referenced branch names not found"
436 " - rebuilding revision branch cache from scratch\n")
439 " - rebuilding revision branch cache from scratch\n")
437 self._clear()
440 self._clear()
438 else:
441 else:
439 # rev/node map has changed, invalidate the cache from here up
442 # rev/node map has changed, invalidate the cache from here up
440 self._repo.ui.debug("history modification detected - truncating "
443 self._repo.ui.debug("history modification detected - truncating "
441 "revision branch cache to revision %d\n" % rev)
444 "revision branch cache to revision %d\n" % rev)
442 truncate = rbcrevidx + _rbcrecsize
445 truncate = rbcrevidx + _rbcrecsize
443 del self._rbcrevs[truncate:]
446 del self._rbcrevs[truncate:]
444 self._rbcrevslen = min(self._rbcrevslen, truncate)
447 self._rbcrevslen = min(self._rbcrevslen, truncate)
445
448
446 # fall back to slow path and make sure it will be written to disk
449 # fall back to slow path and make sure it will be written to disk
447 return self._branchinfo(rev, changelog=changelog)
450 return self._branchinfo(rev, changelog=changelog)
448
451
449 def _branchinfo(self, rev, changelog=None):
452 def _branchinfo(self, rev, changelog=None):
450 """Retrieve branch info from changelog and update _rbcrevs"""
453 """Retrieve branch info from changelog and update _rbcrevs"""
451 changelog = changelog or self._repo.changelog
454 changelog = changelog or self._repo.changelog
452 b, close = changelog.branchinfo(rev)
455 b, close = changelog.branchinfo(rev)
453 if b in self._namesreverse:
456 if b in self._namesreverse:
454 branchidx = self._namesreverse[b]
457 branchidx = self._namesreverse[b]
455 else:
458 else:
456 branchidx = len(self._names)
459 branchidx = len(self._names)
457 self._names.append(b)
460 self._names.append(b)
458 self._namesreverse[b] = branchidx
461 self._namesreverse[b] = branchidx
459 reponode = changelog.node(rev)
462 reponode = changelog.node(rev)
460 if close:
463 if close:
461 branchidx |= _rbccloseflag
464 branchidx |= _rbccloseflag
462 self._setcachedata(rev, reponode, branchidx, changelog)
465 self._setcachedata(rev, reponode, branchidx, changelog)
463 return b, close
466 return b, close
464
467
465 def setdata(self, branch, rev, node, close):
468 def setdata(self, branch, rev, node, close):
466 """add new data information to the cache"""
469 """add new data information to the cache"""
467 if branch in self._namesreverse:
470 if branch in self._namesreverse:
468 branchidx = self._namesreverse[branch]
471 branchidx = self._namesreverse[branch]
469 else:
472 else:
470 branchidx = len(self._names)
473 branchidx = len(self._names)
471 self._names.append(branch)
474 self._names.append(branch)
472 self._namesreverse[branch] = branchidx
475 self._namesreverse[branch] = branchidx
473 if close:
476 if close:
474 branchidx |= _rbccloseflag
477 branchidx |= _rbccloseflag
475 self._setcachedata(rev, node, branchidx)
478 self._setcachedata(rev, node, branchidx)
476 # If no cache data were readable (non exists, bad permission, etc)
479 # If no cache data were readable (non exists, bad permission, etc)
477 # the cache was bypassing itself by setting:
480 # the cache was bypassing itself by setting:
478 #
481 #
479 # self.branchinfo = self._branchinfo
482 # self.branchinfo = self._branchinfo
480 #
483 #
481 # Since we now have data in the cache, we need to drop this bypassing.
484 # Since we now have data in the cache, we need to drop this bypassing.
482 if r'branchinfo' in vars(self):
485 if r'branchinfo' in vars(self):
483 del self.branchinfo
486 del self.branchinfo
484
487
485 def _setcachedata(self, rev, node, branchidx, changelog=None):
488 def _setcachedata(self, rev, node, branchidx, changelog=None):
486 """Writes the node's branch data to the in-memory cache data."""
489 """Writes the node's branch data to the in-memory cache data."""
487 if rev == nullrev:
490 if rev == nullrev:
488 return
491 return
489
492
490 changelog = changelog or self._repo.changelog
493 changelog = changelog or self._repo.changelog
491 rbcrevidx = rev * _rbcrecsize
494 rbcrevidx = rev * _rbcrecsize
492 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
495 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
493 self._rbcrevs.extend('\0' *
496 self._rbcrevs.extend('\0' *
494 (len(changelog) * _rbcrecsize -
497 (len(changelog) * _rbcrecsize -
495 len(self._rbcrevs)))
498 len(self._rbcrevs)))
496 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
499 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
497 self._rbcrevslen = min(self._rbcrevslen, rev)
500 self._rbcrevslen = min(self._rbcrevslen, rev)
498
501
499 tr = self._repo.currenttransaction()
502 tr = self._repo.currenttransaction()
500 if tr:
503 if tr:
501 tr.addfinalize('write-revbranchcache', self.write)
504 tr.addfinalize('write-revbranchcache', self.write)
502
505
503 def write(self, tr=None):
506 def write(self, tr=None):
504 """Save branch cache if it is dirty."""
507 """Save branch cache if it is dirty."""
505 repo = self._repo
508 repo = self._repo
506 wlock = None
509 wlock = None
507 step = ''
510 step = ''
508 try:
511 try:
509 if self._rbcnamescount < len(self._names):
512 if self._rbcnamescount < len(self._names):
510 step = ' names'
513 step = ' names'
511 wlock = repo.wlock(wait=False)
514 wlock = repo.wlock(wait=False)
512 if self._rbcnamescount != 0:
515 if self._rbcnamescount != 0:
513 f = repo.cachevfs.open(_rbcnames, 'ab')
516 f = repo.cachevfs.open(_rbcnames, 'ab')
514 if f.tell() == self._rbcsnameslen:
517 if f.tell() == self._rbcsnameslen:
515 f.write('\0')
518 f.write('\0')
516 else:
519 else:
517 f.close()
520 f.close()
518 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
521 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
519 self._rbcnamescount = 0
522 self._rbcnamescount = 0
520 self._rbcrevslen = 0
523 self._rbcrevslen = 0
521 if self._rbcnamescount == 0:
524 if self._rbcnamescount == 0:
522 # before rewriting names, make sure references are removed
525 # before rewriting names, make sure references are removed
523 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
526 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
524 f = repo.cachevfs.open(_rbcnames, 'wb')
527 f = repo.cachevfs.open(_rbcnames, 'wb')
525 f.write('\0'.join(encoding.fromlocal(b)
528 f.write('\0'.join(encoding.fromlocal(b)
526 for b in self._names[self._rbcnamescount:]))
529 for b in self._names[self._rbcnamescount:]))
527 self._rbcsnameslen = f.tell()
530 self._rbcsnameslen = f.tell()
528 f.close()
531 f.close()
529 self._rbcnamescount = len(self._names)
532 self._rbcnamescount = len(self._names)
530
533
531 start = self._rbcrevslen * _rbcrecsize
534 start = self._rbcrevslen * _rbcrecsize
532 if start != len(self._rbcrevs):
535 if start != len(self._rbcrevs):
533 step = ''
536 step = ''
534 if wlock is None:
537 if wlock is None:
535 wlock = repo.wlock(wait=False)
538 wlock = repo.wlock(wait=False)
536 revs = min(len(repo.changelog),
539 revs = min(len(repo.changelog),
537 len(self._rbcrevs) // _rbcrecsize)
540 len(self._rbcrevs) // _rbcrecsize)
538 f = repo.cachevfs.open(_rbcrevs, 'ab')
541 f = repo.cachevfs.open(_rbcrevs, 'ab')
539 if f.tell() != start:
542 if f.tell() != start:
540 repo.ui.debug("truncating cache/%s to %d\n"
543 repo.ui.debug("truncating cache/%s to %d\n"
541 % (_rbcrevs, start))
544 % (_rbcrevs, start))
542 f.seek(start)
545 f.seek(start)
543 if f.tell() != start:
546 if f.tell() != start:
544 start = 0
547 start = 0
545 f.seek(start)
548 f.seek(start)
546 f.truncate()
549 f.truncate()
547 end = revs * _rbcrecsize
550 end = revs * _rbcrecsize
548 f.write(self._rbcrevs[start:end])
551 f.write(self._rbcrevs[start:end])
549 f.close()
552 f.close()
550 self._rbcrevslen = revs
553 self._rbcrevslen = revs
551 except (IOError, OSError, error.Abort, error.LockError) as inst:
554 except (IOError, OSError, error.Abort, error.LockError) as inst:
552 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
555 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
553 % (step, stringutil.forcebytestr(inst)))
556 % (step, stringutil.forcebytestr(inst)))
554 finally:
557 finally:
555 if wlock is not None:
558 if wlock is not None:
556 wlock.release()
559 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now