##// END OF EJS Templates
branchmap: explicitly convert file into iterator...
Yuya Nishihara -
r39174:8547c859 default
parent child Browse files
Show More
@@ -1,554 +1,555 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33 def _filename(repo):
33 def _filename(repo):
34 """name of a branchcache file for a given repo or repoview"""
34 """name of a branchcache file for a given repo or repoview"""
35 filename = "branch2"
35 filename = "branch2"
36 if repo.filtername:
36 if repo.filtername:
37 filename = '%s-%s' % (filename, repo.filtername)
37 filename = '%s-%s' % (filename, repo.filtername)
38 return filename
38 return filename
39
39
40 def read(repo):
40 def read(repo):
41 f = None
41 f = None
42 try:
42 try:
43 f = repo.cachevfs(_filename(repo))
43 f = repo.cachevfs(_filename(repo))
44 cachekey = next(f).rstrip('\n').split(" ", 2)
44 lineiter = iter(f)
45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
45 last, lrev = cachekey[:2]
46 last, lrev = cachekey[:2]
46 last, lrev = bin(last), int(lrev)
47 last, lrev = bin(last), int(lrev)
47 filteredhash = None
48 filteredhash = None
48 if len(cachekey) > 2:
49 if len(cachekey) > 2:
49 filteredhash = bin(cachekey[2])
50 filteredhash = bin(cachekey[2])
50 partial = branchcache(tipnode=last, tiprev=lrev,
51 partial = branchcache(tipnode=last, tiprev=lrev,
51 filteredhash=filteredhash)
52 filteredhash=filteredhash)
52 if not partial.validfor(repo):
53 if not partial.validfor(repo):
53 # invalidate the cache
54 # invalidate the cache
54 raise ValueError(r'tip differs')
55 raise ValueError(r'tip differs')
55 cl = repo.changelog
56 cl = repo.changelog
56 for l in f:
57 for l in lineiter:
57 l = l.rstrip('\n')
58 l = l.rstrip('\n')
58 if not l:
59 if not l:
59 continue
60 continue
60 node, state, label = l.split(" ", 2)
61 node, state, label = l.split(" ", 2)
61 if state not in 'oc':
62 if state not in 'oc':
62 raise ValueError(r'invalid branch state')
63 raise ValueError(r'invalid branch state')
63 label = encoding.tolocal(label.strip())
64 label = encoding.tolocal(label.strip())
64 node = bin(node)
65 node = bin(node)
65 if not cl.hasnode(node):
66 if not cl.hasnode(node):
66 raise ValueError(
67 raise ValueError(
67 r'node %s does not exist' % pycompat.sysstr(hex(node)))
68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
68 partial.setdefault(label, []).append(node)
69 partial.setdefault(label, []).append(node)
69 if state == 'c':
70 if state == 'c':
70 partial._closednodes.add(node)
71 partial._closednodes.add(node)
71
72
72 except (IOError, OSError):
73 except (IOError, OSError):
73 return None
74 return None
74
75
75 except Exception as inst:
76 except Exception as inst:
76 if repo.ui.debugflag:
77 if repo.ui.debugflag:
77 msg = 'invalid branchheads cache'
78 msg = 'invalid branchheads cache'
78 if repo.filtername is not None:
79 if repo.filtername is not None:
79 msg += ' (%s)' % repo.filtername
80 msg += ' (%s)' % repo.filtername
80 msg += ': %s\n'
81 msg += ': %s\n'
81 repo.ui.debug(msg % pycompat.bytestr(inst))
82 repo.ui.debug(msg % pycompat.bytestr(inst))
82 partial = None
83 partial = None
83
84
84 finally:
85 finally:
85 if f:
86 if f:
86 f.close()
87 f.close()
87
88
88 return partial
89 return partial
89
90
90 ### Nearest subset relation
91 ### Nearest subset relation
91 # Nearest subset of filter X is a filter Y so that:
92 # Nearest subset of filter X is a filter Y so that:
92 # * Y is included in X,
93 # * Y is included in X,
93 # * X - Y is as small as possible.
94 # * X - Y is as small as possible.
94 # This create and ordering used for branchmap purpose.
95 # This create and ordering used for branchmap purpose.
95 # the ordering may be partial
96 # the ordering may be partial
96 subsettable = {None: 'visible',
97 subsettable = {None: 'visible',
97 'visible-hidden': 'visible',
98 'visible-hidden': 'visible',
98 'visible': 'served',
99 'visible': 'served',
99 'served': 'immutable',
100 'served': 'immutable',
100 'immutable': 'base'}
101 'immutable': 'base'}
101
102
102 def updatecache(repo):
103 def updatecache(repo):
103 cl = repo.changelog
104 cl = repo.changelog
104 filtername = repo.filtername
105 filtername = repo.filtername
105 partial = repo._branchcaches.get(filtername)
106 partial = repo._branchcaches.get(filtername)
106
107
107 revs = []
108 revs = []
108 if partial is None or not partial.validfor(repo):
109 if partial is None or not partial.validfor(repo):
109 partial = read(repo)
110 partial = read(repo)
110 if partial is None:
111 if partial is None:
111 subsetname = subsettable.get(filtername)
112 subsetname = subsettable.get(filtername)
112 if subsetname is None:
113 if subsetname is None:
113 partial = branchcache()
114 partial = branchcache()
114 else:
115 else:
115 subset = repo.filtered(subsetname)
116 subset = repo.filtered(subsetname)
116 partial = subset.branchmap().copy()
117 partial = subset.branchmap().copy()
117 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
118 revs.extend(r for r in extrarevs if r <= partial.tiprev)
119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
119 revs.extend(cl.revs(start=partial.tiprev + 1))
120 revs.extend(cl.revs(start=partial.tiprev + 1))
120 if revs:
121 if revs:
121 partial.update(repo, revs)
122 partial.update(repo, revs)
122 partial.write(repo)
123 partial.write(repo)
123
124
124 assert partial.validfor(repo), filtername
125 assert partial.validfor(repo), filtername
125 repo._branchcaches[repo.filtername] = partial
126 repo._branchcaches[repo.filtername] = partial
126
127
127 def replacecache(repo, bm):
128 def replacecache(repo, bm):
128 """Replace the branchmap cache for a repo with a branch mapping.
129 """Replace the branchmap cache for a repo with a branch mapping.
129
130
130 This is likely only called during clone with a branch map from a remote.
131 This is likely only called during clone with a branch map from a remote.
131 """
132 """
132 rbheads = []
133 rbheads = []
133 closed = []
134 closed = []
134 for bheads in bm.itervalues():
135 for bheads in bm.itervalues():
135 rbheads.extend(bheads)
136 rbheads.extend(bheads)
136 for h in bheads:
137 for h in bheads:
137 r = repo.changelog.rev(h)
138 r = repo.changelog.rev(h)
138 b, c = repo.changelog.branchinfo(r)
139 b, c = repo.changelog.branchinfo(r)
139 if c:
140 if c:
140 closed.append(h)
141 closed.append(h)
141
142
142 if rbheads:
143 if rbheads:
143 rtiprev = max((int(repo.changelog.rev(node))
144 rtiprev = max((int(repo.changelog.rev(node))
144 for node in rbheads))
145 for node in rbheads))
145 cache = branchcache(bm,
146 cache = branchcache(bm,
146 repo[rtiprev].node(),
147 repo[rtiprev].node(),
147 rtiprev,
148 rtiprev,
148 closednodes=closed)
149 closednodes=closed)
149
150
150 # Try to stick it as low as possible
151 # Try to stick it as low as possible
151 # filter above served are unlikely to be fetch from a clone
152 # filter above served are unlikely to be fetch from a clone
152 for candidate in ('base', 'immutable', 'served'):
153 for candidate in ('base', 'immutable', 'served'):
153 rview = repo.filtered(candidate)
154 rview = repo.filtered(candidate)
154 if cache.validfor(rview):
155 if cache.validfor(rview):
155 repo._branchcaches[candidate] = cache
156 repo._branchcaches[candidate] = cache
156 cache.write(rview)
157 cache.write(rview)
157 break
158 break
158
159
159 class branchcache(dict):
160 class branchcache(dict):
160 """A dict like object that hold branches heads cache.
161 """A dict like object that hold branches heads cache.
161
162
162 This cache is used to avoid costly computations to determine all the
163 This cache is used to avoid costly computations to determine all the
163 branch heads of a repo.
164 branch heads of a repo.
164
165
165 The cache is serialized on disk in the following format:
166 The cache is serialized on disk in the following format:
166
167
167 <tip hex node> <tip rev number> [optional filtered repo hex hash]
168 <tip hex node> <tip rev number> [optional filtered repo hex hash]
168 <branch head hex node> <open/closed state> <branch name>
169 <branch head hex node> <open/closed state> <branch name>
169 <branch head hex node> <open/closed state> <branch name>
170 <branch head hex node> <open/closed state> <branch name>
170 ...
171 ...
171
172
172 The first line is used to check if the cache is still valid. If the
173 The first line is used to check if the cache is still valid. If the
173 branch cache is for a filtered repo view, an optional third hash is
174 branch cache is for a filtered repo view, an optional third hash is
174 included that hashes the hashes of all filtered revisions.
175 included that hashes the hashes of all filtered revisions.
175
176
176 The open/closed state is represented by a single letter 'o' or 'c'.
177 The open/closed state is represented by a single letter 'o' or 'c'.
177 This field can be used to avoid changelog reads when determining if a
178 This field can be used to avoid changelog reads when determining if a
178 branch head closes a branch or not.
179 branch head closes a branch or not.
179 """
180 """
180
181
181 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
182 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
182 filteredhash=None, closednodes=None):
183 filteredhash=None, closednodes=None):
183 super(branchcache, self).__init__(entries)
184 super(branchcache, self).__init__(entries)
184 self.tipnode = tipnode
185 self.tipnode = tipnode
185 self.tiprev = tiprev
186 self.tiprev = tiprev
186 self.filteredhash = filteredhash
187 self.filteredhash = filteredhash
187 # closednodes is a set of nodes that close their branch. If the branch
188 # closednodes is a set of nodes that close their branch. If the branch
188 # cache has been updated, it may contain nodes that are no longer
189 # cache has been updated, it may contain nodes that are no longer
189 # heads.
190 # heads.
190 if closednodes is None:
191 if closednodes is None:
191 self._closednodes = set()
192 self._closednodes = set()
192 else:
193 else:
193 self._closednodes = closednodes
194 self._closednodes = closednodes
194
195
195 def validfor(self, repo):
196 def validfor(self, repo):
196 """Is the cache content valid regarding a repo
197 """Is the cache content valid regarding a repo
197
198
198 - False when cached tipnode is unknown or if we detect a strip.
199 - False when cached tipnode is unknown or if we detect a strip.
199 - True when cache is up to date or a subset of current repo."""
200 - True when cache is up to date or a subset of current repo."""
200 try:
201 try:
201 return ((self.tipnode == repo.changelog.node(self.tiprev))
202 return ((self.tipnode == repo.changelog.node(self.tiprev))
202 and (self.filteredhash == \
203 and (self.filteredhash == \
203 scmutil.filteredhash(repo, self.tiprev)))
204 scmutil.filteredhash(repo, self.tiprev)))
204 except IndexError:
205 except IndexError:
205 return False
206 return False
206
207
207 def _branchtip(self, heads):
208 def _branchtip(self, heads):
208 '''Return tuple with last open head in heads and false,
209 '''Return tuple with last open head in heads and false,
209 otherwise return last closed head and true.'''
210 otherwise return last closed head and true.'''
210 tip = heads[-1]
211 tip = heads[-1]
211 closed = True
212 closed = True
212 for h in reversed(heads):
213 for h in reversed(heads):
213 if h not in self._closednodes:
214 if h not in self._closednodes:
214 tip = h
215 tip = h
215 closed = False
216 closed = False
216 break
217 break
217 return tip, closed
218 return tip, closed
218
219
219 def branchtip(self, branch):
220 def branchtip(self, branch):
220 '''Return the tipmost open head on branch head, otherwise return the
221 '''Return the tipmost open head on branch head, otherwise return the
221 tipmost closed head on branch.
222 tipmost closed head on branch.
222 Raise KeyError for unknown branch.'''
223 Raise KeyError for unknown branch.'''
223 return self._branchtip(self[branch])[0]
224 return self._branchtip(self[branch])[0]
224
225
225 def iteropen(self, nodes):
226 def iteropen(self, nodes):
226 return (n for n in nodes if n not in self._closednodes)
227 return (n for n in nodes if n not in self._closednodes)
227
228
228 def branchheads(self, branch, closed=False):
229 def branchheads(self, branch, closed=False):
229 heads = self[branch]
230 heads = self[branch]
230 if not closed:
231 if not closed:
231 heads = list(self.iteropen(heads))
232 heads = list(self.iteropen(heads))
232 return heads
233 return heads
233
234
234 def iterbranches(self):
235 def iterbranches(self):
235 for bn, heads in self.iteritems():
236 for bn, heads in self.iteritems():
236 yield (bn, heads) + self._branchtip(heads)
237 yield (bn, heads) + self._branchtip(heads)
237
238
238 def copy(self):
239 def copy(self):
239 """return an deep copy of the branchcache object"""
240 """return an deep copy of the branchcache object"""
240 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
241 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
241 self._closednodes)
242 self._closednodes)
242
243
243 def write(self, repo):
244 def write(self, repo):
244 try:
245 try:
245 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
246 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
246 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
247 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
247 if self.filteredhash is not None:
248 if self.filteredhash is not None:
248 cachekey.append(hex(self.filteredhash))
249 cachekey.append(hex(self.filteredhash))
249 f.write(" ".join(cachekey) + '\n')
250 f.write(" ".join(cachekey) + '\n')
250 nodecount = 0
251 nodecount = 0
251 for label, nodes in sorted(self.iteritems()):
252 for label, nodes in sorted(self.iteritems()):
252 for node in nodes:
253 for node in nodes:
253 nodecount += 1
254 nodecount += 1
254 if node in self._closednodes:
255 if node in self._closednodes:
255 state = 'c'
256 state = 'c'
256 else:
257 else:
257 state = 'o'
258 state = 'o'
258 f.write("%s %s %s\n" % (hex(node), state,
259 f.write("%s %s %s\n" % (hex(node), state,
259 encoding.fromlocal(label)))
260 encoding.fromlocal(label)))
260 f.close()
261 f.close()
261 repo.ui.log('branchcache',
262 repo.ui.log('branchcache',
262 'wrote %s branch cache with %d labels and %d nodes\n',
263 'wrote %s branch cache with %d labels and %d nodes\n',
263 repo.filtername, len(self), nodecount)
264 repo.filtername, len(self), nodecount)
264 except (IOError, OSError, error.Abort) as inst:
265 except (IOError, OSError, error.Abort) as inst:
265 # Abort may be raised by read only opener, so log and continue
266 # Abort may be raised by read only opener, so log and continue
266 repo.ui.debug("couldn't write branch cache: %s\n" %
267 repo.ui.debug("couldn't write branch cache: %s\n" %
267 stringutil.forcebytestr(inst))
268 stringutil.forcebytestr(inst))
268
269
269 def update(self, repo, revgen):
270 def update(self, repo, revgen):
270 """Given a branchhead cache, self, that may have extra nodes or be
271 """Given a branchhead cache, self, that may have extra nodes or be
271 missing heads, and a generator of nodes that are strictly a superset of
272 missing heads, and a generator of nodes that are strictly a superset of
272 heads missing, this function updates self to be correct.
273 heads missing, this function updates self to be correct.
273 """
274 """
274 starttime = util.timer()
275 starttime = util.timer()
275 cl = repo.changelog
276 cl = repo.changelog
276 # collect new branch entries
277 # collect new branch entries
277 newbranches = {}
278 newbranches = {}
278 getbranchinfo = repo.revbranchcache().branchinfo
279 getbranchinfo = repo.revbranchcache().branchinfo
279 for r in revgen:
280 for r in revgen:
280 branch, closesbranch = getbranchinfo(r)
281 branch, closesbranch = getbranchinfo(r)
281 newbranches.setdefault(branch, []).append(r)
282 newbranches.setdefault(branch, []).append(r)
282 if closesbranch:
283 if closesbranch:
283 self._closednodes.add(cl.node(r))
284 self._closednodes.add(cl.node(r))
284
285
285 # fetch current topological heads to speed up filtering
286 # fetch current topological heads to speed up filtering
286 topoheads = set(cl.headrevs())
287 topoheads = set(cl.headrevs())
287
288
288 # if older branchheads are reachable from new ones, they aren't
289 # if older branchheads are reachable from new ones, they aren't
289 # really branchheads. Note checking parents is insufficient:
290 # really branchheads. Note checking parents is insufficient:
290 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
291 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
291 for branch, newheadrevs in newbranches.iteritems():
292 for branch, newheadrevs in newbranches.iteritems():
292 bheads = self.setdefault(branch, [])
293 bheads = self.setdefault(branch, [])
293 bheadset = set(cl.rev(node) for node in bheads)
294 bheadset = set(cl.rev(node) for node in bheads)
294
295
295 # This have been tested True on all internal usage of this function.
296 # This have been tested True on all internal usage of this function.
296 # run it again in case of doubt
297 # run it again in case of doubt
297 # assert not (set(bheadrevs) & set(newheadrevs))
298 # assert not (set(bheadrevs) & set(newheadrevs))
298 newheadrevs.sort()
299 newheadrevs.sort()
299 bheadset.update(newheadrevs)
300 bheadset.update(newheadrevs)
300
301
301 # This prunes out two kinds of heads - heads that are superseded by
302 # This prunes out two kinds of heads - heads that are superseded by
302 # a head in newheadrevs, and newheadrevs that are not heads because
303 # a head in newheadrevs, and newheadrevs that are not heads because
303 # an existing head is their descendant.
304 # an existing head is their descendant.
304 uncertain = bheadset - topoheads
305 uncertain = bheadset - topoheads
305 if uncertain:
306 if uncertain:
306 floorrev = min(uncertain)
307 floorrev = min(uncertain)
307 ancestors = set(cl.ancestors(newheadrevs, floorrev))
308 ancestors = set(cl.ancestors(newheadrevs, floorrev))
308 bheadset -= ancestors
309 bheadset -= ancestors
309 bheadrevs = sorted(bheadset)
310 bheadrevs = sorted(bheadset)
310 self[branch] = [cl.node(rev) for rev in bheadrevs]
311 self[branch] = [cl.node(rev) for rev in bheadrevs]
311 tiprev = bheadrevs[-1]
312 tiprev = bheadrevs[-1]
312 if tiprev > self.tiprev:
313 if tiprev > self.tiprev:
313 self.tipnode = cl.node(tiprev)
314 self.tipnode = cl.node(tiprev)
314 self.tiprev = tiprev
315 self.tiprev = tiprev
315
316
316 if not self.validfor(repo):
317 if not self.validfor(repo):
317 # cache key are not valid anymore
318 # cache key are not valid anymore
318 self.tipnode = nullid
319 self.tipnode = nullid
319 self.tiprev = nullrev
320 self.tiprev = nullrev
320 for heads in self.values():
321 for heads in self.values():
321 tiprev = max(cl.rev(node) for node in heads)
322 tiprev = max(cl.rev(node) for node in heads)
322 if tiprev > self.tiprev:
323 if tiprev > self.tiprev:
323 self.tipnode = cl.node(tiprev)
324 self.tipnode = cl.node(tiprev)
324 self.tiprev = tiprev
325 self.tiprev = tiprev
325 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
326 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
326
327
327 duration = util.timer() - starttime
328 duration = util.timer() - starttime
328 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
329 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
329 repo.filtername, duration)
330 repo.filtername, duration)
330
331
331 # Revision branch info cache
332 # Revision branch info cache
332
333
333 _rbcversion = '-v1'
334 _rbcversion = '-v1'
334 _rbcnames = 'rbc-names' + _rbcversion
335 _rbcnames = 'rbc-names' + _rbcversion
335 _rbcrevs = 'rbc-revs' + _rbcversion
336 _rbcrevs = 'rbc-revs' + _rbcversion
336 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
337 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
337 _rbcrecfmt = '>4sI'
338 _rbcrecfmt = '>4sI'
338 _rbcrecsize = calcsize(_rbcrecfmt)
339 _rbcrecsize = calcsize(_rbcrecfmt)
339 _rbcnodelen = 4
340 _rbcnodelen = 4
340 _rbcbranchidxmask = 0x7fffffff
341 _rbcbranchidxmask = 0x7fffffff
341 _rbccloseflag = 0x80000000
342 _rbccloseflag = 0x80000000
342
343
343 class revbranchcache(object):
344 class revbranchcache(object):
344 """Persistent cache, mapping from revision number to branch name and close.
345 """Persistent cache, mapping from revision number to branch name and close.
345 This is a low level cache, independent of filtering.
346 This is a low level cache, independent of filtering.
346
347
347 Branch names are stored in rbc-names in internal encoding separated by 0.
348 Branch names are stored in rbc-names in internal encoding separated by 0.
348 rbc-names is append-only, and each branch name is only stored once and will
349 rbc-names is append-only, and each branch name is only stored once and will
349 thus have a unique index.
350 thus have a unique index.
350
351
351 The branch info for each revision is stored in rbc-revs as constant size
352 The branch info for each revision is stored in rbc-revs as constant size
352 records. The whole file is read into memory, but it is only 'parsed' on
353 records. The whole file is read into memory, but it is only 'parsed' on
353 demand. The file is usually append-only but will be truncated if repo
354 demand. The file is usually append-only but will be truncated if repo
354 modification is detected.
355 modification is detected.
355 The record for each revision contains the first 4 bytes of the
356 The record for each revision contains the first 4 bytes of the
356 corresponding node hash, and the record is only used if it still matches.
357 corresponding node hash, and the record is only used if it still matches.
357 Even a completely trashed rbc-revs fill thus still give the right result
358 Even a completely trashed rbc-revs fill thus still give the right result
358 while converging towards full recovery ... assuming no incorrectly matching
359 while converging towards full recovery ... assuming no incorrectly matching
359 node hashes.
360 node hashes.
360 The record also contains 4 bytes where 31 bits contains the index of the
361 The record also contains 4 bytes where 31 bits contains the index of the
361 branch and the last bit indicate that it is a branch close commit.
362 branch and the last bit indicate that it is a branch close commit.
362 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
363 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
363 and will grow with it but be 1/8th of its size.
364 and will grow with it but be 1/8th of its size.
364 """
365 """
365
366
366 def __init__(self, repo, readonly=True):
367 def __init__(self, repo, readonly=True):
367 assert repo.filtername is None
368 assert repo.filtername is None
368 self._repo = repo
369 self._repo = repo
369 self._names = [] # branch names in local encoding with static index
370 self._names = [] # branch names in local encoding with static index
370 self._rbcrevs = bytearray()
371 self._rbcrevs = bytearray()
371 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
372 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
372 try:
373 try:
373 bndata = repo.cachevfs.read(_rbcnames)
374 bndata = repo.cachevfs.read(_rbcnames)
374 self._rbcsnameslen = len(bndata) # for verification before writing
375 self._rbcsnameslen = len(bndata) # for verification before writing
375 if bndata:
376 if bndata:
376 self._names = [encoding.tolocal(bn)
377 self._names = [encoding.tolocal(bn)
377 for bn in bndata.split('\0')]
378 for bn in bndata.split('\0')]
378 except (IOError, OSError):
379 except (IOError, OSError):
379 if readonly:
380 if readonly:
380 # don't try to use cache - fall back to the slow path
381 # don't try to use cache - fall back to the slow path
381 self.branchinfo = self._branchinfo
382 self.branchinfo = self._branchinfo
382
383
383 if self._names:
384 if self._names:
384 try:
385 try:
385 data = repo.cachevfs.read(_rbcrevs)
386 data = repo.cachevfs.read(_rbcrevs)
386 self._rbcrevs[:] = data
387 self._rbcrevs[:] = data
387 except (IOError, OSError) as inst:
388 except (IOError, OSError) as inst:
388 repo.ui.debug("couldn't read revision branch cache: %s\n" %
389 repo.ui.debug("couldn't read revision branch cache: %s\n" %
389 stringutil.forcebytestr(inst))
390 stringutil.forcebytestr(inst))
390 # remember number of good records on disk
391 # remember number of good records on disk
391 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
392 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
392 len(repo.changelog))
393 len(repo.changelog))
393 if self._rbcrevslen == 0:
394 if self._rbcrevslen == 0:
394 self._names = []
395 self._names = []
395 self._rbcnamescount = len(self._names) # number of names read at
396 self._rbcnamescount = len(self._names) # number of names read at
396 # _rbcsnameslen
397 # _rbcsnameslen
397 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
398 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
398
399
399 def _clear(self):
400 def _clear(self):
400 self._rbcsnameslen = 0
401 self._rbcsnameslen = 0
401 del self._names[:]
402 del self._names[:]
402 self._rbcnamescount = 0
403 self._rbcnamescount = 0
403 self._namesreverse.clear()
404 self._namesreverse.clear()
404 self._rbcrevslen = len(self._repo.changelog)
405 self._rbcrevslen = len(self._repo.changelog)
405 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
406 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
406
407
407 def branchinfo(self, rev):
408 def branchinfo(self, rev):
408 """Return branch name and close flag for rev, using and updating
409 """Return branch name and close flag for rev, using and updating
409 persistent cache."""
410 persistent cache."""
410 changelog = self._repo.changelog
411 changelog = self._repo.changelog
411 rbcrevidx = rev * _rbcrecsize
412 rbcrevidx = rev * _rbcrecsize
412
413
413 # avoid negative index, changelog.read(nullrev) is fast without cache
414 # avoid negative index, changelog.read(nullrev) is fast without cache
414 if rev == nullrev:
415 if rev == nullrev:
415 return changelog.branchinfo(rev)
416 return changelog.branchinfo(rev)
416
417
417 # if requested rev isn't allocated, grow and cache the rev info
418 # if requested rev isn't allocated, grow and cache the rev info
418 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
419 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
419 return self._branchinfo(rev)
420 return self._branchinfo(rev)
420
421
421 # fast path: extract data from cache, use it if node is matching
422 # fast path: extract data from cache, use it if node is matching
422 reponode = changelog.node(rev)[:_rbcnodelen]
423 reponode = changelog.node(rev)[:_rbcnodelen]
423 cachenode, branchidx = unpack_from(
424 cachenode, branchidx = unpack_from(
424 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
425 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
425 close = bool(branchidx & _rbccloseflag)
426 close = bool(branchidx & _rbccloseflag)
426 if close:
427 if close:
427 branchidx &= _rbcbranchidxmask
428 branchidx &= _rbcbranchidxmask
428 if cachenode == '\0\0\0\0':
429 if cachenode == '\0\0\0\0':
429 pass
430 pass
430 elif cachenode == reponode:
431 elif cachenode == reponode:
431 try:
432 try:
432 return self._names[branchidx], close
433 return self._names[branchidx], close
433 except IndexError:
434 except IndexError:
434 # recover from invalid reference to unknown branch
435 # recover from invalid reference to unknown branch
435 self._repo.ui.debug("referenced branch names not found"
436 self._repo.ui.debug("referenced branch names not found"
436 " - rebuilding revision branch cache from scratch\n")
437 " - rebuilding revision branch cache from scratch\n")
437 self._clear()
438 self._clear()
438 else:
439 else:
439 # rev/node map has changed, invalidate the cache from here up
440 # rev/node map has changed, invalidate the cache from here up
440 self._repo.ui.debug("history modification detected - truncating "
441 self._repo.ui.debug("history modification detected - truncating "
441 "revision branch cache to revision %d\n" % rev)
442 "revision branch cache to revision %d\n" % rev)
442 truncate = rbcrevidx + _rbcrecsize
443 truncate = rbcrevidx + _rbcrecsize
443 del self._rbcrevs[truncate:]
444 del self._rbcrevs[truncate:]
444 self._rbcrevslen = min(self._rbcrevslen, truncate)
445 self._rbcrevslen = min(self._rbcrevslen, truncate)
445
446
446 # fall back to slow path and make sure it will be written to disk
447 # fall back to slow path and make sure it will be written to disk
447 return self._branchinfo(rev)
448 return self._branchinfo(rev)
448
449
449 def _branchinfo(self, rev):
450 def _branchinfo(self, rev):
450 """Retrieve branch info from changelog and update _rbcrevs"""
451 """Retrieve branch info from changelog and update _rbcrevs"""
451 changelog = self._repo.changelog
452 changelog = self._repo.changelog
452 b, close = changelog.branchinfo(rev)
453 b, close = changelog.branchinfo(rev)
453 if b in self._namesreverse:
454 if b in self._namesreverse:
454 branchidx = self._namesreverse[b]
455 branchidx = self._namesreverse[b]
455 else:
456 else:
456 branchidx = len(self._names)
457 branchidx = len(self._names)
457 self._names.append(b)
458 self._names.append(b)
458 self._namesreverse[b] = branchidx
459 self._namesreverse[b] = branchidx
459 reponode = changelog.node(rev)
460 reponode = changelog.node(rev)
460 if close:
461 if close:
461 branchidx |= _rbccloseflag
462 branchidx |= _rbccloseflag
462 self._setcachedata(rev, reponode, branchidx)
463 self._setcachedata(rev, reponode, branchidx)
463 return b, close
464 return b, close
464
465
465 def setdata(self, branch, rev, node, close):
466 def setdata(self, branch, rev, node, close):
466 """add new data information to the cache"""
467 """add new data information to the cache"""
467 if branch in self._namesreverse:
468 if branch in self._namesreverse:
468 branchidx = self._namesreverse[branch]
469 branchidx = self._namesreverse[branch]
469 else:
470 else:
470 branchidx = len(self._names)
471 branchidx = len(self._names)
471 self._names.append(branch)
472 self._names.append(branch)
472 self._namesreverse[branch] = branchidx
473 self._namesreverse[branch] = branchidx
473 if close:
474 if close:
474 branchidx |= _rbccloseflag
475 branchidx |= _rbccloseflag
475 self._setcachedata(rev, node, branchidx)
476 self._setcachedata(rev, node, branchidx)
476 # If no cache data were readable (non exists, bad permission, etc)
477 # If no cache data were readable (non exists, bad permission, etc)
477 # the cache was bypassing itself by setting:
478 # the cache was bypassing itself by setting:
478 #
479 #
479 # self.branchinfo = self._branchinfo
480 # self.branchinfo = self._branchinfo
480 #
481 #
481 # Since we now have data in the cache, we need to drop this bypassing.
482 # Since we now have data in the cache, we need to drop this bypassing.
482 if 'branchinfo' in vars(self):
483 if 'branchinfo' in vars(self):
483 del self.branchinfo
484 del self.branchinfo
484
485
485 def _setcachedata(self, rev, node, branchidx):
486 def _setcachedata(self, rev, node, branchidx):
486 """Writes the node's branch data to the in-memory cache data."""
487 """Writes the node's branch data to the in-memory cache data."""
487 if rev == nullrev:
488 if rev == nullrev:
488 return
489 return
489 rbcrevidx = rev * _rbcrecsize
490 rbcrevidx = rev * _rbcrecsize
490 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
491 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
491 self._rbcrevs.extend('\0' *
492 self._rbcrevs.extend('\0' *
492 (len(self._repo.changelog) * _rbcrecsize -
493 (len(self._repo.changelog) * _rbcrecsize -
493 len(self._rbcrevs)))
494 len(self._rbcrevs)))
494 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
495 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
495 self._rbcrevslen = min(self._rbcrevslen, rev)
496 self._rbcrevslen = min(self._rbcrevslen, rev)
496
497
497 tr = self._repo.currenttransaction()
498 tr = self._repo.currenttransaction()
498 if tr:
499 if tr:
499 tr.addfinalize('write-revbranchcache', self.write)
500 tr.addfinalize('write-revbranchcache', self.write)
500
501
501 def write(self, tr=None):
502 def write(self, tr=None):
502 """Save branch cache if it is dirty."""
503 """Save branch cache if it is dirty."""
503 repo = self._repo
504 repo = self._repo
504 wlock = None
505 wlock = None
505 step = ''
506 step = ''
506 try:
507 try:
507 if self._rbcnamescount < len(self._names):
508 if self._rbcnamescount < len(self._names):
508 step = ' names'
509 step = ' names'
509 wlock = repo.wlock(wait=False)
510 wlock = repo.wlock(wait=False)
510 if self._rbcnamescount != 0:
511 if self._rbcnamescount != 0:
511 f = repo.cachevfs.open(_rbcnames, 'ab')
512 f = repo.cachevfs.open(_rbcnames, 'ab')
512 if f.tell() == self._rbcsnameslen:
513 if f.tell() == self._rbcsnameslen:
513 f.write('\0')
514 f.write('\0')
514 else:
515 else:
515 f.close()
516 f.close()
516 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
517 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
517 self._rbcnamescount = 0
518 self._rbcnamescount = 0
518 self._rbcrevslen = 0
519 self._rbcrevslen = 0
519 if self._rbcnamescount == 0:
520 if self._rbcnamescount == 0:
520 # before rewriting names, make sure references are removed
521 # before rewriting names, make sure references are removed
521 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
522 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
522 f = repo.cachevfs.open(_rbcnames, 'wb')
523 f = repo.cachevfs.open(_rbcnames, 'wb')
523 f.write('\0'.join(encoding.fromlocal(b)
524 f.write('\0'.join(encoding.fromlocal(b)
524 for b in self._names[self._rbcnamescount:]))
525 for b in self._names[self._rbcnamescount:]))
525 self._rbcsnameslen = f.tell()
526 self._rbcsnameslen = f.tell()
526 f.close()
527 f.close()
527 self._rbcnamescount = len(self._names)
528 self._rbcnamescount = len(self._names)
528
529
529 start = self._rbcrevslen * _rbcrecsize
530 start = self._rbcrevslen * _rbcrecsize
530 if start != len(self._rbcrevs):
531 if start != len(self._rbcrevs):
531 step = ''
532 step = ''
532 if wlock is None:
533 if wlock is None:
533 wlock = repo.wlock(wait=False)
534 wlock = repo.wlock(wait=False)
534 revs = min(len(repo.changelog),
535 revs = min(len(repo.changelog),
535 len(self._rbcrevs) // _rbcrecsize)
536 len(self._rbcrevs) // _rbcrecsize)
536 f = repo.cachevfs.open(_rbcrevs, 'ab')
537 f = repo.cachevfs.open(_rbcrevs, 'ab')
537 if f.tell() != start:
538 if f.tell() != start:
538 repo.ui.debug("truncating cache/%s to %d\n"
539 repo.ui.debug("truncating cache/%s to %d\n"
539 % (_rbcrevs, start))
540 % (_rbcrevs, start))
540 f.seek(start)
541 f.seek(start)
541 if f.tell() != start:
542 if f.tell() != start:
542 start = 0
543 start = 0
543 f.seek(start)
544 f.seek(start)
544 f.truncate()
545 f.truncate()
545 end = revs * _rbcrecsize
546 end = revs * _rbcrecsize
546 f.write(self._rbcrevs[start:end])
547 f.write(self._rbcrevs[start:end])
547 f.close()
548 f.close()
548 self._rbcrevslen = revs
549 self._rbcrevslen = revs
549 except (IOError, OSError, error.Abort, error.LockError) as inst:
550 except (IOError, OSError, error.Abort, error.LockError) as inst:
550 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
551 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
551 % (step, stringutil.forcebytestr(inst)))
552 % (step, stringutil.forcebytestr(inst)))
552 finally:
553 finally:
553 if wlock is not None:
554 if wlock is not None:
554 wlock.release()
555 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now