##// END OF EJS Templates
branchmap: prevent reading the file twice through different iterators...
Pulkit Goyal -
r41974:8ad46ac6 default
parent child Browse files
Show More
@@ -1,600 +1,600 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """mapping of filtered views of repo with their branchcache"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129
129
130 class branchcache(dict):
130 class branchcache(dict):
131 """A dict like object that hold branches heads cache.
131 """A dict like object that hold branches heads cache.
132
132
133 This cache is used to avoid costly computations to determine all the
133 This cache is used to avoid costly computations to determine all the
134 branch heads of a repo.
134 branch heads of a repo.
135
135
136 The cache is serialized on disk in the following format:
136 The cache is serialized on disk in the following format:
137
137
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 <branch head hex node> <open/closed state> <branch name>
139 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
141 ...
141 ...
142
142
143 The first line is used to check if the cache is still valid. If the
143 The first line is used to check if the cache is still valid. If the
144 branch cache is for a filtered repo view, an optional third hash is
144 branch cache is for a filtered repo view, an optional third hash is
145 included that hashes the hashes of all filtered revisions.
145 included that hashes the hashes of all filtered revisions.
146
146
147 The open/closed state is represented by a single letter 'o' or 'c'.
147 The open/closed state is represented by a single letter 'o' or 'c'.
148 This field can be used to avoid changelog reads when determining if a
148 This field can be used to avoid changelog reads when determining if a
149 branch head closes a branch or not.
149 branch head closes a branch or not.
150 """
150 """
151
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
153 filteredhash=None, closednodes=None):
154 super(branchcache, self).__init__(entries)
154 super(branchcache, self).__init__(entries)
155 self.tipnode = tipnode
155 self.tipnode = tipnode
156 self.tiprev = tiprev
156 self.tiprev = tiprev
157 self.filteredhash = filteredhash
157 self.filteredhash = filteredhash
158 # closednodes is a set of nodes that close their branch. If the branch
158 # closednodes is a set of nodes that close their branch. If the branch
159 # cache has been updated, it may contain nodes that are no longer
159 # cache has been updated, it may contain nodes that are no longer
160 # heads.
160 # heads.
161 if closednodes is None:
161 if closednodes is None:
162 self._closednodes = set()
162 self._closednodes = set()
163 else:
163 else:
164 self._closednodes = closednodes
164 self._closednodes = closednodes
165
165
166 @classmethod
166 @classmethod
167 def fromfile(cls, repo):
167 def fromfile(cls, repo):
168 f = None
168 f = None
169 try:
169 try:
170 f = repo.cachevfs(cls._filename(repo))
170 f = repo.cachevfs(cls._filename(repo))
171 lineiter = iter(f)
171 lineiter = iter(f)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
173 last, lrev = cachekey[:2]
173 last, lrev = cachekey[:2]
174 last, lrev = bin(last), int(lrev)
174 last, lrev = bin(last), int(lrev)
175 filteredhash = None
175 filteredhash = None
176 if len(cachekey) > 2:
176 if len(cachekey) > 2:
177 filteredhash = bin(cachekey[2])
177 filteredhash = bin(cachekey[2])
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
179 if not bcache.validfor(repo):
179 if not bcache.validfor(repo):
180 # invalidate the cache
180 # invalidate the cache
181 raise ValueError(r'tip differs')
181 raise ValueError(r'tip differs')
182 bcache.load(repo, f)
182 bcache.load(repo, lineiter)
183 except (IOError, OSError):
183 except (IOError, OSError):
184 return None
184 return None
185
185
186 except Exception as inst:
186 except Exception as inst:
187 if repo.ui.debugflag:
187 if repo.ui.debugflag:
188 msg = 'invalid branchheads cache'
188 msg = 'invalid branchheads cache'
189 if repo.filtername is not None:
189 if repo.filtername is not None:
190 msg += ' (%s)' % repo.filtername
190 msg += ' (%s)' % repo.filtername
191 msg += ': %s\n'
191 msg += ': %s\n'
192 repo.ui.debug(msg % pycompat.bytestr(inst))
192 repo.ui.debug(msg % pycompat.bytestr(inst))
193 bcache = None
193 bcache = None
194
194
195 finally:
195 finally:
196 if f:
196 if f:
197 f.close()
197 f.close()
198
198
199 return bcache
199 return bcache
200
200
201 def load(self, repo, f):
201 def load(self, repo, lineiter):
202 """ fully loads the branchcache by reading from the file f """
202 """ fully loads the branchcache by reading from the file using the line
203 iterator passed"""
203 cl = repo.changelog
204 cl = repo.changelog
204 lineiter = iter(f)
205 for line in lineiter:
205 for line in lineiter:
206 line = line.rstrip('\n')
206 line = line.rstrip('\n')
207 if not line:
207 if not line:
208 continue
208 continue
209 node, state, label = line.split(" ", 2)
209 node, state, label = line.split(" ", 2)
210 if state not in 'oc':
210 if state not in 'oc':
211 raise ValueError(r'invalid branch state')
211 raise ValueError(r'invalid branch state')
212 label = encoding.tolocal(label.strip())
212 label = encoding.tolocal(label.strip())
213 node = bin(node)
213 node = bin(node)
214 if not cl.hasnode(node):
214 if not cl.hasnode(node):
215 raise ValueError(
215 raise ValueError(
216 r'node %s does not exist' % pycompat.sysstr(hex(node)))
216 r'node %s does not exist' % pycompat.sysstr(hex(node)))
217 self.setdefault(label, []).append(node)
217 self.setdefault(label, []).append(node)
218 if state == 'c':
218 if state == 'c':
219 self._closednodes.add(node)
219 self._closednodes.add(node)
220
220
221 @staticmethod
221 @staticmethod
222 def _filename(repo):
222 def _filename(repo):
223 """name of a branchcache file for a given repo or repoview"""
223 """name of a branchcache file for a given repo or repoview"""
224 filename = "branch2"
224 filename = "branch2"
225 if repo.filtername:
225 if repo.filtername:
226 filename = '%s-%s' % (filename, repo.filtername)
226 filename = '%s-%s' % (filename, repo.filtername)
227 return filename
227 return filename
228
228
229 def validfor(self, repo):
229 def validfor(self, repo):
230 """Is the cache content valid regarding a repo
230 """Is the cache content valid regarding a repo
231
231
232 - False when cached tipnode is unknown or if we detect a strip.
232 - False when cached tipnode is unknown or if we detect a strip.
233 - True when cache is up to date or a subset of current repo."""
233 - True when cache is up to date or a subset of current repo."""
234 try:
234 try:
235 return ((self.tipnode == repo.changelog.node(self.tiprev))
235 return ((self.tipnode == repo.changelog.node(self.tiprev))
236 and (self.filteredhash ==
236 and (self.filteredhash ==
237 scmutil.filteredhash(repo, self.tiprev)))
237 scmutil.filteredhash(repo, self.tiprev)))
238 except IndexError:
238 except IndexError:
239 return False
239 return False
240
240
241 def _branchtip(self, heads):
241 def _branchtip(self, heads):
242 '''Return tuple with last open head in heads and false,
242 '''Return tuple with last open head in heads and false,
243 otherwise return last closed head and true.'''
243 otherwise return last closed head and true.'''
244 tip = heads[-1]
244 tip = heads[-1]
245 closed = True
245 closed = True
246 for h in reversed(heads):
246 for h in reversed(heads):
247 if h not in self._closednodes:
247 if h not in self._closednodes:
248 tip = h
248 tip = h
249 closed = False
249 closed = False
250 break
250 break
251 return tip, closed
251 return tip, closed
252
252
253 def branchtip(self, branch):
253 def branchtip(self, branch):
254 '''Return the tipmost open head on branch head, otherwise return the
254 '''Return the tipmost open head on branch head, otherwise return the
255 tipmost closed head on branch.
255 tipmost closed head on branch.
256 Raise KeyError for unknown branch.'''
256 Raise KeyError for unknown branch.'''
257 return self._branchtip(self[branch])[0]
257 return self._branchtip(self[branch])[0]
258
258
259 def iteropen(self, nodes):
259 def iteropen(self, nodes):
260 return (n for n in nodes if n not in self._closednodes)
260 return (n for n in nodes if n not in self._closednodes)
261
261
262 def branchheads(self, branch, closed=False):
262 def branchheads(self, branch, closed=False):
263 heads = self[branch]
263 heads = self[branch]
264 if not closed:
264 if not closed:
265 heads = list(self.iteropen(heads))
265 heads = list(self.iteropen(heads))
266 return heads
266 return heads
267
267
268 def iterbranches(self):
268 def iterbranches(self):
269 for bn, heads in self.iteritems():
269 for bn, heads in self.iteritems():
270 yield (bn, heads) + self._branchtip(heads)
270 yield (bn, heads) + self._branchtip(heads)
271
271
272 def copy(self):
272 def copy(self):
273 """return an deep copy of the branchcache object"""
273 """return an deep copy of the branchcache object"""
274 return type(self)(
274 return type(self)(
275 self, self.tipnode, self.tiprev, self.filteredhash,
275 self, self.tipnode, self.tiprev, self.filteredhash,
276 self._closednodes)
276 self._closednodes)
277
277
278 def write(self, repo):
278 def write(self, repo):
279 try:
279 try:
280 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
280 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
281 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
281 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
282 if self.filteredhash is not None:
282 if self.filteredhash is not None:
283 cachekey.append(hex(self.filteredhash))
283 cachekey.append(hex(self.filteredhash))
284 f.write(" ".join(cachekey) + '\n')
284 f.write(" ".join(cachekey) + '\n')
285 nodecount = 0
285 nodecount = 0
286 for label, nodes in sorted(self.iteritems()):
286 for label, nodes in sorted(self.iteritems()):
287 label = encoding.fromlocal(label)
287 label = encoding.fromlocal(label)
288 for node in nodes:
288 for node in nodes:
289 nodecount += 1
289 nodecount += 1
290 if node in self._closednodes:
290 if node in self._closednodes:
291 state = 'c'
291 state = 'c'
292 else:
292 else:
293 state = 'o'
293 state = 'o'
294 f.write("%s %s %s\n" % (hex(node), state, label))
294 f.write("%s %s %s\n" % (hex(node), state, label))
295 f.close()
295 f.close()
296 repo.ui.log('branchcache',
296 repo.ui.log('branchcache',
297 'wrote %s branch cache with %d labels and %d nodes\n',
297 'wrote %s branch cache with %d labels and %d nodes\n',
298 repo.filtername, len(self), nodecount)
298 repo.filtername, len(self), nodecount)
299 except (IOError, OSError, error.Abort) as inst:
299 except (IOError, OSError, error.Abort) as inst:
300 # Abort may be raised by read only opener, so log and continue
300 # Abort may be raised by read only opener, so log and continue
301 repo.ui.debug("couldn't write branch cache: %s\n" %
301 repo.ui.debug("couldn't write branch cache: %s\n" %
302 stringutil.forcebytestr(inst))
302 stringutil.forcebytestr(inst))
303
303
304 def update(self, repo, revgen):
304 def update(self, repo, revgen):
305 """Given a branchhead cache, self, that may have extra nodes or be
305 """Given a branchhead cache, self, that may have extra nodes or be
306 missing heads, and a generator of nodes that are strictly a superset of
306 missing heads, and a generator of nodes that are strictly a superset of
307 heads missing, this function updates self to be correct.
307 heads missing, this function updates self to be correct.
308 """
308 """
309 starttime = util.timer()
309 starttime = util.timer()
310 cl = repo.changelog
310 cl = repo.changelog
311 # collect new branch entries
311 # collect new branch entries
312 newbranches = {}
312 newbranches = {}
313 getbranchinfo = repo.revbranchcache().branchinfo
313 getbranchinfo = repo.revbranchcache().branchinfo
314 for r in revgen:
314 for r in revgen:
315 branch, closesbranch = getbranchinfo(r)
315 branch, closesbranch = getbranchinfo(r)
316 newbranches.setdefault(branch, []).append(r)
316 newbranches.setdefault(branch, []).append(r)
317 if closesbranch:
317 if closesbranch:
318 self._closednodes.add(cl.node(r))
318 self._closednodes.add(cl.node(r))
319
319
320 # fetch current topological heads to speed up filtering
320 # fetch current topological heads to speed up filtering
321 topoheads = set(cl.headrevs())
321 topoheads = set(cl.headrevs())
322
322
323 # if older branchheads are reachable from new ones, they aren't
323 # if older branchheads are reachable from new ones, they aren't
324 # really branchheads. Note checking parents is insufficient:
324 # really branchheads. Note checking parents is insufficient:
325 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
325 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
326 for branch, newheadrevs in newbranches.iteritems():
326 for branch, newheadrevs in newbranches.iteritems():
327 bheads = self.setdefault(branch, [])
327 bheads = self.setdefault(branch, [])
328 bheadset = set(cl.rev(node) for node in bheads)
328 bheadset = set(cl.rev(node) for node in bheads)
329
329
330 # This have been tested True on all internal usage of this function.
330 # This have been tested True on all internal usage of this function.
331 # run it again in case of doubt
331 # run it again in case of doubt
332 # assert not (set(bheadrevs) & set(newheadrevs))
332 # assert not (set(bheadrevs) & set(newheadrevs))
333 bheadset.update(newheadrevs)
333 bheadset.update(newheadrevs)
334
334
335 # This prunes out two kinds of heads - heads that are superseded by
335 # This prunes out two kinds of heads - heads that are superseded by
336 # a head in newheadrevs, and newheadrevs that are not heads because
336 # a head in newheadrevs, and newheadrevs that are not heads because
337 # an existing head is their descendant.
337 # an existing head is their descendant.
338 uncertain = bheadset - topoheads
338 uncertain = bheadset - topoheads
339 if uncertain:
339 if uncertain:
340 floorrev = min(uncertain)
340 floorrev = min(uncertain)
341 ancestors = set(cl.ancestors(newheadrevs, floorrev))
341 ancestors = set(cl.ancestors(newheadrevs, floorrev))
342 bheadset -= ancestors
342 bheadset -= ancestors
343 bheadrevs = sorted(bheadset)
343 bheadrevs = sorted(bheadset)
344 self[branch] = [cl.node(rev) for rev in bheadrevs]
344 self[branch] = [cl.node(rev) for rev in bheadrevs]
345 tiprev = bheadrevs[-1]
345 tiprev = bheadrevs[-1]
346 if tiprev > self.tiprev:
346 if tiprev > self.tiprev:
347 self.tipnode = cl.node(tiprev)
347 self.tipnode = cl.node(tiprev)
348 self.tiprev = tiprev
348 self.tiprev = tiprev
349
349
350 if not self.validfor(repo):
350 if not self.validfor(repo):
351 # cache key are not valid anymore
351 # cache key are not valid anymore
352 self.tipnode = nullid
352 self.tipnode = nullid
353 self.tiprev = nullrev
353 self.tiprev = nullrev
354 for heads in self.values():
354 for heads in self.values():
355 tiprev = max(cl.rev(node) for node in heads)
355 tiprev = max(cl.rev(node) for node in heads)
356 if tiprev > self.tiprev:
356 if tiprev > self.tiprev:
357 self.tipnode = cl.node(tiprev)
357 self.tipnode = cl.node(tiprev)
358 self.tiprev = tiprev
358 self.tiprev = tiprev
359 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
359 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
360
360
361 duration = util.timer() - starttime
361 duration = util.timer() - starttime
362 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
362 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
363 repo.filtername, duration)
363 repo.filtername, duration)
364
364
365 self.write(repo)
365 self.write(repo)
366
366
367
367
368 class remotebranchcache(branchcache):
368 class remotebranchcache(branchcache):
369 """Branchmap info for a remote connection, should not write locally"""
369 """Branchmap info for a remote connection, should not write locally"""
370 def write(self, repo):
370 def write(self, repo):
371 pass
371 pass
372
372
373
373
374 # Revision branch info cache
374 # Revision branch info cache
375
375
376 _rbcversion = '-v1'
376 _rbcversion = '-v1'
377 _rbcnames = 'rbc-names' + _rbcversion
377 _rbcnames = 'rbc-names' + _rbcversion
378 _rbcrevs = 'rbc-revs' + _rbcversion
378 _rbcrevs = 'rbc-revs' + _rbcversion
379 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
379 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
380 _rbcrecfmt = '>4sI'
380 _rbcrecfmt = '>4sI'
381 _rbcrecsize = calcsize(_rbcrecfmt)
381 _rbcrecsize = calcsize(_rbcrecfmt)
382 _rbcnodelen = 4
382 _rbcnodelen = 4
383 _rbcbranchidxmask = 0x7fffffff
383 _rbcbranchidxmask = 0x7fffffff
384 _rbccloseflag = 0x80000000
384 _rbccloseflag = 0x80000000
385
385
386 class revbranchcache(object):
386 class revbranchcache(object):
387 """Persistent cache, mapping from revision number to branch name and close.
387 """Persistent cache, mapping from revision number to branch name and close.
388 This is a low level cache, independent of filtering.
388 This is a low level cache, independent of filtering.
389
389
390 Branch names are stored in rbc-names in internal encoding separated by 0.
390 Branch names are stored in rbc-names in internal encoding separated by 0.
391 rbc-names is append-only, and each branch name is only stored once and will
391 rbc-names is append-only, and each branch name is only stored once and will
392 thus have a unique index.
392 thus have a unique index.
393
393
394 The branch info for each revision is stored in rbc-revs as constant size
394 The branch info for each revision is stored in rbc-revs as constant size
395 records. The whole file is read into memory, but it is only 'parsed' on
395 records. The whole file is read into memory, but it is only 'parsed' on
396 demand. The file is usually append-only but will be truncated if repo
396 demand. The file is usually append-only but will be truncated if repo
397 modification is detected.
397 modification is detected.
398 The record for each revision contains the first 4 bytes of the
398 The record for each revision contains the first 4 bytes of the
399 corresponding node hash, and the record is only used if it still matches.
399 corresponding node hash, and the record is only used if it still matches.
400 Even a completely trashed rbc-revs fill thus still give the right result
400 Even a completely trashed rbc-revs fill thus still give the right result
401 while converging towards full recovery ... assuming no incorrectly matching
401 while converging towards full recovery ... assuming no incorrectly matching
402 node hashes.
402 node hashes.
403 The record also contains 4 bytes where 31 bits contains the index of the
403 The record also contains 4 bytes where 31 bits contains the index of the
404 branch and the last bit indicate that it is a branch close commit.
404 branch and the last bit indicate that it is a branch close commit.
405 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
405 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
406 and will grow with it but be 1/8th of its size.
406 and will grow with it but be 1/8th of its size.
407 """
407 """
408
408
409 def __init__(self, repo, readonly=True):
409 def __init__(self, repo, readonly=True):
410 assert repo.filtername is None
410 assert repo.filtername is None
411 self._repo = repo
411 self._repo = repo
412 self._names = [] # branch names in local encoding with static index
412 self._names = [] # branch names in local encoding with static index
413 self._rbcrevs = bytearray()
413 self._rbcrevs = bytearray()
414 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
414 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
415 try:
415 try:
416 bndata = repo.cachevfs.read(_rbcnames)
416 bndata = repo.cachevfs.read(_rbcnames)
417 self._rbcsnameslen = len(bndata) # for verification before writing
417 self._rbcsnameslen = len(bndata) # for verification before writing
418 if bndata:
418 if bndata:
419 self._names = [encoding.tolocal(bn)
419 self._names = [encoding.tolocal(bn)
420 for bn in bndata.split('\0')]
420 for bn in bndata.split('\0')]
421 except (IOError, OSError):
421 except (IOError, OSError):
422 if readonly:
422 if readonly:
423 # don't try to use cache - fall back to the slow path
423 # don't try to use cache - fall back to the slow path
424 self.branchinfo = self._branchinfo
424 self.branchinfo = self._branchinfo
425
425
426 if self._names:
426 if self._names:
427 try:
427 try:
428 data = repo.cachevfs.read(_rbcrevs)
428 data = repo.cachevfs.read(_rbcrevs)
429 self._rbcrevs[:] = data
429 self._rbcrevs[:] = data
430 except (IOError, OSError) as inst:
430 except (IOError, OSError) as inst:
431 repo.ui.debug("couldn't read revision branch cache: %s\n" %
431 repo.ui.debug("couldn't read revision branch cache: %s\n" %
432 stringutil.forcebytestr(inst))
432 stringutil.forcebytestr(inst))
433 # remember number of good records on disk
433 # remember number of good records on disk
434 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
434 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
435 len(repo.changelog))
435 len(repo.changelog))
436 if self._rbcrevslen == 0:
436 if self._rbcrevslen == 0:
437 self._names = []
437 self._names = []
438 self._rbcnamescount = len(self._names) # number of names read at
438 self._rbcnamescount = len(self._names) # number of names read at
439 # _rbcsnameslen
439 # _rbcsnameslen
440
440
441 def _clear(self):
441 def _clear(self):
442 self._rbcsnameslen = 0
442 self._rbcsnameslen = 0
443 del self._names[:]
443 del self._names[:]
444 self._rbcnamescount = 0
444 self._rbcnamescount = 0
445 self._rbcrevslen = len(self._repo.changelog)
445 self._rbcrevslen = len(self._repo.changelog)
446 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
446 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
447 util.clearcachedproperty(self, '_namesreverse')
447 util.clearcachedproperty(self, '_namesreverse')
448
448
449 @util.propertycache
449 @util.propertycache
450 def _namesreverse(self):
450 def _namesreverse(self):
451 return dict((b, r) for r, b in enumerate(self._names))
451 return dict((b, r) for r, b in enumerate(self._names))
452
452
453 def branchinfo(self, rev):
453 def branchinfo(self, rev):
454 """Return branch name and close flag for rev, using and updating
454 """Return branch name and close flag for rev, using and updating
455 persistent cache."""
455 persistent cache."""
456 changelog = self._repo.changelog
456 changelog = self._repo.changelog
457 rbcrevidx = rev * _rbcrecsize
457 rbcrevidx = rev * _rbcrecsize
458
458
459 # avoid negative index, changelog.read(nullrev) is fast without cache
459 # avoid negative index, changelog.read(nullrev) is fast without cache
460 if rev == nullrev:
460 if rev == nullrev:
461 return changelog.branchinfo(rev)
461 return changelog.branchinfo(rev)
462
462
463 # if requested rev isn't allocated, grow and cache the rev info
463 # if requested rev isn't allocated, grow and cache the rev info
464 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
464 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
465 return self._branchinfo(rev)
465 return self._branchinfo(rev)
466
466
467 # fast path: extract data from cache, use it if node is matching
467 # fast path: extract data from cache, use it if node is matching
468 reponode = changelog.node(rev)[:_rbcnodelen]
468 reponode = changelog.node(rev)[:_rbcnodelen]
469 cachenode, branchidx = unpack_from(
469 cachenode, branchidx = unpack_from(
470 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
470 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
471 close = bool(branchidx & _rbccloseflag)
471 close = bool(branchidx & _rbccloseflag)
472 if close:
472 if close:
473 branchidx &= _rbcbranchidxmask
473 branchidx &= _rbcbranchidxmask
474 if cachenode == '\0\0\0\0':
474 if cachenode == '\0\0\0\0':
475 pass
475 pass
476 elif cachenode == reponode:
476 elif cachenode == reponode:
477 try:
477 try:
478 return self._names[branchidx], close
478 return self._names[branchidx], close
479 except IndexError:
479 except IndexError:
480 # recover from invalid reference to unknown branch
480 # recover from invalid reference to unknown branch
481 self._repo.ui.debug("referenced branch names not found"
481 self._repo.ui.debug("referenced branch names not found"
482 " - rebuilding revision branch cache from scratch\n")
482 " - rebuilding revision branch cache from scratch\n")
483 self._clear()
483 self._clear()
484 else:
484 else:
485 # rev/node map has changed, invalidate the cache from here up
485 # rev/node map has changed, invalidate the cache from here up
486 self._repo.ui.debug("history modification detected - truncating "
486 self._repo.ui.debug("history modification detected - truncating "
487 "revision branch cache to revision %d\n" % rev)
487 "revision branch cache to revision %d\n" % rev)
488 truncate = rbcrevidx + _rbcrecsize
488 truncate = rbcrevidx + _rbcrecsize
489 del self._rbcrevs[truncate:]
489 del self._rbcrevs[truncate:]
490 self._rbcrevslen = min(self._rbcrevslen, truncate)
490 self._rbcrevslen = min(self._rbcrevslen, truncate)
491
491
492 # fall back to slow path and make sure it will be written to disk
492 # fall back to slow path and make sure it will be written to disk
493 return self._branchinfo(rev)
493 return self._branchinfo(rev)
494
494
495 def _branchinfo(self, rev):
495 def _branchinfo(self, rev):
496 """Retrieve branch info from changelog and update _rbcrevs"""
496 """Retrieve branch info from changelog and update _rbcrevs"""
497 changelog = self._repo.changelog
497 changelog = self._repo.changelog
498 b, close = changelog.branchinfo(rev)
498 b, close = changelog.branchinfo(rev)
499 if b in self._namesreverse:
499 if b in self._namesreverse:
500 branchidx = self._namesreverse[b]
500 branchidx = self._namesreverse[b]
501 else:
501 else:
502 branchidx = len(self._names)
502 branchidx = len(self._names)
503 self._names.append(b)
503 self._names.append(b)
504 self._namesreverse[b] = branchidx
504 self._namesreverse[b] = branchidx
505 reponode = changelog.node(rev)
505 reponode = changelog.node(rev)
506 if close:
506 if close:
507 branchidx |= _rbccloseflag
507 branchidx |= _rbccloseflag
508 self._setcachedata(rev, reponode, branchidx)
508 self._setcachedata(rev, reponode, branchidx)
509 return b, close
509 return b, close
510
510
511 def setdata(self, branch, rev, node, close):
511 def setdata(self, branch, rev, node, close):
512 """add new data information to the cache"""
512 """add new data information to the cache"""
513 if branch in self._namesreverse:
513 if branch in self._namesreverse:
514 branchidx = self._namesreverse[branch]
514 branchidx = self._namesreverse[branch]
515 else:
515 else:
516 branchidx = len(self._names)
516 branchidx = len(self._names)
517 self._names.append(branch)
517 self._names.append(branch)
518 self._namesreverse[branch] = branchidx
518 self._namesreverse[branch] = branchidx
519 if close:
519 if close:
520 branchidx |= _rbccloseflag
520 branchidx |= _rbccloseflag
521 self._setcachedata(rev, node, branchidx)
521 self._setcachedata(rev, node, branchidx)
522 # If no cache data were readable (non exists, bad permission, etc)
522 # If no cache data were readable (non exists, bad permission, etc)
523 # the cache was bypassing itself by setting:
523 # the cache was bypassing itself by setting:
524 #
524 #
525 # self.branchinfo = self._branchinfo
525 # self.branchinfo = self._branchinfo
526 #
526 #
527 # Since we now have data in the cache, we need to drop this bypassing.
527 # Since we now have data in the cache, we need to drop this bypassing.
528 if r'branchinfo' in vars(self):
528 if r'branchinfo' in vars(self):
529 del self.branchinfo
529 del self.branchinfo
530
530
531 def _setcachedata(self, rev, node, branchidx):
531 def _setcachedata(self, rev, node, branchidx):
532 """Writes the node's branch data to the in-memory cache data."""
532 """Writes the node's branch data to the in-memory cache data."""
533 if rev == nullrev:
533 if rev == nullrev:
534 return
534 return
535 rbcrevidx = rev * _rbcrecsize
535 rbcrevidx = rev * _rbcrecsize
536 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
536 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
537 self._rbcrevs.extend('\0' *
537 self._rbcrevs.extend('\0' *
538 (len(self._repo.changelog) * _rbcrecsize -
538 (len(self._repo.changelog) * _rbcrecsize -
539 len(self._rbcrevs)))
539 len(self._rbcrevs)))
540 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
540 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
541 self._rbcrevslen = min(self._rbcrevslen, rev)
541 self._rbcrevslen = min(self._rbcrevslen, rev)
542
542
543 tr = self._repo.currenttransaction()
543 tr = self._repo.currenttransaction()
544 if tr:
544 if tr:
545 tr.addfinalize('write-revbranchcache', self.write)
545 tr.addfinalize('write-revbranchcache', self.write)
546
546
547 def write(self, tr=None):
547 def write(self, tr=None):
548 """Save branch cache if it is dirty."""
548 """Save branch cache if it is dirty."""
549 repo = self._repo
549 repo = self._repo
550 wlock = None
550 wlock = None
551 step = ''
551 step = ''
552 try:
552 try:
553 if self._rbcnamescount < len(self._names):
553 if self._rbcnamescount < len(self._names):
554 step = ' names'
554 step = ' names'
555 wlock = repo.wlock(wait=False)
555 wlock = repo.wlock(wait=False)
556 if self._rbcnamescount != 0:
556 if self._rbcnamescount != 0:
557 f = repo.cachevfs.open(_rbcnames, 'ab')
557 f = repo.cachevfs.open(_rbcnames, 'ab')
558 if f.tell() == self._rbcsnameslen:
558 if f.tell() == self._rbcsnameslen:
559 f.write('\0')
559 f.write('\0')
560 else:
560 else:
561 f.close()
561 f.close()
562 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
562 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
563 self._rbcnamescount = 0
563 self._rbcnamescount = 0
564 self._rbcrevslen = 0
564 self._rbcrevslen = 0
565 if self._rbcnamescount == 0:
565 if self._rbcnamescount == 0:
566 # before rewriting names, make sure references are removed
566 # before rewriting names, make sure references are removed
567 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
567 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
568 f = repo.cachevfs.open(_rbcnames, 'wb')
568 f = repo.cachevfs.open(_rbcnames, 'wb')
569 f.write('\0'.join(encoding.fromlocal(b)
569 f.write('\0'.join(encoding.fromlocal(b)
570 for b in self._names[self._rbcnamescount:]))
570 for b in self._names[self._rbcnamescount:]))
571 self._rbcsnameslen = f.tell()
571 self._rbcsnameslen = f.tell()
572 f.close()
572 f.close()
573 self._rbcnamescount = len(self._names)
573 self._rbcnamescount = len(self._names)
574
574
575 start = self._rbcrevslen * _rbcrecsize
575 start = self._rbcrevslen * _rbcrecsize
576 if start != len(self._rbcrevs):
576 if start != len(self._rbcrevs):
577 step = ''
577 step = ''
578 if wlock is None:
578 if wlock is None:
579 wlock = repo.wlock(wait=False)
579 wlock = repo.wlock(wait=False)
580 revs = min(len(repo.changelog),
580 revs = min(len(repo.changelog),
581 len(self._rbcrevs) // _rbcrecsize)
581 len(self._rbcrevs) // _rbcrecsize)
582 f = repo.cachevfs.open(_rbcrevs, 'ab')
582 f = repo.cachevfs.open(_rbcrevs, 'ab')
583 if f.tell() != start:
583 if f.tell() != start:
584 repo.ui.debug("truncating cache/%s to %d\n"
584 repo.ui.debug("truncating cache/%s to %d\n"
585 % (_rbcrevs, start))
585 % (_rbcrevs, start))
586 f.seek(start)
586 f.seek(start)
587 if f.tell() != start:
587 if f.tell() != start:
588 start = 0
588 start = 0
589 f.seek(start)
589 f.seek(start)
590 f.truncate()
590 f.truncate()
591 end = revs * _rbcrecsize
591 end = revs * _rbcrecsize
592 f.write(self._rbcrevs[start:end])
592 f.write(self._rbcrevs[start:end])
593 f.close()
593 f.close()
594 self._rbcrevslen = revs
594 self._rbcrevslen = revs
595 except (IOError, OSError, error.Abort, error.LockError) as inst:
595 except (IOError, OSError, error.Abort, error.LockError) as inst:
596 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
596 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
597 % (step, stringutil.forcebytestr(inst)))
597 % (step, stringutil.forcebytestr(inst)))
598 finally:
598 finally:
599 if wlock is not None:
599 if wlock is not None:
600 wlock.release()
600 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now