##// END OF EJS Templates
branchcache: have a hasnode function to validate nodes...
Pulkit Goyal -
r42174:b5511845 default
parent child Browse files
Show More
@@ -1,626 +1,634 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served': 'immutable',
43 'served': 'immutable',
44 'immutable': 'base'}
44 'immutable': 'base'}
45
45
46
46
47 class BranchMapCache(object):
47 class BranchMapCache(object):
48 """mapping of filtered views of repo with their branchcache"""
48 """mapping of filtered views of repo with their branchcache"""
49 def __init__(self):
49 def __init__(self):
50 self._per_filter = {}
50 self._per_filter = {}
51
51
52 def __getitem__(self, repo):
52 def __getitem__(self, repo):
53 self.updatecache(repo)
53 self.updatecache(repo)
54 return self._per_filter[repo.filtername]
54 return self._per_filter[repo.filtername]
55
55
56 def updatecache(self, repo):
56 def updatecache(self, repo):
57 """Update the cache for the given filtered view on a repository"""
57 """Update the cache for the given filtered view on a repository"""
58 # This can trigger updates for the caches for subsets of the filtered
58 # This can trigger updates for the caches for subsets of the filtered
59 # view, e.g. when there is no cache for this filtered view or the cache
59 # view, e.g. when there is no cache for this filtered view or the cache
60 # is stale.
60 # is stale.
61
61
62 cl = repo.changelog
62 cl = repo.changelog
63 filtername = repo.filtername
63 filtername = repo.filtername
64 bcache = self._per_filter.get(filtername)
64 bcache = self._per_filter.get(filtername)
65 if bcache is None or not bcache.validfor(repo):
65 if bcache is None or not bcache.validfor(repo):
66 # cache object missing or cache object stale? Read from disk
66 # cache object missing or cache object stale? Read from disk
67 bcache = branchcache.fromfile(repo)
67 bcache = branchcache.fromfile(repo)
68
68
69 revs = []
69 revs = []
70 if bcache is None:
70 if bcache is None:
71 # no (fresh) cache available anymore, perhaps we can re-use
71 # no (fresh) cache available anymore, perhaps we can re-use
72 # the cache for a subset, then extend that to add info on missing
72 # the cache for a subset, then extend that to add info on missing
73 # revisions.
73 # revisions.
74 subsetname = subsettable.get(filtername)
74 subsetname = subsettable.get(filtername)
75 if subsetname is not None:
75 if subsetname is not None:
76 subset = repo.filtered(subsetname)
76 subset = repo.filtered(subsetname)
77 bcache = self[subset].copy()
77 bcache = self[subset].copy()
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 else:
80 else:
81 # nothing to fall back on, start empty.
81 # nothing to fall back on, start empty.
82 bcache = branchcache()
82 bcache = branchcache()
83
83
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 if revs:
85 if revs:
86 bcache.update(repo, revs)
86 bcache.update(repo, revs)
87
87
88 assert bcache.validfor(repo), filtername
88 assert bcache.validfor(repo), filtername
89 self._per_filter[repo.filtername] = bcache
89 self._per_filter[repo.filtername] = bcache
90
90
91 def replace(self, repo, remotebranchmap):
91 def replace(self, repo, remotebranchmap):
92 """Replace the branchmap cache for a repo with a branch mapping.
92 """Replace the branchmap cache for a repo with a branch mapping.
93
93
94 This is likely only called during clone with a branch map from a
94 This is likely only called during clone with a branch map from a
95 remote.
95 remote.
96
96
97 """
97 """
98 cl = repo.changelog
98 cl = repo.changelog
99 clrev = cl.rev
99 clrev = cl.rev
100 clbranchinfo = cl.branchinfo
100 clbranchinfo = cl.branchinfo
101 rbheads = []
101 rbheads = []
102 closed = []
102 closed = []
103 for bheads in remotebranchmap.itervalues():
103 for bheads in remotebranchmap.itervalues():
104 rbheads += bheads
104 rbheads += bheads
105 for h in bheads:
105 for h in bheads:
106 r = clrev(h)
106 r = clrev(h)
107 b, c = clbranchinfo(r)
107 b, c = clbranchinfo(r)
108 if c:
108 if c:
109 closed.append(h)
109 closed.append(h)
110
110
111 if rbheads:
111 if rbheads:
112 rtiprev = max((int(clrev(node)) for node in rbheads))
112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 cache = branchcache(
113 cache = branchcache(
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 closednodes=closed)
115 closednodes=closed)
116
116
117 # Try to stick it as low as possible
117 # Try to stick it as low as possible
118 # filter above served are unlikely to be fetch from a clone
118 # filter above served are unlikely to be fetch from a clone
119 for candidate in ('base', 'immutable', 'served'):
119 for candidate in ('base', 'immutable', 'served'):
120 rview = repo.filtered(candidate)
120 rview = repo.filtered(candidate)
121 if cache.validfor(rview):
121 if cache.validfor(rview):
122 self._per_filter[candidate] = cache
122 self._per_filter[candidate] = cache
123 cache.write(rview)
123 cache.write(rview)
124 return
124 return
125
125
126 def clear(self):
126 def clear(self):
127 self._per_filter.clear()
127 self._per_filter.clear()
128
128
129
129
130 class branchcache(object):
130 class branchcache(object):
131 """A dict like object that hold branches heads cache.
131 """A dict like object that hold branches heads cache.
132
132
133 This cache is used to avoid costly computations to determine all the
133 This cache is used to avoid costly computations to determine all the
134 branch heads of a repo.
134 branch heads of a repo.
135
135
136 The cache is serialized on disk in the following format:
136 The cache is serialized on disk in the following format:
137
137
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 <branch head hex node> <open/closed state> <branch name>
139 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
140 <branch head hex node> <open/closed state> <branch name>
141 ...
141 ...
142
142
143 The first line is used to check if the cache is still valid. If the
143 The first line is used to check if the cache is still valid. If the
144 branch cache is for a filtered repo view, an optional third hash is
144 branch cache is for a filtered repo view, an optional third hash is
145 included that hashes the hashes of all filtered revisions.
145 included that hashes the hashes of all filtered revisions.
146
146
147 The open/closed state is represented by a single letter 'o' or 'c'.
147 The open/closed state is represented by a single letter 'o' or 'c'.
148 This field can be used to avoid changelog reads when determining if a
148 This field can be used to avoid changelog reads when determining if a
149 branch head closes a branch or not.
149 branch head closes a branch or not.
150 """
150 """
151
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
153 filteredhash=None, closednodes=None, hasnode=None):
154 """ hasnode is a function which can be used to verify whether changelog
155 has a given node or not. If it's not provided, we assume that every node
156 we have exists in changelog """
154 self.tipnode = tipnode
157 self.tipnode = tipnode
155 self.tiprev = tiprev
158 self.tiprev = tiprev
156 self.filteredhash = filteredhash
159 self.filteredhash = filteredhash
157 # closednodes is a set of nodes that close their branch. If the branch
160 # closednodes is a set of nodes that close their branch. If the branch
158 # cache has been updated, it may contain nodes that are no longer
161 # cache has been updated, it may contain nodes that are no longer
159 # heads.
162 # heads.
160 if closednodes is None:
163 if closednodes is None:
161 self._closednodes = set()
164 self._closednodes = set()
162 else:
165 else:
163 self._closednodes = closednodes
166 self._closednodes = closednodes
164 self._entries = dict(entries)
167 self._entries = dict(entries)
165 # whether closed nodes are verified or not
168 # whether closed nodes are verified or not
166 self._closedverified = False
169 self._closedverified = False
167 # branches for which nodes are verified
170 # branches for which nodes are verified
168 self._verifiedbranches = set()
171 self._verifiedbranches = set()
172 self._hasnode = hasnode
173 if self._hasnode is None:
174 self._hasnode = lambda x: True
169
175
170 def __iter__(self):
176 def __iter__(self):
171 return iter(self._entries)
177 return iter(self._entries)
172
178
173 def __setitem__(self, key, value):
179 def __setitem__(self, key, value):
174 self._entries[key] = value
180 self._entries[key] = value
175
181
176 def __getitem__(self, key):
182 def __getitem__(self, key):
177 return self._entries[key]
183 return self._entries[key]
178
184
179 def iteritems(self):
185 def iteritems(self):
180 return self._entries.iteritems()
186 return self._entries.iteritems()
181
187
182 def hasbranch(self, label):
188 def hasbranch(self, label):
183 """ checks whether a branch of this name exists or not """
189 """ checks whether a branch of this name exists or not """
184 return label in self._entries
190 return label in self._entries
185
191
186 @classmethod
192 @classmethod
187 def fromfile(cls, repo):
193 def fromfile(cls, repo):
188 f = None
194 f = None
189 try:
195 try:
190 f = repo.cachevfs(cls._filename(repo))
196 f = repo.cachevfs(cls._filename(repo))
191 lineiter = iter(f)
197 lineiter = iter(f)
192 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
198 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
193 last, lrev = cachekey[:2]
199 last, lrev = cachekey[:2]
194 last, lrev = bin(last), int(lrev)
200 last, lrev = bin(last), int(lrev)
195 filteredhash = None
201 filteredhash = None
202 hasnode = repo.changelog.hasnode
196 if len(cachekey) > 2:
203 if len(cachekey) > 2:
197 filteredhash = bin(cachekey[2])
204 filteredhash = bin(cachekey[2])
198 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
205 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
206 hasnode=hasnode)
199 if not bcache.validfor(repo):
207 if not bcache.validfor(repo):
200 # invalidate the cache
208 # invalidate the cache
201 raise ValueError(r'tip differs')
209 raise ValueError(r'tip differs')
202 bcache.load(repo, lineiter)
210 bcache.load(repo, lineiter)
203 except (IOError, OSError):
211 except (IOError, OSError):
204 return None
212 return None
205
213
206 except Exception as inst:
214 except Exception as inst:
207 if repo.ui.debugflag:
215 if repo.ui.debugflag:
208 msg = 'invalid branchheads cache'
216 msg = 'invalid branchheads cache'
209 if repo.filtername is not None:
217 if repo.filtername is not None:
210 msg += ' (%s)' % repo.filtername
218 msg += ' (%s)' % repo.filtername
211 msg += ': %s\n'
219 msg += ': %s\n'
212 repo.ui.debug(msg % pycompat.bytestr(inst))
220 repo.ui.debug(msg % pycompat.bytestr(inst))
213 bcache = None
221 bcache = None
214
222
215 finally:
223 finally:
216 if f:
224 if f:
217 f.close()
225 f.close()
218
226
219 return bcache
227 return bcache
220
228
221 def load(self, repo, lineiter):
229 def load(self, repo, lineiter):
222 """ fully loads the branchcache by reading from the file using the line
230 """ fully loads the branchcache by reading from the file using the line
223 iterator passed"""
231 iterator passed"""
224 cl = repo.changelog
232 cl = repo.changelog
225 for line in lineiter:
233 for line in lineiter:
226 line = line.rstrip('\n')
234 line = line.rstrip('\n')
227 if not line:
235 if not line:
228 continue
236 continue
229 node, state, label = line.split(" ", 2)
237 node, state, label = line.split(" ", 2)
230 if state not in 'oc':
238 if state not in 'oc':
231 raise ValueError(r'invalid branch state')
239 raise ValueError(r'invalid branch state')
232 label = encoding.tolocal(label.strip())
240 label = encoding.tolocal(label.strip())
233 node = bin(node)
241 node = bin(node)
234 if not cl.hasnode(node):
242 if not cl.hasnode(node):
235 raise ValueError(
243 raise ValueError(
236 r'node %s does not exist' % pycompat.sysstr(hex(node)))
244 r'node %s does not exist' % pycompat.sysstr(hex(node)))
237 self._entries.setdefault(label, []).append(node)
245 self._entries.setdefault(label, []).append(node)
238 self._verifiedbranches.add(label)
246 self._verifiedbranches.add(label)
239 if state == 'c':
247 if state == 'c':
240 self._closednodes.add(node)
248 self._closednodes.add(node)
241 self._closedverified = True
249 self._closedverified = True
242
250
243 @staticmethod
251 @staticmethod
244 def _filename(repo):
252 def _filename(repo):
245 """name of a branchcache file for a given repo or repoview"""
253 """name of a branchcache file for a given repo or repoview"""
246 filename = "branch2"
254 filename = "branch2"
247 if repo.filtername:
255 if repo.filtername:
248 filename = '%s-%s' % (filename, repo.filtername)
256 filename = '%s-%s' % (filename, repo.filtername)
249 return filename
257 return filename
250
258
251 def validfor(self, repo):
259 def validfor(self, repo):
252 """Is the cache content valid regarding a repo
260 """Is the cache content valid regarding a repo
253
261
254 - False when cached tipnode is unknown or if we detect a strip.
262 - False when cached tipnode is unknown or if we detect a strip.
255 - True when cache is up to date or a subset of current repo."""
263 - True when cache is up to date or a subset of current repo."""
256 try:
264 try:
257 return ((self.tipnode == repo.changelog.node(self.tiprev))
265 return ((self.tipnode == repo.changelog.node(self.tiprev))
258 and (self.filteredhash ==
266 and (self.filteredhash ==
259 scmutil.filteredhash(repo, self.tiprev)))
267 scmutil.filteredhash(repo, self.tiprev)))
260 except IndexError:
268 except IndexError:
261 return False
269 return False
262
270
263 def _branchtip(self, heads):
271 def _branchtip(self, heads):
264 '''Return tuple with last open head in heads and false,
272 '''Return tuple with last open head in heads and false,
265 otherwise return last closed head and true.'''
273 otherwise return last closed head and true.'''
266 tip = heads[-1]
274 tip = heads[-1]
267 closed = True
275 closed = True
268 for h in reversed(heads):
276 for h in reversed(heads):
269 if h not in self._closednodes:
277 if h not in self._closednodes:
270 tip = h
278 tip = h
271 closed = False
279 closed = False
272 break
280 break
273 return tip, closed
281 return tip, closed
274
282
275 def branchtip(self, branch):
283 def branchtip(self, branch):
276 '''Return the tipmost open head on branch head, otherwise return the
284 '''Return the tipmost open head on branch head, otherwise return the
277 tipmost closed head on branch.
285 tipmost closed head on branch.
278 Raise KeyError for unknown branch.'''
286 Raise KeyError for unknown branch.'''
279 return self._branchtip(self[branch])[0]
287 return self._branchtip(self[branch])[0]
280
288
281 def iteropen(self, nodes):
289 def iteropen(self, nodes):
282 return (n for n in nodes if n not in self._closednodes)
290 return (n for n in nodes if n not in self._closednodes)
283
291
284 def branchheads(self, branch, closed=False):
292 def branchheads(self, branch, closed=False):
285 heads = self[branch]
293 heads = self[branch]
286 if not closed:
294 if not closed:
287 heads = list(self.iteropen(heads))
295 heads = list(self.iteropen(heads))
288 return heads
296 return heads
289
297
290 def iterbranches(self):
298 def iterbranches(self):
291 for bn, heads in self.iteritems():
299 for bn, heads in self.iteritems():
292 yield (bn, heads) + self._branchtip(heads)
300 yield (bn, heads) + self._branchtip(heads)
293
301
294 def iterheads(self):
302 def iterheads(self):
295 """ returns all the heads """
303 """ returns all the heads """
296 return self._entries.itervalues()
304 return self._entries.itervalues()
297
305
298 def copy(self):
306 def copy(self):
299 """return an deep copy of the branchcache object"""
307 """return an deep copy of the branchcache object"""
300 return branchcache(
308 return branchcache(
301 self._entries, self.tipnode, self.tiprev, self.filteredhash,
309 self._entries, self.tipnode, self.tiprev, self.filteredhash,
302 self._closednodes)
310 self._closednodes)
303
311
304 def write(self, repo):
312 def write(self, repo):
305 try:
313 try:
306 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
314 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
307 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
315 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
308 if self.filteredhash is not None:
316 if self.filteredhash is not None:
309 cachekey.append(hex(self.filteredhash))
317 cachekey.append(hex(self.filteredhash))
310 f.write(" ".join(cachekey) + '\n')
318 f.write(" ".join(cachekey) + '\n')
311 nodecount = 0
319 nodecount = 0
312 for label, nodes in sorted(self.iteritems()):
320 for label, nodes in sorted(self.iteritems()):
313 label = encoding.fromlocal(label)
321 label = encoding.fromlocal(label)
314 for node in nodes:
322 for node in nodes:
315 nodecount += 1
323 nodecount += 1
316 if node in self._closednodes:
324 if node in self._closednodes:
317 state = 'c'
325 state = 'c'
318 else:
326 else:
319 state = 'o'
327 state = 'o'
320 f.write("%s %s %s\n" % (hex(node), state, label))
328 f.write("%s %s %s\n" % (hex(node), state, label))
321 f.close()
329 f.close()
322 repo.ui.log('branchcache',
330 repo.ui.log('branchcache',
323 'wrote %s branch cache with %d labels and %d nodes\n',
331 'wrote %s branch cache with %d labels and %d nodes\n',
324 repo.filtername, len(self._entries), nodecount)
332 repo.filtername, len(self._entries), nodecount)
325 except (IOError, OSError, error.Abort) as inst:
333 except (IOError, OSError, error.Abort) as inst:
326 # Abort may be raised by read only opener, so log and continue
334 # Abort may be raised by read only opener, so log and continue
327 repo.ui.debug("couldn't write branch cache: %s\n" %
335 repo.ui.debug("couldn't write branch cache: %s\n" %
328 stringutil.forcebytestr(inst))
336 stringutil.forcebytestr(inst))
329
337
330 def update(self, repo, revgen):
338 def update(self, repo, revgen):
331 """Given a branchhead cache, self, that may have extra nodes or be
339 """Given a branchhead cache, self, that may have extra nodes or be
332 missing heads, and a generator of nodes that are strictly a superset of
340 missing heads, and a generator of nodes that are strictly a superset of
333 heads missing, this function updates self to be correct.
341 heads missing, this function updates self to be correct.
334 """
342 """
335 starttime = util.timer()
343 starttime = util.timer()
336 cl = repo.changelog
344 cl = repo.changelog
337 # collect new branch entries
345 # collect new branch entries
338 newbranches = {}
346 newbranches = {}
339 getbranchinfo = repo.revbranchcache().branchinfo
347 getbranchinfo = repo.revbranchcache().branchinfo
340 for r in revgen:
348 for r in revgen:
341 branch, closesbranch = getbranchinfo(r)
349 branch, closesbranch = getbranchinfo(r)
342 newbranches.setdefault(branch, []).append(r)
350 newbranches.setdefault(branch, []).append(r)
343 if closesbranch:
351 if closesbranch:
344 self._closednodes.add(cl.node(r))
352 self._closednodes.add(cl.node(r))
345
353
346 # fetch current topological heads to speed up filtering
354 # fetch current topological heads to speed up filtering
347 topoheads = set(cl.headrevs())
355 topoheads = set(cl.headrevs())
348
356
349 # if older branchheads are reachable from new ones, they aren't
357 # if older branchheads are reachable from new ones, they aren't
350 # really branchheads. Note checking parents is insufficient:
358 # really branchheads. Note checking parents is insufficient:
351 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
359 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
352 for branch, newheadrevs in newbranches.iteritems():
360 for branch, newheadrevs in newbranches.iteritems():
353 bheads = self._entries.setdefault(branch, [])
361 bheads = self._entries.setdefault(branch, [])
354 bheadset = set(cl.rev(node) for node in bheads)
362 bheadset = set(cl.rev(node) for node in bheads)
355
363
356 # This have been tested True on all internal usage of this function.
364 # This have been tested True on all internal usage of this function.
357 # run it again in case of doubt
365 # run it again in case of doubt
358 # assert not (set(bheadrevs) & set(newheadrevs))
366 # assert not (set(bheadrevs) & set(newheadrevs))
359 bheadset.update(newheadrevs)
367 bheadset.update(newheadrevs)
360
368
361 # This prunes out two kinds of heads - heads that are superseded by
369 # This prunes out two kinds of heads - heads that are superseded by
362 # a head in newheadrevs, and newheadrevs that are not heads because
370 # a head in newheadrevs, and newheadrevs that are not heads because
363 # an existing head is their descendant.
371 # an existing head is their descendant.
364 uncertain = bheadset - topoheads
372 uncertain = bheadset - topoheads
365 if uncertain:
373 if uncertain:
366 floorrev = min(uncertain)
374 floorrev = min(uncertain)
367 ancestors = set(cl.ancestors(newheadrevs, floorrev))
375 ancestors = set(cl.ancestors(newheadrevs, floorrev))
368 bheadset -= ancestors
376 bheadset -= ancestors
369 bheadrevs = sorted(bheadset)
377 bheadrevs = sorted(bheadset)
370 self[branch] = [cl.node(rev) for rev in bheadrevs]
378 self[branch] = [cl.node(rev) for rev in bheadrevs]
371 tiprev = bheadrevs[-1]
379 tiprev = bheadrevs[-1]
372 if tiprev > self.tiprev:
380 if tiprev > self.tiprev:
373 self.tipnode = cl.node(tiprev)
381 self.tipnode = cl.node(tiprev)
374 self.tiprev = tiprev
382 self.tiprev = tiprev
375
383
376 if not self.validfor(repo):
384 if not self.validfor(repo):
377 # cache key are not valid anymore
385 # cache key are not valid anymore
378 self.tipnode = nullid
386 self.tipnode = nullid
379 self.tiprev = nullrev
387 self.tiprev = nullrev
380 for heads in self.iterheads():
388 for heads in self.iterheads():
381 tiprev = max(cl.rev(node) for node in heads)
389 tiprev = max(cl.rev(node) for node in heads)
382 if tiprev > self.tiprev:
390 if tiprev > self.tiprev:
383 self.tipnode = cl.node(tiprev)
391 self.tipnode = cl.node(tiprev)
384 self.tiprev = tiprev
392 self.tiprev = tiprev
385 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
393 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
386
394
387 duration = util.timer() - starttime
395 duration = util.timer() - starttime
388 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
396 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
389 repo.filtername or b'None', duration)
397 repo.filtername or b'None', duration)
390
398
391 self.write(repo)
399 self.write(repo)
392
400
393
401
394 class remotebranchcache(branchcache):
402 class remotebranchcache(branchcache):
395 """Branchmap info for a remote connection, should not write locally"""
403 """Branchmap info for a remote connection, should not write locally"""
396 def write(self, repo):
404 def write(self, repo):
397 pass
405 pass
398
406
399
407
400 # Revision branch info cache
408 # Revision branch info cache
401
409
402 _rbcversion = '-v1'
410 _rbcversion = '-v1'
403 _rbcnames = 'rbc-names' + _rbcversion
411 _rbcnames = 'rbc-names' + _rbcversion
404 _rbcrevs = 'rbc-revs' + _rbcversion
412 _rbcrevs = 'rbc-revs' + _rbcversion
405 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
413 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
406 _rbcrecfmt = '>4sI'
414 _rbcrecfmt = '>4sI'
407 _rbcrecsize = calcsize(_rbcrecfmt)
415 _rbcrecsize = calcsize(_rbcrecfmt)
408 _rbcnodelen = 4
416 _rbcnodelen = 4
409 _rbcbranchidxmask = 0x7fffffff
417 _rbcbranchidxmask = 0x7fffffff
410 _rbccloseflag = 0x80000000
418 _rbccloseflag = 0x80000000
411
419
412 class revbranchcache(object):
420 class revbranchcache(object):
413 """Persistent cache, mapping from revision number to branch name and close.
421 """Persistent cache, mapping from revision number to branch name and close.
414 This is a low level cache, independent of filtering.
422 This is a low level cache, independent of filtering.
415
423
416 Branch names are stored in rbc-names in internal encoding separated by 0.
424 Branch names are stored in rbc-names in internal encoding separated by 0.
417 rbc-names is append-only, and each branch name is only stored once and will
425 rbc-names is append-only, and each branch name is only stored once and will
418 thus have a unique index.
426 thus have a unique index.
419
427
420 The branch info for each revision is stored in rbc-revs as constant size
428 The branch info for each revision is stored in rbc-revs as constant size
421 records. The whole file is read into memory, but it is only 'parsed' on
429 records. The whole file is read into memory, but it is only 'parsed' on
422 demand. The file is usually append-only but will be truncated if repo
430 demand. The file is usually append-only but will be truncated if repo
423 modification is detected.
431 modification is detected.
424 The record for each revision contains the first 4 bytes of the
432 The record for each revision contains the first 4 bytes of the
425 corresponding node hash, and the record is only used if it still matches.
433 corresponding node hash, and the record is only used if it still matches.
426 Even a completely trashed rbc-revs fill thus still give the right result
434 Even a completely trashed rbc-revs fill thus still give the right result
427 while converging towards full recovery ... assuming no incorrectly matching
435 while converging towards full recovery ... assuming no incorrectly matching
428 node hashes.
436 node hashes.
429 The record also contains 4 bytes where 31 bits contains the index of the
437 The record also contains 4 bytes where 31 bits contains the index of the
430 branch and the last bit indicate that it is a branch close commit.
438 branch and the last bit indicate that it is a branch close commit.
431 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
439 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
432 and will grow with it but be 1/8th of its size.
440 and will grow with it but be 1/8th of its size.
433 """
441 """
434
442
435 def __init__(self, repo, readonly=True):
443 def __init__(self, repo, readonly=True):
436 assert repo.filtername is None
444 assert repo.filtername is None
437 self._repo = repo
445 self._repo = repo
438 self._names = [] # branch names in local encoding with static index
446 self._names = [] # branch names in local encoding with static index
439 self._rbcrevs = bytearray()
447 self._rbcrevs = bytearray()
440 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
448 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
441 try:
449 try:
442 bndata = repo.cachevfs.read(_rbcnames)
450 bndata = repo.cachevfs.read(_rbcnames)
443 self._rbcsnameslen = len(bndata) # for verification before writing
451 self._rbcsnameslen = len(bndata) # for verification before writing
444 if bndata:
452 if bndata:
445 self._names = [encoding.tolocal(bn)
453 self._names = [encoding.tolocal(bn)
446 for bn in bndata.split('\0')]
454 for bn in bndata.split('\0')]
447 except (IOError, OSError):
455 except (IOError, OSError):
448 if readonly:
456 if readonly:
449 # don't try to use cache - fall back to the slow path
457 # don't try to use cache - fall back to the slow path
450 self.branchinfo = self._branchinfo
458 self.branchinfo = self._branchinfo
451
459
452 if self._names:
460 if self._names:
453 try:
461 try:
454 data = repo.cachevfs.read(_rbcrevs)
462 data = repo.cachevfs.read(_rbcrevs)
455 self._rbcrevs[:] = data
463 self._rbcrevs[:] = data
456 except (IOError, OSError) as inst:
464 except (IOError, OSError) as inst:
457 repo.ui.debug("couldn't read revision branch cache: %s\n" %
465 repo.ui.debug("couldn't read revision branch cache: %s\n" %
458 stringutil.forcebytestr(inst))
466 stringutil.forcebytestr(inst))
459 # remember number of good records on disk
467 # remember number of good records on disk
460 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
468 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
461 len(repo.changelog))
469 len(repo.changelog))
462 if self._rbcrevslen == 0:
470 if self._rbcrevslen == 0:
463 self._names = []
471 self._names = []
464 self._rbcnamescount = len(self._names) # number of names read at
472 self._rbcnamescount = len(self._names) # number of names read at
465 # _rbcsnameslen
473 # _rbcsnameslen
466
474
467 def _clear(self):
475 def _clear(self):
468 self._rbcsnameslen = 0
476 self._rbcsnameslen = 0
469 del self._names[:]
477 del self._names[:]
470 self._rbcnamescount = 0
478 self._rbcnamescount = 0
471 self._rbcrevslen = len(self._repo.changelog)
479 self._rbcrevslen = len(self._repo.changelog)
472 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
480 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
473 util.clearcachedproperty(self, '_namesreverse')
481 util.clearcachedproperty(self, '_namesreverse')
474
482
475 @util.propertycache
483 @util.propertycache
476 def _namesreverse(self):
484 def _namesreverse(self):
477 return dict((b, r) for r, b in enumerate(self._names))
485 return dict((b, r) for r, b in enumerate(self._names))
478
486
479 def branchinfo(self, rev):
487 def branchinfo(self, rev):
480 """Return branch name and close flag for rev, using and updating
488 """Return branch name and close flag for rev, using and updating
481 persistent cache."""
489 persistent cache."""
482 changelog = self._repo.changelog
490 changelog = self._repo.changelog
483 rbcrevidx = rev * _rbcrecsize
491 rbcrevidx = rev * _rbcrecsize
484
492
485 # avoid negative index, changelog.read(nullrev) is fast without cache
493 # avoid negative index, changelog.read(nullrev) is fast without cache
486 if rev == nullrev:
494 if rev == nullrev:
487 return changelog.branchinfo(rev)
495 return changelog.branchinfo(rev)
488
496
489 # if requested rev isn't allocated, grow and cache the rev info
497 # if requested rev isn't allocated, grow and cache the rev info
490 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
498 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
491 return self._branchinfo(rev)
499 return self._branchinfo(rev)
492
500
493 # fast path: extract data from cache, use it if node is matching
501 # fast path: extract data from cache, use it if node is matching
494 reponode = changelog.node(rev)[:_rbcnodelen]
502 reponode = changelog.node(rev)[:_rbcnodelen]
495 cachenode, branchidx = unpack_from(
503 cachenode, branchidx = unpack_from(
496 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
504 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
497 close = bool(branchidx & _rbccloseflag)
505 close = bool(branchidx & _rbccloseflag)
498 if close:
506 if close:
499 branchidx &= _rbcbranchidxmask
507 branchidx &= _rbcbranchidxmask
500 if cachenode == '\0\0\0\0':
508 if cachenode == '\0\0\0\0':
501 pass
509 pass
502 elif cachenode == reponode:
510 elif cachenode == reponode:
503 try:
511 try:
504 return self._names[branchidx], close
512 return self._names[branchidx], close
505 except IndexError:
513 except IndexError:
506 # recover from invalid reference to unknown branch
514 # recover from invalid reference to unknown branch
507 self._repo.ui.debug("referenced branch names not found"
515 self._repo.ui.debug("referenced branch names not found"
508 " - rebuilding revision branch cache from scratch\n")
516 " - rebuilding revision branch cache from scratch\n")
509 self._clear()
517 self._clear()
510 else:
518 else:
511 # rev/node map has changed, invalidate the cache from here up
519 # rev/node map has changed, invalidate the cache from here up
512 self._repo.ui.debug("history modification detected - truncating "
520 self._repo.ui.debug("history modification detected - truncating "
513 "revision branch cache to revision %d\n" % rev)
521 "revision branch cache to revision %d\n" % rev)
514 truncate = rbcrevidx + _rbcrecsize
522 truncate = rbcrevidx + _rbcrecsize
515 del self._rbcrevs[truncate:]
523 del self._rbcrevs[truncate:]
516 self._rbcrevslen = min(self._rbcrevslen, truncate)
524 self._rbcrevslen = min(self._rbcrevslen, truncate)
517
525
518 # fall back to slow path and make sure it will be written to disk
526 # fall back to slow path and make sure it will be written to disk
519 return self._branchinfo(rev)
527 return self._branchinfo(rev)
520
528
521 def _branchinfo(self, rev):
529 def _branchinfo(self, rev):
522 """Retrieve branch info from changelog and update _rbcrevs"""
530 """Retrieve branch info from changelog and update _rbcrevs"""
523 changelog = self._repo.changelog
531 changelog = self._repo.changelog
524 b, close = changelog.branchinfo(rev)
532 b, close = changelog.branchinfo(rev)
525 if b in self._namesreverse:
533 if b in self._namesreverse:
526 branchidx = self._namesreverse[b]
534 branchidx = self._namesreverse[b]
527 else:
535 else:
528 branchidx = len(self._names)
536 branchidx = len(self._names)
529 self._names.append(b)
537 self._names.append(b)
530 self._namesreverse[b] = branchidx
538 self._namesreverse[b] = branchidx
531 reponode = changelog.node(rev)
539 reponode = changelog.node(rev)
532 if close:
540 if close:
533 branchidx |= _rbccloseflag
541 branchidx |= _rbccloseflag
534 self._setcachedata(rev, reponode, branchidx)
542 self._setcachedata(rev, reponode, branchidx)
535 return b, close
543 return b, close
536
544
537 def setdata(self, branch, rev, node, close):
545 def setdata(self, branch, rev, node, close):
538 """add new data information to the cache"""
546 """add new data information to the cache"""
539 if branch in self._namesreverse:
547 if branch in self._namesreverse:
540 branchidx = self._namesreverse[branch]
548 branchidx = self._namesreverse[branch]
541 else:
549 else:
542 branchidx = len(self._names)
550 branchidx = len(self._names)
543 self._names.append(branch)
551 self._names.append(branch)
544 self._namesreverse[branch] = branchidx
552 self._namesreverse[branch] = branchidx
545 if close:
553 if close:
546 branchidx |= _rbccloseflag
554 branchidx |= _rbccloseflag
547 self._setcachedata(rev, node, branchidx)
555 self._setcachedata(rev, node, branchidx)
548 # If no cache data were readable (non exists, bad permission, etc)
556 # If no cache data were readable (non exists, bad permission, etc)
549 # the cache was bypassing itself by setting:
557 # the cache was bypassing itself by setting:
550 #
558 #
551 # self.branchinfo = self._branchinfo
559 # self.branchinfo = self._branchinfo
552 #
560 #
553 # Since we now have data in the cache, we need to drop this bypassing.
561 # Since we now have data in the cache, we need to drop this bypassing.
554 if r'branchinfo' in vars(self):
562 if r'branchinfo' in vars(self):
555 del self.branchinfo
563 del self.branchinfo
556
564
557 def _setcachedata(self, rev, node, branchidx):
565 def _setcachedata(self, rev, node, branchidx):
558 """Writes the node's branch data to the in-memory cache data."""
566 """Writes the node's branch data to the in-memory cache data."""
559 if rev == nullrev:
567 if rev == nullrev:
560 return
568 return
561 rbcrevidx = rev * _rbcrecsize
569 rbcrevidx = rev * _rbcrecsize
562 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
570 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
563 self._rbcrevs.extend('\0' *
571 self._rbcrevs.extend('\0' *
564 (len(self._repo.changelog) * _rbcrecsize -
572 (len(self._repo.changelog) * _rbcrecsize -
565 len(self._rbcrevs)))
573 len(self._rbcrevs)))
566 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
574 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
567 self._rbcrevslen = min(self._rbcrevslen, rev)
575 self._rbcrevslen = min(self._rbcrevslen, rev)
568
576
569 tr = self._repo.currenttransaction()
577 tr = self._repo.currenttransaction()
570 if tr:
578 if tr:
571 tr.addfinalize('write-revbranchcache', self.write)
579 tr.addfinalize('write-revbranchcache', self.write)
572
580
573 def write(self, tr=None):
581 def write(self, tr=None):
574 """Save branch cache if it is dirty."""
582 """Save branch cache if it is dirty."""
575 repo = self._repo
583 repo = self._repo
576 wlock = None
584 wlock = None
577 step = ''
585 step = ''
578 try:
586 try:
579 if self._rbcnamescount < len(self._names):
587 if self._rbcnamescount < len(self._names):
580 step = ' names'
588 step = ' names'
581 wlock = repo.wlock(wait=False)
589 wlock = repo.wlock(wait=False)
582 if self._rbcnamescount != 0:
590 if self._rbcnamescount != 0:
583 f = repo.cachevfs.open(_rbcnames, 'ab')
591 f = repo.cachevfs.open(_rbcnames, 'ab')
584 if f.tell() == self._rbcsnameslen:
592 if f.tell() == self._rbcsnameslen:
585 f.write('\0')
593 f.write('\0')
586 else:
594 else:
587 f.close()
595 f.close()
588 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
596 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
589 self._rbcnamescount = 0
597 self._rbcnamescount = 0
590 self._rbcrevslen = 0
598 self._rbcrevslen = 0
591 if self._rbcnamescount == 0:
599 if self._rbcnamescount == 0:
592 # before rewriting names, make sure references are removed
600 # before rewriting names, make sure references are removed
593 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
601 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
594 f = repo.cachevfs.open(_rbcnames, 'wb')
602 f = repo.cachevfs.open(_rbcnames, 'wb')
595 f.write('\0'.join(encoding.fromlocal(b)
603 f.write('\0'.join(encoding.fromlocal(b)
596 for b in self._names[self._rbcnamescount:]))
604 for b in self._names[self._rbcnamescount:]))
597 self._rbcsnameslen = f.tell()
605 self._rbcsnameslen = f.tell()
598 f.close()
606 f.close()
599 self._rbcnamescount = len(self._names)
607 self._rbcnamescount = len(self._names)
600
608
601 start = self._rbcrevslen * _rbcrecsize
609 start = self._rbcrevslen * _rbcrecsize
602 if start != len(self._rbcrevs):
610 if start != len(self._rbcrevs):
603 step = ''
611 step = ''
604 if wlock is None:
612 if wlock is None:
605 wlock = repo.wlock(wait=False)
613 wlock = repo.wlock(wait=False)
606 revs = min(len(repo.changelog),
614 revs = min(len(repo.changelog),
607 len(self._rbcrevs) // _rbcrecsize)
615 len(self._rbcrevs) // _rbcrecsize)
608 f = repo.cachevfs.open(_rbcrevs, 'ab')
616 f = repo.cachevfs.open(_rbcrevs, 'ab')
609 if f.tell() != start:
617 if f.tell() != start:
610 repo.ui.debug("truncating cache/%s to %d\n"
618 repo.ui.debug("truncating cache/%s to %d\n"
611 % (_rbcrevs, start))
619 % (_rbcrevs, start))
612 f.seek(start)
620 f.seek(start)
613 if f.tell() != start:
621 if f.tell() != start:
614 start = 0
622 start = 0
615 f.seek(start)
623 f.seek(start)
616 f.truncate()
624 f.truncate()
617 end = revs * _rbcrecsize
625 end = revs * _rbcrecsize
618 f.write(self._rbcrevs[start:end])
626 f.write(self._rbcrevs[start:end])
619 f.close()
627 f.close()
620 self._rbcrevslen = revs
628 self._rbcrevslen = revs
621 except (IOError, OSError, error.Abort, error.LockError) as inst:
629 except (IOError, OSError, error.Abort, error.LockError) as inst:
622 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
630 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
623 % (step, stringutil.forcebytestr(inst)))
631 % (step, stringutil.forcebytestr(inst)))
624 finally:
632 finally:
625 if wlock is not None:
633 if wlock is not None:
626 wlock.release()
634 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now