##// END OF EJS Templates
branchcache: lazily validate nodes in iteritems()...
Pulkit Goyal -
r42303:f0203c34 default
parent child Browse files
Show More
@@ -1,668 +1,669 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from .node import (
12 from .node import (
13 bin,
13 bin,
14 hex,
14 hex,
15 nullid,
15 nullid,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 pycompat,
21 pycompat,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 stringutil,
26 stringutil,
27 )
27 )
28
28
29 calcsize = struct.calcsize
29 calcsize = struct.calcsize
30 pack_into = struct.pack_into
30 pack_into = struct.pack_into
31 unpack_from = struct.unpack_from
31 unpack_from = struct.unpack_from
32
32
33
33
34 ### Nearest subset relation
34 ### Nearest subset relation
35 # Nearest subset of filter X is a filter Y so that:
35 # Nearest subset of filter X is a filter Y so that:
36 # * Y is included in X,
36 # * Y is included in X,
37 # * X - Y is as small as possible.
37 # * X - Y is as small as possible.
38 # This create and ordering used for branchmap purpose.
38 # This create and ordering used for branchmap purpose.
39 # the ordering may be partial
39 # the ordering may be partial
40 subsettable = {None: 'visible',
40 subsettable = {None: 'visible',
41 'visible-hidden': 'visible',
41 'visible-hidden': 'visible',
42 'visible': 'served',
42 'visible': 'served',
43 'served.hidden': 'served',
43 'served.hidden': 'served',
44 'served': 'immutable',
44 'served': 'immutable',
45 'immutable': 'base'}
45 'immutable': 'base'}
46
46
47
47
48 class BranchMapCache(object):
48 class BranchMapCache(object):
49 """mapping of filtered views of repo with their branchcache"""
49 """mapping of filtered views of repo with their branchcache"""
50 def __init__(self):
50 def __init__(self):
51 self._per_filter = {}
51 self._per_filter = {}
52
52
53 def __getitem__(self, repo):
53 def __getitem__(self, repo):
54 self.updatecache(repo)
54 self.updatecache(repo)
55 return self._per_filter[repo.filtername]
55 return self._per_filter[repo.filtername]
56
56
57 def updatecache(self, repo):
57 def updatecache(self, repo):
58 """Update the cache for the given filtered view on a repository"""
58 """Update the cache for the given filtered view on a repository"""
59 # This can trigger updates for the caches for subsets of the filtered
59 # This can trigger updates for the caches for subsets of the filtered
60 # view, e.g. when there is no cache for this filtered view or the cache
60 # view, e.g. when there is no cache for this filtered view or the cache
61 # is stale.
61 # is stale.
62
62
63 cl = repo.changelog
63 cl = repo.changelog
64 filtername = repo.filtername
64 filtername = repo.filtername
65 bcache = self._per_filter.get(filtername)
65 bcache = self._per_filter.get(filtername)
66 if bcache is None or not bcache.validfor(repo):
66 if bcache is None or not bcache.validfor(repo):
67 # cache object missing or cache object stale? Read from disk
67 # cache object missing or cache object stale? Read from disk
68 bcache = branchcache.fromfile(repo)
68 bcache = branchcache.fromfile(repo)
69
69
70 revs = []
70 revs = []
71 if bcache is None:
71 if bcache is None:
72 # no (fresh) cache available anymore, perhaps we can re-use
72 # no (fresh) cache available anymore, perhaps we can re-use
73 # the cache for a subset, then extend that to add info on missing
73 # the cache for a subset, then extend that to add info on missing
74 # revisions.
74 # revisions.
75 subsetname = subsettable.get(filtername)
75 subsetname = subsettable.get(filtername)
76 if subsetname is not None:
76 if subsetname is not None:
77 subset = repo.filtered(subsetname)
77 subset = repo.filtered(subsetname)
78 bcache = self[subset].copy()
78 bcache = self[subset].copy()
79 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
80 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
81 else:
81 else:
82 # nothing to fall back on, start empty.
82 # nothing to fall back on, start empty.
83 bcache = branchcache()
83 bcache = branchcache()
84
84
85 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 revs.extend(cl.revs(start=bcache.tiprev + 1))
86 if revs:
86 if revs:
87 bcache.update(repo, revs)
87 bcache.update(repo, revs)
88
88
89 assert bcache.validfor(repo), filtername
89 assert bcache.validfor(repo), filtername
90 self._per_filter[repo.filtername] = bcache
90 self._per_filter[repo.filtername] = bcache
91
91
92 def replace(self, repo, remotebranchmap):
92 def replace(self, repo, remotebranchmap):
93 """Replace the branchmap cache for a repo with a branch mapping.
93 """Replace the branchmap cache for a repo with a branch mapping.
94
94
95 This is likely only called during clone with a branch map from a
95 This is likely only called during clone with a branch map from a
96 remote.
96 remote.
97
97
98 """
98 """
99 cl = repo.changelog
99 cl = repo.changelog
100 clrev = cl.rev
100 clrev = cl.rev
101 clbranchinfo = cl.branchinfo
101 clbranchinfo = cl.branchinfo
102 rbheads = []
102 rbheads = []
103 closed = []
103 closed = []
104 for bheads in remotebranchmap.itervalues():
104 for bheads in remotebranchmap.itervalues():
105 rbheads += bheads
105 rbheads += bheads
106 for h in bheads:
106 for h in bheads:
107 r = clrev(h)
107 r = clrev(h)
108 b, c = clbranchinfo(r)
108 b, c = clbranchinfo(r)
109 if c:
109 if c:
110 closed.append(h)
110 closed.append(h)
111
111
112 if rbheads:
112 if rbheads:
113 rtiprev = max((int(clrev(node)) for node in rbheads))
113 rtiprev = max((int(clrev(node)) for node in rbheads))
114 cache = branchcache(
114 cache = branchcache(
115 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 remotebranchmap, repo[rtiprev].node(), rtiprev,
116 closednodes=closed)
116 closednodes=closed)
117
117
118 # Try to stick it as low as possible
118 # Try to stick it as low as possible
119 # filter above served are unlikely to be fetch from a clone
119 # filter above served are unlikely to be fetch from a clone
120 for candidate in ('base', 'immutable', 'served'):
120 for candidate in ('base', 'immutable', 'served'):
121 rview = repo.filtered(candidate)
121 rview = repo.filtered(candidate)
122 if cache.validfor(rview):
122 if cache.validfor(rview):
123 self._per_filter[candidate] = cache
123 self._per_filter[candidate] = cache
124 cache.write(rview)
124 cache.write(rview)
125 return
125 return
126
126
127 def clear(self):
127 def clear(self):
128 self._per_filter.clear()
128 self._per_filter.clear()
129
129
130 def _unknownnode(node):
130 def _unknownnode(node):
131 """ raises ValueError when branchcache found a node which does not exists
131 """ raises ValueError when branchcache found a node which does not exists
132 """
132 """
133 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
133 raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node)))
134
134
135 class branchcache(object):
135 class branchcache(object):
136 """A dict like object that hold branches heads cache.
136 """A dict like object that hold branches heads cache.
137
137
138 This cache is used to avoid costly computations to determine all the
138 This cache is used to avoid costly computations to determine all the
139 branch heads of a repo.
139 branch heads of a repo.
140
140
141 The cache is serialized on disk in the following format:
141 The cache is serialized on disk in the following format:
142
142
143 <tip hex node> <tip rev number> [optional filtered repo hex hash]
143 <tip hex node> <tip rev number> [optional filtered repo hex hash]
144 <branch head hex node> <open/closed state> <branch name>
144 <branch head hex node> <open/closed state> <branch name>
145 <branch head hex node> <open/closed state> <branch name>
145 <branch head hex node> <open/closed state> <branch name>
146 ...
146 ...
147
147
148 The first line is used to check if the cache is still valid. If the
148 The first line is used to check if the cache is still valid. If the
149 branch cache is for a filtered repo view, an optional third hash is
149 branch cache is for a filtered repo view, an optional third hash is
150 included that hashes the hashes of all filtered revisions.
150 included that hashes the hashes of all filtered revisions.
151
151
152 The open/closed state is represented by a single letter 'o' or 'c'.
152 The open/closed state is represented by a single letter 'o' or 'c'.
153 This field can be used to avoid changelog reads when determining if a
153 This field can be used to avoid changelog reads when determining if a
154 branch head closes a branch or not.
154 branch head closes a branch or not.
155 """
155 """
156
156
157 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
157 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
158 filteredhash=None, closednodes=None, hasnode=None):
158 filteredhash=None, closednodes=None, hasnode=None):
159 """ hasnode is a function which can be used to verify whether changelog
159 """ hasnode is a function which can be used to verify whether changelog
160 has a given node or not. If it's not provided, we assume that every node
160 has a given node or not. If it's not provided, we assume that every node
161 we have exists in changelog """
161 we have exists in changelog """
162 self.tipnode = tipnode
162 self.tipnode = tipnode
163 self.tiprev = tiprev
163 self.tiprev = tiprev
164 self.filteredhash = filteredhash
164 self.filteredhash = filteredhash
165 # closednodes is a set of nodes that close their branch. If the branch
165 # closednodes is a set of nodes that close their branch. If the branch
166 # cache has been updated, it may contain nodes that are no longer
166 # cache has been updated, it may contain nodes that are no longer
167 # heads.
167 # heads.
168 if closednodes is None:
168 if closednodes is None:
169 self._closednodes = set()
169 self._closednodes = set()
170 else:
170 else:
171 self._closednodes = closednodes
171 self._closednodes = closednodes
172 self._entries = dict(entries)
172 self._entries = dict(entries)
173 # whether closed nodes are verified or not
173 # whether closed nodes are verified or not
174 self._closedverified = False
174 self._closedverified = False
175 # branches for which nodes are verified
175 # branches for which nodes are verified
176 self._verifiedbranches = set()
176 self._verifiedbranches = set()
177 self._hasnode = hasnode
177 self._hasnode = hasnode
178 if self._hasnode is None:
178 if self._hasnode is None:
179 self._hasnode = lambda x: True
179 self._hasnode = lambda x: True
180
180
181 def _verifyclosed(self):
181 def _verifyclosed(self):
182 """ verify the closed nodes we have """
182 """ verify the closed nodes we have """
183 if self._closedverified:
183 if self._closedverified:
184 return
184 return
185 for node in self._closednodes:
185 for node in self._closednodes:
186 if not self._hasnode(node):
186 if not self._hasnode(node):
187 _unknownnode(node)
187 _unknownnode(node)
188
188
189 self._closedverified = True
189 self._closedverified = True
190
190
191 def _verifybranch(self, branch):
191 def _verifybranch(self, branch):
192 """ verify head nodes for the given branch. """
192 """ verify head nodes for the given branch. """
193 if branch not in self._entries or branch in self._verifiedbranches:
193 if branch not in self._entries or branch in self._verifiedbranches:
194 return
194 return
195 for n in self._entries[branch]:
195 for n in self._entries[branch]:
196 if not self._hasnode(n):
196 if not self._hasnode(n):
197 _unknownnode(n)
197 _unknownnode(n)
198
198
199 self._verifiedbranches.add(branch)
199 self._verifiedbranches.add(branch)
200
200
201 def _verifyall(self):
201 def _verifyall(self):
202 """ verifies nodes of all the branches """
202 """ verifies nodes of all the branches """
203 needverification = set(self._entries.keys()) - self._verifiedbranches
203 needverification = set(self._entries.keys()) - self._verifiedbranches
204 for b in needverification:
204 for b in needverification:
205 self._verifybranch(b)
205 self._verifybranch(b)
206
206
207 def __iter__(self):
207 def __iter__(self):
208 return iter(self._entries)
208 return iter(self._entries)
209
209
210 def __setitem__(self, key, value):
210 def __setitem__(self, key, value):
211 self._entries[key] = value
211 self._entries[key] = value
212
212
213 def __getitem__(self, key):
213 def __getitem__(self, key):
214 self._verifybranch(key)
214 self._verifybranch(key)
215 return self._entries[key]
215 return self._entries[key]
216
216
217 def __contains__(self, key):
217 def __contains__(self, key):
218 self._verifybranch(key)
218 self._verifybranch(key)
219 return key in self._entries
219 return key in self._entries
220
220
221 def iteritems(self):
221 def iteritems(self):
222 self._verifyall()
222 for k, v in self._entries.iteritems():
223 return self._entries.iteritems()
223 self._verifybranch(k)
224 yield k, v
224
225
225 def hasbranch(self, label):
226 def hasbranch(self, label):
226 """ checks whether a branch of this name exists or not """
227 """ checks whether a branch of this name exists or not """
227 self._verifybranch(label)
228 self._verifybranch(label)
228 return label in self._entries
229 return label in self._entries
229
230
230 @classmethod
231 @classmethod
231 def fromfile(cls, repo):
232 def fromfile(cls, repo):
232 f = None
233 f = None
233 try:
234 try:
234 f = repo.cachevfs(cls._filename(repo))
235 f = repo.cachevfs(cls._filename(repo))
235 lineiter = iter(f)
236 lineiter = iter(f)
236 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
237 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
237 last, lrev = cachekey[:2]
238 last, lrev = cachekey[:2]
238 last, lrev = bin(last), int(lrev)
239 last, lrev = bin(last), int(lrev)
239 filteredhash = None
240 filteredhash = None
240 hasnode = repo.changelog.hasnode
241 hasnode = repo.changelog.hasnode
241 if len(cachekey) > 2:
242 if len(cachekey) > 2:
242 filteredhash = bin(cachekey[2])
243 filteredhash = bin(cachekey[2])
243 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
244 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
244 hasnode=hasnode)
245 hasnode=hasnode)
245 if not bcache.validfor(repo):
246 if not bcache.validfor(repo):
246 # invalidate the cache
247 # invalidate the cache
247 raise ValueError(r'tip differs')
248 raise ValueError(r'tip differs')
248 bcache.load(repo, lineiter)
249 bcache.load(repo, lineiter)
249 except (IOError, OSError):
250 except (IOError, OSError):
250 return None
251 return None
251
252
252 except Exception as inst:
253 except Exception as inst:
253 if repo.ui.debugflag:
254 if repo.ui.debugflag:
254 msg = 'invalid branchheads cache'
255 msg = 'invalid branchheads cache'
255 if repo.filtername is not None:
256 if repo.filtername is not None:
256 msg += ' (%s)' % repo.filtername
257 msg += ' (%s)' % repo.filtername
257 msg += ': %s\n'
258 msg += ': %s\n'
258 repo.ui.debug(msg % pycompat.bytestr(inst))
259 repo.ui.debug(msg % pycompat.bytestr(inst))
259 bcache = None
260 bcache = None
260
261
261 finally:
262 finally:
262 if f:
263 if f:
263 f.close()
264 f.close()
264
265
265 return bcache
266 return bcache
266
267
267 def load(self, repo, lineiter):
268 def load(self, repo, lineiter):
268 """ fully loads the branchcache by reading from the file using the line
269 """ fully loads the branchcache by reading from the file using the line
269 iterator passed"""
270 iterator passed"""
270 for line in lineiter:
271 for line in lineiter:
271 line = line.rstrip('\n')
272 line = line.rstrip('\n')
272 if not line:
273 if not line:
273 continue
274 continue
274 node, state, label = line.split(" ", 2)
275 node, state, label = line.split(" ", 2)
275 if state not in 'oc':
276 if state not in 'oc':
276 raise ValueError(r'invalid branch state')
277 raise ValueError(r'invalid branch state')
277 label = encoding.tolocal(label.strip())
278 label = encoding.tolocal(label.strip())
278 node = bin(node)
279 node = bin(node)
279 self._entries.setdefault(label, []).append(node)
280 self._entries.setdefault(label, []).append(node)
280 if state == 'c':
281 if state == 'c':
281 self._closednodes.add(node)
282 self._closednodes.add(node)
282
283
283 @staticmethod
284 @staticmethod
284 def _filename(repo):
285 def _filename(repo):
285 """name of a branchcache file for a given repo or repoview"""
286 """name of a branchcache file for a given repo or repoview"""
286 filename = "branch2"
287 filename = "branch2"
287 if repo.filtername:
288 if repo.filtername:
288 filename = '%s-%s' % (filename, repo.filtername)
289 filename = '%s-%s' % (filename, repo.filtername)
289 return filename
290 return filename
290
291
291 def validfor(self, repo):
292 def validfor(self, repo):
292 """Is the cache content valid regarding a repo
293 """Is the cache content valid regarding a repo
293
294
294 - False when cached tipnode is unknown or if we detect a strip.
295 - False when cached tipnode is unknown or if we detect a strip.
295 - True when cache is up to date or a subset of current repo."""
296 - True when cache is up to date or a subset of current repo."""
296 try:
297 try:
297 return ((self.tipnode == repo.changelog.node(self.tiprev))
298 return ((self.tipnode == repo.changelog.node(self.tiprev))
298 and (self.filteredhash ==
299 and (self.filteredhash ==
299 scmutil.filteredhash(repo, self.tiprev)))
300 scmutil.filteredhash(repo, self.tiprev)))
300 except IndexError:
301 except IndexError:
301 return False
302 return False
302
303
303 def _branchtip(self, heads):
304 def _branchtip(self, heads):
304 '''Return tuple with last open head in heads and false,
305 '''Return tuple with last open head in heads and false,
305 otherwise return last closed head and true.'''
306 otherwise return last closed head and true.'''
306 tip = heads[-1]
307 tip = heads[-1]
307 closed = True
308 closed = True
308 for h in reversed(heads):
309 for h in reversed(heads):
309 if h not in self._closednodes:
310 if h not in self._closednodes:
310 tip = h
311 tip = h
311 closed = False
312 closed = False
312 break
313 break
313 return tip, closed
314 return tip, closed
314
315
315 def branchtip(self, branch):
316 def branchtip(self, branch):
316 '''Return the tipmost open head on branch head, otherwise return the
317 '''Return the tipmost open head on branch head, otherwise return the
317 tipmost closed head on branch.
318 tipmost closed head on branch.
318 Raise KeyError for unknown branch.'''
319 Raise KeyError for unknown branch.'''
319 return self._branchtip(self[branch])[0]
320 return self._branchtip(self[branch])[0]
320
321
321 def iteropen(self, nodes):
322 def iteropen(self, nodes):
322 return (n for n in nodes if n not in self._closednodes)
323 return (n for n in nodes if n not in self._closednodes)
323
324
324 def branchheads(self, branch, closed=False):
325 def branchheads(self, branch, closed=False):
325 self._verifybranch(branch)
326 self._verifybranch(branch)
326 heads = self._entries[branch]
327 heads = self._entries[branch]
327 if not closed:
328 if not closed:
328 heads = list(self.iteropen(heads))
329 heads = list(self.iteropen(heads))
329 return heads
330 return heads
330
331
331 def iterbranches(self):
332 def iterbranches(self):
332 for bn, heads in self.iteritems():
333 for bn, heads in self.iteritems():
333 yield (bn, heads) + self._branchtip(heads)
334 yield (bn, heads) + self._branchtip(heads)
334
335
335 def iterheads(self):
336 def iterheads(self):
336 """ returns all the heads """
337 """ returns all the heads """
337 self._verifyall()
338 self._verifyall()
338 return self._entries.itervalues()
339 return self._entries.itervalues()
339
340
340 def copy(self):
341 def copy(self):
341 """return an deep copy of the branchcache object"""
342 """return an deep copy of the branchcache object"""
342 return type(self)(
343 return type(self)(
343 self._entries, self.tipnode, self.tiprev, self.filteredhash,
344 self._entries, self.tipnode, self.tiprev, self.filteredhash,
344 self._closednodes)
345 self._closednodes)
345
346
346 def write(self, repo):
347 def write(self, repo):
347 try:
348 try:
348 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
349 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
349 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
350 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
350 if self.filteredhash is not None:
351 if self.filteredhash is not None:
351 cachekey.append(hex(self.filteredhash))
352 cachekey.append(hex(self.filteredhash))
352 f.write(" ".join(cachekey) + '\n')
353 f.write(" ".join(cachekey) + '\n')
353 nodecount = 0
354 nodecount = 0
354 for label, nodes in sorted(self.iteritems()):
355 for label, nodes in sorted(self.iteritems()):
355 label = encoding.fromlocal(label)
356 label = encoding.fromlocal(label)
356 for node in nodes:
357 for node in nodes:
357 nodecount += 1
358 nodecount += 1
358 if node in self._closednodes:
359 if node in self._closednodes:
359 state = 'c'
360 state = 'c'
360 else:
361 else:
361 state = 'o'
362 state = 'o'
362 f.write("%s %s %s\n" % (hex(node), state, label))
363 f.write("%s %s %s\n" % (hex(node), state, label))
363 f.close()
364 f.close()
364 repo.ui.log('branchcache',
365 repo.ui.log('branchcache',
365 'wrote %s branch cache with %d labels and %d nodes\n',
366 'wrote %s branch cache with %d labels and %d nodes\n',
366 repo.filtername, len(self._entries), nodecount)
367 repo.filtername, len(self._entries), nodecount)
367 except (IOError, OSError, error.Abort) as inst:
368 except (IOError, OSError, error.Abort) as inst:
368 # Abort may be raised by read only opener, so log and continue
369 # Abort may be raised by read only opener, so log and continue
369 repo.ui.debug("couldn't write branch cache: %s\n" %
370 repo.ui.debug("couldn't write branch cache: %s\n" %
370 stringutil.forcebytestr(inst))
371 stringutil.forcebytestr(inst))
371
372
372 def update(self, repo, revgen):
373 def update(self, repo, revgen):
373 """Given a branchhead cache, self, that may have extra nodes or be
374 """Given a branchhead cache, self, that may have extra nodes or be
374 missing heads, and a generator of nodes that are strictly a superset of
375 missing heads, and a generator of nodes that are strictly a superset of
375 heads missing, this function updates self to be correct.
376 heads missing, this function updates self to be correct.
376 """
377 """
377 starttime = util.timer()
378 starttime = util.timer()
378 cl = repo.changelog
379 cl = repo.changelog
379 # collect new branch entries
380 # collect new branch entries
380 newbranches = {}
381 newbranches = {}
381 getbranchinfo = repo.revbranchcache().branchinfo
382 getbranchinfo = repo.revbranchcache().branchinfo
382 for r in revgen:
383 for r in revgen:
383 branch, closesbranch = getbranchinfo(r)
384 branch, closesbranch = getbranchinfo(r)
384 newbranches.setdefault(branch, []).append(r)
385 newbranches.setdefault(branch, []).append(r)
385 if closesbranch:
386 if closesbranch:
386 self._closednodes.add(cl.node(r))
387 self._closednodes.add(cl.node(r))
387
388
388 # fetch current topological heads to speed up filtering
389 # fetch current topological heads to speed up filtering
389 topoheads = set(cl.headrevs())
390 topoheads = set(cl.headrevs())
390
391
391 # if older branchheads are reachable from new ones, they aren't
392 # if older branchheads are reachable from new ones, they aren't
392 # really branchheads. Note checking parents is insufficient:
393 # really branchheads. Note checking parents is insufficient:
393 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
394 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
394 for branch, newheadrevs in newbranches.iteritems():
395 for branch, newheadrevs in newbranches.iteritems():
395 bheads = self._entries.setdefault(branch, [])
396 bheads = self._entries.setdefault(branch, [])
396 bheadset = set(cl.rev(node) for node in bheads)
397 bheadset = set(cl.rev(node) for node in bheads)
397
398
398 # This have been tested True on all internal usage of this function.
399 # This have been tested True on all internal usage of this function.
399 # run it again in case of doubt
400 # run it again in case of doubt
400 # assert not (set(bheadrevs) & set(newheadrevs))
401 # assert not (set(bheadrevs) & set(newheadrevs))
401 bheadset.update(newheadrevs)
402 bheadset.update(newheadrevs)
402
403
403 # This prunes out two kinds of heads - heads that are superseded by
404 # This prunes out two kinds of heads - heads that are superseded by
404 # a head in newheadrevs, and newheadrevs that are not heads because
405 # a head in newheadrevs, and newheadrevs that are not heads because
405 # an existing head is their descendant.
406 # an existing head is their descendant.
406 uncertain = bheadset - topoheads
407 uncertain = bheadset - topoheads
407 if uncertain:
408 if uncertain:
408 floorrev = min(uncertain)
409 floorrev = min(uncertain)
409 ancestors = set(cl.ancestors(newheadrevs, floorrev))
410 ancestors = set(cl.ancestors(newheadrevs, floorrev))
410 bheadset -= ancestors
411 bheadset -= ancestors
411 bheadrevs = sorted(bheadset)
412 bheadrevs = sorted(bheadset)
412 self[branch] = [cl.node(rev) for rev in bheadrevs]
413 self[branch] = [cl.node(rev) for rev in bheadrevs]
413 tiprev = bheadrevs[-1]
414 tiprev = bheadrevs[-1]
414 if tiprev > self.tiprev:
415 if tiprev > self.tiprev:
415 self.tipnode = cl.node(tiprev)
416 self.tipnode = cl.node(tiprev)
416 self.tiprev = tiprev
417 self.tiprev = tiprev
417
418
418 if not self.validfor(repo):
419 if not self.validfor(repo):
419 # cache key are not valid anymore
420 # cache key are not valid anymore
420 self.tipnode = nullid
421 self.tipnode = nullid
421 self.tiprev = nullrev
422 self.tiprev = nullrev
422 for heads in self.iterheads():
423 for heads in self.iterheads():
423 tiprev = max(cl.rev(node) for node in heads)
424 tiprev = max(cl.rev(node) for node in heads)
424 if tiprev > self.tiprev:
425 if tiprev > self.tiprev:
425 self.tipnode = cl.node(tiprev)
426 self.tipnode = cl.node(tiprev)
426 self.tiprev = tiprev
427 self.tiprev = tiprev
427 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
428 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
428
429
429 duration = util.timer() - starttime
430 duration = util.timer() - starttime
430 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
431 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
431 repo.filtername or b'None', duration)
432 repo.filtername or b'None', duration)
432
433
433 self.write(repo)
434 self.write(repo)
434
435
435
436
436 class remotebranchcache(branchcache):
437 class remotebranchcache(branchcache):
437 """Branchmap info for a remote connection, should not write locally"""
438 """Branchmap info for a remote connection, should not write locally"""
438 def write(self, repo):
439 def write(self, repo):
439 pass
440 pass
440
441
441
442
442 # Revision branch info cache
443 # Revision branch info cache
443
444
444 _rbcversion = '-v1'
445 _rbcversion = '-v1'
445 _rbcnames = 'rbc-names' + _rbcversion
446 _rbcnames = 'rbc-names' + _rbcversion
446 _rbcrevs = 'rbc-revs' + _rbcversion
447 _rbcrevs = 'rbc-revs' + _rbcversion
447 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
448 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
448 _rbcrecfmt = '>4sI'
449 _rbcrecfmt = '>4sI'
449 _rbcrecsize = calcsize(_rbcrecfmt)
450 _rbcrecsize = calcsize(_rbcrecfmt)
450 _rbcnodelen = 4
451 _rbcnodelen = 4
451 _rbcbranchidxmask = 0x7fffffff
452 _rbcbranchidxmask = 0x7fffffff
452 _rbccloseflag = 0x80000000
453 _rbccloseflag = 0x80000000
453
454
454 class revbranchcache(object):
455 class revbranchcache(object):
455 """Persistent cache, mapping from revision number to branch name and close.
456 """Persistent cache, mapping from revision number to branch name and close.
456 This is a low level cache, independent of filtering.
457 This is a low level cache, independent of filtering.
457
458
458 Branch names are stored in rbc-names in internal encoding separated by 0.
459 Branch names are stored in rbc-names in internal encoding separated by 0.
459 rbc-names is append-only, and each branch name is only stored once and will
460 rbc-names is append-only, and each branch name is only stored once and will
460 thus have a unique index.
461 thus have a unique index.
461
462
462 The branch info for each revision is stored in rbc-revs as constant size
463 The branch info for each revision is stored in rbc-revs as constant size
463 records. The whole file is read into memory, but it is only 'parsed' on
464 records. The whole file is read into memory, but it is only 'parsed' on
464 demand. The file is usually append-only but will be truncated if repo
465 demand. The file is usually append-only but will be truncated if repo
465 modification is detected.
466 modification is detected.
466 The record for each revision contains the first 4 bytes of the
467 The record for each revision contains the first 4 bytes of the
467 corresponding node hash, and the record is only used if it still matches.
468 corresponding node hash, and the record is only used if it still matches.
468 Even a completely trashed rbc-revs fill thus still give the right result
469 Even a completely trashed rbc-revs fill thus still give the right result
469 while converging towards full recovery ... assuming no incorrectly matching
470 while converging towards full recovery ... assuming no incorrectly matching
470 node hashes.
471 node hashes.
471 The record also contains 4 bytes where 31 bits contains the index of the
472 The record also contains 4 bytes where 31 bits contains the index of the
472 branch and the last bit indicate that it is a branch close commit.
473 branch and the last bit indicate that it is a branch close commit.
473 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
474 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
474 and will grow with it but be 1/8th of its size.
475 and will grow with it but be 1/8th of its size.
475 """
476 """
476
477
477 def __init__(self, repo, readonly=True):
478 def __init__(self, repo, readonly=True):
478 assert repo.filtername is None
479 assert repo.filtername is None
479 self._repo = repo
480 self._repo = repo
480 self._names = [] # branch names in local encoding with static index
481 self._names = [] # branch names in local encoding with static index
481 self._rbcrevs = bytearray()
482 self._rbcrevs = bytearray()
482 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
483 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
483 try:
484 try:
484 bndata = repo.cachevfs.read(_rbcnames)
485 bndata = repo.cachevfs.read(_rbcnames)
485 self._rbcsnameslen = len(bndata) # for verification before writing
486 self._rbcsnameslen = len(bndata) # for verification before writing
486 if bndata:
487 if bndata:
487 self._names = [encoding.tolocal(bn)
488 self._names = [encoding.tolocal(bn)
488 for bn in bndata.split('\0')]
489 for bn in bndata.split('\0')]
489 except (IOError, OSError):
490 except (IOError, OSError):
490 if readonly:
491 if readonly:
491 # don't try to use cache - fall back to the slow path
492 # don't try to use cache - fall back to the slow path
492 self.branchinfo = self._branchinfo
493 self.branchinfo = self._branchinfo
493
494
494 if self._names:
495 if self._names:
495 try:
496 try:
496 data = repo.cachevfs.read(_rbcrevs)
497 data = repo.cachevfs.read(_rbcrevs)
497 self._rbcrevs[:] = data
498 self._rbcrevs[:] = data
498 except (IOError, OSError) as inst:
499 except (IOError, OSError) as inst:
499 repo.ui.debug("couldn't read revision branch cache: %s\n" %
500 repo.ui.debug("couldn't read revision branch cache: %s\n" %
500 stringutil.forcebytestr(inst))
501 stringutil.forcebytestr(inst))
501 # remember number of good records on disk
502 # remember number of good records on disk
502 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
503 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
503 len(repo.changelog))
504 len(repo.changelog))
504 if self._rbcrevslen == 0:
505 if self._rbcrevslen == 0:
505 self._names = []
506 self._names = []
506 self._rbcnamescount = len(self._names) # number of names read at
507 self._rbcnamescount = len(self._names) # number of names read at
507 # _rbcsnameslen
508 # _rbcsnameslen
508
509
509 def _clear(self):
510 def _clear(self):
510 self._rbcsnameslen = 0
511 self._rbcsnameslen = 0
511 del self._names[:]
512 del self._names[:]
512 self._rbcnamescount = 0
513 self._rbcnamescount = 0
513 self._rbcrevslen = len(self._repo.changelog)
514 self._rbcrevslen = len(self._repo.changelog)
514 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
515 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
515 util.clearcachedproperty(self, '_namesreverse')
516 util.clearcachedproperty(self, '_namesreverse')
516
517
517 @util.propertycache
518 @util.propertycache
518 def _namesreverse(self):
519 def _namesreverse(self):
519 return dict((b, r) for r, b in enumerate(self._names))
520 return dict((b, r) for r, b in enumerate(self._names))
520
521
521 def branchinfo(self, rev):
522 def branchinfo(self, rev):
522 """Return branch name and close flag for rev, using and updating
523 """Return branch name and close flag for rev, using and updating
523 persistent cache."""
524 persistent cache."""
524 changelog = self._repo.changelog
525 changelog = self._repo.changelog
525 rbcrevidx = rev * _rbcrecsize
526 rbcrevidx = rev * _rbcrecsize
526
527
527 # avoid negative index, changelog.read(nullrev) is fast without cache
528 # avoid negative index, changelog.read(nullrev) is fast without cache
528 if rev == nullrev:
529 if rev == nullrev:
529 return changelog.branchinfo(rev)
530 return changelog.branchinfo(rev)
530
531
531 # if requested rev isn't allocated, grow and cache the rev info
532 # if requested rev isn't allocated, grow and cache the rev info
532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
533 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
533 return self._branchinfo(rev)
534 return self._branchinfo(rev)
534
535
535 # fast path: extract data from cache, use it if node is matching
536 # fast path: extract data from cache, use it if node is matching
536 reponode = changelog.node(rev)[:_rbcnodelen]
537 reponode = changelog.node(rev)[:_rbcnodelen]
537 cachenode, branchidx = unpack_from(
538 cachenode, branchidx = unpack_from(
538 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
539 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
539 close = bool(branchidx & _rbccloseflag)
540 close = bool(branchidx & _rbccloseflag)
540 if close:
541 if close:
541 branchidx &= _rbcbranchidxmask
542 branchidx &= _rbcbranchidxmask
542 if cachenode == '\0\0\0\0':
543 if cachenode == '\0\0\0\0':
543 pass
544 pass
544 elif cachenode == reponode:
545 elif cachenode == reponode:
545 try:
546 try:
546 return self._names[branchidx], close
547 return self._names[branchidx], close
547 except IndexError:
548 except IndexError:
548 # recover from invalid reference to unknown branch
549 # recover from invalid reference to unknown branch
549 self._repo.ui.debug("referenced branch names not found"
550 self._repo.ui.debug("referenced branch names not found"
550 " - rebuilding revision branch cache from scratch\n")
551 " - rebuilding revision branch cache from scratch\n")
551 self._clear()
552 self._clear()
552 else:
553 else:
553 # rev/node map has changed, invalidate the cache from here up
554 # rev/node map has changed, invalidate the cache from here up
554 self._repo.ui.debug("history modification detected - truncating "
555 self._repo.ui.debug("history modification detected - truncating "
555 "revision branch cache to revision %d\n" % rev)
556 "revision branch cache to revision %d\n" % rev)
556 truncate = rbcrevidx + _rbcrecsize
557 truncate = rbcrevidx + _rbcrecsize
557 del self._rbcrevs[truncate:]
558 del self._rbcrevs[truncate:]
558 self._rbcrevslen = min(self._rbcrevslen, truncate)
559 self._rbcrevslen = min(self._rbcrevslen, truncate)
559
560
560 # fall back to slow path and make sure it will be written to disk
561 # fall back to slow path and make sure it will be written to disk
561 return self._branchinfo(rev)
562 return self._branchinfo(rev)
562
563
563 def _branchinfo(self, rev):
564 def _branchinfo(self, rev):
564 """Retrieve branch info from changelog and update _rbcrevs"""
565 """Retrieve branch info from changelog and update _rbcrevs"""
565 changelog = self._repo.changelog
566 changelog = self._repo.changelog
566 b, close = changelog.branchinfo(rev)
567 b, close = changelog.branchinfo(rev)
567 if b in self._namesreverse:
568 if b in self._namesreverse:
568 branchidx = self._namesreverse[b]
569 branchidx = self._namesreverse[b]
569 else:
570 else:
570 branchidx = len(self._names)
571 branchidx = len(self._names)
571 self._names.append(b)
572 self._names.append(b)
572 self._namesreverse[b] = branchidx
573 self._namesreverse[b] = branchidx
573 reponode = changelog.node(rev)
574 reponode = changelog.node(rev)
574 if close:
575 if close:
575 branchidx |= _rbccloseflag
576 branchidx |= _rbccloseflag
576 self._setcachedata(rev, reponode, branchidx)
577 self._setcachedata(rev, reponode, branchidx)
577 return b, close
578 return b, close
578
579
579 def setdata(self, branch, rev, node, close):
580 def setdata(self, branch, rev, node, close):
580 """add new data information to the cache"""
581 """add new data information to the cache"""
581 if branch in self._namesreverse:
582 if branch in self._namesreverse:
582 branchidx = self._namesreverse[branch]
583 branchidx = self._namesreverse[branch]
583 else:
584 else:
584 branchidx = len(self._names)
585 branchidx = len(self._names)
585 self._names.append(branch)
586 self._names.append(branch)
586 self._namesreverse[branch] = branchidx
587 self._namesreverse[branch] = branchidx
587 if close:
588 if close:
588 branchidx |= _rbccloseflag
589 branchidx |= _rbccloseflag
589 self._setcachedata(rev, node, branchidx)
590 self._setcachedata(rev, node, branchidx)
590 # If no cache data were readable (non exists, bad permission, etc)
591 # If no cache data were readable (non exists, bad permission, etc)
591 # the cache was bypassing itself by setting:
592 # the cache was bypassing itself by setting:
592 #
593 #
593 # self.branchinfo = self._branchinfo
594 # self.branchinfo = self._branchinfo
594 #
595 #
595 # Since we now have data in the cache, we need to drop this bypassing.
596 # Since we now have data in the cache, we need to drop this bypassing.
596 if r'branchinfo' in vars(self):
597 if r'branchinfo' in vars(self):
597 del self.branchinfo
598 del self.branchinfo
598
599
599 def _setcachedata(self, rev, node, branchidx):
600 def _setcachedata(self, rev, node, branchidx):
600 """Writes the node's branch data to the in-memory cache data."""
601 """Writes the node's branch data to the in-memory cache data."""
601 if rev == nullrev:
602 if rev == nullrev:
602 return
603 return
603 rbcrevidx = rev * _rbcrecsize
604 rbcrevidx = rev * _rbcrecsize
604 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
605 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
605 self._rbcrevs.extend('\0' *
606 self._rbcrevs.extend('\0' *
606 (len(self._repo.changelog) * _rbcrecsize -
607 (len(self._repo.changelog) * _rbcrecsize -
607 len(self._rbcrevs)))
608 len(self._rbcrevs)))
608 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
609 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
609 self._rbcrevslen = min(self._rbcrevslen, rev)
610 self._rbcrevslen = min(self._rbcrevslen, rev)
610
611
611 tr = self._repo.currenttransaction()
612 tr = self._repo.currenttransaction()
612 if tr:
613 if tr:
613 tr.addfinalize('write-revbranchcache', self.write)
614 tr.addfinalize('write-revbranchcache', self.write)
614
615
615 def write(self, tr=None):
616 def write(self, tr=None):
616 """Save branch cache if it is dirty."""
617 """Save branch cache if it is dirty."""
617 repo = self._repo
618 repo = self._repo
618 wlock = None
619 wlock = None
619 step = ''
620 step = ''
620 try:
621 try:
621 if self._rbcnamescount < len(self._names):
622 if self._rbcnamescount < len(self._names):
622 step = ' names'
623 step = ' names'
623 wlock = repo.wlock(wait=False)
624 wlock = repo.wlock(wait=False)
624 if self._rbcnamescount != 0:
625 if self._rbcnamescount != 0:
625 f = repo.cachevfs.open(_rbcnames, 'ab')
626 f = repo.cachevfs.open(_rbcnames, 'ab')
626 if f.tell() == self._rbcsnameslen:
627 if f.tell() == self._rbcsnameslen:
627 f.write('\0')
628 f.write('\0')
628 else:
629 else:
629 f.close()
630 f.close()
630 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
631 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
631 self._rbcnamescount = 0
632 self._rbcnamescount = 0
632 self._rbcrevslen = 0
633 self._rbcrevslen = 0
633 if self._rbcnamescount == 0:
634 if self._rbcnamescount == 0:
634 # before rewriting names, make sure references are removed
635 # before rewriting names, make sure references are removed
635 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
636 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
636 f = repo.cachevfs.open(_rbcnames, 'wb')
637 f = repo.cachevfs.open(_rbcnames, 'wb')
637 f.write('\0'.join(encoding.fromlocal(b)
638 f.write('\0'.join(encoding.fromlocal(b)
638 for b in self._names[self._rbcnamescount:]))
639 for b in self._names[self._rbcnamescount:]))
639 self._rbcsnameslen = f.tell()
640 self._rbcsnameslen = f.tell()
640 f.close()
641 f.close()
641 self._rbcnamescount = len(self._names)
642 self._rbcnamescount = len(self._names)
642
643
643 start = self._rbcrevslen * _rbcrecsize
644 start = self._rbcrevslen * _rbcrecsize
644 if start != len(self._rbcrevs):
645 if start != len(self._rbcrevs):
645 step = ''
646 step = ''
646 if wlock is None:
647 if wlock is None:
647 wlock = repo.wlock(wait=False)
648 wlock = repo.wlock(wait=False)
648 revs = min(len(repo.changelog),
649 revs = min(len(repo.changelog),
649 len(self._rbcrevs) // _rbcrecsize)
650 len(self._rbcrevs) // _rbcrecsize)
650 f = repo.cachevfs.open(_rbcrevs, 'ab')
651 f = repo.cachevfs.open(_rbcrevs, 'ab')
651 if f.tell() != start:
652 if f.tell() != start:
652 repo.ui.debug("truncating cache/%s to %d\n"
653 repo.ui.debug("truncating cache/%s to %d\n"
653 % (_rbcrevs, start))
654 % (_rbcrevs, start))
654 f.seek(start)
655 f.seek(start)
655 if f.tell() != start:
656 if f.tell() != start:
656 start = 0
657 start = 0
657 f.seek(start)
658 f.seek(start)
658 f.truncate()
659 f.truncate()
659 end = revs * _rbcrecsize
660 end = revs * _rbcrecsize
660 f.write(self._rbcrevs[start:end])
661 f.write(self._rbcrevs[start:end])
661 f.close()
662 f.close()
662 self._rbcrevslen = revs
663 self._rbcrevslen = revs
663 except (IOError, OSError, error.Abort, error.LockError) as inst:
664 except (IOError, OSError, error.Abort, error.LockError) as inst:
664 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
665 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
665 % (step, stringutil.forcebytestr(inst)))
666 % (step, stringutil.forcebytestr(inst)))
666 finally:
667 finally:
667 if wlock is not None:
668 if wlock is not None:
668 wlock.release()
669 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now