##// END OF EJS Templates
mmap: only use mmap to read rev-branch-cache data if it is safe...
marmoute -
r52547:c0e30a01 stable
parent child Browse files
Show More
@@ -1,1412 +1,1413 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 cast,
28 cast,
29 )
29 )
30
30
31 from . import (
31 from . import (
32 encoding,
32 encoding,
33 error,
33 error,
34 obsolete,
34 obsolete,
35 scmutil,
35 scmutil,
36 util,
36 util,
37 )
37 )
38
38
39 from .utils import (
39 from .utils import (
40 repoviewutil,
40 repoviewutil,
41 stringutil,
41 stringutil,
42 )
42 )
43
43
44 if TYPE_CHECKING:
44 if TYPE_CHECKING:
45 from . import localrepo
45 from . import localrepo
46
46
47 assert [localrepo]
47 assert [localrepo]
48
48
49 subsettable = repoviewutil.subsettable
49 subsettable = repoviewutil.subsettable
50
50
51 calcsize = struct.calcsize
51 calcsize = struct.calcsize
52 pack_into = struct.pack_into
52 pack_into = struct.pack_into
53 unpack_from = struct.unpack_from
53 unpack_from = struct.unpack_from
54
54
55
55
56 class BranchMapCache:
56 class BranchMapCache:
57 """mapping of filtered views of repo with their branchcache"""
57 """mapping of filtered views of repo with their branchcache"""
58
58
59 def __init__(self):
59 def __init__(self):
60 self._per_filter = {}
60 self._per_filter = {}
61
61
62 def __getitem__(self, repo):
62 def __getitem__(self, repo):
63 self.updatecache(repo)
63 self.updatecache(repo)
64 bcache = self._per_filter[repo.filtername]
64 bcache = self._per_filter[repo.filtername]
65 bcache._ensure_populated(repo)
65 bcache._ensure_populated(repo)
66 assert bcache._filtername == repo.filtername, (
66 assert bcache._filtername == repo.filtername, (
67 bcache._filtername,
67 bcache._filtername,
68 repo.filtername,
68 repo.filtername,
69 )
69 )
70 return bcache
70 return bcache
71
71
72 def update_disk(self, repo, detect_pure_topo=False):
72 def update_disk(self, repo, detect_pure_topo=False):
73 """ensure and up-to-date cache is (or will be) written on disk
73 """ensure and up-to-date cache is (or will be) written on disk
74
74
75 The cache for this repository view is updated if needed and written on
75 The cache for this repository view is updated if needed and written on
76 disk.
76 disk.
77
77
78 If a transaction is in progress, the writing is schedule to transaction
78 If a transaction is in progress, the writing is schedule to transaction
79 close. See the `BranchMapCache.write_dirty` method.
79 close. See the `BranchMapCache.write_dirty` method.
80
80
81 This method exist independently of __getitem__ as it is sometime useful
81 This method exist independently of __getitem__ as it is sometime useful
82 to signal that we have no intend to use the data in memory yet.
82 to signal that we have no intend to use the data in memory yet.
83 """
83 """
84 self.updatecache(repo)
84 self.updatecache(repo)
85 bcache = self._per_filter[repo.filtername]
85 bcache = self._per_filter[repo.filtername]
86 assert bcache._filtername == repo.filtername, (
86 assert bcache._filtername == repo.filtername, (
87 bcache._filtername,
87 bcache._filtername,
88 repo.filtername,
88 repo.filtername,
89 )
89 )
90 if detect_pure_topo:
90 if detect_pure_topo:
91 bcache._detect_pure_topo(repo)
91 bcache._detect_pure_topo(repo)
92 tr = repo.currenttransaction()
92 tr = repo.currenttransaction()
93 if getattr(tr, 'finalized', True):
93 if getattr(tr, 'finalized', True):
94 bcache.sync_disk(repo)
94 bcache.sync_disk(repo)
95
95
96 def updatecache(self, repo):
96 def updatecache(self, repo):
97 """Update the cache for the given filtered view on a repository"""
97 """Update the cache for the given filtered view on a repository"""
98 # This can trigger updates for the caches for subsets of the filtered
98 # This can trigger updates for the caches for subsets of the filtered
99 # view, e.g. when there is no cache for this filtered view or the cache
99 # view, e.g. when there is no cache for this filtered view or the cache
100 # is stale.
100 # is stale.
101
101
102 cl = repo.changelog
102 cl = repo.changelog
103 filtername = repo.filtername
103 filtername = repo.filtername
104 bcache = self._per_filter.get(filtername)
104 bcache = self._per_filter.get(filtername)
105 if bcache is None or not bcache.validfor(repo):
105 if bcache is None or not bcache.validfor(repo):
106 # cache object missing or cache object stale? Read from disk
106 # cache object missing or cache object stale? Read from disk
107 bcache = branch_cache_from_file(repo)
107 bcache = branch_cache_from_file(repo)
108
108
109 revs = []
109 revs = []
110 if bcache is None:
110 if bcache is None:
111 # no (fresh) cache available anymore, perhaps we can re-use
111 # no (fresh) cache available anymore, perhaps we can re-use
112 # the cache for a subset, then extend that to add info on missing
112 # the cache for a subset, then extend that to add info on missing
113 # revisions.
113 # revisions.
114 subsetname = subsettable.get(filtername)
114 subsetname = subsettable.get(filtername)
115 if subsetname is not None:
115 if subsetname is not None:
116 subset = repo.filtered(subsetname)
116 subset = repo.filtered(subsetname)
117 self.updatecache(subset)
117 self.updatecache(subset)
118 bcache = self._per_filter[subset.filtername].inherit_for(repo)
118 bcache = self._per_filter[subset.filtername].inherit_for(repo)
119 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
119 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
120 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
120 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
121 else:
121 else:
122 # nothing to fall back on, start empty.
122 # nothing to fall back on, start empty.
123 bcache = new_branch_cache(repo)
123 bcache = new_branch_cache(repo)
124
124
125 revs.extend(cl.revs(start=bcache.tiprev + 1))
125 revs.extend(cl.revs(start=bcache.tiprev + 1))
126 if revs:
126 if revs:
127 bcache.update(repo, revs)
127 bcache.update(repo, revs)
128
128
129 assert bcache.validfor(repo), filtername
129 assert bcache.validfor(repo), filtername
130 self._per_filter[repo.filtername] = bcache
130 self._per_filter[repo.filtername] = bcache
131
131
132 def replace(self, repo, remotebranchmap):
132 def replace(self, repo, remotebranchmap):
133 """Replace the branchmap cache for a repo with a branch mapping.
133 """Replace the branchmap cache for a repo with a branch mapping.
134
134
135 This is likely only called during clone with a branch map from a
135 This is likely only called during clone with a branch map from a
136 remote.
136 remote.
137
137
138 """
138 """
139 cl = repo.changelog
139 cl = repo.changelog
140 clrev = cl.rev
140 clrev = cl.rev
141 clbranchinfo = cl.branchinfo
141 clbranchinfo = cl.branchinfo
142 rbheads = []
142 rbheads = []
143 closed = set()
143 closed = set()
144 for bheads in remotebranchmap.values():
144 for bheads in remotebranchmap.values():
145 rbheads += bheads
145 rbheads += bheads
146 for h in bheads:
146 for h in bheads:
147 r = clrev(h)
147 r = clrev(h)
148 b, c = clbranchinfo(r)
148 b, c = clbranchinfo(r)
149 if c:
149 if c:
150 closed.add(h)
150 closed.add(h)
151
151
152 if rbheads:
152 if rbheads:
153 rtiprev = max((int(clrev(node)) for node in rbheads))
153 rtiprev = max((int(clrev(node)) for node in rbheads))
154 cache = new_branch_cache(
154 cache = new_branch_cache(
155 repo,
155 repo,
156 remotebranchmap,
156 remotebranchmap,
157 repo[rtiprev].node(),
157 repo[rtiprev].node(),
158 rtiprev,
158 rtiprev,
159 closednodes=closed,
159 closednodes=closed,
160 )
160 )
161
161
162 # Try to stick it as low as possible
162 # Try to stick it as low as possible
163 # filter above served are unlikely to be fetch from a clone
163 # filter above served are unlikely to be fetch from a clone
164 for candidate in (b'base', b'immutable', b'served'):
164 for candidate in (b'base', b'immutable', b'served'):
165 rview = repo.filtered(candidate)
165 rview = repo.filtered(candidate)
166 if cache.validfor(rview):
166 if cache.validfor(rview):
167 cache._filtername = candidate
167 cache._filtername = candidate
168 self._per_filter[candidate] = cache
168 self._per_filter[candidate] = cache
169 cache._state = STATE_DIRTY
169 cache._state = STATE_DIRTY
170 cache.write(rview)
170 cache.write(rview)
171 return
171 return
172
172
173 def clear(self):
173 def clear(self):
174 self._per_filter.clear()
174 self._per_filter.clear()
175
175
176 def write_dirty(self, repo):
176 def write_dirty(self, repo):
177 unfi = repo.unfiltered()
177 unfi = repo.unfiltered()
178 for filtername in repoviewutil.get_ordered_subset():
178 for filtername in repoviewutil.get_ordered_subset():
179 cache = self._per_filter.get(filtername)
179 cache = self._per_filter.get(filtername)
180 if cache is None:
180 if cache is None:
181 continue
181 continue
182 if filtername is None:
182 if filtername is None:
183 repo = unfi
183 repo = unfi
184 else:
184 else:
185 repo = unfi.filtered(filtername)
185 repo = unfi.filtered(filtername)
186 cache.sync_disk(repo)
186 cache.sync_disk(repo)
187
187
188
188
189 def _unknownnode(node):
189 def _unknownnode(node):
190 """raises ValueError when branchcache found a node which does not exists"""
190 """raises ValueError when branchcache found a node which does not exists"""
191 raise ValueError('node %s does not exist' % node.hex())
191 raise ValueError('node %s does not exist' % node.hex())
192
192
193
193
194 def _branchcachedesc(repo):
194 def _branchcachedesc(repo):
195 if repo.filtername is not None:
195 if repo.filtername is not None:
196 return b'branch cache (%s)' % repo.filtername
196 return b'branch cache (%s)' % repo.filtername
197 else:
197 else:
198 return b'branch cache'
198 return b'branch cache'
199
199
200
200
201 class _BaseBranchCache:
201 class _BaseBranchCache:
202 """A dict like object that hold branches heads cache.
202 """A dict like object that hold branches heads cache.
203
203
204 This cache is used to avoid costly computations to determine all the
204 This cache is used to avoid costly computations to determine all the
205 branch heads of a repo.
205 branch heads of a repo.
206 """
206 """
207
207
208 def __init__(
208 def __init__(
209 self,
209 self,
210 repo: "localrepo.localrepository",
210 repo: "localrepo.localrepository",
211 entries: Union[
211 entries: Union[
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 ] = (),
213 ] = (),
214 closed_nodes: Optional[Set[bytes]] = None,
214 closed_nodes: Optional[Set[bytes]] = None,
215 ) -> None:
215 ) -> None:
216 """hasnode is a function which can be used to verify whether changelog
216 """hasnode is a function which can be used to verify whether changelog
217 has a given node or not. If it's not provided, we assume that every node
217 has a given node or not. If it's not provided, we assume that every node
218 we have exists in changelog"""
218 we have exists in changelog"""
219 # closednodes is a set of nodes that close their branch. If the branch
219 # closednodes is a set of nodes that close their branch. If the branch
220 # cache has been updated, it may contain nodes that are no longer
220 # cache has been updated, it may contain nodes that are no longer
221 # heads.
221 # heads.
222 if closed_nodes is None:
222 if closed_nodes is None:
223 closed_nodes = set()
223 closed_nodes = set()
224 self._closednodes = set(closed_nodes)
224 self._closednodes = set(closed_nodes)
225 self._entries = dict(entries)
225 self._entries = dict(entries)
226
226
227 def __iter__(self):
227 def __iter__(self):
228 return iter(self._entries)
228 return iter(self._entries)
229
229
230 def __setitem__(self, key, value):
230 def __setitem__(self, key, value):
231 self._entries[key] = value
231 self._entries[key] = value
232
232
233 def __getitem__(self, key):
233 def __getitem__(self, key):
234 return self._entries[key]
234 return self._entries[key]
235
235
236 def __contains__(self, key):
236 def __contains__(self, key):
237 return key in self._entries
237 return key in self._entries
238
238
239 def iteritems(self):
239 def iteritems(self):
240 return self._entries.items()
240 return self._entries.items()
241
241
242 items = iteritems
242 items = iteritems
243
243
244 def hasbranch(self, label):
244 def hasbranch(self, label):
245 """checks whether a branch of this name exists or not"""
245 """checks whether a branch of this name exists or not"""
246 return label in self._entries
246 return label in self._entries
247
247
248 def _branchtip(self, heads):
248 def _branchtip(self, heads):
249 """Return tuple with last open head in heads and false,
249 """Return tuple with last open head in heads and false,
250 otherwise return last closed head and true."""
250 otherwise return last closed head and true."""
251 tip = heads[-1]
251 tip = heads[-1]
252 closed = True
252 closed = True
253 for h in reversed(heads):
253 for h in reversed(heads):
254 if h not in self._closednodes:
254 if h not in self._closednodes:
255 tip = h
255 tip = h
256 closed = False
256 closed = False
257 break
257 break
258 return tip, closed
258 return tip, closed
259
259
260 def branchtip(self, branch):
260 def branchtip(self, branch):
261 """Return the tipmost open head on branch head, otherwise return the
261 """Return the tipmost open head on branch head, otherwise return the
262 tipmost closed head on branch.
262 tipmost closed head on branch.
263 Raise KeyError for unknown branch."""
263 Raise KeyError for unknown branch."""
264 return self._branchtip(self[branch])[0]
264 return self._branchtip(self[branch])[0]
265
265
266 def iteropen(self, nodes):
266 def iteropen(self, nodes):
267 return (n for n in nodes if n not in self._closednodes)
267 return (n for n in nodes if n not in self._closednodes)
268
268
269 def branchheads(self, branch, closed=False):
269 def branchheads(self, branch, closed=False):
270 heads = self._entries[branch]
270 heads = self._entries[branch]
271 if not closed:
271 if not closed:
272 heads = list(self.iteropen(heads))
272 heads = list(self.iteropen(heads))
273 return heads
273 return heads
274
274
275 def iterbranches(self):
275 def iterbranches(self):
276 for bn, heads in self.items():
276 for bn, heads in self.items():
277 yield (bn, heads) + self._branchtip(heads)
277 yield (bn, heads) + self._branchtip(heads)
278
278
279 def iterheads(self):
279 def iterheads(self):
280 """returns all the heads"""
280 """returns all the heads"""
281 return self._entries.values()
281 return self._entries.values()
282
282
283 def update(self, repo, revgen):
283 def update(self, repo, revgen):
284 """Given a branchhead cache, self, that may have extra nodes or be
284 """Given a branchhead cache, self, that may have extra nodes or be
285 missing heads, and a generator of nodes that are strictly a superset of
285 missing heads, and a generator of nodes that are strictly a superset of
286 heads missing, this function updates self to be correct.
286 heads missing, this function updates self to be correct.
287 """
287 """
288 starttime = util.timer()
288 starttime = util.timer()
289 cl = repo.changelog
289 cl = repo.changelog
290 # Faster than using ctx.obsolete()
290 # Faster than using ctx.obsolete()
291 obsrevs = obsolete.getrevs(repo, b'obsolete')
291 obsrevs = obsolete.getrevs(repo, b'obsolete')
292 # collect new branch entries
292 # collect new branch entries
293 newbranches = {}
293 newbranches = {}
294 new_closed = set()
294 new_closed = set()
295 obs_ignored = set()
295 obs_ignored = set()
296 getbranchinfo = repo.revbranchcache().branchinfo
296 getbranchinfo = repo.revbranchcache().branchinfo
297 max_rev = -1
297 max_rev = -1
298 for r in revgen:
298 for r in revgen:
299 max_rev = max(max_rev, r)
299 max_rev = max(max_rev, r)
300 if r in obsrevs:
300 if r in obsrevs:
301 # We ignore obsolete changesets as they shouldn't be
301 # We ignore obsolete changesets as they shouldn't be
302 # considered heads.
302 # considered heads.
303 obs_ignored.add(r)
303 obs_ignored.add(r)
304 continue
304 continue
305 branch, closesbranch = getbranchinfo(r)
305 branch, closesbranch = getbranchinfo(r)
306 newbranches.setdefault(branch, []).append(r)
306 newbranches.setdefault(branch, []).append(r)
307 if closesbranch:
307 if closesbranch:
308 new_closed.add(r)
308 new_closed.add(r)
309 if max_rev < 0:
309 if max_rev < 0:
310 msg = "running branchcache.update without revision to update"
310 msg = "running branchcache.update without revision to update"
311 raise error.ProgrammingError(msg)
311 raise error.ProgrammingError(msg)
312
312
313 self._process_new(
313 self._process_new(
314 repo,
314 repo,
315 newbranches,
315 newbranches,
316 new_closed,
316 new_closed,
317 obs_ignored,
317 obs_ignored,
318 max_rev,
318 max_rev,
319 )
319 )
320
320
321 self._closednodes.update(cl.node(rev) for rev in new_closed)
321 self._closednodes.update(cl.node(rev) for rev in new_closed)
322
322
323 duration = util.timer() - starttime
323 duration = util.timer() - starttime
324 repo.ui.log(
324 repo.ui.log(
325 b'branchcache',
325 b'branchcache',
326 b'updated %s in %.4f seconds\n',
326 b'updated %s in %.4f seconds\n',
327 _branchcachedesc(repo),
327 _branchcachedesc(repo),
328 duration,
328 duration,
329 )
329 )
330 return max_rev
330 return max_rev
331
331
332 def _process_new(
332 def _process_new(
333 self,
333 self,
334 repo,
334 repo,
335 newbranches,
335 newbranches,
336 new_closed,
336 new_closed,
337 obs_ignored,
337 obs_ignored,
338 max_rev,
338 max_rev,
339 ):
339 ):
340 """update the branchmap from a set of new information"""
340 """update the branchmap from a set of new information"""
341 # Delay fetching the topological heads until they are needed.
341 # Delay fetching the topological heads until they are needed.
342 # A repository without non-continous branches can skip this part.
342 # A repository without non-continous branches can skip this part.
343 topoheads = None
343 topoheads = None
344
344
345 cl = repo.changelog
345 cl = repo.changelog
346 getbranchinfo = repo.revbranchcache().branchinfo
346 getbranchinfo = repo.revbranchcache().branchinfo
347 # Faster than using ctx.obsolete()
347 # Faster than using ctx.obsolete()
348 obsrevs = obsolete.getrevs(repo, b'obsolete')
348 obsrevs = obsolete.getrevs(repo, b'obsolete')
349
349
350 # If a changeset is visible, its parents must be visible too, so
350 # If a changeset is visible, its parents must be visible too, so
351 # use the faster unfiltered parent accessor.
351 # use the faster unfiltered parent accessor.
352 parentrevs = cl._uncheckedparentrevs
352 parentrevs = cl._uncheckedparentrevs
353
353
354 for branch, newheadrevs in newbranches.items():
354 for branch, newheadrevs in newbranches.items():
355 # For every branch, compute the new branchheads.
355 # For every branch, compute the new branchheads.
356 # A branchhead is a revision such that no descendant is on
356 # A branchhead is a revision such that no descendant is on
357 # the same branch.
357 # the same branch.
358 #
358 #
359 # The branchheads are computed iteratively in revision order.
359 # The branchheads are computed iteratively in revision order.
360 # This ensures topological order, i.e. parents are processed
360 # This ensures topological order, i.e. parents are processed
361 # before their children. Ancestors are inclusive here, i.e.
361 # before their children. Ancestors are inclusive here, i.e.
362 # any revision is an ancestor of itself.
362 # any revision is an ancestor of itself.
363 #
363 #
364 # Core observations:
364 # Core observations:
365 # - The current revision is always a branchhead for the
365 # - The current revision is always a branchhead for the
366 # repository up to that point.
366 # repository up to that point.
367 # - It is the first revision of the branch if and only if
367 # - It is the first revision of the branch if and only if
368 # there was no branchhead before. In that case, it is the
368 # there was no branchhead before. In that case, it is the
369 # only branchhead as there are no possible ancestors on
369 # only branchhead as there are no possible ancestors on
370 # the same branch.
370 # the same branch.
371 # - If a parent is on the same branch, a branchhead can
371 # - If a parent is on the same branch, a branchhead can
372 # only be an ancestor of that parent, if it is parent
372 # only be an ancestor of that parent, if it is parent
373 # itself. Otherwise it would have been removed as ancestor
373 # itself. Otherwise it would have been removed as ancestor
374 # of that parent before.
374 # of that parent before.
375 # - Therefore, if all parents are on the same branch, they
375 # - Therefore, if all parents are on the same branch, they
376 # can just be removed from the branchhead set.
376 # can just be removed from the branchhead set.
377 # - If one parent is on the same branch and the other is not
377 # - If one parent is on the same branch and the other is not
378 # and there was exactly one branchhead known, the existing
378 # and there was exactly one branchhead known, the existing
379 # branchhead can only be an ancestor if it is the parent.
379 # branchhead can only be an ancestor if it is the parent.
380 # Otherwise it would have been removed as ancestor of
380 # Otherwise it would have been removed as ancestor of
381 # the parent before. The other parent therefore can't have
381 # the parent before. The other parent therefore can't have
382 # a branchhead as ancestor.
382 # a branchhead as ancestor.
383 # - In all other cases, the parents on different branches
383 # - In all other cases, the parents on different branches
384 # could have a branchhead as ancestor. Those parents are
384 # could have a branchhead as ancestor. Those parents are
385 # kept in the "uncertain" set. If all branchheads are also
385 # kept in the "uncertain" set. If all branchheads are also
386 # topological heads, they can't have descendants and further
386 # topological heads, they can't have descendants and further
387 # checks can be skipped. Otherwise, the ancestors of the
387 # checks can be skipped. Otherwise, the ancestors of the
388 # "uncertain" set are removed from branchheads.
388 # "uncertain" set are removed from branchheads.
389 # This computation is heavy and avoided if at all possible.
389 # This computation is heavy and avoided if at all possible.
390 bheads = self._entries.get(branch, [])
390 bheads = self._entries.get(branch, [])
391 bheadset = {cl.rev(node) for node in bheads}
391 bheadset = {cl.rev(node) for node in bheads}
392 uncertain = set()
392 uncertain = set()
393 for newrev in sorted(newheadrevs):
393 for newrev in sorted(newheadrevs):
394 if not bheadset:
394 if not bheadset:
395 bheadset.add(newrev)
395 bheadset.add(newrev)
396 continue
396 continue
397
397
398 parents = [p for p in parentrevs(newrev) if p != nullrev]
398 parents = [p for p in parentrevs(newrev) if p != nullrev]
399 samebranch = set()
399 samebranch = set()
400 otherbranch = set()
400 otherbranch = set()
401 obsparents = set()
401 obsparents = set()
402 for p in parents:
402 for p in parents:
403 if p in obsrevs:
403 if p in obsrevs:
404 # We ignored this obsolete changeset earlier, but now
404 # We ignored this obsolete changeset earlier, but now
405 # that it has non-ignored children, we need to make
405 # that it has non-ignored children, we need to make
406 # sure their ancestors are not considered heads. To
406 # sure their ancestors are not considered heads. To
407 # achieve that, we will simply treat this obsolete
407 # achieve that, we will simply treat this obsolete
408 # changeset as a parent from other branch.
408 # changeset as a parent from other branch.
409 obsparents.add(p)
409 obsparents.add(p)
410 elif p in bheadset or getbranchinfo(p)[0] == branch:
410 elif p in bheadset or getbranchinfo(p)[0] == branch:
411 samebranch.add(p)
411 samebranch.add(p)
412 else:
412 else:
413 otherbranch.add(p)
413 otherbranch.add(p)
414 if not (len(bheadset) == len(samebranch) == 1):
414 if not (len(bheadset) == len(samebranch) == 1):
415 uncertain.update(otherbranch)
415 uncertain.update(otherbranch)
416 uncertain.update(obsparents)
416 uncertain.update(obsparents)
417 bheadset.difference_update(samebranch)
417 bheadset.difference_update(samebranch)
418 bheadset.add(newrev)
418 bheadset.add(newrev)
419
419
420 if uncertain:
420 if uncertain:
421 if topoheads is None:
421 if topoheads is None:
422 topoheads = set(cl.headrevs())
422 topoheads = set(cl.headrevs())
423 if bheadset - topoheads:
423 if bheadset - topoheads:
424 floorrev = min(bheadset)
424 floorrev = min(bheadset)
425 if floorrev <= max(uncertain):
425 if floorrev <= max(uncertain):
426 ancestors = set(cl.ancestors(uncertain, floorrev))
426 ancestors = set(cl.ancestors(uncertain, floorrev))
427 bheadset -= ancestors
427 bheadset -= ancestors
428 if bheadset:
428 if bheadset:
429 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
429 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
430
430
431
431
432 STATE_CLEAN = 1
432 STATE_CLEAN = 1
433 STATE_INHERITED = 2
433 STATE_INHERITED = 2
434 STATE_DIRTY = 3
434 STATE_DIRTY = 3
435
435
436
436
437 class _LocalBranchCache(_BaseBranchCache):
437 class _LocalBranchCache(_BaseBranchCache):
438 """base class of branch-map info for a local repo or repoview"""
438 """base class of branch-map info for a local repo or repoview"""
439
439
440 _base_filename = None
440 _base_filename = None
441 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
441 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
442
442
443 def __init__(
443 def __init__(
444 self,
444 self,
445 repo: "localrepo.localrepository",
445 repo: "localrepo.localrepository",
446 entries: Union[
446 entries: Union[
447 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
447 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
448 ] = (),
448 ] = (),
449 tipnode: Optional[bytes] = None,
449 tipnode: Optional[bytes] = None,
450 tiprev: Optional[int] = nullrev,
450 tiprev: Optional[int] = nullrev,
451 key_hashes: Optional[Tuple[bytes]] = None,
451 key_hashes: Optional[Tuple[bytes]] = None,
452 closednodes: Optional[Set[bytes]] = None,
452 closednodes: Optional[Set[bytes]] = None,
453 hasnode: Optional[Callable[[bytes], bool]] = None,
453 hasnode: Optional[Callable[[bytes], bool]] = None,
454 verify_node: bool = False,
454 verify_node: bool = False,
455 inherited: bool = False,
455 inherited: bool = False,
456 ) -> None:
456 ) -> None:
457 """hasnode is a function which can be used to verify whether changelog
457 """hasnode is a function which can be used to verify whether changelog
458 has a given node or not. If it's not provided, we assume that every node
458 has a given node or not. If it's not provided, we assume that every node
459 we have exists in changelog"""
459 we have exists in changelog"""
460 self._filtername = repo.filtername
460 self._filtername = repo.filtername
461 if tipnode is None:
461 if tipnode is None:
462 self.tipnode = repo.nullid
462 self.tipnode = repo.nullid
463 else:
463 else:
464 self.tipnode = tipnode
464 self.tipnode = tipnode
465 self.tiprev = tiprev
465 self.tiprev = tiprev
466 if key_hashes is None:
466 if key_hashes is None:
467 self.key_hashes = self._default_key_hashes
467 self.key_hashes = self._default_key_hashes
468 else:
468 else:
469 self.key_hashes = key_hashes
469 self.key_hashes = key_hashes
470 self._state = STATE_CLEAN
470 self._state = STATE_CLEAN
471 if inherited:
471 if inherited:
472 self._state = STATE_INHERITED
472 self._state = STATE_INHERITED
473
473
474 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
474 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
475 # closednodes is a set of nodes that close their branch. If the branch
475 # closednodes is a set of nodes that close their branch. If the branch
476 # cache has been updated, it may contain nodes that are no longer
476 # cache has been updated, it may contain nodes that are no longer
477 # heads.
477 # heads.
478
478
479 # Do we need to verify branch at all ?
479 # Do we need to verify branch at all ?
480 self._verify_node = verify_node
480 self._verify_node = verify_node
481 # branches for which nodes are verified
481 # branches for which nodes are verified
482 self._verifiedbranches = set()
482 self._verifiedbranches = set()
483 self._hasnode = None
483 self._hasnode = None
484 if self._verify_node:
484 if self._verify_node:
485 self._hasnode = repo.changelog.hasnode
485 self._hasnode = repo.changelog.hasnode
486
486
487 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
487 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
488 raise NotImplementedError
488 raise NotImplementedError
489
489
490 def _ensure_populated(self, repo):
490 def _ensure_populated(self, repo):
491 """make sure any lazily loaded values are fully populated"""
491 """make sure any lazily loaded values are fully populated"""
492
492
493 def _detect_pure_topo(self, repo) -> None:
493 def _detect_pure_topo(self, repo) -> None:
494 pass
494 pass
495
495
496 def validfor(self, repo):
496 def validfor(self, repo):
497 """check that cache contents are valid for (a subset of) this repo
497 """check that cache contents are valid for (a subset of) this repo
498
498
499 - False when the order of changesets changed or if we detect a strip.
499 - False when the order of changesets changed or if we detect a strip.
500 - True when cache is up-to-date for the current repo or its subset."""
500 - True when cache is up-to-date for the current repo or its subset."""
501 try:
501 try:
502 node = repo.changelog.node(self.tiprev)
502 node = repo.changelog.node(self.tiprev)
503 except IndexError:
503 except IndexError:
504 # changesets were stripped and now we don't even have enough to
504 # changesets were stripped and now we don't even have enough to
505 # find tiprev
505 # find tiprev
506 return False
506 return False
507 if self.tipnode != node:
507 if self.tipnode != node:
508 # tiprev doesn't correspond to tipnode: repo was stripped, or this
508 # tiprev doesn't correspond to tipnode: repo was stripped, or this
509 # repo has a different order of changesets
509 # repo has a different order of changesets
510 return False
510 return False
511 repo_key_hashes = self._compute_key_hashes(repo)
511 repo_key_hashes = self._compute_key_hashes(repo)
512 # hashes don't match if this repo view has a different set of filtered
512 # hashes don't match if this repo view has a different set of filtered
513 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
513 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
514 # history was rewritten)
514 # history was rewritten)
515 return self.key_hashes == repo_key_hashes
515 return self.key_hashes == repo_key_hashes
516
516
517 @classmethod
517 @classmethod
518 def fromfile(cls, repo):
518 def fromfile(cls, repo):
519 f = None
519 f = None
520 try:
520 try:
521 f = repo.cachevfs(cls._filename(repo))
521 f = repo.cachevfs(cls._filename(repo))
522 lineiter = iter(f)
522 lineiter = iter(f)
523 init_kwargs = cls._load_header(repo, lineiter)
523 init_kwargs = cls._load_header(repo, lineiter)
524 bcache = cls(
524 bcache = cls(
525 repo,
525 repo,
526 verify_node=True,
526 verify_node=True,
527 **init_kwargs,
527 **init_kwargs,
528 )
528 )
529 if not bcache.validfor(repo):
529 if not bcache.validfor(repo):
530 # invalidate the cache
530 # invalidate the cache
531 raise ValueError('tip differs')
531 raise ValueError('tip differs')
532 bcache._load_heads(repo, lineiter)
532 bcache._load_heads(repo, lineiter)
533 except (IOError, OSError):
533 except (IOError, OSError):
534 return None
534 return None
535
535
536 except Exception as inst:
536 except Exception as inst:
537 if repo.ui.debugflag:
537 if repo.ui.debugflag:
538 msg = b'invalid %s: %s\n'
538 msg = b'invalid %s: %s\n'
539 msg %= (
539 msg %= (
540 _branchcachedesc(repo),
540 _branchcachedesc(repo),
541 stringutil.forcebytestr(inst),
541 stringutil.forcebytestr(inst),
542 )
542 )
543 repo.ui.debug(msg)
543 repo.ui.debug(msg)
544 bcache = None
544 bcache = None
545
545
546 finally:
546 finally:
547 if f:
547 if f:
548 f.close()
548 f.close()
549
549
550 return bcache
550 return bcache
551
551
552 @classmethod
552 @classmethod
553 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
553 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
554 raise NotImplementedError
554 raise NotImplementedError
555
555
556 def _load_heads(self, repo, lineiter):
556 def _load_heads(self, repo, lineiter):
557 """fully loads the branchcache by reading from the file using the line
557 """fully loads the branchcache by reading from the file using the line
558 iterator passed"""
558 iterator passed"""
559 for line in lineiter:
559 for line in lineiter:
560 line = line.rstrip(b'\n')
560 line = line.rstrip(b'\n')
561 if not line:
561 if not line:
562 continue
562 continue
563 node, state, label = line.split(b" ", 2)
563 node, state, label = line.split(b" ", 2)
564 if state not in b'oc':
564 if state not in b'oc':
565 raise ValueError('invalid branch state')
565 raise ValueError('invalid branch state')
566 label = encoding.tolocal(label.strip())
566 label = encoding.tolocal(label.strip())
567 node = bin(node)
567 node = bin(node)
568 self._entries.setdefault(label, []).append(node)
568 self._entries.setdefault(label, []).append(node)
569 if state == b'c':
569 if state == b'c':
570 self._closednodes.add(node)
570 self._closednodes.add(node)
571
571
572 @classmethod
572 @classmethod
573 def _filename(cls, repo):
573 def _filename(cls, repo):
574 """name of a branchcache file for a given repo or repoview"""
574 """name of a branchcache file for a given repo or repoview"""
575 filename = cls._base_filename
575 filename = cls._base_filename
576 assert filename is not None
576 assert filename is not None
577 if repo.filtername:
577 if repo.filtername:
578 filename = b'%s-%s' % (filename, repo.filtername)
578 filename = b'%s-%s' % (filename, repo.filtername)
579 return filename
579 return filename
580
580
581 def inherit_for(self, repo):
581 def inherit_for(self, repo):
582 """return a deep copy of the branchcache object"""
582 """return a deep copy of the branchcache object"""
583 assert repo.filtername != self._filtername
583 assert repo.filtername != self._filtername
584 other = type(self)(
584 other = type(self)(
585 repo=repo,
585 repo=repo,
586 # we always do a shally copy of self._entries, and the values is
586 # we always do a shally copy of self._entries, and the values is
587 # always replaced, so no need to deepcopy until the above remains
587 # always replaced, so no need to deepcopy until the above remains
588 # true.
588 # true.
589 entries=self._entries,
589 entries=self._entries,
590 tipnode=self.tipnode,
590 tipnode=self.tipnode,
591 tiprev=self.tiprev,
591 tiprev=self.tiprev,
592 key_hashes=self.key_hashes,
592 key_hashes=self.key_hashes,
593 closednodes=set(self._closednodes),
593 closednodes=set(self._closednodes),
594 verify_node=self._verify_node,
594 verify_node=self._verify_node,
595 inherited=True,
595 inherited=True,
596 )
596 )
597 # also copy information about the current verification state
597 # also copy information about the current verification state
598 other._verifiedbranches = set(self._verifiedbranches)
598 other._verifiedbranches = set(self._verifiedbranches)
599 return other
599 return other
600
600
601 def sync_disk(self, repo):
601 def sync_disk(self, repo):
602 """synchronise the on disk file with the cache state
602 """synchronise the on disk file with the cache state
603
603
604 If new value specific to this filter level need to be written, the file
604 If new value specific to this filter level need to be written, the file
605 will be updated, if the state of the branchcache is inherited from a
605 will be updated, if the state of the branchcache is inherited from a
606 subset, any stalled on disk file will be deleted.
606 subset, any stalled on disk file will be deleted.
607
607
608 That method does nothing if there is nothing to do.
608 That method does nothing if there is nothing to do.
609 """
609 """
610 if self._state == STATE_DIRTY:
610 if self._state == STATE_DIRTY:
611 self.write(repo)
611 self.write(repo)
612 elif self._state == STATE_INHERITED:
612 elif self._state == STATE_INHERITED:
613 filename = self._filename(repo)
613 filename = self._filename(repo)
614 repo.cachevfs.tryunlink(filename)
614 repo.cachevfs.tryunlink(filename)
615
615
616 def write(self, repo):
616 def write(self, repo):
617 assert self._filtername == repo.filtername, (
617 assert self._filtername == repo.filtername, (
618 self._filtername,
618 self._filtername,
619 repo.filtername,
619 repo.filtername,
620 )
620 )
621 assert self._state == STATE_DIRTY, self._state
621 assert self._state == STATE_DIRTY, self._state
622 # This method should not be called during an open transaction
622 # This method should not be called during an open transaction
623 tr = repo.currenttransaction()
623 tr = repo.currenttransaction()
624 if not getattr(tr, 'finalized', True):
624 if not getattr(tr, 'finalized', True):
625 msg = "writing branchcache in the middle of a transaction"
625 msg = "writing branchcache in the middle of a transaction"
626 raise error.ProgrammingError(msg)
626 raise error.ProgrammingError(msg)
627 try:
627 try:
628 filename = self._filename(repo)
628 filename = self._filename(repo)
629 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
629 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
630 self._write_header(f)
630 self._write_header(f)
631 nodecount = self._write_heads(repo, f)
631 nodecount = self._write_heads(repo, f)
632 repo.ui.log(
632 repo.ui.log(
633 b'branchcache',
633 b'branchcache',
634 b'wrote %s with %d labels and %d nodes\n',
634 b'wrote %s with %d labels and %d nodes\n',
635 _branchcachedesc(repo),
635 _branchcachedesc(repo),
636 len(self._entries),
636 len(self._entries),
637 nodecount,
637 nodecount,
638 )
638 )
639 self._state = STATE_CLEAN
639 self._state = STATE_CLEAN
640 except (IOError, OSError, error.Abort) as inst:
640 except (IOError, OSError, error.Abort) as inst:
641 # Abort may be raised by read only opener, so log and continue
641 # Abort may be raised by read only opener, so log and continue
642 repo.ui.debug(
642 repo.ui.debug(
643 b"couldn't write branch cache: %s\n"
643 b"couldn't write branch cache: %s\n"
644 % stringutil.forcebytestr(inst)
644 % stringutil.forcebytestr(inst)
645 )
645 )
646
646
647 def _write_header(self, fp) -> None:
647 def _write_header(self, fp) -> None:
648 raise NotImplementedError
648 raise NotImplementedError
649
649
650 def _write_heads(self, repo, fp) -> int:
650 def _write_heads(self, repo, fp) -> int:
651 """write list of heads to a file
651 """write list of heads to a file
652
652
653 Return the number of heads written."""
653 Return the number of heads written."""
654 nodecount = 0
654 nodecount = 0
655 for label, nodes in sorted(self._entries.items()):
655 for label, nodes in sorted(self._entries.items()):
656 label = encoding.fromlocal(label)
656 label = encoding.fromlocal(label)
657 for node in nodes:
657 for node in nodes:
658 nodecount += 1
658 nodecount += 1
659 if node in self._closednodes:
659 if node in self._closednodes:
660 state = b'c'
660 state = b'c'
661 else:
661 else:
662 state = b'o'
662 state = b'o'
663 fp.write(b"%s %s %s\n" % (hex(node), state, label))
663 fp.write(b"%s %s %s\n" % (hex(node), state, label))
664 return nodecount
664 return nodecount
665
665
666 def _verifybranch(self, branch):
666 def _verifybranch(self, branch):
667 """verify head nodes for the given branch."""
667 """verify head nodes for the given branch."""
668 if not self._verify_node:
668 if not self._verify_node:
669 return
669 return
670 if branch not in self._entries or branch in self._verifiedbranches:
670 if branch not in self._entries or branch in self._verifiedbranches:
671 return
671 return
672 assert self._hasnode is not None
672 assert self._hasnode is not None
673 for n in self._entries[branch]:
673 for n in self._entries[branch]:
674 if not self._hasnode(n):
674 if not self._hasnode(n):
675 _unknownnode(n)
675 _unknownnode(n)
676
676
677 self._verifiedbranches.add(branch)
677 self._verifiedbranches.add(branch)
678
678
679 def _verifyall(self):
679 def _verifyall(self):
680 """verifies nodes of all the branches"""
680 """verifies nodes of all the branches"""
681 for b in self._entries.keys():
681 for b in self._entries.keys():
682 if b not in self._verifiedbranches:
682 if b not in self._verifiedbranches:
683 self._verifybranch(b)
683 self._verifybranch(b)
684
684
685 def __getitem__(self, key):
685 def __getitem__(self, key):
686 self._verifybranch(key)
686 self._verifybranch(key)
687 return super().__getitem__(key)
687 return super().__getitem__(key)
688
688
689 def __contains__(self, key):
689 def __contains__(self, key):
690 self._verifybranch(key)
690 self._verifybranch(key)
691 return super().__contains__(key)
691 return super().__contains__(key)
692
692
693 def iteritems(self):
693 def iteritems(self):
694 self._verifyall()
694 self._verifyall()
695 return super().iteritems()
695 return super().iteritems()
696
696
697 items = iteritems
697 items = iteritems
698
698
699 def iterheads(self):
699 def iterheads(self):
700 """returns all the heads"""
700 """returns all the heads"""
701 self._verifyall()
701 self._verifyall()
702 return super().iterheads()
702 return super().iterheads()
703
703
704 def hasbranch(self, label):
704 def hasbranch(self, label):
705 """checks whether a branch of this name exists or not"""
705 """checks whether a branch of this name exists or not"""
706 self._verifybranch(label)
706 self._verifybranch(label)
707 return super().hasbranch(label)
707 return super().hasbranch(label)
708
708
709 def branchheads(self, branch, closed=False):
709 def branchheads(self, branch, closed=False):
710 self._verifybranch(branch)
710 self._verifybranch(branch)
711 return super().branchheads(branch, closed=closed)
711 return super().branchheads(branch, closed=closed)
712
712
713 def update(self, repo, revgen):
713 def update(self, repo, revgen):
714 assert self._filtername == repo.filtername, (
714 assert self._filtername == repo.filtername, (
715 self._filtername,
715 self._filtername,
716 repo.filtername,
716 repo.filtername,
717 )
717 )
718 cl = repo.changelog
718 cl = repo.changelog
719 max_rev = super().update(repo, revgen)
719 max_rev = super().update(repo, revgen)
720 # new tip revision which we found after iterating items from new
720 # new tip revision which we found after iterating items from new
721 # branches
721 # branches
722 if max_rev is not None and max_rev > self.tiprev:
722 if max_rev is not None and max_rev > self.tiprev:
723 self.tiprev = max_rev
723 self.tiprev = max_rev
724 self.tipnode = cl.node(max_rev)
724 self.tipnode = cl.node(max_rev)
725 else:
725 else:
726 # We should not be here is if this is false
726 # We should not be here is if this is false
727 assert cl.node(self.tiprev) == self.tipnode
727 assert cl.node(self.tiprev) == self.tipnode
728
728
729 if not self.validfor(repo):
729 if not self.validfor(repo):
730 # the tiprev and tipnode should be aligned, so if the current repo
730 # the tiprev and tipnode should be aligned, so if the current repo
731 # is not seens as valid this is because old cache key is now
731 # is not seens as valid this is because old cache key is now
732 # invalid for the repo.
732 # invalid for the repo.
733 #
733 #
734 # However. we've just updated the cache and we assume it's valid,
734 # However. we've just updated the cache and we assume it's valid,
735 # so let's make the cache key valid as well by recomputing it from
735 # so let's make the cache key valid as well by recomputing it from
736 # the cached data
736 # the cached data
737 self.key_hashes = self._compute_key_hashes(repo)
737 self.key_hashes = self._compute_key_hashes(repo)
738 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
738 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
739 repo,
739 repo,
740 self.tiprev,
740 self.tiprev,
741 )
741 )
742
742
743 self._state = STATE_DIRTY
743 self._state = STATE_DIRTY
744 tr = repo.currenttransaction()
744 tr = repo.currenttransaction()
745 if getattr(tr, 'finalized', True):
745 if getattr(tr, 'finalized', True):
746 # Avoid premature writing.
746 # Avoid premature writing.
747 #
747 #
748 # (The cache warming setup by localrepo will update the file later.)
748 # (The cache warming setup by localrepo will update the file later.)
749 self.write(repo)
749 self.write(repo)
750
750
751
751
752 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
752 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
753 """Build a branch cache from on-disk data if possible
753 """Build a branch cache from on-disk data if possible
754
754
755 Return a branch cache of the right format depending of the repository.
755 Return a branch cache of the right format depending of the repository.
756 """
756 """
757 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
757 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
758 return BranchCacheV3.fromfile(repo)
758 return BranchCacheV3.fromfile(repo)
759 else:
759 else:
760 return BranchCacheV2.fromfile(repo)
760 return BranchCacheV2.fromfile(repo)
761
761
762
762
763 def new_branch_cache(repo, *args, **kwargs):
763 def new_branch_cache(repo, *args, **kwargs):
764 """Build a new branch cache from argument
764 """Build a new branch cache from argument
765
765
766 Return a branch cache of the right format depending of the repository.
766 Return a branch cache of the right format depending of the repository.
767 """
767 """
768 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
768 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
769 return BranchCacheV3(repo, *args, **kwargs)
769 return BranchCacheV3(repo, *args, **kwargs)
770 else:
770 else:
771 return BranchCacheV2(repo, *args, **kwargs)
771 return BranchCacheV2(repo, *args, **kwargs)
772
772
773
773
774 class BranchCacheV2(_LocalBranchCache):
774 class BranchCacheV2(_LocalBranchCache):
775 """a branch cache using version 2 of the format on disk
775 """a branch cache using version 2 of the format on disk
776
776
777 The cache is serialized on disk in the following format:
777 The cache is serialized on disk in the following format:
778
778
779 <tip hex node> <tip rev number> [optional filtered repo hex hash]
779 <tip hex node> <tip rev number> [optional filtered repo hex hash]
780 <branch head hex node> <open/closed state> <branch name>
780 <branch head hex node> <open/closed state> <branch name>
781 <branch head hex node> <open/closed state> <branch name>
781 <branch head hex node> <open/closed state> <branch name>
782 ...
782 ...
783
783
784 The first line is used to check if the cache is still valid. If the
784 The first line is used to check if the cache is still valid. If the
785 branch cache is for a filtered repo view, an optional third hash is
785 branch cache is for a filtered repo view, an optional third hash is
786 included that hashes the hashes of all filtered and obsolete revisions.
786 included that hashes the hashes of all filtered and obsolete revisions.
787
787
788 The open/closed state is represented by a single letter 'o' or 'c'.
788 The open/closed state is represented by a single letter 'o' or 'c'.
789 This field can be used to avoid changelog reads when determining if a
789 This field can be used to avoid changelog reads when determining if a
790 branch head closes a branch or not.
790 branch head closes a branch or not.
791 """
791 """
792
792
793 _base_filename = b"branch2"
793 _base_filename = b"branch2"
794
794
795 @classmethod
795 @classmethod
796 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
796 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
797 """parse the head of a branchmap file
797 """parse the head of a branchmap file
798
798
799 return parameters to pass to a newly created class instance.
799 return parameters to pass to a newly created class instance.
800 """
800 """
801 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
801 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
802 last, lrev = cachekey[:2]
802 last, lrev = cachekey[:2]
803 last, lrev = bin(last), int(lrev)
803 last, lrev = bin(last), int(lrev)
804 filteredhash = ()
804 filteredhash = ()
805 if len(cachekey) > 2:
805 if len(cachekey) > 2:
806 filteredhash = (bin(cachekey[2]),)
806 filteredhash = (bin(cachekey[2]),)
807 return {
807 return {
808 "tipnode": last,
808 "tipnode": last,
809 "tiprev": lrev,
809 "tiprev": lrev,
810 "key_hashes": filteredhash,
810 "key_hashes": filteredhash,
811 }
811 }
812
812
813 def _write_header(self, fp) -> None:
813 def _write_header(self, fp) -> None:
814 """write the branch cache header to a file"""
814 """write the branch cache header to a file"""
815 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
815 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
816 if self.key_hashes:
816 if self.key_hashes:
817 cachekey.append(hex(self.key_hashes[0]))
817 cachekey.append(hex(self.key_hashes[0]))
818 fp.write(b" ".join(cachekey) + b'\n')
818 fp.write(b" ".join(cachekey) + b'\n')
819
819
820 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
820 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
821 """return the cache key hashes that match this repoview state"""
821 """return the cache key hashes that match this repoview state"""
822 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
822 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
823 repo,
823 repo,
824 self.tiprev,
824 self.tiprev,
825 needobsolete=True,
825 needobsolete=True,
826 )
826 )
827 keys: Tuple[bytes] = cast(Tuple[bytes], ())
827 keys: Tuple[bytes] = cast(Tuple[bytes], ())
828 if filtered_hash is not None:
828 if filtered_hash is not None:
829 keys: Tuple[bytes] = (filtered_hash,)
829 keys: Tuple[bytes] = (filtered_hash,)
830 return keys
830 return keys
831
831
832
832
833 class BranchCacheV3(_LocalBranchCache):
833 class BranchCacheV3(_LocalBranchCache):
834 """a branch cache using version 3 of the format on disk
834 """a branch cache using version 3 of the format on disk
835
835
836 This version is still EXPERIMENTAL and the format is subject to changes.
836 This version is still EXPERIMENTAL and the format is subject to changes.
837
837
838 The cache is serialized on disk in the following format:
838 The cache is serialized on disk in the following format:
839
839
840 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
840 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
841 <branch head hex node> <open/closed state> <branch name>
841 <branch head hex node> <open/closed state> <branch name>
842 <branch head hex node> <open/closed state> <branch name>
842 <branch head hex node> <open/closed state> <branch name>
843 ...
843 ...
844
844
845 The first line is used to check if the cache is still valid. It is a series
845 The first line is used to check if the cache is still valid. It is a series
846 of key value pair. The following key are recognized:
846 of key value pair. The following key are recognized:
847
847
848 - tip-rev: the rev-num of the tip-most revision seen by this cache
848 - tip-rev: the rev-num of the tip-most revision seen by this cache
849 - tip-node: the node-id of the tip-most revision sen by this cache
849 - tip-node: the node-id of the tip-most revision sen by this cache
850 - filtered-hash: the hash of all filtered revisions (before tip-rev)
850 - filtered-hash: the hash of all filtered revisions (before tip-rev)
851 ignored by this cache.
851 ignored by this cache.
852 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
852 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
853 tip-rev) ignored by this cache.
853 tip-rev) ignored by this cache.
854
854
855 The tip-rev is used to know how far behind the value in the file are
855 The tip-rev is used to know how far behind the value in the file are
856 compared to the current repository state.
856 compared to the current repository state.
857
857
858 The tip-node, filtered-hash and obsolete-hash are used to detect if this
858 The tip-node, filtered-hash and obsolete-hash are used to detect if this
859 cache can be used for this repository state at all.
859 cache can be used for this repository state at all.
860
860
861 The open/closed state is represented by a single letter 'o' or 'c'.
861 The open/closed state is represented by a single letter 'o' or 'c'.
862 This field can be used to avoid changelog reads when determining if a
862 This field can be used to avoid changelog reads when determining if a
863 branch head closes a branch or not.
863 branch head closes a branch or not.
864
864
865 Topological heads are not included in the listing and should be dispatched
865 Topological heads are not included in the listing and should be dispatched
866 on the right branch at read time. Obsolete topological heads should be
866 on the right branch at read time. Obsolete topological heads should be
867 ignored.
867 ignored.
868 """
868 """
869
869
870 _base_filename = b"branch3-exp"
870 _base_filename = b"branch3-exp"
871 _default_key_hashes = (None, None)
871 _default_key_hashes = (None, None)
872
872
873 def __init__(self, *args, pure_topo_branch=None, **kwargs):
873 def __init__(self, *args, pure_topo_branch=None, **kwargs):
874 super().__init__(*args, **kwargs)
874 super().__init__(*args, **kwargs)
875 self._pure_topo_branch = pure_topo_branch
875 self._pure_topo_branch = pure_topo_branch
876 self._needs_populate = self._pure_topo_branch is not None
876 self._needs_populate = self._pure_topo_branch is not None
877
877
878 def inherit_for(self, repo):
878 def inherit_for(self, repo):
879 new = super().inherit_for(repo)
879 new = super().inherit_for(repo)
880 new._pure_topo_branch = self._pure_topo_branch
880 new._pure_topo_branch = self._pure_topo_branch
881 new._needs_populate = self._needs_populate
881 new._needs_populate = self._needs_populate
882 return new
882 return new
883
883
884 def _get_topo_heads(self, repo):
884 def _get_topo_heads(self, repo):
885 """returns the topological head of a repoview content up to self.tiprev"""
885 """returns the topological head of a repoview content up to self.tiprev"""
886 cl = repo.changelog
886 cl = repo.changelog
887 if self.tiprev == nullrev:
887 if self.tiprev == nullrev:
888 return []
888 return []
889 elif self.tiprev == cl.tiprev():
889 elif self.tiprev == cl.tiprev():
890 return cl.headrevs()
890 return cl.headrevs()
891 else:
891 else:
892 # XXX passing tiprev as ceiling of cl.headrevs could be faster
892 # XXX passing tiprev as ceiling of cl.headrevs could be faster
893 heads = cl.headrevs(cl.revs(stop=self.tiprev))
893 heads = cl.headrevs(cl.revs(stop=self.tiprev))
894 return heads
894 return heads
895
895
896 def _write_header(self, fp) -> None:
896 def _write_header(self, fp) -> None:
897 cache_keys = {
897 cache_keys = {
898 b"tip-node": hex(self.tipnode),
898 b"tip-node": hex(self.tipnode),
899 b"tip-rev": b'%d' % self.tiprev,
899 b"tip-rev": b'%d' % self.tiprev,
900 }
900 }
901 if self.key_hashes:
901 if self.key_hashes:
902 if self.key_hashes[0] is not None:
902 if self.key_hashes[0] is not None:
903 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
903 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
904 if self.key_hashes[1] is not None:
904 if self.key_hashes[1] is not None:
905 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
905 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
906 if self._pure_topo_branch is not None:
906 if self._pure_topo_branch is not None:
907 cache_keys[b"topo-mode"] = b"pure"
907 cache_keys[b"topo-mode"] = b"pure"
908 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
908 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
909 fp.write(b" ".join(pieces) + b'\n')
909 fp.write(b" ".join(pieces) + b'\n')
910 if self._pure_topo_branch is not None:
910 if self._pure_topo_branch is not None:
911 label = encoding.fromlocal(self._pure_topo_branch)
911 label = encoding.fromlocal(self._pure_topo_branch)
912 fp.write(label + b'\n')
912 fp.write(label + b'\n')
913
913
914 def _write_heads(self, repo, fp) -> int:
914 def _write_heads(self, repo, fp) -> int:
915 """write list of heads to a file
915 """write list of heads to a file
916
916
917 Return the number of heads written."""
917 Return the number of heads written."""
918 nodecount = 0
918 nodecount = 0
919 topo_heads = None
919 topo_heads = None
920 if self._pure_topo_branch is None:
920 if self._pure_topo_branch is None:
921 topo_heads = set(self._get_topo_heads(repo))
921 topo_heads = set(self._get_topo_heads(repo))
922 to_rev = repo.changelog.index.rev
922 to_rev = repo.changelog.index.rev
923 for label, nodes in sorted(self._entries.items()):
923 for label, nodes in sorted(self._entries.items()):
924 if label == self._pure_topo_branch:
924 if label == self._pure_topo_branch:
925 # not need to write anything the header took care of that
925 # not need to write anything the header took care of that
926 continue
926 continue
927 label = encoding.fromlocal(label)
927 label = encoding.fromlocal(label)
928 for node in nodes:
928 for node in nodes:
929 if topo_heads is not None:
929 if topo_heads is not None:
930 rev = to_rev(node)
930 rev = to_rev(node)
931 if rev in topo_heads:
931 if rev in topo_heads:
932 continue
932 continue
933 if node in self._closednodes:
933 if node in self._closednodes:
934 state = b'c'
934 state = b'c'
935 else:
935 else:
936 state = b'o'
936 state = b'o'
937 nodecount += 1
937 nodecount += 1
938 fp.write(b"%s %s %s\n" % (hex(node), state, label))
938 fp.write(b"%s %s %s\n" % (hex(node), state, label))
939 return nodecount
939 return nodecount
940
940
941 @classmethod
941 @classmethod
942 def _load_header(cls, repo, lineiter):
942 def _load_header(cls, repo, lineiter):
943 header_line = next(lineiter)
943 header_line = next(lineiter)
944 pieces = header_line.rstrip(b'\n').split(b" ")
944 pieces = header_line.rstrip(b'\n').split(b" ")
945 cache_keys = dict(p.split(b'=', 1) for p in pieces)
945 cache_keys = dict(p.split(b'=', 1) for p in pieces)
946
946
947 args = {}
947 args = {}
948 filtered_hash = None
948 filtered_hash = None
949 obsolete_hash = None
949 obsolete_hash = None
950 has_pure_topo_heads = False
950 has_pure_topo_heads = False
951 for k, v in cache_keys.items():
951 for k, v in cache_keys.items():
952 if k == b"tip-rev":
952 if k == b"tip-rev":
953 args["tiprev"] = int(v)
953 args["tiprev"] = int(v)
954 elif k == b"tip-node":
954 elif k == b"tip-node":
955 args["tipnode"] = bin(v)
955 args["tipnode"] = bin(v)
956 elif k == b"filtered-hash":
956 elif k == b"filtered-hash":
957 filtered_hash = bin(v)
957 filtered_hash = bin(v)
958 elif k == b"obsolete-hash":
958 elif k == b"obsolete-hash":
959 obsolete_hash = bin(v)
959 obsolete_hash = bin(v)
960 elif k == b"topo-mode":
960 elif k == b"topo-mode":
961 if v == b"pure":
961 if v == b"pure":
962 has_pure_topo_heads = True
962 has_pure_topo_heads = True
963 else:
963 else:
964 msg = b"unknown topo-mode: %r" % v
964 msg = b"unknown topo-mode: %r" % v
965 raise ValueError(msg)
965 raise ValueError(msg)
966 else:
966 else:
967 msg = b"unknown cache key: %r" % k
967 msg = b"unknown cache key: %r" % k
968 raise ValueError(msg)
968 raise ValueError(msg)
969 args["key_hashes"] = (filtered_hash, obsolete_hash)
969 args["key_hashes"] = (filtered_hash, obsolete_hash)
970 if has_pure_topo_heads:
970 if has_pure_topo_heads:
971 pure_line = next(lineiter).rstrip(b'\n')
971 pure_line = next(lineiter).rstrip(b'\n')
972 args["pure_topo_branch"] = encoding.tolocal(pure_line)
972 args["pure_topo_branch"] = encoding.tolocal(pure_line)
973 return args
973 return args
974
974
975 def _load_heads(self, repo, lineiter):
975 def _load_heads(self, repo, lineiter):
976 """fully loads the branchcache by reading from the file using the line
976 """fully loads the branchcache by reading from the file using the line
977 iterator passed"""
977 iterator passed"""
978 super()._load_heads(repo, lineiter)
978 super()._load_heads(repo, lineiter)
979 if self._pure_topo_branch is not None:
979 if self._pure_topo_branch is not None:
980 # no need to read the repository heads, we know their value already.
980 # no need to read the repository heads, we know their value already.
981 return
981 return
982 cl = repo.changelog
982 cl = repo.changelog
983 getbranchinfo = repo.revbranchcache().branchinfo
983 getbranchinfo = repo.revbranchcache().branchinfo
984 obsrevs = obsolete.getrevs(repo, b'obsolete')
984 obsrevs = obsolete.getrevs(repo, b'obsolete')
985 to_node = cl.node
985 to_node = cl.node
986 touched_branch = set()
986 touched_branch = set()
987 for head in self._get_topo_heads(repo):
987 for head in self._get_topo_heads(repo):
988 if head in obsrevs:
988 if head in obsrevs:
989 continue
989 continue
990 node = to_node(head)
990 node = to_node(head)
991 branch, closed = getbranchinfo(head)
991 branch, closed = getbranchinfo(head)
992 self._entries.setdefault(branch, []).append(node)
992 self._entries.setdefault(branch, []).append(node)
993 if closed:
993 if closed:
994 self._closednodes.add(node)
994 self._closednodes.add(node)
995 touched_branch.add(branch)
995 touched_branch.add(branch)
996 to_rev = cl.index.rev
996 to_rev = cl.index.rev
997 for branch in touched_branch:
997 for branch in touched_branch:
998 self._entries[branch].sort(key=to_rev)
998 self._entries[branch].sort(key=to_rev)
999
999
1000 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
1000 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
1001 """return the cache key hashes that match this repoview state"""
1001 """return the cache key hashes that match this repoview state"""
1002 return scmutil.filtered_and_obsolete_hash(
1002 return scmutil.filtered_and_obsolete_hash(
1003 repo,
1003 repo,
1004 self.tiprev,
1004 self.tiprev,
1005 )
1005 )
1006
1006
1007 def _process_new(
1007 def _process_new(
1008 self,
1008 self,
1009 repo,
1009 repo,
1010 newbranches,
1010 newbranches,
1011 new_closed,
1011 new_closed,
1012 obs_ignored,
1012 obs_ignored,
1013 max_rev,
1013 max_rev,
1014 ) -> None:
1014 ) -> None:
1015 if (
1015 if (
1016 # note: the check about `obs_ignored` is too strict as the
1016 # note: the check about `obs_ignored` is too strict as the
1017 # obsolete revision could be non-topological, but lets keep
1017 # obsolete revision could be non-topological, but lets keep
1018 # things simple for now
1018 # things simple for now
1019 #
1019 #
1020 # The same apply to `new_closed` if the closed changeset are
1020 # The same apply to `new_closed` if the closed changeset are
1021 # not a head, we don't care that it is closed, but lets keep
1021 # not a head, we don't care that it is closed, but lets keep
1022 # things simple here too.
1022 # things simple here too.
1023 not (obs_ignored or new_closed)
1023 not (obs_ignored or new_closed)
1024 and (
1024 and (
1025 not newbranches
1025 not newbranches
1026 or (
1026 or (
1027 len(newbranches) == 1
1027 len(newbranches) == 1
1028 and (
1028 and (
1029 self.tiprev == nullrev
1029 self.tiprev == nullrev
1030 or self._pure_topo_branch in newbranches
1030 or self._pure_topo_branch in newbranches
1031 )
1031 )
1032 )
1032 )
1033 )
1033 )
1034 ):
1034 ):
1035 if newbranches:
1035 if newbranches:
1036 assert len(newbranches) == 1
1036 assert len(newbranches) == 1
1037 self._pure_topo_branch = list(newbranches.keys())[0]
1037 self._pure_topo_branch = list(newbranches.keys())[0]
1038 self._needs_populate = True
1038 self._needs_populate = True
1039 self._entries.pop(self._pure_topo_branch, None)
1039 self._entries.pop(self._pure_topo_branch, None)
1040 return
1040 return
1041
1041
1042 self._ensure_populated(repo)
1042 self._ensure_populated(repo)
1043 self._pure_topo_branch = None
1043 self._pure_topo_branch = None
1044 super()._process_new(
1044 super()._process_new(
1045 repo,
1045 repo,
1046 newbranches,
1046 newbranches,
1047 new_closed,
1047 new_closed,
1048 obs_ignored,
1048 obs_ignored,
1049 max_rev,
1049 max_rev,
1050 )
1050 )
1051
1051
1052 def _ensure_populated(self, repo):
1052 def _ensure_populated(self, repo):
1053 """make sure any lazily loaded values are fully populated"""
1053 """make sure any lazily loaded values are fully populated"""
1054 if self._needs_populate:
1054 if self._needs_populate:
1055 assert self._pure_topo_branch is not None
1055 assert self._pure_topo_branch is not None
1056 cl = repo.changelog
1056 cl = repo.changelog
1057 to_node = cl.node
1057 to_node = cl.node
1058 topo_heads = self._get_topo_heads(repo)
1058 topo_heads = self._get_topo_heads(repo)
1059 heads = [to_node(r) for r in topo_heads]
1059 heads = [to_node(r) for r in topo_heads]
1060 self._entries[self._pure_topo_branch] = heads
1060 self._entries[self._pure_topo_branch] = heads
1061 self._needs_populate = False
1061 self._needs_populate = False
1062
1062
1063 def _detect_pure_topo(self, repo) -> None:
1063 def _detect_pure_topo(self, repo) -> None:
1064 if self._pure_topo_branch is not None:
1064 if self._pure_topo_branch is not None:
1065 # we are pure topological already
1065 # we are pure topological already
1066 return
1066 return
1067 to_node = repo.changelog.node
1067 to_node = repo.changelog.node
1068 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1068 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1069 if any(n in self._closednodes for n in topo_heads):
1069 if any(n in self._closednodes for n in topo_heads):
1070 return
1070 return
1071 for branch, heads in self._entries.items():
1071 for branch, heads in self._entries.items():
1072 if heads == topo_heads:
1072 if heads == topo_heads:
1073 self._pure_topo_branch = branch
1073 self._pure_topo_branch = branch
1074 break
1074 break
1075
1075
1076
1076
1077 class remotebranchcache(_BaseBranchCache):
1077 class remotebranchcache(_BaseBranchCache):
1078 """Branchmap info for a remote connection, should not write locally"""
1078 """Branchmap info for a remote connection, should not write locally"""
1079
1079
1080 def __init__(
1080 def __init__(
1081 self,
1081 self,
1082 repo: "localrepo.localrepository",
1082 repo: "localrepo.localrepository",
1083 entries: Union[
1083 entries: Union[
1084 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1084 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1085 ] = (),
1085 ] = (),
1086 closednodes: Optional[Set[bytes]] = None,
1086 closednodes: Optional[Set[bytes]] = None,
1087 ) -> None:
1087 ) -> None:
1088 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
1088 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
1089
1089
1090
1090
1091 # Revision branch info cache
1091 # Revision branch info cache
1092
1092
1093 _rbcversion = b'-v1'
1093 _rbcversion = b'-v1'
1094 _rbcnames = b'rbc-names' + _rbcversion
1094 _rbcnames = b'rbc-names' + _rbcversion
1095 _rbcrevs = b'rbc-revs' + _rbcversion
1095 _rbcrevs = b'rbc-revs' + _rbcversion
1096 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
1096 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
1097 _rbcrecfmt = b'>4sI'
1097 _rbcrecfmt = b'>4sI'
1098 _rbcrecsize = calcsize(_rbcrecfmt)
1098 _rbcrecsize = calcsize(_rbcrecfmt)
1099 _rbcmininc = 64 * _rbcrecsize
1099 _rbcmininc = 64 * _rbcrecsize
1100 _rbcnodelen = 4
1100 _rbcnodelen = 4
1101 _rbcbranchidxmask = 0x7FFFFFFF
1101 _rbcbranchidxmask = 0x7FFFFFFF
1102 _rbccloseflag = 0x80000000
1102 _rbccloseflag = 0x80000000
1103
1103
1104
1104
1105 class rbcrevs:
1105 class rbcrevs:
1106 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
1106 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
1107
1107
1108 def __init__(self, revs):
1108 def __init__(self, revs):
1109 self._prefix = revs
1109 self._prefix = revs
1110 self._rest = bytearray()
1110 self._rest = bytearray()
1111
1111
1112 def __len__(self):
1112 def __len__(self):
1113 return len(self._prefix) + len(self._rest)
1113 return len(self._prefix) + len(self._rest)
1114
1114
1115 def unpack_record(self, rbcrevidx):
1115 def unpack_record(self, rbcrevidx):
1116 if rbcrevidx < len(self._prefix):
1116 if rbcrevidx < len(self._prefix):
1117 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
1117 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
1118 else:
1118 else:
1119 return unpack_from(
1119 return unpack_from(
1120 _rbcrecfmt,
1120 _rbcrecfmt,
1121 util.buffer(self._rest),
1121 util.buffer(self._rest),
1122 rbcrevidx - len(self._prefix),
1122 rbcrevidx - len(self._prefix),
1123 )
1123 )
1124
1124
1125 def make_mutable(self):
1125 def make_mutable(self):
1126 if len(self._prefix) > 0:
1126 if len(self._prefix) > 0:
1127 entirety = bytearray()
1127 entirety = bytearray()
1128 entirety[:] = self._prefix
1128 entirety[:] = self._prefix
1129 entirety.extend(self._rest)
1129 entirety.extend(self._rest)
1130 self._rest = entirety
1130 self._rest = entirety
1131 self._prefix = bytearray()
1131 self._prefix = bytearray()
1132
1132
1133 def truncate(self, pos):
1133 def truncate(self, pos):
1134 self.make_mutable()
1134 self.make_mutable()
1135 del self._rest[pos:]
1135 del self._rest[pos:]
1136
1136
1137 def pack_into(self, rbcrevidx, node, branchidx):
1137 def pack_into(self, rbcrevidx, node, branchidx):
1138 if rbcrevidx < len(self._prefix):
1138 if rbcrevidx < len(self._prefix):
1139 self.make_mutable()
1139 self.make_mutable()
1140 buf = self._rest
1140 buf = self._rest
1141 start_offset = rbcrevidx - len(self._prefix)
1141 start_offset = rbcrevidx - len(self._prefix)
1142 end_offset = start_offset + _rbcrecsize
1142 end_offset = start_offset + _rbcrecsize
1143
1143
1144 if len(self._rest) < end_offset:
1144 if len(self._rest) < end_offset:
1145 # bytearray doesn't allocate extra space at least in Python 3.7.
1145 # bytearray doesn't allocate extra space at least in Python 3.7.
1146 # When multiple changesets are added in a row, precise resize would
1146 # When multiple changesets are added in a row, precise resize would
1147 # result in quadratic complexity. Overallocate to compensate by
1147 # result in quadratic complexity. Overallocate to compensate by
1148 # using the classic doubling technique for dynamic arrays instead.
1148 # using the classic doubling technique for dynamic arrays instead.
1149 # If there was a gap in the map before, less space will be reserved.
1149 # If there was a gap in the map before, less space will be reserved.
1150 self._rest.extend(b'\0' * end_offset)
1150 self._rest.extend(b'\0' * end_offset)
1151 return pack_into(
1151 return pack_into(
1152 _rbcrecfmt,
1152 _rbcrecfmt,
1153 buf,
1153 buf,
1154 start_offset,
1154 start_offset,
1155 node,
1155 node,
1156 branchidx,
1156 branchidx,
1157 )
1157 )
1158
1158
1159 def extend(self, extension):
1159 def extend(self, extension):
1160 return self._rest.extend(extension)
1160 return self._rest.extend(extension)
1161
1161
1162 def slice(self, begin, end):
1162 def slice(self, begin, end):
1163 if begin < len(self._prefix):
1163 if begin < len(self._prefix):
1164 acc = bytearray()
1164 acc = bytearray()
1165 acc[:] = self._prefix[begin:end]
1165 acc[:] = self._prefix[begin:end]
1166 acc.extend(
1166 acc.extend(
1167 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1167 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1168 )
1168 )
1169 return acc
1169 return acc
1170 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1170 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1171
1171
1172
1172
1173 class revbranchcache:
1173 class revbranchcache:
1174 """Persistent cache, mapping from revision number to branch name and close.
1174 """Persistent cache, mapping from revision number to branch name and close.
1175 This is a low level cache, independent of filtering.
1175 This is a low level cache, independent of filtering.
1176
1176
1177 Branch names are stored in rbc-names in internal encoding separated by 0.
1177 Branch names are stored in rbc-names in internal encoding separated by 0.
1178 rbc-names is append-only, and each branch name is only stored once and will
1178 rbc-names is append-only, and each branch name is only stored once and will
1179 thus have a unique index.
1179 thus have a unique index.
1180
1180
1181 The branch info for each revision is stored in rbc-revs as constant size
1181 The branch info for each revision is stored in rbc-revs as constant size
1182 records. The whole file is read into memory, but it is only 'parsed' on
1182 records. The whole file is read into memory, but it is only 'parsed' on
1183 demand. The file is usually append-only but will be truncated if repo
1183 demand. The file is usually append-only but will be truncated if repo
1184 modification is detected.
1184 modification is detected.
1185 The record for each revision contains the first 4 bytes of the
1185 The record for each revision contains the first 4 bytes of the
1186 corresponding node hash, and the record is only used if it still matches.
1186 corresponding node hash, and the record is only used if it still matches.
1187 Even a completely trashed rbc-revs fill thus still give the right result
1187 Even a completely trashed rbc-revs fill thus still give the right result
1188 while converging towards full recovery ... assuming no incorrectly matching
1188 while converging towards full recovery ... assuming no incorrectly matching
1189 node hashes.
1189 node hashes.
1190 The record also contains 4 bytes where 31 bits contains the index of the
1190 The record also contains 4 bytes where 31 bits contains the index of the
1191 branch and the last bit indicate that it is a branch close commit.
1191 branch and the last bit indicate that it is a branch close commit.
1192 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
1192 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
1193 and will grow with it but be 1/8th of its size.
1193 and will grow with it but be 1/8th of its size.
1194 """
1194 """
1195
1195
1196 def __init__(self, repo, readonly=True):
1196 def __init__(self, repo, readonly=True):
1197 assert repo.filtername is None
1197 assert repo.filtername is None
1198 self._repo = repo
1198 self._repo = repo
1199 self._names = [] # branch names in local encoding with static index
1199 self._names = [] # branch names in local encoding with static index
1200 self._rbcrevs = rbcrevs(bytearray())
1200 self._rbcrevs = rbcrevs(bytearray())
1201 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
1201 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
1202 try:
1202 try:
1203 bndata = repo.cachevfs.read(_rbcnames)
1203 bndata = repo.cachevfs.read(_rbcnames)
1204 self._rbcsnameslen = len(bndata) # for verification before writing
1204 self._rbcsnameslen = len(bndata) # for verification before writing
1205 if bndata:
1205 if bndata:
1206 self._names = [
1206 self._names = [
1207 encoding.tolocal(bn) for bn in bndata.split(b'\0')
1207 encoding.tolocal(bn) for bn in bndata.split(b'\0')
1208 ]
1208 ]
1209 except (IOError, OSError):
1209 except (IOError, OSError):
1210 if readonly:
1210 if readonly:
1211 # don't try to use cache - fall back to the slow path
1211 # don't try to use cache - fall back to the slow path
1212 self.branchinfo = self._branchinfo
1212 self.branchinfo = self._branchinfo
1213
1213
1214 if self._names:
1214 if self._names:
1215 try:
1215 try:
1216 if repo.ui.configbool(b'storage', b'revbranchcache.mmap'):
1216 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap')
1217 with repo.cachevfs(_rbcrevs) as fp:
1217 with repo.cachevfs(_rbcrevs) as fp:
1218 if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
1218 data = util.buffer(util.mmapread(fp))
1219 data = util.buffer(util.mmapread(fp))
1219 else:
1220 else:
1220 data = repo.cachevfs.read(_rbcrevs)
1221 data = fp.read()
1221 self._rbcrevs = rbcrevs(data)
1222 self._rbcrevs = rbcrevs(data)
1222 except (IOError, OSError) as inst:
1223 except (IOError, OSError) as inst:
1223 repo.ui.debug(
1224 repo.ui.debug(
1224 b"couldn't read revision branch cache: %s\n"
1225 b"couldn't read revision branch cache: %s\n"
1225 % stringutil.forcebytestr(inst)
1226 % stringutil.forcebytestr(inst)
1226 )
1227 )
1227 # remember number of good records on disk
1228 # remember number of good records on disk
1228 self._rbcrevslen = min(
1229 self._rbcrevslen = min(
1229 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
1230 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
1230 )
1231 )
1231 if self._rbcrevslen == 0:
1232 if self._rbcrevslen == 0:
1232 self._names = []
1233 self._names = []
1233 self._rbcnamescount = len(self._names) # number of names read at
1234 self._rbcnamescount = len(self._names) # number of names read at
1234 # _rbcsnameslen
1235 # _rbcsnameslen
1235
1236
1236 def _clear(self):
1237 def _clear(self):
1237 self._rbcsnameslen = 0
1238 self._rbcsnameslen = 0
1238 del self._names[:]
1239 del self._names[:]
1239 self._rbcnamescount = 0
1240 self._rbcnamescount = 0
1240 self._rbcrevslen = len(self._repo.changelog)
1241 self._rbcrevslen = len(self._repo.changelog)
1241 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
1242 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
1242 util.clearcachedproperty(self, b'_namesreverse')
1243 util.clearcachedproperty(self, b'_namesreverse')
1243
1244
1244 @util.propertycache
1245 @util.propertycache
1245 def _namesreverse(self):
1246 def _namesreverse(self):
1246 return {b: r for r, b in enumerate(self._names)}
1247 return {b: r for r, b in enumerate(self._names)}
1247
1248
1248 def branchinfo(self, rev):
1249 def branchinfo(self, rev):
1249 """Return branch name and close flag for rev, using and updating
1250 """Return branch name and close flag for rev, using and updating
1250 persistent cache."""
1251 persistent cache."""
1251 changelog = self._repo.changelog
1252 changelog = self._repo.changelog
1252 rbcrevidx = rev * _rbcrecsize
1253 rbcrevidx = rev * _rbcrecsize
1253
1254
1254 # avoid negative index, changelog.read(nullrev) is fast without cache
1255 # avoid negative index, changelog.read(nullrev) is fast without cache
1255 if rev == nullrev:
1256 if rev == nullrev:
1256 return changelog.branchinfo(rev)
1257 return changelog.branchinfo(rev)
1257
1258
1258 # if requested rev isn't allocated, grow and cache the rev info
1259 # if requested rev isn't allocated, grow and cache the rev info
1259 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
1260 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
1260 return self._branchinfo(rev)
1261 return self._branchinfo(rev)
1261
1262
1262 # fast path: extract data from cache, use it if node is matching
1263 # fast path: extract data from cache, use it if node is matching
1263 reponode = changelog.node(rev)[:_rbcnodelen]
1264 reponode = changelog.node(rev)[:_rbcnodelen]
1264 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
1265 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
1265 close = bool(branchidx & _rbccloseflag)
1266 close = bool(branchidx & _rbccloseflag)
1266 if close:
1267 if close:
1267 branchidx &= _rbcbranchidxmask
1268 branchidx &= _rbcbranchidxmask
1268 if cachenode == b'\0\0\0\0':
1269 if cachenode == b'\0\0\0\0':
1269 pass
1270 pass
1270 elif cachenode == reponode:
1271 elif cachenode == reponode:
1271 try:
1272 try:
1272 return self._names[branchidx], close
1273 return self._names[branchidx], close
1273 except IndexError:
1274 except IndexError:
1274 # recover from invalid reference to unknown branch
1275 # recover from invalid reference to unknown branch
1275 self._repo.ui.debug(
1276 self._repo.ui.debug(
1276 b"referenced branch names not found"
1277 b"referenced branch names not found"
1277 b" - rebuilding revision branch cache from scratch\n"
1278 b" - rebuilding revision branch cache from scratch\n"
1278 )
1279 )
1279 self._clear()
1280 self._clear()
1280 else:
1281 else:
1281 # rev/node map has changed, invalidate the cache from here up
1282 # rev/node map has changed, invalidate the cache from here up
1282 self._repo.ui.debug(
1283 self._repo.ui.debug(
1283 b"history modification detected - truncating "
1284 b"history modification detected - truncating "
1284 b"revision branch cache to revision %d\n" % rev
1285 b"revision branch cache to revision %d\n" % rev
1285 )
1286 )
1286 truncate = rbcrevidx + _rbcrecsize
1287 truncate = rbcrevidx + _rbcrecsize
1287 self._rbcrevs.truncate(truncate)
1288 self._rbcrevs.truncate(truncate)
1288 self._rbcrevslen = min(self._rbcrevslen, truncate)
1289 self._rbcrevslen = min(self._rbcrevslen, truncate)
1289
1290
1290 # fall back to slow path and make sure it will be written to disk
1291 # fall back to slow path and make sure it will be written to disk
1291 return self._branchinfo(rev)
1292 return self._branchinfo(rev)
1292
1293
1293 def _branchinfo(self, rev):
1294 def _branchinfo(self, rev):
1294 """Retrieve branch info from changelog and update _rbcrevs"""
1295 """Retrieve branch info from changelog and update _rbcrevs"""
1295 changelog = self._repo.changelog
1296 changelog = self._repo.changelog
1296 b, close = changelog.branchinfo(rev)
1297 b, close = changelog.branchinfo(rev)
1297 if b in self._namesreverse:
1298 if b in self._namesreverse:
1298 branchidx = self._namesreverse[b]
1299 branchidx = self._namesreverse[b]
1299 else:
1300 else:
1300 branchidx = len(self._names)
1301 branchidx = len(self._names)
1301 self._names.append(b)
1302 self._names.append(b)
1302 self._namesreverse[b] = branchidx
1303 self._namesreverse[b] = branchidx
1303 reponode = changelog.node(rev)
1304 reponode = changelog.node(rev)
1304 if close:
1305 if close:
1305 branchidx |= _rbccloseflag
1306 branchidx |= _rbccloseflag
1306 self._setcachedata(rev, reponode, branchidx)
1307 self._setcachedata(rev, reponode, branchidx)
1307 return b, close
1308 return b, close
1308
1309
1309 def setdata(self, rev, changelogrevision):
1310 def setdata(self, rev, changelogrevision):
1310 """add new data information to the cache"""
1311 """add new data information to the cache"""
1311 branch, close = changelogrevision.branchinfo
1312 branch, close = changelogrevision.branchinfo
1312
1313
1313 if branch in self._namesreverse:
1314 if branch in self._namesreverse:
1314 branchidx = self._namesreverse[branch]
1315 branchidx = self._namesreverse[branch]
1315 else:
1316 else:
1316 branchidx = len(self._names)
1317 branchidx = len(self._names)
1317 self._names.append(branch)
1318 self._names.append(branch)
1318 self._namesreverse[branch] = branchidx
1319 self._namesreverse[branch] = branchidx
1319 if close:
1320 if close:
1320 branchidx |= _rbccloseflag
1321 branchidx |= _rbccloseflag
1321 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
1322 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
1322 # If no cache data were readable (non exists, bad permission, etc)
1323 # If no cache data were readable (non exists, bad permission, etc)
1323 # the cache was bypassing itself by setting:
1324 # the cache was bypassing itself by setting:
1324 #
1325 #
1325 # self.branchinfo = self._branchinfo
1326 # self.branchinfo = self._branchinfo
1326 #
1327 #
1327 # Since we now have data in the cache, we need to drop this bypassing.
1328 # Since we now have data in the cache, we need to drop this bypassing.
1328 if 'branchinfo' in vars(self):
1329 if 'branchinfo' in vars(self):
1329 del self.branchinfo
1330 del self.branchinfo
1330
1331
1331 def _setcachedata(self, rev, node, branchidx):
1332 def _setcachedata(self, rev, node, branchidx):
1332 """Writes the node's branch data to the in-memory cache data."""
1333 """Writes the node's branch data to the in-memory cache data."""
1333 if rev == nullrev:
1334 if rev == nullrev:
1334 return
1335 return
1335 rbcrevidx = rev * _rbcrecsize
1336 rbcrevidx = rev * _rbcrecsize
1336 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
1337 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
1337 self._rbcrevslen = min(self._rbcrevslen, rev)
1338 self._rbcrevslen = min(self._rbcrevslen, rev)
1338
1339
1339 tr = self._repo.currenttransaction()
1340 tr = self._repo.currenttransaction()
1340 if tr:
1341 if tr:
1341 tr.addfinalize(b'write-revbranchcache', self.write)
1342 tr.addfinalize(b'write-revbranchcache', self.write)
1342
1343
1343 def write(self, tr=None):
1344 def write(self, tr=None):
1344 """Save branch cache if it is dirty."""
1345 """Save branch cache if it is dirty."""
1345 repo = self._repo
1346 repo = self._repo
1346 wlock = None
1347 wlock = None
1347 step = b''
1348 step = b''
1348 try:
1349 try:
1349 # write the new names
1350 # write the new names
1350 if self._rbcnamescount < len(self._names):
1351 if self._rbcnamescount < len(self._names):
1351 wlock = repo.wlock(wait=False)
1352 wlock = repo.wlock(wait=False)
1352 step = b' names'
1353 step = b' names'
1353 self._writenames(repo)
1354 self._writenames(repo)
1354
1355
1355 # write the new revs
1356 # write the new revs
1356 start = self._rbcrevslen * _rbcrecsize
1357 start = self._rbcrevslen * _rbcrecsize
1357 if start != len(self._rbcrevs):
1358 if start != len(self._rbcrevs):
1358 step = b''
1359 step = b''
1359 if wlock is None:
1360 if wlock is None:
1360 wlock = repo.wlock(wait=False)
1361 wlock = repo.wlock(wait=False)
1361 self._writerevs(repo, start)
1362 self._writerevs(repo, start)
1362
1363
1363 except (IOError, OSError, error.Abort, error.LockError) as inst:
1364 except (IOError, OSError, error.Abort, error.LockError) as inst:
1364 repo.ui.debug(
1365 repo.ui.debug(
1365 b"couldn't write revision branch cache%s: %s\n"
1366 b"couldn't write revision branch cache%s: %s\n"
1366 % (step, stringutil.forcebytestr(inst))
1367 % (step, stringutil.forcebytestr(inst))
1367 )
1368 )
1368 finally:
1369 finally:
1369 if wlock is not None:
1370 if wlock is not None:
1370 wlock.release()
1371 wlock.release()
1371
1372
1372 def _writenames(self, repo):
1373 def _writenames(self, repo):
1373 """write the new branch names to revbranchcache"""
1374 """write the new branch names to revbranchcache"""
1374 if self._rbcnamescount != 0:
1375 if self._rbcnamescount != 0:
1375 f = repo.cachevfs.open(_rbcnames, b'ab')
1376 f = repo.cachevfs.open(_rbcnames, b'ab')
1376 if f.tell() == self._rbcsnameslen:
1377 if f.tell() == self._rbcsnameslen:
1377 f.write(b'\0')
1378 f.write(b'\0')
1378 else:
1379 else:
1379 f.close()
1380 f.close()
1380 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1381 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1381 self._rbcnamescount = 0
1382 self._rbcnamescount = 0
1382 self._rbcrevslen = 0
1383 self._rbcrevslen = 0
1383 if self._rbcnamescount == 0:
1384 if self._rbcnamescount == 0:
1384 # before rewriting names, make sure references are removed
1385 # before rewriting names, make sure references are removed
1385 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1386 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1386 f = repo.cachevfs.open(_rbcnames, b'wb')
1387 f = repo.cachevfs.open(_rbcnames, b'wb')
1387 f.write(
1388 f.write(
1388 b'\0'.join(
1389 b'\0'.join(
1389 encoding.fromlocal(b)
1390 encoding.fromlocal(b)
1390 for b in self._names[self._rbcnamescount :]
1391 for b in self._names[self._rbcnamescount :]
1391 )
1392 )
1392 )
1393 )
1393 self._rbcsnameslen = f.tell()
1394 self._rbcsnameslen = f.tell()
1394 f.close()
1395 f.close()
1395 self._rbcnamescount = len(self._names)
1396 self._rbcnamescount = len(self._names)
1396
1397
1397 def _writerevs(self, repo, start):
1398 def _writerevs(self, repo, start):
1398 """write the new revs to revbranchcache"""
1399 """write the new revs to revbranchcache"""
1399 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1400 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1400 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1401 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1401 if f.tell() != start:
1402 if f.tell() != start:
1402 repo.ui.debug(
1403 repo.ui.debug(
1403 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1404 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1404 )
1405 )
1405 f.seek(start)
1406 f.seek(start)
1406 if f.tell() != start:
1407 if f.tell() != start:
1407 start = 0
1408 start = 0
1408 f.seek(start)
1409 f.seek(start)
1409 f.truncate()
1410 f.truncate()
1410 end = revs * _rbcrecsize
1411 end = revs * _rbcrecsize
1411 f.write(self._rbcrevs.slice(start, end))
1412 f.write(self._rbcrevs.slice(start, end))
1412 self._rbcrevslen = revs
1413 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now