##// END OF EJS Templates
branchcache: do not copy the `_dirty` flag...
marmoute -
r52384:dd5b5f99 default
parent child Browse files
Show More
@@ -1,1051 +1,1048 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_dirty` method.
77 close. See the `BranchMapCache.write_dirty` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.write(repo)
88 bcache.write(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
114 else:
115 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
116 bcache = branchcache(repo)
117
117
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
119 if revs:
120 bcache.update(repo, revs)
120 bcache.update(repo, revs)
121
121
122 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
124
124
125 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
127
127
128 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
129 remote.
129 remote.
130
130
131 """
131 """
132 cl = repo.changelog
132 cl = repo.changelog
133 clrev = cl.rev
133 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
135 rbheads = []
135 rbheads = []
136 closed = set()
136 closed = set()
137 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
138 rbheads += bheads
138 rbheads += bheads
139 for h in bheads:
139 for h in bheads:
140 r = clrev(h)
140 r = clrev(h)
141 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
142 if c:
142 if c:
143 closed.add(h)
143 closed.add(h)
144
144
145 if rbheads:
145 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
147 cache = branchcache(
148 repo,
148 repo,
149 remotebranchmap,
149 remotebranchmap,
150 repo[rtiprev].node(),
150 repo[rtiprev].node(),
151 rtiprev,
151 rtiprev,
152 closednodes=closed,
152 closednodes=closed,
153 )
153 )
154
154
155 # Try to stick it as low as possible
155 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
159 if cache.validfor(rview):
160 cache._filtername = candidate
160 cache._filtername = candidate
161 self._per_filter[candidate] = cache
161 self._per_filter[candidate] = cache
162 cache._dirty = True
162 cache._dirty = True
163 cache.write(rview)
163 cache.write(rview)
164 return
164 return
165
165
166 def clear(self):
166 def clear(self):
167 self._per_filter.clear()
167 self._per_filter.clear()
168
168
169 def write_dirty(self, repo):
169 def write_dirty(self, repo):
170 unfi = repo.unfiltered()
170 unfi = repo.unfiltered()
171 for filtername in repoviewutil.get_ordered_subset():
171 for filtername in repoviewutil.get_ordered_subset():
172 cache = self._per_filter.get(filtername)
172 cache = self._per_filter.get(filtername)
173 if cache is None:
173 if cache is None:
174 continue
174 continue
175 if cache._dirty:
175 if cache._dirty:
176 if filtername is None:
176 if filtername is None:
177 repo = unfi
177 repo = unfi
178 else:
178 else:
179 repo = unfi.filtered(filtername)
179 repo = unfi.filtered(filtername)
180 cache.write(repo)
180 cache.write(repo)
181
181
182
182
183 def _unknownnode(node):
183 def _unknownnode(node):
184 """raises ValueError when branchcache found a node which does not exists"""
184 """raises ValueError when branchcache found a node which does not exists"""
185 raise ValueError('node %s does not exist' % node.hex())
185 raise ValueError('node %s does not exist' % node.hex())
186
186
187
187
188 def _branchcachedesc(repo):
188 def _branchcachedesc(repo):
189 if repo.filtername is not None:
189 if repo.filtername is not None:
190 return b'branch cache (%s)' % repo.filtername
190 return b'branch cache (%s)' % repo.filtername
191 else:
191 else:
192 return b'branch cache'
192 return b'branch cache'
193
193
194
194
195 class _BaseBranchCache:
195 class _BaseBranchCache:
196 """A dict like object that hold branches heads cache.
196 """A dict like object that hold branches heads cache.
197
197
198 This cache is used to avoid costly computations to determine all the
198 This cache is used to avoid costly computations to determine all the
199 branch heads of a repo.
199 branch heads of a repo.
200
200
201 The cache is serialized on disk in the following format:
201 The cache is serialized on disk in the following format:
202
202
203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
204 <branch head hex node> <open/closed state> <branch name>
204 <branch head hex node> <open/closed state> <branch name>
205 <branch head hex node> <open/closed state> <branch name>
205 <branch head hex node> <open/closed state> <branch name>
206 ...
206 ...
207
207
208 The first line is used to check if the cache is still valid. If the
208 The first line is used to check if the cache is still valid. If the
209 branch cache is for a filtered repo view, an optional third hash is
209 branch cache is for a filtered repo view, an optional third hash is
210 included that hashes the hashes of all filtered and obsolete revisions.
210 included that hashes the hashes of all filtered and obsolete revisions.
211
211
212 The open/closed state is represented by a single letter 'o' or 'c'.
212 The open/closed state is represented by a single letter 'o' or 'c'.
213 This field can be used to avoid changelog reads when determining if a
213 This field can be used to avoid changelog reads when determining if a
214 branch head closes a branch or not.
214 branch head closes a branch or not.
215 """
215 """
216
216
217 def __init__(
217 def __init__(
218 self,
218 self,
219 repo: "localrepo.localrepository",
219 repo: "localrepo.localrepository",
220 entries: Union[
220 entries: Union[
221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
222 ] = (),
222 ] = (),
223 closed_nodes: Optional[Set[bytes]] = None,
223 closed_nodes: Optional[Set[bytes]] = None,
224 ) -> None:
224 ) -> None:
225 """hasnode is a function which can be used to verify whether changelog
225 """hasnode is a function which can be used to verify whether changelog
226 has a given node or not. If it's not provided, we assume that every node
226 has a given node or not. If it's not provided, we assume that every node
227 we have exists in changelog"""
227 we have exists in changelog"""
228 # closednodes is a set of nodes that close their branch. If the branch
228 # closednodes is a set of nodes that close their branch. If the branch
229 # cache has been updated, it may contain nodes that are no longer
229 # cache has been updated, it may contain nodes that are no longer
230 # heads.
230 # heads.
231 if closed_nodes is None:
231 if closed_nodes is None:
232 closed_nodes = set()
232 closed_nodes = set()
233 self._closednodes = set(closed_nodes)
233 self._closednodes = set(closed_nodes)
234 self._entries = dict(entries)
234 self._entries = dict(entries)
235
235
236 def __iter__(self):
236 def __iter__(self):
237 return iter(self._entries)
237 return iter(self._entries)
238
238
239 def __setitem__(self, key, value):
239 def __setitem__(self, key, value):
240 self._entries[key] = value
240 self._entries[key] = value
241
241
242 def __getitem__(self, key):
242 def __getitem__(self, key):
243 return self._entries[key]
243 return self._entries[key]
244
244
245 def __contains__(self, key):
245 def __contains__(self, key):
246 return key in self._entries
246 return key in self._entries
247
247
248 def iteritems(self):
248 def iteritems(self):
249 return self._entries.items()
249 return self._entries.items()
250
250
251 items = iteritems
251 items = iteritems
252
252
253 def hasbranch(self, label):
253 def hasbranch(self, label):
254 """checks whether a branch of this name exists or not"""
254 """checks whether a branch of this name exists or not"""
255 return label in self._entries
255 return label in self._entries
256
256
257 def _branchtip(self, heads):
257 def _branchtip(self, heads):
258 """Return tuple with last open head in heads and false,
258 """Return tuple with last open head in heads and false,
259 otherwise return last closed head and true."""
259 otherwise return last closed head and true."""
260 tip = heads[-1]
260 tip = heads[-1]
261 closed = True
261 closed = True
262 for h in reversed(heads):
262 for h in reversed(heads):
263 if h not in self._closednodes:
263 if h not in self._closednodes:
264 tip = h
264 tip = h
265 closed = False
265 closed = False
266 break
266 break
267 return tip, closed
267 return tip, closed
268
268
269 def branchtip(self, branch):
269 def branchtip(self, branch):
270 """Return the tipmost open head on branch head, otherwise return the
270 """Return the tipmost open head on branch head, otherwise return the
271 tipmost closed head on branch.
271 tipmost closed head on branch.
272 Raise KeyError for unknown branch."""
272 Raise KeyError for unknown branch."""
273 return self._branchtip(self[branch])[0]
273 return self._branchtip(self[branch])[0]
274
274
275 def iteropen(self, nodes):
275 def iteropen(self, nodes):
276 return (n for n in nodes if n not in self._closednodes)
276 return (n for n in nodes if n not in self._closednodes)
277
277
278 def branchheads(self, branch, closed=False):
278 def branchheads(self, branch, closed=False):
279 heads = self._entries[branch]
279 heads = self._entries[branch]
280 if not closed:
280 if not closed:
281 heads = list(self.iteropen(heads))
281 heads = list(self.iteropen(heads))
282 return heads
282 return heads
283
283
284 def iterbranches(self):
284 def iterbranches(self):
285 for bn, heads in self.items():
285 for bn, heads in self.items():
286 yield (bn, heads) + self._branchtip(heads)
286 yield (bn, heads) + self._branchtip(heads)
287
287
288 def iterheads(self):
288 def iterheads(self):
289 """returns all the heads"""
289 """returns all the heads"""
290 return self._entries.values()
290 return self._entries.values()
291
291
292 def update(self, repo, revgen):
292 def update(self, repo, revgen):
293 """Given a branchhead cache, self, that may have extra nodes or be
293 """Given a branchhead cache, self, that may have extra nodes or be
294 missing heads, and a generator of nodes that are strictly a superset of
294 missing heads, and a generator of nodes that are strictly a superset of
295 heads missing, this function updates self to be correct.
295 heads missing, this function updates self to be correct.
296 """
296 """
297 starttime = util.timer()
297 starttime = util.timer()
298 cl = repo.changelog
298 cl = repo.changelog
299 # collect new branch entries
299 # collect new branch entries
300 newbranches = {}
300 newbranches = {}
301 getbranchinfo = repo.revbranchcache().branchinfo
301 getbranchinfo = repo.revbranchcache().branchinfo
302 max_rev = -1
302 max_rev = -1
303 for r in revgen:
303 for r in revgen:
304 branch, closesbranch = getbranchinfo(r)
304 branch, closesbranch = getbranchinfo(r)
305 newbranches.setdefault(branch, []).append(r)
305 newbranches.setdefault(branch, []).append(r)
306 if closesbranch:
306 if closesbranch:
307 self._closednodes.add(cl.node(r))
307 self._closednodes.add(cl.node(r))
308 max_rev = max(max_rev, r)
308 max_rev = max(max_rev, r)
309 if max_rev < 0:
309 if max_rev < 0:
310 msg = "running branchcache.update without revision to update"
310 msg = "running branchcache.update without revision to update"
311 raise error.ProgrammingError(msg)
311 raise error.ProgrammingError(msg)
312
312
313 # Delay fetching the topological heads until they are needed.
313 # Delay fetching the topological heads until they are needed.
314 # A repository without non-continous branches can skip this part.
314 # A repository without non-continous branches can skip this part.
315 topoheads = None
315 topoheads = None
316
316
317 # If a changeset is visible, its parents must be visible too, so
317 # If a changeset is visible, its parents must be visible too, so
318 # use the faster unfiltered parent accessor.
318 # use the faster unfiltered parent accessor.
319 parentrevs = repo.unfiltered().changelog.parentrevs
319 parentrevs = repo.unfiltered().changelog.parentrevs
320
320
321 # Faster than using ctx.obsolete()
321 # Faster than using ctx.obsolete()
322 obsrevs = obsolete.getrevs(repo, b'obsolete')
322 obsrevs = obsolete.getrevs(repo, b'obsolete')
323
323
324 for branch, newheadrevs in newbranches.items():
324 for branch, newheadrevs in newbranches.items():
325 # For every branch, compute the new branchheads.
325 # For every branch, compute the new branchheads.
326 # A branchhead is a revision such that no descendant is on
326 # A branchhead is a revision such that no descendant is on
327 # the same branch.
327 # the same branch.
328 #
328 #
329 # The branchheads are computed iteratively in revision order.
329 # The branchheads are computed iteratively in revision order.
330 # This ensures topological order, i.e. parents are processed
330 # This ensures topological order, i.e. parents are processed
331 # before their children. Ancestors are inclusive here, i.e.
331 # before their children. Ancestors are inclusive here, i.e.
332 # any revision is an ancestor of itself.
332 # any revision is an ancestor of itself.
333 #
333 #
334 # Core observations:
334 # Core observations:
335 # - The current revision is always a branchhead for the
335 # - The current revision is always a branchhead for the
336 # repository up to that point.
336 # repository up to that point.
337 # - It is the first revision of the branch if and only if
337 # - It is the first revision of the branch if and only if
338 # there was no branchhead before. In that case, it is the
338 # there was no branchhead before. In that case, it is the
339 # only branchhead as there are no possible ancestors on
339 # only branchhead as there are no possible ancestors on
340 # the same branch.
340 # the same branch.
341 # - If a parent is on the same branch, a branchhead can
341 # - If a parent is on the same branch, a branchhead can
342 # only be an ancestor of that parent, if it is parent
342 # only be an ancestor of that parent, if it is parent
343 # itself. Otherwise it would have been removed as ancestor
343 # itself. Otherwise it would have been removed as ancestor
344 # of that parent before.
344 # of that parent before.
345 # - Therefore, if all parents are on the same branch, they
345 # - Therefore, if all parents are on the same branch, they
346 # can just be removed from the branchhead set.
346 # can just be removed from the branchhead set.
347 # - If one parent is on the same branch and the other is not
347 # - If one parent is on the same branch and the other is not
348 # and there was exactly one branchhead known, the existing
348 # and there was exactly one branchhead known, the existing
349 # branchhead can only be an ancestor if it is the parent.
349 # branchhead can only be an ancestor if it is the parent.
350 # Otherwise it would have been removed as ancestor of
350 # Otherwise it would have been removed as ancestor of
351 # the parent before. The other parent therefore can't have
351 # the parent before. The other parent therefore can't have
352 # a branchhead as ancestor.
352 # a branchhead as ancestor.
353 # - In all other cases, the parents on different branches
353 # - In all other cases, the parents on different branches
354 # could have a branchhead as ancestor. Those parents are
354 # could have a branchhead as ancestor. Those parents are
355 # kept in the "uncertain" set. If all branchheads are also
355 # kept in the "uncertain" set. If all branchheads are also
356 # topological heads, they can't have descendants and further
356 # topological heads, they can't have descendants and further
357 # checks can be skipped. Otherwise, the ancestors of the
357 # checks can be skipped. Otherwise, the ancestors of the
358 # "uncertain" set are removed from branchheads.
358 # "uncertain" set are removed from branchheads.
359 # This computation is heavy and avoided if at all possible.
359 # This computation is heavy and avoided if at all possible.
360 bheads = self._entries.get(branch, [])
360 bheads = self._entries.get(branch, [])
361 bheadset = {cl.rev(node) for node in bheads}
361 bheadset = {cl.rev(node) for node in bheads}
362 uncertain = set()
362 uncertain = set()
363 for newrev in sorted(newheadrevs):
363 for newrev in sorted(newheadrevs):
364 if newrev in obsrevs:
364 if newrev in obsrevs:
365 # We ignore obsolete changesets as they shouldn't be
365 # We ignore obsolete changesets as they shouldn't be
366 # considered heads.
366 # considered heads.
367 continue
367 continue
368
368
369 if not bheadset:
369 if not bheadset:
370 bheadset.add(newrev)
370 bheadset.add(newrev)
371 continue
371 continue
372
372
373 parents = [p for p in parentrevs(newrev) if p != nullrev]
373 parents = [p for p in parentrevs(newrev) if p != nullrev]
374 samebranch = set()
374 samebranch = set()
375 otherbranch = set()
375 otherbranch = set()
376 obsparents = set()
376 obsparents = set()
377 for p in parents:
377 for p in parents:
378 if p in obsrevs:
378 if p in obsrevs:
379 # We ignored this obsolete changeset earlier, but now
379 # We ignored this obsolete changeset earlier, but now
380 # that it has non-ignored children, we need to make
380 # that it has non-ignored children, we need to make
381 # sure their ancestors are not considered heads. To
381 # sure their ancestors are not considered heads. To
382 # achieve that, we will simply treat this obsolete
382 # achieve that, we will simply treat this obsolete
383 # changeset as a parent from other branch.
383 # changeset as a parent from other branch.
384 obsparents.add(p)
384 obsparents.add(p)
385 elif p in bheadset or getbranchinfo(p)[0] == branch:
385 elif p in bheadset or getbranchinfo(p)[0] == branch:
386 samebranch.add(p)
386 samebranch.add(p)
387 else:
387 else:
388 otherbranch.add(p)
388 otherbranch.add(p)
389 if not (len(bheadset) == len(samebranch) == 1):
389 if not (len(bheadset) == len(samebranch) == 1):
390 uncertain.update(otherbranch)
390 uncertain.update(otherbranch)
391 uncertain.update(obsparents)
391 uncertain.update(obsparents)
392 bheadset.difference_update(samebranch)
392 bheadset.difference_update(samebranch)
393 bheadset.add(newrev)
393 bheadset.add(newrev)
394
394
395 if uncertain:
395 if uncertain:
396 if topoheads is None:
396 if topoheads is None:
397 topoheads = set(cl.headrevs())
397 topoheads = set(cl.headrevs())
398 if bheadset - topoheads:
398 if bheadset - topoheads:
399 floorrev = min(bheadset)
399 floorrev = min(bheadset)
400 if floorrev <= max(uncertain):
400 if floorrev <= max(uncertain):
401 ancestors = set(cl.ancestors(uncertain, floorrev))
401 ancestors = set(cl.ancestors(uncertain, floorrev))
402 bheadset -= ancestors
402 bheadset -= ancestors
403 if bheadset:
403 if bheadset:
404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
405
405
406 duration = util.timer() - starttime
406 duration = util.timer() - starttime
407 repo.ui.log(
407 repo.ui.log(
408 b'branchcache',
408 b'branchcache',
409 b'updated %s in %.4f seconds\n',
409 b'updated %s in %.4f seconds\n',
410 _branchcachedesc(repo),
410 _branchcachedesc(repo),
411 duration,
411 duration,
412 )
412 )
413 return max_rev
413 return max_rev
414
414
415
415
416 class branchcache(_BaseBranchCache):
416 class branchcache(_BaseBranchCache):
417 """Branchmap info for a local repo or repoview"""
417 """Branchmap info for a local repo or repoview"""
418
418
419 _base_filename = b"branch2"
419 _base_filename = b"branch2"
420
420
421 def __init__(
421 def __init__(
422 self,
422 self,
423 repo: "localrepo.localrepository",
423 repo: "localrepo.localrepository",
424 entries: Union[
424 entries: Union[
425 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
425 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
426 ] = (),
426 ] = (),
427 tipnode: Optional[bytes] = None,
427 tipnode: Optional[bytes] = None,
428 tiprev: Optional[int] = nullrev,
428 tiprev: Optional[int] = nullrev,
429 filteredhash: Optional[bytes] = None,
429 filteredhash: Optional[bytes] = None,
430 closednodes: Optional[Set[bytes]] = None,
430 closednodes: Optional[Set[bytes]] = None,
431 hasnode: Optional[Callable[[bytes], bool]] = None,
431 hasnode: Optional[Callable[[bytes], bool]] = None,
432 verify_node: bool = False,
432 verify_node: bool = False,
433 ) -> None:
433 ) -> None:
434 """hasnode is a function which can be used to verify whether changelog
434 """hasnode is a function which can be used to verify whether changelog
435 has a given node or not. If it's not provided, we assume that every node
435 has a given node or not. If it's not provided, we assume that every node
436 we have exists in changelog"""
436 we have exists in changelog"""
437 self._filtername = repo.filtername
437 self._filtername = repo.filtername
438 if tipnode is None:
438 if tipnode is None:
439 self.tipnode = repo.nullid
439 self.tipnode = repo.nullid
440 else:
440 else:
441 self.tipnode = tipnode
441 self.tipnode = tipnode
442 self.tiprev = tiprev
442 self.tiprev = tiprev
443 self.filteredhash = filteredhash
443 self.filteredhash = filteredhash
444 self._dirty = False
444 self._dirty = False
445
445
446 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
446 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
447 # closednodes is a set of nodes that close their branch. If the branch
447 # closednodes is a set of nodes that close their branch. If the branch
448 # cache has been updated, it may contain nodes that are no longer
448 # cache has been updated, it may contain nodes that are no longer
449 # heads.
449 # heads.
450
450
451 # Do we need to verify branch at all ?
451 # Do we need to verify branch at all ?
452 self._verify_node = verify_node
452 self._verify_node = verify_node
453 # branches for which nodes are verified
453 # branches for which nodes are verified
454 self._verifiedbranches = set()
454 self._verifiedbranches = set()
455 self._hasnode = None
455 self._hasnode = None
456 if self._verify_node:
456 if self._verify_node:
457 self._hasnode = repo.changelog.hasnode
457 self._hasnode = repo.changelog.hasnode
458
458
459 def validfor(self, repo):
459 def validfor(self, repo):
460 """check that cache contents are valid for (a subset of) this repo
460 """check that cache contents are valid for (a subset of) this repo
461
461
462 - False when the order of changesets changed or if we detect a strip.
462 - False when the order of changesets changed or if we detect a strip.
463 - True when cache is up-to-date for the current repo or its subset."""
463 - True when cache is up-to-date for the current repo or its subset."""
464 try:
464 try:
465 node = repo.changelog.node(self.tiprev)
465 node = repo.changelog.node(self.tiprev)
466 except IndexError:
466 except IndexError:
467 # changesets were stripped and now we don't even have enough to
467 # changesets were stripped and now we don't even have enough to
468 # find tiprev
468 # find tiprev
469 return False
469 return False
470 if self.tipnode != node:
470 if self.tipnode != node:
471 # tiprev doesn't correspond to tipnode: repo was stripped, or this
471 # tiprev doesn't correspond to tipnode: repo was stripped, or this
472 # repo has a different order of changesets
472 # repo has a different order of changesets
473 return False
473 return False
474 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
474 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
475 # hashes don't match if this repo view has a different set of filtered
475 # hashes don't match if this repo view has a different set of filtered
476 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
476 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
477 # history was rewritten)
477 # history was rewritten)
478 return self.filteredhash == tiphash
478 return self.filteredhash == tiphash
479
479
480 @classmethod
480 @classmethod
481 def fromfile(cls, repo):
481 def fromfile(cls, repo):
482 f = None
482 f = None
483 try:
483 try:
484 f = repo.cachevfs(cls._filename(repo))
484 f = repo.cachevfs(cls._filename(repo))
485 lineiter = iter(f)
485 lineiter = iter(f)
486 init_kwargs = cls._load_header(repo, lineiter)
486 init_kwargs = cls._load_header(repo, lineiter)
487 bcache = cls(
487 bcache = cls(
488 repo,
488 repo,
489 verify_node=True,
489 verify_node=True,
490 **init_kwargs,
490 **init_kwargs,
491 )
491 )
492 if not bcache.validfor(repo):
492 if not bcache.validfor(repo):
493 # invalidate the cache
493 # invalidate the cache
494 raise ValueError('tip differs')
494 raise ValueError('tip differs')
495 bcache._load_heads(repo, lineiter)
495 bcache._load_heads(repo, lineiter)
496 except (IOError, OSError):
496 except (IOError, OSError):
497 return None
497 return None
498
498
499 except Exception as inst:
499 except Exception as inst:
500 if repo.ui.debugflag:
500 if repo.ui.debugflag:
501 msg = b'invalid %s: %s\n'
501 msg = b'invalid %s: %s\n'
502 msg %= (
502 msg %= (
503 _branchcachedesc(repo),
503 _branchcachedesc(repo),
504 stringutil.forcebytestr(inst),
504 stringutil.forcebytestr(inst),
505 )
505 )
506 repo.ui.debug(msg)
506 repo.ui.debug(msg)
507 bcache = None
507 bcache = None
508
508
509 finally:
509 finally:
510 if f:
510 if f:
511 f.close()
511 f.close()
512
512
513 return bcache
513 return bcache
514
514
515 @classmethod
515 @classmethod
516 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
516 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
517 """parse the head of a branchmap file
517 """parse the head of a branchmap file
518
518
519 return parameters to pass to a newly created class instance.
519 return parameters to pass to a newly created class instance.
520 """
520 """
521 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
521 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
522 last, lrev = cachekey[:2]
522 last, lrev = cachekey[:2]
523 last, lrev = bin(last), int(lrev)
523 last, lrev = bin(last), int(lrev)
524 filteredhash = None
524 filteredhash = None
525 if len(cachekey) > 2:
525 if len(cachekey) > 2:
526 filteredhash = bin(cachekey[2])
526 filteredhash = bin(cachekey[2])
527 return {
527 return {
528 "tipnode": last,
528 "tipnode": last,
529 "tiprev": lrev,
529 "tiprev": lrev,
530 "filteredhash": filteredhash,
530 "filteredhash": filteredhash,
531 }
531 }
532
532
533 def _load_heads(self, repo, lineiter):
533 def _load_heads(self, repo, lineiter):
534 """fully loads the branchcache by reading from the file using the line
534 """fully loads the branchcache by reading from the file using the line
535 iterator passed"""
535 iterator passed"""
536 for line in lineiter:
536 for line in lineiter:
537 line = line.rstrip(b'\n')
537 line = line.rstrip(b'\n')
538 if not line:
538 if not line:
539 continue
539 continue
540 node, state, label = line.split(b" ", 2)
540 node, state, label = line.split(b" ", 2)
541 if state not in b'oc':
541 if state not in b'oc':
542 raise ValueError('invalid branch state')
542 raise ValueError('invalid branch state')
543 label = encoding.tolocal(label.strip())
543 label = encoding.tolocal(label.strip())
544 node = bin(node)
544 node = bin(node)
545 self._entries.setdefault(label, []).append(node)
545 self._entries.setdefault(label, []).append(node)
546 if state == b'c':
546 if state == b'c':
547 self._closednodes.add(node)
547 self._closednodes.add(node)
548
548
549 @classmethod
549 @classmethod
550 def _filename(cls, repo):
550 def _filename(cls, repo):
551 """name of a branchcache file for a given repo or repoview"""
551 """name of a branchcache file for a given repo or repoview"""
552 filename = cls._base_filename
552 filename = cls._base_filename
553 if repo.filtername:
553 if repo.filtername:
554 filename = b'%s-%s' % (filename, repo.filtername)
554 filename = b'%s-%s' % (filename, repo.filtername)
555 return filename
555 return filename
556
556
557 def copy(self, repo):
557 def copy(self, repo):
558 """return a deep copy of the branchcache object"""
558 """return a deep copy of the branchcache object"""
559 assert repo.filtername != self._filtername
559 assert repo.filtername != self._filtername
560 other = type(self)(
560 other = type(self)(
561 repo=repo,
561 repo=repo,
562 # we always do a shally copy of self._entries, and the values is
562 # we always do a shally copy of self._entries, and the values is
563 # always replaced, so no need to deepcopy until the above remains
563 # always replaced, so no need to deepcopy until the above remains
564 # true.
564 # true.
565 entries=self._entries,
565 entries=self._entries,
566 tipnode=self.tipnode,
566 tipnode=self.tipnode,
567 tiprev=self.tiprev,
567 tiprev=self.tiprev,
568 filteredhash=self.filteredhash,
568 filteredhash=self.filteredhash,
569 closednodes=set(self._closednodes),
569 closednodes=set(self._closednodes),
570 verify_node=self._verify_node,
570 verify_node=self._verify_node,
571 )
571 )
572 # we copy will likely schedule a write anyway, but that does not seems
573 # to hurt to overschedule
574 other._dirty = self._dirty
575 # also copy information about the current verification state
572 # also copy information about the current verification state
576 other._verifiedbranches = set(self._verifiedbranches)
573 other._verifiedbranches = set(self._verifiedbranches)
577 return other
574 return other
578
575
579 def write(self, repo):
576 def write(self, repo):
580 assert self._filtername == repo.filtername, (
577 assert self._filtername == repo.filtername, (
581 self._filtername,
578 self._filtername,
582 repo.filtername,
579 repo.filtername,
583 )
580 )
584 tr = repo.currenttransaction()
581 tr = repo.currenttransaction()
585 if not getattr(tr, 'finalized', True):
582 if not getattr(tr, 'finalized', True):
586 # Avoid premature writing.
583 # Avoid premature writing.
587 #
584 #
588 # (The cache warming setup by localrepo will update the file later.)
585 # (The cache warming setup by localrepo will update the file later.)
589 return
586 return
590 try:
587 try:
591 filename = self._filename(repo)
588 filename = self._filename(repo)
592 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
589 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
593 self._write_header(f)
590 self._write_header(f)
594 nodecount = self._write_heads(f)
591 nodecount = self._write_heads(f)
595 repo.ui.log(
592 repo.ui.log(
596 b'branchcache',
593 b'branchcache',
597 b'wrote %s with %d labels and %d nodes\n',
594 b'wrote %s with %d labels and %d nodes\n',
598 _branchcachedesc(repo),
595 _branchcachedesc(repo),
599 len(self._entries),
596 len(self._entries),
600 nodecount,
597 nodecount,
601 )
598 )
602 self._dirty = False
599 self._dirty = False
603 except (IOError, OSError, error.Abort) as inst:
600 except (IOError, OSError, error.Abort) as inst:
604 # Abort may be raised by read only opener, so log and continue
601 # Abort may be raised by read only opener, so log and continue
605 repo.ui.debug(
602 repo.ui.debug(
606 b"couldn't write branch cache: %s\n"
603 b"couldn't write branch cache: %s\n"
607 % stringutil.forcebytestr(inst)
604 % stringutil.forcebytestr(inst)
608 )
605 )
609
606
610 def _write_header(self, fp) -> None:
607 def _write_header(self, fp) -> None:
611 """write the branch cache header to a file"""
608 """write the branch cache header to a file"""
612 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
609 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
613 if self.filteredhash is not None:
610 if self.filteredhash is not None:
614 cachekey.append(hex(self.filteredhash))
611 cachekey.append(hex(self.filteredhash))
615 fp.write(b" ".join(cachekey) + b'\n')
612 fp.write(b" ".join(cachekey) + b'\n')
616
613
617 def _write_heads(self, fp) -> int:
614 def _write_heads(self, fp) -> int:
618 """write list of heads to a file
615 """write list of heads to a file
619
616
620 Return the number of heads written."""
617 Return the number of heads written."""
621 nodecount = 0
618 nodecount = 0
622 for label, nodes in sorted(self._entries.items()):
619 for label, nodes in sorted(self._entries.items()):
623 label = encoding.fromlocal(label)
620 label = encoding.fromlocal(label)
624 for node in nodes:
621 for node in nodes:
625 nodecount += 1
622 nodecount += 1
626 if node in self._closednodes:
623 if node in self._closednodes:
627 state = b'c'
624 state = b'c'
628 else:
625 else:
629 state = b'o'
626 state = b'o'
630 fp.write(b"%s %s %s\n" % (hex(node), state, label))
627 fp.write(b"%s %s %s\n" % (hex(node), state, label))
631 return nodecount
628 return nodecount
632
629
633 def _verifybranch(self, branch):
630 def _verifybranch(self, branch):
634 """verify head nodes for the given branch."""
631 """verify head nodes for the given branch."""
635 if not self._verify_node:
632 if not self._verify_node:
636 return
633 return
637 if branch not in self._entries or branch in self._verifiedbranches:
634 if branch not in self._entries or branch in self._verifiedbranches:
638 return
635 return
639 assert self._hasnode is not None
636 assert self._hasnode is not None
640 for n in self._entries[branch]:
637 for n in self._entries[branch]:
641 if not self._hasnode(n):
638 if not self._hasnode(n):
642 _unknownnode(n)
639 _unknownnode(n)
643
640
644 self._verifiedbranches.add(branch)
641 self._verifiedbranches.add(branch)
645
642
646 def _verifyall(self):
643 def _verifyall(self):
647 """verifies nodes of all the branches"""
644 """verifies nodes of all the branches"""
648 for b in self._entries.keys():
645 for b in self._entries.keys():
649 if b not in self._verifiedbranches:
646 if b not in self._verifiedbranches:
650 self._verifybranch(b)
647 self._verifybranch(b)
651
648
652 def __getitem__(self, key):
649 def __getitem__(self, key):
653 self._verifybranch(key)
650 self._verifybranch(key)
654 return super().__getitem__(key)
651 return super().__getitem__(key)
655
652
656 def __contains__(self, key):
653 def __contains__(self, key):
657 self._verifybranch(key)
654 self._verifybranch(key)
658 return super().__contains__(key)
655 return super().__contains__(key)
659
656
660 def iteritems(self):
657 def iteritems(self):
661 self._verifyall()
658 self._verifyall()
662 return super().iteritems()
659 return super().iteritems()
663
660
664 items = iteritems
661 items = iteritems
665
662
666 def iterheads(self):
663 def iterheads(self):
667 """returns all the heads"""
664 """returns all the heads"""
668 self._verifyall()
665 self._verifyall()
669 return super().iterheads()
666 return super().iterheads()
670
667
671 def hasbranch(self, label):
668 def hasbranch(self, label):
672 """checks whether a branch of this name exists or not"""
669 """checks whether a branch of this name exists or not"""
673 self._verifybranch(label)
670 self._verifybranch(label)
674 return super().hasbranch(label)
671 return super().hasbranch(label)
675
672
676 def branchheads(self, branch, closed=False):
673 def branchheads(self, branch, closed=False):
677 self._verifybranch(branch)
674 self._verifybranch(branch)
678 return super().branchheads(branch, closed=closed)
675 return super().branchheads(branch, closed=closed)
679
676
680 def update(self, repo, revgen):
677 def update(self, repo, revgen):
681 assert self._filtername == repo.filtername, (
678 assert self._filtername == repo.filtername, (
682 self._filtername,
679 self._filtername,
683 repo.filtername,
680 repo.filtername,
684 )
681 )
685 cl = repo.changelog
682 cl = repo.changelog
686 max_rev = super().update(repo, revgen)
683 max_rev = super().update(repo, revgen)
687 # new tip revision which we found after iterating items from new
684 # new tip revision which we found after iterating items from new
688 # branches
685 # branches
689 if max_rev is not None and max_rev > self.tiprev:
686 if max_rev is not None and max_rev > self.tiprev:
690 self.tiprev = max_rev
687 self.tiprev = max_rev
691 self.tipnode = cl.node(max_rev)
688 self.tipnode = cl.node(max_rev)
692
689
693 if not self.validfor(repo):
690 if not self.validfor(repo):
694 # old cache key is now invalid for the repo, but we've just updated
691 # old cache key is now invalid for the repo, but we've just updated
695 # the cache and we assume it's valid, so let's make the cache key
692 # the cache and we assume it's valid, so let's make the cache key
696 # valid as well by recomputing it from the cached data
693 # valid as well by recomputing it from the cached data
697 self.tipnode = repo.nullid
694 self.tipnode = repo.nullid
698 self.tiprev = nullrev
695 self.tiprev = nullrev
699 for heads in self.iterheads():
696 for heads in self.iterheads():
700 if not heads:
697 if not heads:
701 # all revisions on a branch are obsolete
698 # all revisions on a branch are obsolete
702 continue
699 continue
703 # note: tiprev is not necessarily the tip revision of repo,
700 # note: tiprev is not necessarily the tip revision of repo,
704 # because the tip could be obsolete (i.e. not a head)
701 # because the tip could be obsolete (i.e. not a head)
705 tiprev = max(cl.rev(node) for node in heads)
702 tiprev = max(cl.rev(node) for node in heads)
706 if tiprev > self.tiprev:
703 if tiprev > self.tiprev:
707 self.tipnode = cl.node(tiprev)
704 self.tipnode = cl.node(tiprev)
708 self.tiprev = tiprev
705 self.tiprev = tiprev
709 self.filteredhash = scmutil.filteredhash(
706 self.filteredhash = scmutil.filteredhash(
710 repo, self.tiprev, needobsolete=True
707 repo, self.tiprev, needobsolete=True
711 )
708 )
712 self._dirty = True
709 self._dirty = True
713 self.write(repo)
710 self.write(repo)
714
711
715
712
716 class remotebranchcache(_BaseBranchCache):
713 class remotebranchcache(_BaseBranchCache):
717 """Branchmap info for a remote connection, should not write locally"""
714 """Branchmap info for a remote connection, should not write locally"""
718
715
719 def __init__(
716 def __init__(
720 self,
717 self,
721 repo: "localrepo.localrepository",
718 repo: "localrepo.localrepository",
722 entries: Union[
719 entries: Union[
723 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
720 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
724 ] = (),
721 ] = (),
725 closednodes: Optional[Set[bytes]] = None,
722 closednodes: Optional[Set[bytes]] = None,
726 ) -> None:
723 ) -> None:
727 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
724 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
728
725
729
726
730 # Revision branch info cache
727 # Revision branch info cache
731
728
732 _rbcversion = b'-v1'
729 _rbcversion = b'-v1'
733 _rbcnames = b'rbc-names' + _rbcversion
730 _rbcnames = b'rbc-names' + _rbcversion
734 _rbcrevs = b'rbc-revs' + _rbcversion
731 _rbcrevs = b'rbc-revs' + _rbcversion
735 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
732 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
736 _rbcrecfmt = b'>4sI'
733 _rbcrecfmt = b'>4sI'
737 _rbcrecsize = calcsize(_rbcrecfmt)
734 _rbcrecsize = calcsize(_rbcrecfmt)
738 _rbcmininc = 64 * _rbcrecsize
735 _rbcmininc = 64 * _rbcrecsize
739 _rbcnodelen = 4
736 _rbcnodelen = 4
740 _rbcbranchidxmask = 0x7FFFFFFF
737 _rbcbranchidxmask = 0x7FFFFFFF
741 _rbccloseflag = 0x80000000
738 _rbccloseflag = 0x80000000
742
739
743
740
744 class rbcrevs:
741 class rbcrevs:
745 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
742 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
746
743
747 def __init__(self, revs):
744 def __init__(self, revs):
748 self._prefix = revs
745 self._prefix = revs
749 self._rest = bytearray()
746 self._rest = bytearray()
750
747
751 def __len__(self):
748 def __len__(self):
752 return len(self._prefix) + len(self._rest)
749 return len(self._prefix) + len(self._rest)
753
750
754 def unpack_record(self, rbcrevidx):
751 def unpack_record(self, rbcrevidx):
755 if rbcrevidx < len(self._prefix):
752 if rbcrevidx < len(self._prefix):
756 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
753 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
757 else:
754 else:
758 return unpack_from(
755 return unpack_from(
759 _rbcrecfmt,
756 _rbcrecfmt,
760 util.buffer(self._rest),
757 util.buffer(self._rest),
761 rbcrevidx - len(self._prefix),
758 rbcrevidx - len(self._prefix),
762 )
759 )
763
760
764 def make_mutable(self):
761 def make_mutable(self):
765 if len(self._prefix) > 0:
762 if len(self._prefix) > 0:
766 entirety = bytearray()
763 entirety = bytearray()
767 entirety[:] = self._prefix
764 entirety[:] = self._prefix
768 entirety.extend(self._rest)
765 entirety.extend(self._rest)
769 self._rest = entirety
766 self._rest = entirety
770 self._prefix = bytearray()
767 self._prefix = bytearray()
771
768
772 def truncate(self, pos):
769 def truncate(self, pos):
773 self.make_mutable()
770 self.make_mutable()
774 del self._rest[pos:]
771 del self._rest[pos:]
775
772
776 def pack_into(self, rbcrevidx, node, branchidx):
773 def pack_into(self, rbcrevidx, node, branchidx):
777 if rbcrevidx < len(self._prefix):
774 if rbcrevidx < len(self._prefix):
778 self.make_mutable()
775 self.make_mutable()
779 buf = self._rest
776 buf = self._rest
780 start_offset = rbcrevidx - len(self._prefix)
777 start_offset = rbcrevidx - len(self._prefix)
781 end_offset = start_offset + _rbcrecsize
778 end_offset = start_offset + _rbcrecsize
782
779
783 if len(self._rest) < end_offset:
780 if len(self._rest) < end_offset:
784 # bytearray doesn't allocate extra space at least in Python 3.7.
781 # bytearray doesn't allocate extra space at least in Python 3.7.
785 # When multiple changesets are added in a row, precise resize would
782 # When multiple changesets are added in a row, precise resize would
786 # result in quadratic complexity. Overallocate to compensate by
783 # result in quadratic complexity. Overallocate to compensate by
787 # using the classic doubling technique for dynamic arrays instead.
784 # using the classic doubling technique for dynamic arrays instead.
788 # If there was a gap in the map before, less space will be reserved.
785 # If there was a gap in the map before, less space will be reserved.
789 self._rest.extend(b'\0' * end_offset)
786 self._rest.extend(b'\0' * end_offset)
790 return pack_into(
787 return pack_into(
791 _rbcrecfmt,
788 _rbcrecfmt,
792 buf,
789 buf,
793 start_offset,
790 start_offset,
794 node,
791 node,
795 branchidx,
792 branchidx,
796 )
793 )
797
794
798 def extend(self, extension):
795 def extend(self, extension):
799 return self._rest.extend(extension)
796 return self._rest.extend(extension)
800
797
801 def slice(self, begin, end):
798 def slice(self, begin, end):
802 if begin < len(self._prefix):
799 if begin < len(self._prefix):
803 acc = bytearray()
800 acc = bytearray()
804 acc[:] = self._prefix[begin:end]
801 acc[:] = self._prefix[begin:end]
805 acc.extend(
802 acc.extend(
806 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
803 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
807 )
804 )
808 return acc
805 return acc
809 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
806 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
810
807
811
808
812 class revbranchcache:
809 class revbranchcache:
813 """Persistent cache, mapping from revision number to branch name and close.
810 """Persistent cache, mapping from revision number to branch name and close.
814 This is a low level cache, independent of filtering.
811 This is a low level cache, independent of filtering.
815
812
816 Branch names are stored in rbc-names in internal encoding separated by 0.
813 Branch names are stored in rbc-names in internal encoding separated by 0.
817 rbc-names is append-only, and each branch name is only stored once and will
814 rbc-names is append-only, and each branch name is only stored once and will
818 thus have a unique index.
815 thus have a unique index.
819
816
820 The branch info for each revision is stored in rbc-revs as constant size
817 The branch info for each revision is stored in rbc-revs as constant size
821 records. The whole file is read into memory, but it is only 'parsed' on
818 records. The whole file is read into memory, but it is only 'parsed' on
822 demand. The file is usually append-only but will be truncated if repo
819 demand. The file is usually append-only but will be truncated if repo
823 modification is detected.
820 modification is detected.
824 The record for each revision contains the first 4 bytes of the
821 The record for each revision contains the first 4 bytes of the
825 corresponding node hash, and the record is only used if it still matches.
822 corresponding node hash, and the record is only used if it still matches.
826 Even a completely trashed rbc-revs fill thus still give the right result
823 Even a completely trashed rbc-revs fill thus still give the right result
827 while converging towards full recovery ... assuming no incorrectly matching
824 while converging towards full recovery ... assuming no incorrectly matching
828 node hashes.
825 node hashes.
829 The record also contains 4 bytes where 31 bits contains the index of the
826 The record also contains 4 bytes where 31 bits contains the index of the
830 branch and the last bit indicate that it is a branch close commit.
827 branch and the last bit indicate that it is a branch close commit.
831 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
828 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
832 and will grow with it but be 1/8th of its size.
829 and will grow with it but be 1/8th of its size.
833 """
830 """
834
831
835 def __init__(self, repo, readonly=True):
832 def __init__(self, repo, readonly=True):
836 assert repo.filtername is None
833 assert repo.filtername is None
837 self._repo = repo
834 self._repo = repo
838 self._names = [] # branch names in local encoding with static index
835 self._names = [] # branch names in local encoding with static index
839 self._rbcrevs = rbcrevs(bytearray())
836 self._rbcrevs = rbcrevs(bytearray())
840 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
837 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
841 try:
838 try:
842 bndata = repo.cachevfs.read(_rbcnames)
839 bndata = repo.cachevfs.read(_rbcnames)
843 self._rbcsnameslen = len(bndata) # for verification before writing
840 self._rbcsnameslen = len(bndata) # for verification before writing
844 if bndata:
841 if bndata:
845 self._names = [
842 self._names = [
846 encoding.tolocal(bn) for bn in bndata.split(b'\0')
843 encoding.tolocal(bn) for bn in bndata.split(b'\0')
847 ]
844 ]
848 except (IOError, OSError):
845 except (IOError, OSError):
849 if readonly:
846 if readonly:
850 # don't try to use cache - fall back to the slow path
847 # don't try to use cache - fall back to the slow path
851 self.branchinfo = self._branchinfo
848 self.branchinfo = self._branchinfo
852
849
853 if self._names:
850 if self._names:
854 try:
851 try:
855 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
852 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
856 with repo.cachevfs(_rbcrevs) as fp:
853 with repo.cachevfs(_rbcrevs) as fp:
857 data = util.buffer(util.mmapread(fp))
854 data = util.buffer(util.mmapread(fp))
858 else:
855 else:
859 data = repo.cachevfs.read(_rbcrevs)
856 data = repo.cachevfs.read(_rbcrevs)
860 self._rbcrevs = rbcrevs(data)
857 self._rbcrevs = rbcrevs(data)
861 except (IOError, OSError) as inst:
858 except (IOError, OSError) as inst:
862 repo.ui.debug(
859 repo.ui.debug(
863 b"couldn't read revision branch cache: %s\n"
860 b"couldn't read revision branch cache: %s\n"
864 % stringutil.forcebytestr(inst)
861 % stringutil.forcebytestr(inst)
865 )
862 )
866 # remember number of good records on disk
863 # remember number of good records on disk
867 self._rbcrevslen = min(
864 self._rbcrevslen = min(
868 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
865 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
869 )
866 )
870 if self._rbcrevslen == 0:
867 if self._rbcrevslen == 0:
871 self._names = []
868 self._names = []
872 self._rbcnamescount = len(self._names) # number of names read at
869 self._rbcnamescount = len(self._names) # number of names read at
873 # _rbcsnameslen
870 # _rbcsnameslen
874
871
875 def _clear(self):
872 def _clear(self):
876 self._rbcsnameslen = 0
873 self._rbcsnameslen = 0
877 del self._names[:]
874 del self._names[:]
878 self._rbcnamescount = 0
875 self._rbcnamescount = 0
879 self._rbcrevslen = len(self._repo.changelog)
876 self._rbcrevslen = len(self._repo.changelog)
880 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
877 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
881 util.clearcachedproperty(self, b'_namesreverse')
878 util.clearcachedproperty(self, b'_namesreverse')
882
879
883 @util.propertycache
880 @util.propertycache
884 def _namesreverse(self):
881 def _namesreverse(self):
885 return {b: r for r, b in enumerate(self._names)}
882 return {b: r for r, b in enumerate(self._names)}
886
883
887 def branchinfo(self, rev):
884 def branchinfo(self, rev):
888 """Return branch name and close flag for rev, using and updating
885 """Return branch name and close flag for rev, using and updating
889 persistent cache."""
886 persistent cache."""
890 changelog = self._repo.changelog
887 changelog = self._repo.changelog
891 rbcrevidx = rev * _rbcrecsize
888 rbcrevidx = rev * _rbcrecsize
892
889
893 # avoid negative index, changelog.read(nullrev) is fast without cache
890 # avoid negative index, changelog.read(nullrev) is fast without cache
894 if rev == nullrev:
891 if rev == nullrev:
895 return changelog.branchinfo(rev)
892 return changelog.branchinfo(rev)
896
893
897 # if requested rev isn't allocated, grow and cache the rev info
894 # if requested rev isn't allocated, grow and cache the rev info
898 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
895 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
899 return self._branchinfo(rev)
896 return self._branchinfo(rev)
900
897
901 # fast path: extract data from cache, use it if node is matching
898 # fast path: extract data from cache, use it if node is matching
902 reponode = changelog.node(rev)[:_rbcnodelen]
899 reponode = changelog.node(rev)[:_rbcnodelen]
903 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
900 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
904 close = bool(branchidx & _rbccloseflag)
901 close = bool(branchidx & _rbccloseflag)
905 if close:
902 if close:
906 branchidx &= _rbcbranchidxmask
903 branchidx &= _rbcbranchidxmask
907 if cachenode == b'\0\0\0\0':
904 if cachenode == b'\0\0\0\0':
908 pass
905 pass
909 elif cachenode == reponode:
906 elif cachenode == reponode:
910 try:
907 try:
911 return self._names[branchidx], close
908 return self._names[branchidx], close
912 except IndexError:
909 except IndexError:
913 # recover from invalid reference to unknown branch
910 # recover from invalid reference to unknown branch
914 self._repo.ui.debug(
911 self._repo.ui.debug(
915 b"referenced branch names not found"
912 b"referenced branch names not found"
916 b" - rebuilding revision branch cache from scratch\n"
913 b" - rebuilding revision branch cache from scratch\n"
917 )
914 )
918 self._clear()
915 self._clear()
919 else:
916 else:
920 # rev/node map has changed, invalidate the cache from here up
917 # rev/node map has changed, invalidate the cache from here up
921 self._repo.ui.debug(
918 self._repo.ui.debug(
922 b"history modification detected - truncating "
919 b"history modification detected - truncating "
923 b"revision branch cache to revision %d\n" % rev
920 b"revision branch cache to revision %d\n" % rev
924 )
921 )
925 truncate = rbcrevidx + _rbcrecsize
922 truncate = rbcrevidx + _rbcrecsize
926 self._rbcrevs.truncate(truncate)
923 self._rbcrevs.truncate(truncate)
927 self._rbcrevslen = min(self._rbcrevslen, truncate)
924 self._rbcrevslen = min(self._rbcrevslen, truncate)
928
925
929 # fall back to slow path and make sure it will be written to disk
926 # fall back to slow path and make sure it will be written to disk
930 return self._branchinfo(rev)
927 return self._branchinfo(rev)
931
928
932 def _branchinfo(self, rev):
929 def _branchinfo(self, rev):
933 """Retrieve branch info from changelog and update _rbcrevs"""
930 """Retrieve branch info from changelog and update _rbcrevs"""
934 changelog = self._repo.changelog
931 changelog = self._repo.changelog
935 b, close = changelog.branchinfo(rev)
932 b, close = changelog.branchinfo(rev)
936 if b in self._namesreverse:
933 if b in self._namesreverse:
937 branchidx = self._namesreverse[b]
934 branchidx = self._namesreverse[b]
938 else:
935 else:
939 branchidx = len(self._names)
936 branchidx = len(self._names)
940 self._names.append(b)
937 self._names.append(b)
941 self._namesreverse[b] = branchidx
938 self._namesreverse[b] = branchidx
942 reponode = changelog.node(rev)
939 reponode = changelog.node(rev)
943 if close:
940 if close:
944 branchidx |= _rbccloseflag
941 branchidx |= _rbccloseflag
945 self._setcachedata(rev, reponode, branchidx)
942 self._setcachedata(rev, reponode, branchidx)
946 return b, close
943 return b, close
947
944
948 def setdata(self, rev, changelogrevision):
945 def setdata(self, rev, changelogrevision):
949 """add new data information to the cache"""
946 """add new data information to the cache"""
950 branch, close = changelogrevision.branchinfo
947 branch, close = changelogrevision.branchinfo
951
948
952 if branch in self._namesreverse:
949 if branch in self._namesreverse:
953 branchidx = self._namesreverse[branch]
950 branchidx = self._namesreverse[branch]
954 else:
951 else:
955 branchidx = len(self._names)
952 branchidx = len(self._names)
956 self._names.append(branch)
953 self._names.append(branch)
957 self._namesreverse[branch] = branchidx
954 self._namesreverse[branch] = branchidx
958 if close:
955 if close:
959 branchidx |= _rbccloseflag
956 branchidx |= _rbccloseflag
960 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
957 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
961 # If no cache data were readable (non exists, bad permission, etc)
958 # If no cache data were readable (non exists, bad permission, etc)
962 # the cache was bypassing itself by setting:
959 # the cache was bypassing itself by setting:
963 #
960 #
964 # self.branchinfo = self._branchinfo
961 # self.branchinfo = self._branchinfo
965 #
962 #
966 # Since we now have data in the cache, we need to drop this bypassing.
963 # Since we now have data in the cache, we need to drop this bypassing.
967 if 'branchinfo' in vars(self):
964 if 'branchinfo' in vars(self):
968 del self.branchinfo
965 del self.branchinfo
969
966
970 def _setcachedata(self, rev, node, branchidx):
967 def _setcachedata(self, rev, node, branchidx):
971 """Writes the node's branch data to the in-memory cache data."""
968 """Writes the node's branch data to the in-memory cache data."""
972 if rev == nullrev:
969 if rev == nullrev:
973 return
970 return
974 rbcrevidx = rev * _rbcrecsize
971 rbcrevidx = rev * _rbcrecsize
975 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
972 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
976 self._rbcrevslen = min(self._rbcrevslen, rev)
973 self._rbcrevslen = min(self._rbcrevslen, rev)
977
974
978 tr = self._repo.currenttransaction()
975 tr = self._repo.currenttransaction()
979 if tr:
976 if tr:
980 tr.addfinalize(b'write-revbranchcache', self.write)
977 tr.addfinalize(b'write-revbranchcache', self.write)
981
978
982 def write(self, tr=None):
979 def write(self, tr=None):
983 """Save branch cache if it is dirty."""
980 """Save branch cache if it is dirty."""
984 repo = self._repo
981 repo = self._repo
985 wlock = None
982 wlock = None
986 step = b''
983 step = b''
987 try:
984 try:
988 # write the new names
985 # write the new names
989 if self._rbcnamescount < len(self._names):
986 if self._rbcnamescount < len(self._names):
990 wlock = repo.wlock(wait=False)
987 wlock = repo.wlock(wait=False)
991 step = b' names'
988 step = b' names'
992 self._writenames(repo)
989 self._writenames(repo)
993
990
994 # write the new revs
991 # write the new revs
995 start = self._rbcrevslen * _rbcrecsize
992 start = self._rbcrevslen * _rbcrecsize
996 if start != len(self._rbcrevs):
993 if start != len(self._rbcrevs):
997 step = b''
994 step = b''
998 if wlock is None:
995 if wlock is None:
999 wlock = repo.wlock(wait=False)
996 wlock = repo.wlock(wait=False)
1000 self._writerevs(repo, start)
997 self._writerevs(repo, start)
1001
998
1002 except (IOError, OSError, error.Abort, error.LockError) as inst:
999 except (IOError, OSError, error.Abort, error.LockError) as inst:
1003 repo.ui.debug(
1000 repo.ui.debug(
1004 b"couldn't write revision branch cache%s: %s\n"
1001 b"couldn't write revision branch cache%s: %s\n"
1005 % (step, stringutil.forcebytestr(inst))
1002 % (step, stringutil.forcebytestr(inst))
1006 )
1003 )
1007 finally:
1004 finally:
1008 if wlock is not None:
1005 if wlock is not None:
1009 wlock.release()
1006 wlock.release()
1010
1007
1011 def _writenames(self, repo):
1008 def _writenames(self, repo):
1012 """write the new branch names to revbranchcache"""
1009 """write the new branch names to revbranchcache"""
1013 if self._rbcnamescount != 0:
1010 if self._rbcnamescount != 0:
1014 f = repo.cachevfs.open(_rbcnames, b'ab')
1011 f = repo.cachevfs.open(_rbcnames, b'ab')
1015 if f.tell() == self._rbcsnameslen:
1012 if f.tell() == self._rbcsnameslen:
1016 f.write(b'\0')
1013 f.write(b'\0')
1017 else:
1014 else:
1018 f.close()
1015 f.close()
1019 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1016 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1020 self._rbcnamescount = 0
1017 self._rbcnamescount = 0
1021 self._rbcrevslen = 0
1018 self._rbcrevslen = 0
1022 if self._rbcnamescount == 0:
1019 if self._rbcnamescount == 0:
1023 # before rewriting names, make sure references are removed
1020 # before rewriting names, make sure references are removed
1024 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1021 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1025 f = repo.cachevfs.open(_rbcnames, b'wb')
1022 f = repo.cachevfs.open(_rbcnames, b'wb')
1026 f.write(
1023 f.write(
1027 b'\0'.join(
1024 b'\0'.join(
1028 encoding.fromlocal(b)
1025 encoding.fromlocal(b)
1029 for b in self._names[self._rbcnamescount :]
1026 for b in self._names[self._rbcnamescount :]
1030 )
1027 )
1031 )
1028 )
1032 self._rbcsnameslen = f.tell()
1029 self._rbcsnameslen = f.tell()
1033 f.close()
1030 f.close()
1034 self._rbcnamescount = len(self._names)
1031 self._rbcnamescount = len(self._names)
1035
1032
1036 def _writerevs(self, repo, start):
1033 def _writerevs(self, repo, start):
1037 """write the new revs to revbranchcache"""
1034 """write the new revs to revbranchcache"""
1038 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1035 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1039 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1036 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1040 if f.tell() != start:
1037 if f.tell() != start:
1041 repo.ui.debug(
1038 repo.ui.debug(
1042 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1039 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1043 )
1040 )
1044 f.seek(start)
1041 f.seek(start)
1045 if f.tell() != start:
1042 if f.tell() != start:
1046 start = 0
1043 start = 0
1047 f.seek(start)
1044 f.seek(start)
1048 f.truncate()
1045 f.truncate()
1049 end = revs * _rbcrecsize
1046 end = revs * _rbcrecsize
1050 f.write(self._rbcrevs.slice(start, end))
1047 f.write(self._rbcrevs.slice(start, end))
1051 self._rbcrevslen = revs
1048 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now