##// END OF EJS Templates
branchcache: drop the unused `_verifyclosed`...
marmoute -
r52349:19b2736c default
parent child Browse files
Show More
@@ -1,1035 +1,1019 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Callable,
18 Callable,
19 Dict,
19 Dict,
20 Iterable,
20 Iterable,
21 List,
21 List,
22 Optional,
22 Optional,
23 Set,
23 Set,
24 TYPE_CHECKING,
24 TYPE_CHECKING,
25 Tuple,
25 Tuple,
26 Union,
26 Union,
27 )
27 )
28
28
29 from . import (
29 from . import (
30 encoding,
30 encoding,
31 error,
31 error,
32 obsolete,
32 obsolete,
33 scmutil,
33 scmutil,
34 util,
34 util,
35 )
35 )
36
36
37 from .utils import (
37 from .utils import (
38 repoviewutil,
38 repoviewutil,
39 stringutil,
39 stringutil,
40 )
40 )
41
41
42 if TYPE_CHECKING:
42 if TYPE_CHECKING:
43 from . import localrepo
43 from . import localrepo
44
44
45 assert [localrepo]
45 assert [localrepo]
46
46
47 subsettable = repoviewutil.subsettable
47 subsettable = repoviewutil.subsettable
48
48
49 calcsize = struct.calcsize
49 calcsize = struct.calcsize
50 pack_into = struct.pack_into
50 pack_into = struct.pack_into
51 unpack_from = struct.unpack_from
51 unpack_from = struct.unpack_from
52
52
53
53
54 class BranchMapCache:
54 class BranchMapCache:
55 """mapping of filtered views of repo with their branchcache"""
55 """mapping of filtered views of repo with their branchcache"""
56
56
57 def __init__(self):
57 def __init__(self):
58 self._per_filter = {}
58 self._per_filter = {}
59
59
60 def __getitem__(self, repo):
60 def __getitem__(self, repo):
61 self.updatecache(repo)
61 self.updatecache(repo)
62 bcache = self._per_filter[repo.filtername]
62 bcache = self._per_filter[repo.filtername]
63 assert bcache._filtername == repo.filtername, (
63 assert bcache._filtername == repo.filtername, (
64 bcache._filtername,
64 bcache._filtername,
65 repo.filtername,
65 repo.filtername,
66 )
66 )
67 return bcache
67 return bcache
68
68
69 def update_disk(self, repo):
69 def update_disk(self, repo):
70 """ensure and up-to-date cache is (or will be) written on disk
70 """ensure and up-to-date cache is (or will be) written on disk
71
71
72 The cache for this repository view is updated if needed and written on
72 The cache for this repository view is updated if needed and written on
73 disk.
73 disk.
74
74
75 If a transaction is in progress, the writing is schedule to transaction
75 If a transaction is in progress, the writing is schedule to transaction
76 close. See the `BranchMapCache.write_delayed` method.
76 close. See the `BranchMapCache.write_delayed` method.
77
77
78 This method exist independently of __getitem__ as it is sometime useful
78 This method exist independently of __getitem__ as it is sometime useful
79 to signal that we have no intend to use the data in memory yet.
79 to signal that we have no intend to use the data in memory yet.
80 """
80 """
81 self.updatecache(repo)
81 self.updatecache(repo)
82 bcache = self._per_filter[repo.filtername]
82 bcache = self._per_filter[repo.filtername]
83 assert bcache._filtername == repo.filtername, (
83 assert bcache._filtername == repo.filtername, (
84 bcache._filtername,
84 bcache._filtername,
85 repo.filtername,
85 repo.filtername,
86 )
86 )
87 bcache.write(repo)
87 bcache.write(repo)
88
88
89 def updatecache(self, repo):
89 def updatecache(self, repo):
90 """Update the cache for the given filtered view on a repository"""
90 """Update the cache for the given filtered view on a repository"""
91 # This can trigger updates for the caches for subsets of the filtered
91 # This can trigger updates for the caches for subsets of the filtered
92 # view, e.g. when there is no cache for this filtered view or the cache
92 # view, e.g. when there is no cache for this filtered view or the cache
93 # is stale.
93 # is stale.
94
94
95 cl = repo.changelog
95 cl = repo.changelog
96 filtername = repo.filtername
96 filtername = repo.filtername
97 bcache = self._per_filter.get(filtername)
97 bcache = self._per_filter.get(filtername)
98 if bcache is None or not bcache.validfor(repo):
98 if bcache is None or not bcache.validfor(repo):
99 # cache object missing or cache object stale? Read from disk
99 # cache object missing or cache object stale? Read from disk
100 bcache = branchcache.fromfile(repo)
100 bcache = branchcache.fromfile(repo)
101
101
102 revs = []
102 revs = []
103 if bcache is None:
103 if bcache is None:
104 # no (fresh) cache available anymore, perhaps we can re-use
104 # no (fresh) cache available anymore, perhaps we can re-use
105 # the cache for a subset, then extend that to add info on missing
105 # the cache for a subset, then extend that to add info on missing
106 # revisions.
106 # revisions.
107 subsetname = subsettable.get(filtername)
107 subsetname = subsettable.get(filtername)
108 if subsetname is not None:
108 if subsetname is not None:
109 subset = repo.filtered(subsetname)
109 subset = repo.filtered(subsetname)
110 bcache = self[subset].copy(repo)
110 bcache = self[subset].copy(repo)
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 else:
113 else:
114 # nothing to fall back on, start empty.
114 # nothing to fall back on, start empty.
115 bcache = branchcache(repo)
115 bcache = branchcache(repo)
116
116
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 if revs:
118 if revs:
119 bcache.update(repo, revs)
119 bcache.update(repo, revs)
120
120
121 assert bcache.validfor(repo), filtername
121 assert bcache.validfor(repo), filtername
122 self._per_filter[repo.filtername] = bcache
122 self._per_filter[repo.filtername] = bcache
123
123
124 def replace(self, repo, remotebranchmap):
124 def replace(self, repo, remotebranchmap):
125 """Replace the branchmap cache for a repo with a branch mapping.
125 """Replace the branchmap cache for a repo with a branch mapping.
126
126
127 This is likely only called during clone with a branch map from a
127 This is likely only called during clone with a branch map from a
128 remote.
128 remote.
129
129
130 """
130 """
131 cl = repo.changelog
131 cl = repo.changelog
132 clrev = cl.rev
132 clrev = cl.rev
133 clbranchinfo = cl.branchinfo
133 clbranchinfo = cl.branchinfo
134 rbheads = []
134 rbheads = []
135 closed = set()
135 closed = set()
136 for bheads in remotebranchmap.values():
136 for bheads in remotebranchmap.values():
137 rbheads += bheads
137 rbheads += bheads
138 for h in bheads:
138 for h in bheads:
139 r = clrev(h)
139 r = clrev(h)
140 b, c = clbranchinfo(r)
140 b, c = clbranchinfo(r)
141 if c:
141 if c:
142 closed.add(h)
142 closed.add(h)
143
143
144 if rbheads:
144 if rbheads:
145 rtiprev = max((int(clrev(node)) for node in rbheads))
145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 cache = branchcache(
146 cache = branchcache(
147 repo,
147 repo,
148 remotebranchmap,
148 remotebranchmap,
149 repo[rtiprev].node(),
149 repo[rtiprev].node(),
150 rtiprev,
150 rtiprev,
151 closednodes=closed,
151 closednodes=closed,
152 )
152 )
153
153
154 # Try to stick it as low as possible
154 # Try to stick it as low as possible
155 # filter above served are unlikely to be fetch from a clone
155 # filter above served are unlikely to be fetch from a clone
156 for candidate in (b'base', b'immutable', b'served'):
156 for candidate in (b'base', b'immutable', b'served'):
157 rview = repo.filtered(candidate)
157 rview = repo.filtered(candidate)
158 if cache.validfor(rview):
158 if cache.validfor(rview):
159 cache = self._per_filter[candidate] = cache.copy(rview)
159 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache.write(rview)
160 cache.write(rview)
161 return
161 return
162
162
163 def clear(self):
163 def clear(self):
164 self._per_filter.clear()
164 self._per_filter.clear()
165
165
166 def write_delayed(self, repo):
166 def write_delayed(self, repo):
167 unfi = repo.unfiltered()
167 unfi = repo.unfiltered()
168 for filtername, cache in self._per_filter.items():
168 for filtername, cache in self._per_filter.items():
169 if cache._delayed:
169 if cache._delayed:
170 repo = unfi.filtered(filtername)
170 repo = unfi.filtered(filtername)
171 cache.write(repo)
171 cache.write(repo)
172
172
173
173
174 def _unknownnode(node):
174 def _unknownnode(node):
175 """raises ValueError when branchcache found a node which does not exists"""
175 """raises ValueError when branchcache found a node which does not exists"""
176 raise ValueError('node %s does not exist' % node.hex())
176 raise ValueError('node %s does not exist' % node.hex())
177
177
178
178
179 def _branchcachedesc(repo):
179 def _branchcachedesc(repo):
180 if repo.filtername is not None:
180 if repo.filtername is not None:
181 return b'branch cache (%s)' % repo.filtername
181 return b'branch cache (%s)' % repo.filtername
182 else:
182 else:
183 return b'branch cache'
183 return b'branch cache'
184
184
185
185
186 class _BaseBranchCache:
186 class _BaseBranchCache:
187 """A dict like object that hold branches heads cache.
187 """A dict like object that hold branches heads cache.
188
188
189 This cache is used to avoid costly computations to determine all the
189 This cache is used to avoid costly computations to determine all the
190 branch heads of a repo.
190 branch heads of a repo.
191
191
192 The cache is serialized on disk in the following format:
192 The cache is serialized on disk in the following format:
193
193
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <branch head hex node> <open/closed state> <branch name>
195 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
197 ...
197 ...
198
198
199 The first line is used to check if the cache is still valid. If the
199 The first line is used to check if the cache is still valid. If the
200 branch cache is for a filtered repo view, an optional third hash is
200 branch cache is for a filtered repo view, an optional third hash is
201 included that hashes the hashes of all filtered and obsolete revisions.
201 included that hashes the hashes of all filtered and obsolete revisions.
202
202
203 The open/closed state is represented by a single letter 'o' or 'c'.
203 The open/closed state is represented by a single letter 'o' or 'c'.
204 This field can be used to avoid changelog reads when determining if a
204 This field can be used to avoid changelog reads when determining if a
205 branch head closes a branch or not.
205 branch head closes a branch or not.
206 """
206 """
207
207
208 def __init__(
208 def __init__(
209 self,
209 self,
210 repo: "localrepo.localrepository",
210 repo: "localrepo.localrepository",
211 entries: Union[
211 entries: Union[
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 ] = (),
213 ] = (),
214 closed_nodes: Optional[Set[bytes]] = None,
214 closed_nodes: Optional[Set[bytes]] = None,
215 ) -> None:
215 ) -> None:
216 """hasnode is a function which can be used to verify whether changelog
216 """hasnode is a function which can be used to verify whether changelog
217 has a given node or not. If it's not provided, we assume that every node
217 has a given node or not. If it's not provided, we assume that every node
218 we have exists in changelog"""
218 we have exists in changelog"""
219 # closednodes is a set of nodes that close their branch. If the branch
219 # closednodes is a set of nodes that close their branch. If the branch
220 # cache has been updated, it may contain nodes that are no longer
220 # cache has been updated, it may contain nodes that are no longer
221 # heads.
221 # heads.
222 if closed_nodes is None:
222 if closed_nodes is None:
223 closed_nodes = set()
223 closed_nodes = set()
224 self._closednodes = set(closed_nodes)
224 self._closednodes = set(closed_nodes)
225 self._entries = dict(entries)
225 self._entries = dict(entries)
226
226
227 def __iter__(self):
227 def __iter__(self):
228 return iter(self._entries)
228 return iter(self._entries)
229
229
230 def __setitem__(self, key, value):
230 def __setitem__(self, key, value):
231 self._entries[key] = value
231 self._entries[key] = value
232
232
233 def __getitem__(self, key):
233 def __getitem__(self, key):
234 return self._entries[key]
234 return self._entries[key]
235
235
236 def __contains__(self, key):
236 def __contains__(self, key):
237 return key in self._entries
237 return key in self._entries
238
238
239 def iteritems(self):
239 def iteritems(self):
240 return self._entries.items()
240 return self._entries.items()
241
241
242 items = iteritems
242 items = iteritems
243
243
244 def hasbranch(self, label):
244 def hasbranch(self, label):
245 """checks whether a branch of this name exists or not"""
245 """checks whether a branch of this name exists or not"""
246 return label in self._entries
246 return label in self._entries
247
247
248 def _branchtip(self, heads):
248 def _branchtip(self, heads):
249 """Return tuple with last open head in heads and false,
249 """Return tuple with last open head in heads and false,
250 otherwise return last closed head and true."""
250 otherwise return last closed head and true."""
251 tip = heads[-1]
251 tip = heads[-1]
252 closed = True
252 closed = True
253 for h in reversed(heads):
253 for h in reversed(heads):
254 if h not in self._closednodes:
254 if h not in self._closednodes:
255 tip = h
255 tip = h
256 closed = False
256 closed = False
257 break
257 break
258 return tip, closed
258 return tip, closed
259
259
260 def branchtip(self, branch):
260 def branchtip(self, branch):
261 """Return the tipmost open head on branch head, otherwise return the
261 """Return the tipmost open head on branch head, otherwise return the
262 tipmost closed head on branch.
262 tipmost closed head on branch.
263 Raise KeyError for unknown branch."""
263 Raise KeyError for unknown branch."""
264 return self._branchtip(self[branch])[0]
264 return self._branchtip(self[branch])[0]
265
265
266 def iteropen(self, nodes):
266 def iteropen(self, nodes):
267 return (n for n in nodes if n not in self._closednodes)
267 return (n for n in nodes if n not in self._closednodes)
268
268
269 def branchheads(self, branch, closed=False):
269 def branchheads(self, branch, closed=False):
270 heads = self._entries[branch]
270 heads = self._entries[branch]
271 if not closed:
271 if not closed:
272 heads = list(self.iteropen(heads))
272 heads = list(self.iteropen(heads))
273 return heads
273 return heads
274
274
275 def iterbranches(self):
275 def iterbranches(self):
276 for bn, heads in self.items():
276 for bn, heads in self.items():
277 yield (bn, heads) + self._branchtip(heads)
277 yield (bn, heads) + self._branchtip(heads)
278
278
279 def iterheads(self):
279 def iterheads(self):
280 """returns all the heads"""
280 """returns all the heads"""
281 return self._entries.values()
281 return self._entries.values()
282
282
283 def update(self, repo, revgen):
283 def update(self, repo, revgen):
284 """Given a branchhead cache, self, that may have extra nodes or be
284 """Given a branchhead cache, self, that may have extra nodes or be
285 missing heads, and a generator of nodes that are strictly a superset of
285 missing heads, and a generator of nodes that are strictly a superset of
286 heads missing, this function updates self to be correct.
286 heads missing, this function updates self to be correct.
287 """
287 """
288 starttime = util.timer()
288 starttime = util.timer()
289 cl = repo.changelog
289 cl = repo.changelog
290 # collect new branch entries
290 # collect new branch entries
291 newbranches = {}
291 newbranches = {}
292 getbranchinfo = repo.revbranchcache().branchinfo
292 getbranchinfo = repo.revbranchcache().branchinfo
293 max_rev = -1
293 max_rev = -1
294 for r in revgen:
294 for r in revgen:
295 branch, closesbranch = getbranchinfo(r)
295 branch, closesbranch = getbranchinfo(r)
296 newbranches.setdefault(branch, []).append(r)
296 newbranches.setdefault(branch, []).append(r)
297 if closesbranch:
297 if closesbranch:
298 self._closednodes.add(cl.node(r))
298 self._closednodes.add(cl.node(r))
299 max_rev = max(max_rev, r)
299 max_rev = max(max_rev, r)
300 if max_rev < 0:
300 if max_rev < 0:
301 max_rev = None
301 max_rev = None
302
302
303 # Delay fetching the topological heads until they are needed.
303 # Delay fetching the topological heads until they are needed.
304 # A repository without non-continous branches can skip this part.
304 # A repository without non-continous branches can skip this part.
305 topoheads = None
305 topoheads = None
306
306
307 # If a changeset is visible, its parents must be visible too, so
307 # If a changeset is visible, its parents must be visible too, so
308 # use the faster unfiltered parent accessor.
308 # use the faster unfiltered parent accessor.
309 parentrevs = repo.unfiltered().changelog.parentrevs
309 parentrevs = repo.unfiltered().changelog.parentrevs
310
310
311 # Faster than using ctx.obsolete()
311 # Faster than using ctx.obsolete()
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
313
313
314 for branch, newheadrevs in newbranches.items():
314 for branch, newheadrevs in newbranches.items():
315 # For every branch, compute the new branchheads.
315 # For every branch, compute the new branchheads.
316 # A branchhead is a revision such that no descendant is on
316 # A branchhead is a revision such that no descendant is on
317 # the same branch.
317 # the same branch.
318 #
318 #
319 # The branchheads are computed iteratively in revision order.
319 # The branchheads are computed iteratively in revision order.
320 # This ensures topological order, i.e. parents are processed
320 # This ensures topological order, i.e. parents are processed
321 # before their children. Ancestors are inclusive here, i.e.
321 # before their children. Ancestors are inclusive here, i.e.
322 # any revision is an ancestor of itself.
322 # any revision is an ancestor of itself.
323 #
323 #
324 # Core observations:
324 # Core observations:
325 # - The current revision is always a branchhead for the
325 # - The current revision is always a branchhead for the
326 # repository up to that point.
326 # repository up to that point.
327 # - It is the first revision of the branch if and only if
327 # - It is the first revision of the branch if and only if
328 # there was no branchhead before. In that case, it is the
328 # there was no branchhead before. In that case, it is the
329 # only branchhead as there are no possible ancestors on
329 # only branchhead as there are no possible ancestors on
330 # the same branch.
330 # the same branch.
331 # - If a parent is on the same branch, a branchhead can
331 # - If a parent is on the same branch, a branchhead can
332 # only be an ancestor of that parent, if it is parent
332 # only be an ancestor of that parent, if it is parent
333 # itself. Otherwise it would have been removed as ancestor
333 # itself. Otherwise it would have been removed as ancestor
334 # of that parent before.
334 # of that parent before.
335 # - Therefore, if all parents are on the same branch, they
335 # - Therefore, if all parents are on the same branch, they
336 # can just be removed from the branchhead set.
336 # can just be removed from the branchhead set.
337 # - If one parent is on the same branch and the other is not
337 # - If one parent is on the same branch and the other is not
338 # and there was exactly one branchhead known, the existing
338 # and there was exactly one branchhead known, the existing
339 # branchhead can only be an ancestor if it is the parent.
339 # branchhead can only be an ancestor if it is the parent.
340 # Otherwise it would have been removed as ancestor of
340 # Otherwise it would have been removed as ancestor of
341 # the parent before. The other parent therefore can't have
341 # the parent before. The other parent therefore can't have
342 # a branchhead as ancestor.
342 # a branchhead as ancestor.
343 # - In all other cases, the parents on different branches
343 # - In all other cases, the parents on different branches
344 # could have a branchhead as ancestor. Those parents are
344 # could have a branchhead as ancestor. Those parents are
345 # kept in the "uncertain" set. If all branchheads are also
345 # kept in the "uncertain" set. If all branchheads are also
346 # topological heads, they can't have descendants and further
346 # topological heads, they can't have descendants and further
347 # checks can be skipped. Otherwise, the ancestors of the
347 # checks can be skipped. Otherwise, the ancestors of the
348 # "uncertain" set are removed from branchheads.
348 # "uncertain" set are removed from branchheads.
349 # This computation is heavy and avoided if at all possible.
349 # This computation is heavy and avoided if at all possible.
350 bheads = self._entries.get(branch, [])
350 bheads = self._entries.get(branch, [])
351 bheadset = {cl.rev(node) for node in bheads}
351 bheadset = {cl.rev(node) for node in bheads}
352 uncertain = set()
352 uncertain = set()
353 for newrev in sorted(newheadrevs):
353 for newrev in sorted(newheadrevs):
354 if newrev in obsrevs:
354 if newrev in obsrevs:
355 # We ignore obsolete changesets as they shouldn't be
355 # We ignore obsolete changesets as they shouldn't be
356 # considered heads.
356 # considered heads.
357 continue
357 continue
358
358
359 if not bheadset:
359 if not bheadset:
360 bheadset.add(newrev)
360 bheadset.add(newrev)
361 continue
361 continue
362
362
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 samebranch = set()
364 samebranch = set()
365 otherbranch = set()
365 otherbranch = set()
366 obsparents = set()
366 obsparents = set()
367 for p in parents:
367 for p in parents:
368 if p in obsrevs:
368 if p in obsrevs:
369 # We ignored this obsolete changeset earlier, but now
369 # We ignored this obsolete changeset earlier, but now
370 # that it has non-ignored children, we need to make
370 # that it has non-ignored children, we need to make
371 # sure their ancestors are not considered heads. To
371 # sure their ancestors are not considered heads. To
372 # achieve that, we will simply treat this obsolete
372 # achieve that, we will simply treat this obsolete
373 # changeset as a parent from other branch.
373 # changeset as a parent from other branch.
374 obsparents.add(p)
374 obsparents.add(p)
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 samebranch.add(p)
376 samebranch.add(p)
377 else:
377 else:
378 otherbranch.add(p)
378 otherbranch.add(p)
379 if not (len(bheadset) == len(samebranch) == 1):
379 if not (len(bheadset) == len(samebranch) == 1):
380 uncertain.update(otherbranch)
380 uncertain.update(otherbranch)
381 uncertain.update(obsparents)
381 uncertain.update(obsparents)
382 bheadset.difference_update(samebranch)
382 bheadset.difference_update(samebranch)
383 bheadset.add(newrev)
383 bheadset.add(newrev)
384
384
385 if uncertain:
385 if uncertain:
386 if topoheads is None:
386 if topoheads is None:
387 topoheads = set(cl.headrevs())
387 topoheads = set(cl.headrevs())
388 if bheadset - topoheads:
388 if bheadset - topoheads:
389 floorrev = min(bheadset)
389 floorrev = min(bheadset)
390 if floorrev <= max(uncertain):
390 if floorrev <= max(uncertain):
391 ancestors = set(cl.ancestors(uncertain, floorrev))
391 ancestors = set(cl.ancestors(uncertain, floorrev))
392 bheadset -= ancestors
392 bheadset -= ancestors
393 if bheadset:
393 if bheadset:
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395
395
396 duration = util.timer() - starttime
396 duration = util.timer() - starttime
397 repo.ui.log(
397 repo.ui.log(
398 b'branchcache',
398 b'branchcache',
399 b'updated %s in %.4f seconds\n',
399 b'updated %s in %.4f seconds\n',
400 _branchcachedesc(repo),
400 _branchcachedesc(repo),
401 duration,
401 duration,
402 )
402 )
403 return max_rev
403 return max_rev
404
404
405
405
406 class branchcache(_BaseBranchCache):
406 class branchcache(_BaseBranchCache):
407 """Branchmap info for a local repo or repoview"""
407 """Branchmap info for a local repo or repoview"""
408
408
409 def __init__(
409 def __init__(
410 self,
410 self,
411 repo: "localrepo.localrepository",
411 repo: "localrepo.localrepository",
412 entries: Union[
412 entries: Union[
413 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
413 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
414 ] = (),
414 ] = (),
415 tipnode: Optional[bytes] = None,
415 tipnode: Optional[bytes] = None,
416 tiprev: Optional[int] = nullrev,
416 tiprev: Optional[int] = nullrev,
417 filteredhash: Optional[bytes] = None,
417 filteredhash: Optional[bytes] = None,
418 closednodes: Optional[Set[bytes]] = None,
418 closednodes: Optional[Set[bytes]] = None,
419 hasnode: Optional[Callable[[bytes], bool]] = None,
419 hasnode: Optional[Callable[[bytes], bool]] = None,
420 verify_node: bool = False,
420 verify_node: bool = False,
421 ) -> None:
421 ) -> None:
422 """hasnode is a function which can be used to verify whether changelog
422 """hasnode is a function which can be used to verify whether changelog
423 has a given node or not. If it's not provided, we assume that every node
423 has a given node or not. If it's not provided, we assume that every node
424 we have exists in changelog"""
424 we have exists in changelog"""
425 self._filtername = repo.filtername
425 self._filtername = repo.filtername
426 self._delayed = False
426 self._delayed = False
427 if tipnode is None:
427 if tipnode is None:
428 self.tipnode = repo.nullid
428 self.tipnode = repo.nullid
429 else:
429 else:
430 self.tipnode = tipnode
430 self.tipnode = tipnode
431 self.tiprev = tiprev
431 self.tiprev = tiprev
432 self.filteredhash = filteredhash
432 self.filteredhash = filteredhash
433
433
434 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
434 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
435 # closednodes is a set of nodes that close their branch. If the branch
435 # closednodes is a set of nodes that close their branch. If the branch
436 # cache has been updated, it may contain nodes that are no longer
436 # cache has been updated, it may contain nodes that are no longer
437 # heads.
437 # heads.
438
438
439 # Do we need to verify branch at all ?
439 # Do we need to verify branch at all ?
440 self._verify_node = verify_node
440 self._verify_node = verify_node
441 # whether closed nodes are verified or not
442 self._closedverified = False
443 # branches for which nodes are verified
441 # branches for which nodes are verified
444 self._verifiedbranches = set()
442 self._verifiedbranches = set()
445 self._hasnode = None
443 self._hasnode = None
446 if self._verify_node:
444 if self._verify_node:
447 self._hasnode = repo.changelog.hasnode
445 self._hasnode = repo.changelog.hasnode
448
446
449 def validfor(self, repo):
447 def validfor(self, repo):
450 """check that cache contents are valid for (a subset of) this repo
448 """check that cache contents are valid for (a subset of) this repo
451
449
452 - False when the order of changesets changed or if we detect a strip.
450 - False when the order of changesets changed or if we detect a strip.
453 - True when cache is up-to-date for the current repo or its subset."""
451 - True when cache is up-to-date for the current repo or its subset."""
454 try:
452 try:
455 node = repo.changelog.node(self.tiprev)
453 node = repo.changelog.node(self.tiprev)
456 except IndexError:
454 except IndexError:
457 # changesets were stripped and now we don't even have enough to
455 # changesets were stripped and now we don't even have enough to
458 # find tiprev
456 # find tiprev
459 return False
457 return False
460 if self.tipnode != node:
458 if self.tipnode != node:
461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
459 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 # repo has a different order of changesets
460 # repo has a different order of changesets
463 return False
461 return False
464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
462 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 # hashes don't match if this repo view has a different set of filtered
463 # hashes don't match if this repo view has a different set of filtered
466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
464 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 # history was rewritten)
465 # history was rewritten)
468 return self.filteredhash == tiphash
466 return self.filteredhash == tiphash
469
467
470 @classmethod
468 @classmethod
471 def fromfile(cls, repo):
469 def fromfile(cls, repo):
472 f = None
470 f = None
473 try:
471 try:
474 f = repo.cachevfs(cls._filename(repo))
472 f = repo.cachevfs(cls._filename(repo))
475 lineiter = iter(f)
473 lineiter = iter(f)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
474 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
477 last, lrev = cachekey[:2]
475 last, lrev = cachekey[:2]
478 last, lrev = bin(last), int(lrev)
476 last, lrev = bin(last), int(lrev)
479 filteredhash = None
477 filteredhash = None
480 if len(cachekey) > 2:
478 if len(cachekey) > 2:
481 filteredhash = bin(cachekey[2])
479 filteredhash = bin(cachekey[2])
482 bcache = cls(
480 bcache = cls(
483 repo,
481 repo,
484 tipnode=last,
482 tipnode=last,
485 tiprev=lrev,
483 tiprev=lrev,
486 filteredhash=filteredhash,
484 filteredhash=filteredhash,
487 verify_node=True,
485 verify_node=True,
488 )
486 )
489 if not bcache.validfor(repo):
487 if not bcache.validfor(repo):
490 # invalidate the cache
488 # invalidate the cache
491 raise ValueError('tip differs')
489 raise ValueError('tip differs')
492 bcache.load(repo, lineiter)
490 bcache.load(repo, lineiter)
493 except (IOError, OSError):
491 except (IOError, OSError):
494 return None
492 return None
495
493
496 except Exception as inst:
494 except Exception as inst:
497 if repo.ui.debugflag:
495 if repo.ui.debugflag:
498 msg = b'invalid %s: %s\n'
496 msg = b'invalid %s: %s\n'
499 repo.ui.debug(
497 repo.ui.debug(
500 msg
498 msg
501 % (
499 % (
502 _branchcachedesc(repo),
500 _branchcachedesc(repo),
503 stringutil.forcebytestr(inst),
501 stringutil.forcebytestr(inst),
504 )
502 )
505 )
503 )
506 bcache = None
504 bcache = None
507
505
508 finally:
506 finally:
509 if f:
507 if f:
510 f.close()
508 f.close()
511
509
512 return bcache
510 return bcache
513
511
514 def load(self, repo, lineiter):
512 def load(self, repo, lineiter):
515 """fully loads the branchcache by reading from the file using the line
513 """fully loads the branchcache by reading from the file using the line
516 iterator passed"""
514 iterator passed"""
517 for line in lineiter:
515 for line in lineiter:
518 line = line.rstrip(b'\n')
516 line = line.rstrip(b'\n')
519 if not line:
517 if not line:
520 continue
518 continue
521 node, state, label = line.split(b" ", 2)
519 node, state, label = line.split(b" ", 2)
522 if state not in b'oc':
520 if state not in b'oc':
523 raise ValueError('invalid branch state')
521 raise ValueError('invalid branch state')
524 label = encoding.tolocal(label.strip())
522 label = encoding.tolocal(label.strip())
525 node = bin(node)
523 node = bin(node)
526 self._entries.setdefault(label, []).append(node)
524 self._entries.setdefault(label, []).append(node)
527 if state == b'c':
525 if state == b'c':
528 self._closednodes.add(node)
526 self._closednodes.add(node)
529
527
530 @staticmethod
528 @staticmethod
531 def _filename(repo):
529 def _filename(repo):
532 """name of a branchcache file for a given repo or repoview"""
530 """name of a branchcache file for a given repo or repoview"""
533 filename = b"branch2"
531 filename = b"branch2"
534 if repo.filtername:
532 if repo.filtername:
535 filename = b'%s-%s' % (filename, repo.filtername)
533 filename = b'%s-%s' % (filename, repo.filtername)
536 return filename
534 return filename
537
535
538 def copy(self, repo):
536 def copy(self, repo):
539 """return a deep copy of the branchcache object"""
537 """return a deep copy of the branchcache object"""
540 other = type(self)(
538 other = type(self)(
541 repo=repo,
539 repo=repo,
542 # we always do a shally copy of self._entries, and the values is
540 # we always do a shally copy of self._entries, and the values is
543 # always replaced, so no need to deepcopy until the above remains
541 # always replaced, so no need to deepcopy until the above remains
544 # true.
542 # true.
545 entries=self._entries,
543 entries=self._entries,
546 tipnode=self.tipnode,
544 tipnode=self.tipnode,
547 tiprev=self.tiprev,
545 tiprev=self.tiprev,
548 filteredhash=self.filteredhash,
546 filteredhash=self.filteredhash,
549 closednodes=set(self._closednodes),
547 closednodes=set(self._closednodes),
550 verify_node=self._verify_node,
548 verify_node=self._verify_node,
551 )
549 )
552 # we copy will likely schedule a write anyway, but that does not seems
550 # we copy will likely schedule a write anyway, but that does not seems
553 # to hurt to overschedule
551 # to hurt to overschedule
554 other._delayed = self._delayed
552 other._delayed = self._delayed
555 # also copy information about the current verification state
553 # also copy information about the current verification state
556 other._closedverified = self._closedverified
557 other._verifiedbranches = set(self._verifiedbranches)
554 other._verifiedbranches = set(self._verifiedbranches)
558 return other
555 return other
559
556
560 def write(self, repo):
557 def write(self, repo):
561 assert self._filtername == repo.filtername, (
558 assert self._filtername == repo.filtername, (
562 self._filtername,
559 self._filtername,
563 repo.filtername,
560 repo.filtername,
564 )
561 )
565 tr = repo.currenttransaction()
562 tr = repo.currenttransaction()
566 if not getattr(tr, 'finalized', True):
563 if not getattr(tr, 'finalized', True):
567 # Avoid premature writing.
564 # Avoid premature writing.
568 #
565 #
569 # (The cache warming setup by localrepo will update the file later.)
566 # (The cache warming setup by localrepo will update the file later.)
570 self._delayed = True
567 self._delayed = True
571 return
568 return
572 try:
569 try:
573 filename = self._filename(repo)
570 filename = self._filename(repo)
574 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
571 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
575 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
572 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
576 if self.filteredhash is not None:
573 if self.filteredhash is not None:
577 cachekey.append(hex(self.filteredhash))
574 cachekey.append(hex(self.filteredhash))
578 f.write(b" ".join(cachekey) + b'\n')
575 f.write(b" ".join(cachekey) + b'\n')
579 nodecount = 0
576 nodecount = 0
580 for label, nodes in sorted(self._entries.items()):
577 for label, nodes in sorted(self._entries.items()):
581 label = encoding.fromlocal(label)
578 label = encoding.fromlocal(label)
582 for node in nodes:
579 for node in nodes:
583 nodecount += 1
580 nodecount += 1
584 if node in self._closednodes:
581 if node in self._closednodes:
585 state = b'c'
582 state = b'c'
586 else:
583 else:
587 state = b'o'
584 state = b'o'
588 f.write(b"%s %s %s\n" % (hex(node), state, label))
585 f.write(b"%s %s %s\n" % (hex(node), state, label))
589 repo.ui.log(
586 repo.ui.log(
590 b'branchcache',
587 b'branchcache',
591 b'wrote %s with %d labels and %d nodes\n',
588 b'wrote %s with %d labels and %d nodes\n',
592 _branchcachedesc(repo),
589 _branchcachedesc(repo),
593 len(self._entries),
590 len(self._entries),
594 nodecount,
591 nodecount,
595 )
592 )
596 self._delayed = False
593 self._delayed = False
597 except (IOError, OSError, error.Abort) as inst:
594 except (IOError, OSError, error.Abort) as inst:
598 # Abort may be raised by read only opener, so log and continue
595 # Abort may be raised by read only opener, so log and continue
599 repo.ui.debug(
596 repo.ui.debug(
600 b"couldn't write branch cache: %s\n"
597 b"couldn't write branch cache: %s\n"
601 % stringutil.forcebytestr(inst)
598 % stringutil.forcebytestr(inst)
602 )
599 )
603
600
604 def _verifyclosed(self):
605 """verify the closed nodes we have"""
606 if not self._verify_node:
607 return
608 if self._closedverified:
609 return
610 assert self._hasnode is not None
611 for node in self._closednodes:
612 if not self._hasnode(node):
613 _unknownnode(node)
614
615 self._closedverified = True
616
617 def _verifybranch(self, branch):
601 def _verifybranch(self, branch):
618 """verify head nodes for the given branch."""
602 """verify head nodes for the given branch."""
619 if not self._verify_node:
603 if not self._verify_node:
620 return
604 return
621 if branch not in self._entries or branch in self._verifiedbranches:
605 if branch not in self._entries or branch in self._verifiedbranches:
622 return
606 return
623 assert self._hasnode is not None
607 assert self._hasnode is not None
624 for n in self._entries[branch]:
608 for n in self._entries[branch]:
625 if not self._hasnode(n):
609 if not self._hasnode(n):
626 _unknownnode(n)
610 _unknownnode(n)
627
611
628 self._verifiedbranches.add(branch)
612 self._verifiedbranches.add(branch)
629
613
630 def _verifyall(self):
614 def _verifyall(self):
631 """verifies nodes of all the branches"""
615 """verifies nodes of all the branches"""
632 for b in self._entries.keys():
616 for b in self._entries.keys():
633 if b not in self._verifiedbranches:
617 if b not in self._verifiedbranches:
634 self._verifybranch(b)
618 self._verifybranch(b)
635
619
636 def __getitem__(self, key):
620 def __getitem__(self, key):
637 self._verifybranch(key)
621 self._verifybranch(key)
638 return super().__getitem__(key)
622 return super().__getitem__(key)
639
623
640 def __contains__(self, key):
624 def __contains__(self, key):
641 self._verifybranch(key)
625 self._verifybranch(key)
642 return super().__contains__(key)
626 return super().__contains__(key)
643
627
644 def iteritems(self):
628 def iteritems(self):
645 self._verifyall()
629 self._verifyall()
646 return super().iteritems()
630 return super().iteritems()
647
631
648 items = iteritems
632 items = iteritems
649
633
650 def iterheads(self):
634 def iterheads(self):
651 """returns all the heads"""
635 """returns all the heads"""
652 self._verifyall()
636 self._verifyall()
653 return super().iterheads()
637 return super().iterheads()
654
638
655 def hasbranch(self, label):
639 def hasbranch(self, label):
656 """checks whether a branch of this name exists or not"""
640 """checks whether a branch of this name exists or not"""
657 self._verifybranch(label)
641 self._verifybranch(label)
658 return super().hasbranch(label)
642 return super().hasbranch(label)
659
643
660 def branchheads(self, branch, closed=False):
644 def branchheads(self, branch, closed=False):
661 self._verifybranch(branch)
645 self._verifybranch(branch)
662 return super().branchheads(branch, closed=closed)
646 return super().branchheads(branch, closed=closed)
663
647
664 def update(self, repo, revgen):
648 def update(self, repo, revgen):
665 assert self._filtername == repo.filtername, (
649 assert self._filtername == repo.filtername, (
666 self._filtername,
650 self._filtername,
667 repo.filtername,
651 repo.filtername,
668 )
652 )
669 cl = repo.changelog
653 cl = repo.changelog
670 max_rev = super().update(repo, revgen)
654 max_rev = super().update(repo, revgen)
671 # new tip revision which we found after iterating items from new
655 # new tip revision which we found after iterating items from new
672 # branches
656 # branches
673 if max_rev is not None and max_rev > self.tiprev:
657 if max_rev is not None and max_rev > self.tiprev:
674 self.tiprev = max_rev
658 self.tiprev = max_rev
675 self.tipnode = cl.node(max_rev)
659 self.tipnode = cl.node(max_rev)
676
660
677 if not self.validfor(repo):
661 if not self.validfor(repo):
678 # old cache key is now invalid for the repo, but we've just updated
662 # old cache key is now invalid for the repo, but we've just updated
679 # the cache and we assume it's valid, so let's make the cache key
663 # the cache and we assume it's valid, so let's make the cache key
680 # valid as well by recomputing it from the cached data
664 # valid as well by recomputing it from the cached data
681 self.tipnode = repo.nullid
665 self.tipnode = repo.nullid
682 self.tiprev = nullrev
666 self.tiprev = nullrev
683 for heads in self.iterheads():
667 for heads in self.iterheads():
684 if not heads:
668 if not heads:
685 # all revisions on a branch are obsolete
669 # all revisions on a branch are obsolete
686 continue
670 continue
687 # note: tiprev is not necessarily the tip revision of repo,
671 # note: tiprev is not necessarily the tip revision of repo,
688 # because the tip could be obsolete (i.e. not a head)
672 # because the tip could be obsolete (i.e. not a head)
689 tiprev = max(cl.rev(node) for node in heads)
673 tiprev = max(cl.rev(node) for node in heads)
690 if tiprev > self.tiprev:
674 if tiprev > self.tiprev:
691 self.tipnode = cl.node(tiprev)
675 self.tipnode = cl.node(tiprev)
692 self.tiprev = tiprev
676 self.tiprev = tiprev
693 self.filteredhash = scmutil.filteredhash(
677 self.filteredhash = scmutil.filteredhash(
694 repo, self.tiprev, needobsolete=True
678 repo, self.tiprev, needobsolete=True
695 )
679 )
696
680
697 self.write(repo)
681 self.write(repo)
698
682
699
683
700 class remotebranchcache(_BaseBranchCache):
684 class remotebranchcache(_BaseBranchCache):
701 """Branchmap info for a remote connection, should not write locally"""
685 """Branchmap info for a remote connection, should not write locally"""
702
686
703 def __init__(
687 def __init__(
704 self,
688 self,
705 repo: "localrepo.localrepository",
689 repo: "localrepo.localrepository",
706 entries: Union[
690 entries: Union[
707 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
691 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
708 ] = (),
692 ] = (),
709 closednodes: Optional[Set[bytes]] = None,
693 closednodes: Optional[Set[bytes]] = None,
710 ) -> None:
694 ) -> None:
711 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
695 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
712
696
713
697
714 # Revision branch info cache
698 # Revision branch info cache
715
699
716 _rbcversion = b'-v1'
700 _rbcversion = b'-v1'
717 _rbcnames = b'rbc-names' + _rbcversion
701 _rbcnames = b'rbc-names' + _rbcversion
718 _rbcrevs = b'rbc-revs' + _rbcversion
702 _rbcrevs = b'rbc-revs' + _rbcversion
719 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
703 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
720 _rbcrecfmt = b'>4sI'
704 _rbcrecfmt = b'>4sI'
721 _rbcrecsize = calcsize(_rbcrecfmt)
705 _rbcrecsize = calcsize(_rbcrecfmt)
722 _rbcmininc = 64 * _rbcrecsize
706 _rbcmininc = 64 * _rbcrecsize
723 _rbcnodelen = 4
707 _rbcnodelen = 4
724 _rbcbranchidxmask = 0x7FFFFFFF
708 _rbcbranchidxmask = 0x7FFFFFFF
725 _rbccloseflag = 0x80000000
709 _rbccloseflag = 0x80000000
726
710
727
711
728 class rbcrevs:
712 class rbcrevs:
729 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
713 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
730
714
731 def __init__(self, revs):
715 def __init__(self, revs):
732 self._prefix = revs
716 self._prefix = revs
733 self._rest = bytearray()
717 self._rest = bytearray()
734
718
735 def __len__(self):
719 def __len__(self):
736 return len(self._prefix) + len(self._rest)
720 return len(self._prefix) + len(self._rest)
737
721
738 def unpack_record(self, rbcrevidx):
722 def unpack_record(self, rbcrevidx):
739 if rbcrevidx < len(self._prefix):
723 if rbcrevidx < len(self._prefix):
740 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
724 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
741 else:
725 else:
742 return unpack_from(
726 return unpack_from(
743 _rbcrecfmt,
727 _rbcrecfmt,
744 util.buffer(self._rest),
728 util.buffer(self._rest),
745 rbcrevidx - len(self._prefix),
729 rbcrevidx - len(self._prefix),
746 )
730 )
747
731
748 def make_mutable(self):
732 def make_mutable(self):
749 if len(self._prefix) > 0:
733 if len(self._prefix) > 0:
750 entirety = bytearray()
734 entirety = bytearray()
751 entirety[:] = self._prefix
735 entirety[:] = self._prefix
752 entirety.extend(self._rest)
736 entirety.extend(self._rest)
753 self._rest = entirety
737 self._rest = entirety
754 self._prefix = bytearray()
738 self._prefix = bytearray()
755
739
756 def truncate(self, pos):
740 def truncate(self, pos):
757 self.make_mutable()
741 self.make_mutable()
758 del self._rest[pos:]
742 del self._rest[pos:]
759
743
760 def pack_into(self, rbcrevidx, node, branchidx):
744 def pack_into(self, rbcrevidx, node, branchidx):
761 if rbcrevidx < len(self._prefix):
745 if rbcrevidx < len(self._prefix):
762 self.make_mutable()
746 self.make_mutable()
763 buf = self._rest
747 buf = self._rest
764 start_offset = rbcrevidx - len(self._prefix)
748 start_offset = rbcrevidx - len(self._prefix)
765 end_offset = start_offset + _rbcrecsize
749 end_offset = start_offset + _rbcrecsize
766
750
767 if len(self._rest) < end_offset:
751 if len(self._rest) < end_offset:
768 # bytearray doesn't allocate extra space at least in Python 3.7.
752 # bytearray doesn't allocate extra space at least in Python 3.7.
769 # When multiple changesets are added in a row, precise resize would
753 # When multiple changesets are added in a row, precise resize would
770 # result in quadratic complexity. Overallocate to compensate by
754 # result in quadratic complexity. Overallocate to compensate by
771 # using the classic doubling technique for dynamic arrays instead.
755 # using the classic doubling technique for dynamic arrays instead.
772 # If there was a gap in the map before, less space will be reserved.
756 # If there was a gap in the map before, less space will be reserved.
773 self._rest.extend(b'\0' * end_offset)
757 self._rest.extend(b'\0' * end_offset)
774 return pack_into(
758 return pack_into(
775 _rbcrecfmt,
759 _rbcrecfmt,
776 buf,
760 buf,
777 start_offset,
761 start_offset,
778 node,
762 node,
779 branchidx,
763 branchidx,
780 )
764 )
781
765
782 def extend(self, extension):
766 def extend(self, extension):
783 return self._rest.extend(extension)
767 return self._rest.extend(extension)
784
768
785 def slice(self, begin, end):
769 def slice(self, begin, end):
786 if begin < len(self._prefix):
770 if begin < len(self._prefix):
787 acc = bytearray()
771 acc = bytearray()
788 acc[:] = self._prefix[begin:end]
772 acc[:] = self._prefix[begin:end]
789 acc.extend(
773 acc.extend(
790 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
774 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
791 )
775 )
792 return acc
776 return acc
793 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
777 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
794
778
795
779
796 class revbranchcache:
780 class revbranchcache:
797 """Persistent cache, mapping from revision number to branch name and close.
781 """Persistent cache, mapping from revision number to branch name and close.
798 This is a low level cache, independent of filtering.
782 This is a low level cache, independent of filtering.
799
783
800 Branch names are stored in rbc-names in internal encoding separated by 0.
784 Branch names are stored in rbc-names in internal encoding separated by 0.
801 rbc-names is append-only, and each branch name is only stored once and will
785 rbc-names is append-only, and each branch name is only stored once and will
802 thus have a unique index.
786 thus have a unique index.
803
787
804 The branch info for each revision is stored in rbc-revs as constant size
788 The branch info for each revision is stored in rbc-revs as constant size
805 records. The whole file is read into memory, but it is only 'parsed' on
789 records. The whole file is read into memory, but it is only 'parsed' on
806 demand. The file is usually append-only but will be truncated if repo
790 demand. The file is usually append-only but will be truncated if repo
807 modification is detected.
791 modification is detected.
808 The record for each revision contains the first 4 bytes of the
792 The record for each revision contains the first 4 bytes of the
809 corresponding node hash, and the record is only used if it still matches.
793 corresponding node hash, and the record is only used if it still matches.
810 Even a completely trashed rbc-revs fill thus still give the right result
794 Even a completely trashed rbc-revs fill thus still give the right result
811 while converging towards full recovery ... assuming no incorrectly matching
795 while converging towards full recovery ... assuming no incorrectly matching
812 node hashes.
796 node hashes.
813 The record also contains 4 bytes where 31 bits contains the index of the
797 The record also contains 4 bytes where 31 bits contains the index of the
814 branch and the last bit indicate that it is a branch close commit.
798 branch and the last bit indicate that it is a branch close commit.
815 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
799 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
816 and will grow with it but be 1/8th of its size.
800 and will grow with it but be 1/8th of its size.
817 """
801 """
818
802
819 def __init__(self, repo, readonly=True):
803 def __init__(self, repo, readonly=True):
820 assert repo.filtername is None
804 assert repo.filtername is None
821 self._repo = repo
805 self._repo = repo
822 self._names = [] # branch names in local encoding with static index
806 self._names = [] # branch names in local encoding with static index
823 self._rbcrevs = rbcrevs(bytearray())
807 self._rbcrevs = rbcrevs(bytearray())
824 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
808 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
825 try:
809 try:
826 bndata = repo.cachevfs.read(_rbcnames)
810 bndata = repo.cachevfs.read(_rbcnames)
827 self._rbcsnameslen = len(bndata) # for verification before writing
811 self._rbcsnameslen = len(bndata) # for verification before writing
828 if bndata:
812 if bndata:
829 self._names = [
813 self._names = [
830 encoding.tolocal(bn) for bn in bndata.split(b'\0')
814 encoding.tolocal(bn) for bn in bndata.split(b'\0')
831 ]
815 ]
832 except (IOError, OSError):
816 except (IOError, OSError):
833 if readonly:
817 if readonly:
834 # don't try to use cache - fall back to the slow path
818 # don't try to use cache - fall back to the slow path
835 self.branchinfo = self._branchinfo
819 self.branchinfo = self._branchinfo
836
820
837 if self._names:
821 if self._names:
838 try:
822 try:
839 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
823 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
840 with repo.cachevfs(_rbcrevs) as fp:
824 with repo.cachevfs(_rbcrevs) as fp:
841 data = util.buffer(util.mmapread(fp))
825 data = util.buffer(util.mmapread(fp))
842 else:
826 else:
843 data = repo.cachevfs.read(_rbcrevs)
827 data = repo.cachevfs.read(_rbcrevs)
844 self._rbcrevs = rbcrevs(data)
828 self._rbcrevs = rbcrevs(data)
845 except (IOError, OSError) as inst:
829 except (IOError, OSError) as inst:
846 repo.ui.debug(
830 repo.ui.debug(
847 b"couldn't read revision branch cache: %s\n"
831 b"couldn't read revision branch cache: %s\n"
848 % stringutil.forcebytestr(inst)
832 % stringutil.forcebytestr(inst)
849 )
833 )
850 # remember number of good records on disk
834 # remember number of good records on disk
851 self._rbcrevslen = min(
835 self._rbcrevslen = min(
852 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
836 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
853 )
837 )
854 if self._rbcrevslen == 0:
838 if self._rbcrevslen == 0:
855 self._names = []
839 self._names = []
856 self._rbcnamescount = len(self._names) # number of names read at
840 self._rbcnamescount = len(self._names) # number of names read at
857 # _rbcsnameslen
841 # _rbcsnameslen
858
842
859 def _clear(self):
843 def _clear(self):
860 self._rbcsnameslen = 0
844 self._rbcsnameslen = 0
861 del self._names[:]
845 del self._names[:]
862 self._rbcnamescount = 0
846 self._rbcnamescount = 0
863 self._rbcrevslen = len(self._repo.changelog)
847 self._rbcrevslen = len(self._repo.changelog)
864 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
848 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
865 util.clearcachedproperty(self, b'_namesreverse')
849 util.clearcachedproperty(self, b'_namesreverse')
866
850
867 @util.propertycache
851 @util.propertycache
868 def _namesreverse(self):
852 def _namesreverse(self):
869 return {b: r for r, b in enumerate(self._names)}
853 return {b: r for r, b in enumerate(self._names)}
870
854
871 def branchinfo(self, rev):
855 def branchinfo(self, rev):
872 """Return branch name and close flag for rev, using and updating
856 """Return branch name and close flag for rev, using and updating
873 persistent cache."""
857 persistent cache."""
874 changelog = self._repo.changelog
858 changelog = self._repo.changelog
875 rbcrevidx = rev * _rbcrecsize
859 rbcrevidx = rev * _rbcrecsize
876
860
877 # avoid negative index, changelog.read(nullrev) is fast without cache
861 # avoid negative index, changelog.read(nullrev) is fast without cache
878 if rev == nullrev:
862 if rev == nullrev:
879 return changelog.branchinfo(rev)
863 return changelog.branchinfo(rev)
880
864
881 # if requested rev isn't allocated, grow and cache the rev info
865 # if requested rev isn't allocated, grow and cache the rev info
882 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
866 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
883 return self._branchinfo(rev)
867 return self._branchinfo(rev)
884
868
885 # fast path: extract data from cache, use it if node is matching
869 # fast path: extract data from cache, use it if node is matching
886 reponode = changelog.node(rev)[:_rbcnodelen]
870 reponode = changelog.node(rev)[:_rbcnodelen]
887 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
871 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
888 close = bool(branchidx & _rbccloseflag)
872 close = bool(branchidx & _rbccloseflag)
889 if close:
873 if close:
890 branchidx &= _rbcbranchidxmask
874 branchidx &= _rbcbranchidxmask
891 if cachenode == b'\0\0\0\0':
875 if cachenode == b'\0\0\0\0':
892 pass
876 pass
893 elif cachenode == reponode:
877 elif cachenode == reponode:
894 try:
878 try:
895 return self._names[branchidx], close
879 return self._names[branchidx], close
896 except IndexError:
880 except IndexError:
897 # recover from invalid reference to unknown branch
881 # recover from invalid reference to unknown branch
898 self._repo.ui.debug(
882 self._repo.ui.debug(
899 b"referenced branch names not found"
883 b"referenced branch names not found"
900 b" - rebuilding revision branch cache from scratch\n"
884 b" - rebuilding revision branch cache from scratch\n"
901 )
885 )
902 self._clear()
886 self._clear()
903 else:
887 else:
904 # rev/node map has changed, invalidate the cache from here up
888 # rev/node map has changed, invalidate the cache from here up
905 self._repo.ui.debug(
889 self._repo.ui.debug(
906 b"history modification detected - truncating "
890 b"history modification detected - truncating "
907 b"revision branch cache to revision %d\n" % rev
891 b"revision branch cache to revision %d\n" % rev
908 )
892 )
909 truncate = rbcrevidx + _rbcrecsize
893 truncate = rbcrevidx + _rbcrecsize
910 self._rbcrevs.truncate(truncate)
894 self._rbcrevs.truncate(truncate)
911 self._rbcrevslen = min(self._rbcrevslen, truncate)
895 self._rbcrevslen = min(self._rbcrevslen, truncate)
912
896
913 # fall back to slow path and make sure it will be written to disk
897 # fall back to slow path and make sure it will be written to disk
914 return self._branchinfo(rev)
898 return self._branchinfo(rev)
915
899
916 def _branchinfo(self, rev):
900 def _branchinfo(self, rev):
917 """Retrieve branch info from changelog and update _rbcrevs"""
901 """Retrieve branch info from changelog and update _rbcrevs"""
918 changelog = self._repo.changelog
902 changelog = self._repo.changelog
919 b, close = changelog.branchinfo(rev)
903 b, close = changelog.branchinfo(rev)
920 if b in self._namesreverse:
904 if b in self._namesreverse:
921 branchidx = self._namesreverse[b]
905 branchidx = self._namesreverse[b]
922 else:
906 else:
923 branchidx = len(self._names)
907 branchidx = len(self._names)
924 self._names.append(b)
908 self._names.append(b)
925 self._namesreverse[b] = branchidx
909 self._namesreverse[b] = branchidx
926 reponode = changelog.node(rev)
910 reponode = changelog.node(rev)
927 if close:
911 if close:
928 branchidx |= _rbccloseflag
912 branchidx |= _rbccloseflag
929 self._setcachedata(rev, reponode, branchidx)
913 self._setcachedata(rev, reponode, branchidx)
930 return b, close
914 return b, close
931
915
932 def setdata(self, rev, changelogrevision):
916 def setdata(self, rev, changelogrevision):
933 """add new data information to the cache"""
917 """add new data information to the cache"""
934 branch, close = changelogrevision.branchinfo
918 branch, close = changelogrevision.branchinfo
935
919
936 if branch in self._namesreverse:
920 if branch in self._namesreverse:
937 branchidx = self._namesreverse[branch]
921 branchidx = self._namesreverse[branch]
938 else:
922 else:
939 branchidx = len(self._names)
923 branchidx = len(self._names)
940 self._names.append(branch)
924 self._names.append(branch)
941 self._namesreverse[branch] = branchidx
925 self._namesreverse[branch] = branchidx
942 if close:
926 if close:
943 branchidx |= _rbccloseflag
927 branchidx |= _rbccloseflag
944 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
928 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
945 # If no cache data were readable (non exists, bad permission, etc)
929 # If no cache data were readable (non exists, bad permission, etc)
946 # the cache was bypassing itself by setting:
930 # the cache was bypassing itself by setting:
947 #
931 #
948 # self.branchinfo = self._branchinfo
932 # self.branchinfo = self._branchinfo
949 #
933 #
950 # Since we now have data in the cache, we need to drop this bypassing.
934 # Since we now have data in the cache, we need to drop this bypassing.
951 if 'branchinfo' in vars(self):
935 if 'branchinfo' in vars(self):
952 del self.branchinfo
936 del self.branchinfo
953
937
954 def _setcachedata(self, rev, node, branchidx):
938 def _setcachedata(self, rev, node, branchidx):
955 """Writes the node's branch data to the in-memory cache data."""
939 """Writes the node's branch data to the in-memory cache data."""
956 if rev == nullrev:
940 if rev == nullrev:
957 return
941 return
958 rbcrevidx = rev * _rbcrecsize
942 rbcrevidx = rev * _rbcrecsize
959 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
943 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
960 self._rbcrevslen = min(self._rbcrevslen, rev)
944 self._rbcrevslen = min(self._rbcrevslen, rev)
961
945
962 tr = self._repo.currenttransaction()
946 tr = self._repo.currenttransaction()
963 if tr:
947 if tr:
964 tr.addfinalize(b'write-revbranchcache', self.write)
948 tr.addfinalize(b'write-revbranchcache', self.write)
965
949
966 def write(self, tr=None):
950 def write(self, tr=None):
967 """Save branch cache if it is dirty."""
951 """Save branch cache if it is dirty."""
968 repo = self._repo
952 repo = self._repo
969 wlock = None
953 wlock = None
970 step = b''
954 step = b''
971 try:
955 try:
972 # write the new names
956 # write the new names
973 if self._rbcnamescount < len(self._names):
957 if self._rbcnamescount < len(self._names):
974 wlock = repo.wlock(wait=False)
958 wlock = repo.wlock(wait=False)
975 step = b' names'
959 step = b' names'
976 self._writenames(repo)
960 self._writenames(repo)
977
961
978 # write the new revs
962 # write the new revs
979 start = self._rbcrevslen * _rbcrecsize
963 start = self._rbcrevslen * _rbcrecsize
980 if start != len(self._rbcrevs):
964 if start != len(self._rbcrevs):
981 step = b''
965 step = b''
982 if wlock is None:
966 if wlock is None:
983 wlock = repo.wlock(wait=False)
967 wlock = repo.wlock(wait=False)
984 self._writerevs(repo, start)
968 self._writerevs(repo, start)
985
969
986 except (IOError, OSError, error.Abort, error.LockError) as inst:
970 except (IOError, OSError, error.Abort, error.LockError) as inst:
987 repo.ui.debug(
971 repo.ui.debug(
988 b"couldn't write revision branch cache%s: %s\n"
972 b"couldn't write revision branch cache%s: %s\n"
989 % (step, stringutil.forcebytestr(inst))
973 % (step, stringutil.forcebytestr(inst))
990 )
974 )
991 finally:
975 finally:
992 if wlock is not None:
976 if wlock is not None:
993 wlock.release()
977 wlock.release()
994
978
995 def _writenames(self, repo):
979 def _writenames(self, repo):
996 """write the new branch names to revbranchcache"""
980 """write the new branch names to revbranchcache"""
997 if self._rbcnamescount != 0:
981 if self._rbcnamescount != 0:
998 f = repo.cachevfs.open(_rbcnames, b'ab')
982 f = repo.cachevfs.open(_rbcnames, b'ab')
999 if f.tell() == self._rbcsnameslen:
983 if f.tell() == self._rbcsnameslen:
1000 f.write(b'\0')
984 f.write(b'\0')
1001 else:
985 else:
1002 f.close()
986 f.close()
1003 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
987 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1004 self._rbcnamescount = 0
988 self._rbcnamescount = 0
1005 self._rbcrevslen = 0
989 self._rbcrevslen = 0
1006 if self._rbcnamescount == 0:
990 if self._rbcnamescount == 0:
1007 # before rewriting names, make sure references are removed
991 # before rewriting names, make sure references are removed
1008 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
992 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1009 f = repo.cachevfs.open(_rbcnames, b'wb')
993 f = repo.cachevfs.open(_rbcnames, b'wb')
1010 f.write(
994 f.write(
1011 b'\0'.join(
995 b'\0'.join(
1012 encoding.fromlocal(b)
996 encoding.fromlocal(b)
1013 for b in self._names[self._rbcnamescount :]
997 for b in self._names[self._rbcnamescount :]
1014 )
998 )
1015 )
999 )
1016 self._rbcsnameslen = f.tell()
1000 self._rbcsnameslen = f.tell()
1017 f.close()
1001 f.close()
1018 self._rbcnamescount = len(self._names)
1002 self._rbcnamescount = len(self._names)
1019
1003
1020 def _writerevs(self, repo, start):
1004 def _writerevs(self, repo, start):
1021 """write the new revs to revbranchcache"""
1005 """write the new revs to revbranchcache"""
1022 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1006 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1023 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1007 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1024 if f.tell() != start:
1008 if f.tell() != start:
1025 repo.ui.debug(
1009 repo.ui.debug(
1026 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1010 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1027 )
1011 )
1028 f.seek(start)
1012 f.seek(start)
1029 if f.tell() != start:
1013 if f.tell() != start:
1030 start = 0
1014 start = 0
1031 f.seek(start)
1015 f.seek(start)
1032 f.truncate()
1016 f.truncate()
1033 end = revs * _rbcrecsize
1017 end = revs * _rbcrecsize
1034 f.write(self._rbcrevs.slice(start, end))
1018 f.write(self._rbcrevs.slice(start, end))
1035 self._rbcrevslen = revs
1019 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now