##// END OF EJS Templates
branchcache: move head writing in a `_write_headers` method...
marmoute -
r52358:9007387a default
parent child Browse files
Show More
@@ -1,1038 +1,1042 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_delayed` method.
77 close. See the `BranchMapCache.write_delayed` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.write(repo)
88 bcache.write(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
114 else:
115 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
116 bcache = branchcache(repo)
117
117
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
119 if revs:
120 bcache.update(repo, revs)
120 bcache.update(repo, revs)
121
121
122 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
124
124
125 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
127
127
128 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
129 remote.
129 remote.
130
130
131 """
131 """
132 cl = repo.changelog
132 cl = repo.changelog
133 clrev = cl.rev
133 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
135 rbheads = []
135 rbheads = []
136 closed = set()
136 closed = set()
137 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
138 rbheads += bheads
138 rbheads += bheads
139 for h in bheads:
139 for h in bheads:
140 r = clrev(h)
140 r = clrev(h)
141 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
142 if c:
142 if c:
143 closed.add(h)
143 closed.add(h)
144
144
145 if rbheads:
145 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
147 cache = branchcache(
148 repo,
148 repo,
149 remotebranchmap,
149 remotebranchmap,
150 repo[rtiprev].node(),
150 repo[rtiprev].node(),
151 rtiprev,
151 rtiprev,
152 closednodes=closed,
152 closednodes=closed,
153 )
153 )
154
154
155 # Try to stick it as low as possible
155 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
159 if cache.validfor(rview):
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache = self._per_filter[candidate] = cache.copy(rview)
161 cache.write(rview)
161 cache.write(rview)
162 return
162 return
163
163
164 def clear(self):
164 def clear(self):
165 self._per_filter.clear()
165 self._per_filter.clear()
166
166
167 def write_delayed(self, repo):
167 def write_delayed(self, repo):
168 unfi = repo.unfiltered()
168 unfi = repo.unfiltered()
169 for filtername, cache in self._per_filter.items():
169 for filtername, cache in self._per_filter.items():
170 if cache._delayed:
170 if cache._delayed:
171 repo = unfi.filtered(filtername)
171 repo = unfi.filtered(filtername)
172 cache.write(repo)
172 cache.write(repo)
173
173
174
174
175 def _unknownnode(node):
175 def _unknownnode(node):
176 """raises ValueError when branchcache found a node which does not exists"""
176 """raises ValueError when branchcache found a node which does not exists"""
177 raise ValueError('node %s does not exist' % node.hex())
177 raise ValueError('node %s does not exist' % node.hex())
178
178
179
179
180 def _branchcachedesc(repo):
180 def _branchcachedesc(repo):
181 if repo.filtername is not None:
181 if repo.filtername is not None:
182 return b'branch cache (%s)' % repo.filtername
182 return b'branch cache (%s)' % repo.filtername
183 else:
183 else:
184 return b'branch cache'
184 return b'branch cache'
185
185
186
186
187 class _BaseBranchCache:
187 class _BaseBranchCache:
188 """A dict like object that hold branches heads cache.
188 """A dict like object that hold branches heads cache.
189
189
190 This cache is used to avoid costly computations to determine all the
190 This cache is used to avoid costly computations to determine all the
191 branch heads of a repo.
191 branch heads of a repo.
192
192
193 The cache is serialized on disk in the following format:
193 The cache is serialized on disk in the following format:
194
194
195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
196 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
197 <branch head hex node> <open/closed state> <branch name>
197 <branch head hex node> <open/closed state> <branch name>
198 ...
198 ...
199
199
200 The first line is used to check if the cache is still valid. If the
200 The first line is used to check if the cache is still valid. If the
201 branch cache is for a filtered repo view, an optional third hash is
201 branch cache is for a filtered repo view, an optional third hash is
202 included that hashes the hashes of all filtered and obsolete revisions.
202 included that hashes the hashes of all filtered and obsolete revisions.
203
203
204 The open/closed state is represented by a single letter 'o' or 'c'.
204 The open/closed state is represented by a single letter 'o' or 'c'.
205 This field can be used to avoid changelog reads when determining if a
205 This field can be used to avoid changelog reads when determining if a
206 branch head closes a branch or not.
206 branch head closes a branch or not.
207 """
207 """
208
208
209 def __init__(
209 def __init__(
210 self,
210 self,
211 repo: "localrepo.localrepository",
211 repo: "localrepo.localrepository",
212 entries: Union[
212 entries: Union[
213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
214 ] = (),
214 ] = (),
215 closed_nodes: Optional[Set[bytes]] = None,
215 closed_nodes: Optional[Set[bytes]] = None,
216 ) -> None:
216 ) -> None:
217 """hasnode is a function which can be used to verify whether changelog
217 """hasnode is a function which can be used to verify whether changelog
218 has a given node or not. If it's not provided, we assume that every node
218 has a given node or not. If it's not provided, we assume that every node
219 we have exists in changelog"""
219 we have exists in changelog"""
220 # closednodes is a set of nodes that close their branch. If the branch
220 # closednodes is a set of nodes that close their branch. If the branch
221 # cache has been updated, it may contain nodes that are no longer
221 # cache has been updated, it may contain nodes that are no longer
222 # heads.
222 # heads.
223 if closed_nodes is None:
223 if closed_nodes is None:
224 closed_nodes = set()
224 closed_nodes = set()
225 self._closednodes = set(closed_nodes)
225 self._closednodes = set(closed_nodes)
226 self._entries = dict(entries)
226 self._entries = dict(entries)
227
227
228 def __iter__(self):
228 def __iter__(self):
229 return iter(self._entries)
229 return iter(self._entries)
230
230
231 def __setitem__(self, key, value):
231 def __setitem__(self, key, value):
232 self._entries[key] = value
232 self._entries[key] = value
233
233
234 def __getitem__(self, key):
234 def __getitem__(self, key):
235 return self._entries[key]
235 return self._entries[key]
236
236
237 def __contains__(self, key):
237 def __contains__(self, key):
238 return key in self._entries
238 return key in self._entries
239
239
240 def iteritems(self):
240 def iteritems(self):
241 return self._entries.items()
241 return self._entries.items()
242
242
243 items = iteritems
243 items = iteritems
244
244
245 def hasbranch(self, label):
245 def hasbranch(self, label):
246 """checks whether a branch of this name exists or not"""
246 """checks whether a branch of this name exists or not"""
247 return label in self._entries
247 return label in self._entries
248
248
249 def _branchtip(self, heads):
249 def _branchtip(self, heads):
250 """Return tuple with last open head in heads and false,
250 """Return tuple with last open head in heads and false,
251 otherwise return last closed head and true."""
251 otherwise return last closed head and true."""
252 tip = heads[-1]
252 tip = heads[-1]
253 closed = True
253 closed = True
254 for h in reversed(heads):
254 for h in reversed(heads):
255 if h not in self._closednodes:
255 if h not in self._closednodes:
256 tip = h
256 tip = h
257 closed = False
257 closed = False
258 break
258 break
259 return tip, closed
259 return tip, closed
260
260
261 def branchtip(self, branch):
261 def branchtip(self, branch):
262 """Return the tipmost open head on branch head, otherwise return the
262 """Return the tipmost open head on branch head, otherwise return the
263 tipmost closed head on branch.
263 tipmost closed head on branch.
264 Raise KeyError for unknown branch."""
264 Raise KeyError for unknown branch."""
265 return self._branchtip(self[branch])[0]
265 return self._branchtip(self[branch])[0]
266
266
267 def iteropen(self, nodes):
267 def iteropen(self, nodes):
268 return (n for n in nodes if n not in self._closednodes)
268 return (n for n in nodes if n not in self._closednodes)
269
269
270 def branchheads(self, branch, closed=False):
270 def branchheads(self, branch, closed=False):
271 heads = self._entries[branch]
271 heads = self._entries[branch]
272 if not closed:
272 if not closed:
273 heads = list(self.iteropen(heads))
273 heads = list(self.iteropen(heads))
274 return heads
274 return heads
275
275
276 def iterbranches(self):
276 def iterbranches(self):
277 for bn, heads in self.items():
277 for bn, heads in self.items():
278 yield (bn, heads) + self._branchtip(heads)
278 yield (bn, heads) + self._branchtip(heads)
279
279
280 def iterheads(self):
280 def iterheads(self):
281 """returns all the heads"""
281 """returns all the heads"""
282 return self._entries.values()
282 return self._entries.values()
283
283
284 def update(self, repo, revgen):
284 def update(self, repo, revgen):
285 """Given a branchhead cache, self, that may have extra nodes or be
285 """Given a branchhead cache, self, that may have extra nodes or be
286 missing heads, and a generator of nodes that are strictly a superset of
286 missing heads, and a generator of nodes that are strictly a superset of
287 heads missing, this function updates self to be correct.
287 heads missing, this function updates self to be correct.
288 """
288 """
289 starttime = util.timer()
289 starttime = util.timer()
290 cl = repo.changelog
290 cl = repo.changelog
291 # collect new branch entries
291 # collect new branch entries
292 newbranches = {}
292 newbranches = {}
293 getbranchinfo = repo.revbranchcache().branchinfo
293 getbranchinfo = repo.revbranchcache().branchinfo
294 max_rev = -1
294 max_rev = -1
295 for r in revgen:
295 for r in revgen:
296 branch, closesbranch = getbranchinfo(r)
296 branch, closesbranch = getbranchinfo(r)
297 newbranches.setdefault(branch, []).append(r)
297 newbranches.setdefault(branch, []).append(r)
298 if closesbranch:
298 if closesbranch:
299 self._closednodes.add(cl.node(r))
299 self._closednodes.add(cl.node(r))
300 max_rev = max(max_rev, r)
300 max_rev = max(max_rev, r)
301 if max_rev < 0:
301 if max_rev < 0:
302 max_rev = None
302 max_rev = None
303
303
304 # Delay fetching the topological heads until they are needed.
304 # Delay fetching the topological heads until they are needed.
305 # A repository without non-continous branches can skip this part.
305 # A repository without non-continous branches can skip this part.
306 topoheads = None
306 topoheads = None
307
307
308 # If a changeset is visible, its parents must be visible too, so
308 # If a changeset is visible, its parents must be visible too, so
309 # use the faster unfiltered parent accessor.
309 # use the faster unfiltered parent accessor.
310 parentrevs = repo.unfiltered().changelog.parentrevs
310 parentrevs = repo.unfiltered().changelog.parentrevs
311
311
312 # Faster than using ctx.obsolete()
312 # Faster than using ctx.obsolete()
313 obsrevs = obsolete.getrevs(repo, b'obsolete')
313 obsrevs = obsolete.getrevs(repo, b'obsolete')
314
314
315 for branch, newheadrevs in newbranches.items():
315 for branch, newheadrevs in newbranches.items():
316 # For every branch, compute the new branchheads.
316 # For every branch, compute the new branchheads.
317 # A branchhead is a revision such that no descendant is on
317 # A branchhead is a revision such that no descendant is on
318 # the same branch.
318 # the same branch.
319 #
319 #
320 # The branchheads are computed iteratively in revision order.
320 # The branchheads are computed iteratively in revision order.
321 # This ensures topological order, i.e. parents are processed
321 # This ensures topological order, i.e. parents are processed
322 # before their children. Ancestors are inclusive here, i.e.
322 # before their children. Ancestors are inclusive here, i.e.
323 # any revision is an ancestor of itself.
323 # any revision is an ancestor of itself.
324 #
324 #
325 # Core observations:
325 # Core observations:
326 # - The current revision is always a branchhead for the
326 # - The current revision is always a branchhead for the
327 # repository up to that point.
327 # repository up to that point.
328 # - It is the first revision of the branch if and only if
328 # - It is the first revision of the branch if and only if
329 # there was no branchhead before. In that case, it is the
329 # there was no branchhead before. In that case, it is the
330 # only branchhead as there are no possible ancestors on
330 # only branchhead as there are no possible ancestors on
331 # the same branch.
331 # the same branch.
332 # - If a parent is on the same branch, a branchhead can
332 # - If a parent is on the same branch, a branchhead can
333 # only be an ancestor of that parent, if it is parent
333 # only be an ancestor of that parent, if it is parent
334 # itself. Otherwise it would have been removed as ancestor
334 # itself. Otherwise it would have been removed as ancestor
335 # of that parent before.
335 # of that parent before.
336 # - Therefore, if all parents are on the same branch, they
336 # - Therefore, if all parents are on the same branch, they
337 # can just be removed from the branchhead set.
337 # can just be removed from the branchhead set.
338 # - If one parent is on the same branch and the other is not
338 # - If one parent is on the same branch and the other is not
339 # and there was exactly one branchhead known, the existing
339 # and there was exactly one branchhead known, the existing
340 # branchhead can only be an ancestor if it is the parent.
340 # branchhead can only be an ancestor if it is the parent.
341 # Otherwise it would have been removed as ancestor of
341 # Otherwise it would have been removed as ancestor of
342 # the parent before. The other parent therefore can't have
342 # the parent before. The other parent therefore can't have
343 # a branchhead as ancestor.
343 # a branchhead as ancestor.
344 # - In all other cases, the parents on different branches
344 # - In all other cases, the parents on different branches
345 # could have a branchhead as ancestor. Those parents are
345 # could have a branchhead as ancestor. Those parents are
346 # kept in the "uncertain" set. If all branchheads are also
346 # kept in the "uncertain" set. If all branchheads are also
347 # topological heads, they can't have descendants and further
347 # topological heads, they can't have descendants and further
348 # checks can be skipped. Otherwise, the ancestors of the
348 # checks can be skipped. Otherwise, the ancestors of the
349 # "uncertain" set are removed from branchheads.
349 # "uncertain" set are removed from branchheads.
350 # This computation is heavy and avoided if at all possible.
350 # This computation is heavy and avoided if at all possible.
351 bheads = self._entries.get(branch, [])
351 bheads = self._entries.get(branch, [])
352 bheadset = {cl.rev(node) for node in bheads}
352 bheadset = {cl.rev(node) for node in bheads}
353 uncertain = set()
353 uncertain = set()
354 for newrev in sorted(newheadrevs):
354 for newrev in sorted(newheadrevs):
355 if newrev in obsrevs:
355 if newrev in obsrevs:
356 # We ignore obsolete changesets as they shouldn't be
356 # We ignore obsolete changesets as they shouldn't be
357 # considered heads.
357 # considered heads.
358 continue
358 continue
359
359
360 if not bheadset:
360 if not bheadset:
361 bheadset.add(newrev)
361 bheadset.add(newrev)
362 continue
362 continue
363
363
364 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 parents = [p for p in parentrevs(newrev) if p != nullrev]
365 samebranch = set()
365 samebranch = set()
366 otherbranch = set()
366 otherbranch = set()
367 obsparents = set()
367 obsparents = set()
368 for p in parents:
368 for p in parents:
369 if p in obsrevs:
369 if p in obsrevs:
370 # We ignored this obsolete changeset earlier, but now
370 # We ignored this obsolete changeset earlier, but now
371 # that it has non-ignored children, we need to make
371 # that it has non-ignored children, we need to make
372 # sure their ancestors are not considered heads. To
372 # sure their ancestors are not considered heads. To
373 # achieve that, we will simply treat this obsolete
373 # achieve that, we will simply treat this obsolete
374 # changeset as a parent from other branch.
374 # changeset as a parent from other branch.
375 obsparents.add(p)
375 obsparents.add(p)
376 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 elif p in bheadset or getbranchinfo(p)[0] == branch:
377 samebranch.add(p)
377 samebranch.add(p)
378 else:
378 else:
379 otherbranch.add(p)
379 otherbranch.add(p)
380 if not (len(bheadset) == len(samebranch) == 1):
380 if not (len(bheadset) == len(samebranch) == 1):
381 uncertain.update(otherbranch)
381 uncertain.update(otherbranch)
382 uncertain.update(obsparents)
382 uncertain.update(obsparents)
383 bheadset.difference_update(samebranch)
383 bheadset.difference_update(samebranch)
384 bheadset.add(newrev)
384 bheadset.add(newrev)
385
385
386 if uncertain:
386 if uncertain:
387 if topoheads is None:
387 if topoheads is None:
388 topoheads = set(cl.headrevs())
388 topoheads = set(cl.headrevs())
389 if bheadset - topoheads:
389 if bheadset - topoheads:
390 floorrev = min(bheadset)
390 floorrev = min(bheadset)
391 if floorrev <= max(uncertain):
391 if floorrev <= max(uncertain):
392 ancestors = set(cl.ancestors(uncertain, floorrev))
392 ancestors = set(cl.ancestors(uncertain, floorrev))
393 bheadset -= ancestors
393 bheadset -= ancestors
394 if bheadset:
394 if bheadset:
395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
396
396
397 duration = util.timer() - starttime
397 duration = util.timer() - starttime
398 repo.ui.log(
398 repo.ui.log(
399 b'branchcache',
399 b'branchcache',
400 b'updated %s in %.4f seconds\n',
400 b'updated %s in %.4f seconds\n',
401 _branchcachedesc(repo),
401 _branchcachedesc(repo),
402 duration,
402 duration,
403 )
403 )
404 return max_rev
404 return max_rev
405
405
406
406
407 class branchcache(_BaseBranchCache):
407 class branchcache(_BaseBranchCache):
408 """Branchmap info for a local repo or repoview"""
408 """Branchmap info for a local repo or repoview"""
409
409
410 _base_filename = b"branch2"
410 _base_filename = b"branch2"
411
411
412 def __init__(
412 def __init__(
413 self,
413 self,
414 repo: "localrepo.localrepository",
414 repo: "localrepo.localrepository",
415 entries: Union[
415 entries: Union[
416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
417 ] = (),
417 ] = (),
418 tipnode: Optional[bytes] = None,
418 tipnode: Optional[bytes] = None,
419 tiprev: Optional[int] = nullrev,
419 tiprev: Optional[int] = nullrev,
420 filteredhash: Optional[bytes] = None,
420 filteredhash: Optional[bytes] = None,
421 closednodes: Optional[Set[bytes]] = None,
421 closednodes: Optional[Set[bytes]] = None,
422 hasnode: Optional[Callable[[bytes], bool]] = None,
422 hasnode: Optional[Callable[[bytes], bool]] = None,
423 verify_node: bool = False,
423 verify_node: bool = False,
424 ) -> None:
424 ) -> None:
425 """hasnode is a function which can be used to verify whether changelog
425 """hasnode is a function which can be used to verify whether changelog
426 has a given node or not. If it's not provided, we assume that every node
426 has a given node or not. If it's not provided, we assume that every node
427 we have exists in changelog"""
427 we have exists in changelog"""
428 self._filtername = repo.filtername
428 self._filtername = repo.filtername
429 self._delayed = False
429 self._delayed = False
430 if tipnode is None:
430 if tipnode is None:
431 self.tipnode = repo.nullid
431 self.tipnode = repo.nullid
432 else:
432 else:
433 self.tipnode = tipnode
433 self.tipnode = tipnode
434 self.tiprev = tiprev
434 self.tiprev = tiprev
435 self.filteredhash = filteredhash
435 self.filteredhash = filteredhash
436
436
437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
438 # closednodes is a set of nodes that close their branch. If the branch
438 # closednodes is a set of nodes that close their branch. If the branch
439 # cache has been updated, it may contain nodes that are no longer
439 # cache has been updated, it may contain nodes that are no longer
440 # heads.
440 # heads.
441
441
442 # Do we need to verify branch at all ?
442 # Do we need to verify branch at all ?
443 self._verify_node = verify_node
443 self._verify_node = verify_node
444 # branches for which nodes are verified
444 # branches for which nodes are verified
445 self._verifiedbranches = set()
445 self._verifiedbranches = set()
446 self._hasnode = None
446 self._hasnode = None
447 if self._verify_node:
447 if self._verify_node:
448 self._hasnode = repo.changelog.hasnode
448 self._hasnode = repo.changelog.hasnode
449
449
450 def validfor(self, repo):
450 def validfor(self, repo):
451 """check that cache contents are valid for (a subset of) this repo
451 """check that cache contents are valid for (a subset of) this repo
452
452
453 - False when the order of changesets changed or if we detect a strip.
453 - False when the order of changesets changed or if we detect a strip.
454 - True when cache is up-to-date for the current repo or its subset."""
454 - True when cache is up-to-date for the current repo or its subset."""
455 try:
455 try:
456 node = repo.changelog.node(self.tiprev)
456 node = repo.changelog.node(self.tiprev)
457 except IndexError:
457 except IndexError:
458 # changesets were stripped and now we don't even have enough to
458 # changesets were stripped and now we don't even have enough to
459 # find tiprev
459 # find tiprev
460 return False
460 return False
461 if self.tipnode != node:
461 if self.tipnode != node:
462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
463 # repo has a different order of changesets
463 # repo has a different order of changesets
464 return False
464 return False
465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
466 # hashes don't match if this repo view has a different set of filtered
466 # hashes don't match if this repo view has a different set of filtered
467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
468 # history was rewritten)
468 # history was rewritten)
469 return self.filteredhash == tiphash
469 return self.filteredhash == tiphash
470
470
471 @classmethod
471 @classmethod
472 def fromfile(cls, repo):
472 def fromfile(cls, repo):
473 f = None
473 f = None
474 try:
474 try:
475 f = repo.cachevfs(cls._filename(repo))
475 f = repo.cachevfs(cls._filename(repo))
476 lineiter = iter(f)
476 lineiter = iter(f)
477 init_kwargs = cls._load_header(repo, lineiter)
477 init_kwargs = cls._load_header(repo, lineiter)
478 bcache = cls(
478 bcache = cls(
479 repo,
479 repo,
480 verify_node=True,
480 verify_node=True,
481 **init_kwargs,
481 **init_kwargs,
482 )
482 )
483 if not bcache.validfor(repo):
483 if not bcache.validfor(repo):
484 # invalidate the cache
484 # invalidate the cache
485 raise ValueError('tip differs')
485 raise ValueError('tip differs')
486 bcache._load_heads(repo, lineiter)
486 bcache._load_heads(repo, lineiter)
487 except (IOError, OSError):
487 except (IOError, OSError):
488 return None
488 return None
489
489
490 except Exception as inst:
490 except Exception as inst:
491 if repo.ui.debugflag:
491 if repo.ui.debugflag:
492 msg = b'invalid %s: %s\n'
492 msg = b'invalid %s: %s\n'
493 msg %= (
493 msg %= (
494 _branchcachedesc(repo),
494 _branchcachedesc(repo),
495 stringutil.forcebytestr(inst),
495 stringutil.forcebytestr(inst),
496 )
496 )
497 repo.ui.debug(msg)
497 repo.ui.debug(msg)
498 bcache = None
498 bcache = None
499
499
500 finally:
500 finally:
501 if f:
501 if f:
502 f.close()
502 f.close()
503
503
504 return bcache
504 return bcache
505
505
506 @classmethod
506 @classmethod
507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 """parse the head of a branchmap file
508 """parse the head of a branchmap file
509
509
510 return parameters to pass to a newly created class instance.
510 return parameters to pass to a newly created class instance.
511 """
511 """
512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 last, lrev = cachekey[:2]
513 last, lrev = cachekey[:2]
514 last, lrev = bin(last), int(lrev)
514 last, lrev = bin(last), int(lrev)
515 filteredhash = None
515 filteredhash = None
516 if len(cachekey) > 2:
516 if len(cachekey) > 2:
517 filteredhash = bin(cachekey[2])
517 filteredhash = bin(cachekey[2])
518 return {
518 return {
519 "tipnode": last,
519 "tipnode": last,
520 "tiprev": lrev,
520 "tiprev": lrev,
521 "filteredhash": filteredhash,
521 "filteredhash": filteredhash,
522 }
522 }
523
523
524 def _load_heads(self, repo, lineiter):
524 def _load_heads(self, repo, lineiter):
525 """fully loads the branchcache by reading from the file using the line
525 """fully loads the branchcache by reading from the file using the line
526 iterator passed"""
526 iterator passed"""
527 for line in lineiter:
527 for line in lineiter:
528 line = line.rstrip(b'\n')
528 line = line.rstrip(b'\n')
529 if not line:
529 if not line:
530 continue
530 continue
531 node, state, label = line.split(b" ", 2)
531 node, state, label = line.split(b" ", 2)
532 if state not in b'oc':
532 if state not in b'oc':
533 raise ValueError('invalid branch state')
533 raise ValueError('invalid branch state')
534 label = encoding.tolocal(label.strip())
534 label = encoding.tolocal(label.strip())
535 node = bin(node)
535 node = bin(node)
536 self._entries.setdefault(label, []).append(node)
536 self._entries.setdefault(label, []).append(node)
537 if state == b'c':
537 if state == b'c':
538 self._closednodes.add(node)
538 self._closednodes.add(node)
539
539
540 @classmethod
540 @classmethod
541 def _filename(cls, repo):
541 def _filename(cls, repo):
542 """name of a branchcache file for a given repo or repoview"""
542 """name of a branchcache file for a given repo or repoview"""
543 filename = cls._base_filename
543 filename = cls._base_filename
544 if repo.filtername:
544 if repo.filtername:
545 filename = b'%s-%s' % (filename, repo.filtername)
545 filename = b'%s-%s' % (filename, repo.filtername)
546 return filename
546 return filename
547
547
548 def copy(self, repo):
548 def copy(self, repo):
549 """return a deep copy of the branchcache object"""
549 """return a deep copy of the branchcache object"""
550 other = type(self)(
550 other = type(self)(
551 repo=repo,
551 repo=repo,
552 # we always do a shally copy of self._entries, and the values is
552 # we always do a shally copy of self._entries, and the values is
553 # always replaced, so no need to deepcopy until the above remains
553 # always replaced, so no need to deepcopy until the above remains
554 # true.
554 # true.
555 entries=self._entries,
555 entries=self._entries,
556 tipnode=self.tipnode,
556 tipnode=self.tipnode,
557 tiprev=self.tiprev,
557 tiprev=self.tiprev,
558 filteredhash=self.filteredhash,
558 filteredhash=self.filteredhash,
559 closednodes=set(self._closednodes),
559 closednodes=set(self._closednodes),
560 verify_node=self._verify_node,
560 verify_node=self._verify_node,
561 )
561 )
562 # we copy will likely schedule a write anyway, but that does not seems
562 # we copy will likely schedule a write anyway, but that does not seems
563 # to hurt to overschedule
563 # to hurt to overschedule
564 other._delayed = self._delayed
564 other._delayed = self._delayed
565 # also copy information about the current verification state
565 # also copy information about the current verification state
566 other._verifiedbranches = set(self._verifiedbranches)
566 other._verifiedbranches = set(self._verifiedbranches)
567 return other
567 return other
568
568
569 def write(self, repo):
569 def write(self, repo):
570 assert self._filtername == repo.filtername, (
570 assert self._filtername == repo.filtername, (
571 self._filtername,
571 self._filtername,
572 repo.filtername,
572 repo.filtername,
573 )
573 )
574 tr = repo.currenttransaction()
574 tr = repo.currenttransaction()
575 if not getattr(tr, 'finalized', True):
575 if not getattr(tr, 'finalized', True):
576 # Avoid premature writing.
576 # Avoid premature writing.
577 #
577 #
578 # (The cache warming setup by localrepo will update the file later.)
578 # (The cache warming setup by localrepo will update the file later.)
579 self._delayed = True
579 self._delayed = True
580 return
580 return
581 try:
581 try:
582 filename = self._filename(repo)
582 filename = self._filename(repo)
583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
584 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
584 self._write_header(f)
585 if self.filteredhash is not None:
586 cachekey.append(hex(self.filteredhash))
587 f.write(b" ".join(cachekey) + b'\n')
588 nodecount = self._write_heads(f)
585 nodecount = self._write_heads(f)
589 repo.ui.log(
586 repo.ui.log(
590 b'branchcache',
587 b'branchcache',
591 b'wrote %s with %d labels and %d nodes\n',
588 b'wrote %s with %d labels and %d nodes\n',
592 _branchcachedesc(repo),
589 _branchcachedesc(repo),
593 len(self._entries),
590 len(self._entries),
594 nodecount,
591 nodecount,
595 )
592 )
596 self._delayed = False
593 self._delayed = False
597 except (IOError, OSError, error.Abort) as inst:
594 except (IOError, OSError, error.Abort) as inst:
598 # Abort may be raised by read only opener, so log and continue
595 # Abort may be raised by read only opener, so log and continue
599 repo.ui.debug(
596 repo.ui.debug(
600 b"couldn't write branch cache: %s\n"
597 b"couldn't write branch cache: %s\n"
601 % stringutil.forcebytestr(inst)
598 % stringutil.forcebytestr(inst)
602 )
599 )
603
600
601 def _write_header(self, fp) -> None:
602 """write the branch cache header to a file"""
603 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
604 if self.filteredhash is not None:
605 cachekey.append(hex(self.filteredhash))
606 fp.write(b" ".join(cachekey) + b'\n')
607
604 def _write_heads(self, fp) -> int:
608 def _write_heads(self, fp) -> int:
605 """write list of heads to a file
609 """write list of heads to a file
606
610
607 Return the number of heads written."""
611 Return the number of heads written."""
608 nodecount = 0
612 nodecount = 0
609 for label, nodes in sorted(self._entries.items()):
613 for label, nodes in sorted(self._entries.items()):
610 label = encoding.fromlocal(label)
614 label = encoding.fromlocal(label)
611 for node in nodes:
615 for node in nodes:
612 nodecount += 1
616 nodecount += 1
613 if node in self._closednodes:
617 if node in self._closednodes:
614 state = b'c'
618 state = b'c'
615 else:
619 else:
616 state = b'o'
620 state = b'o'
617 fp.write(b"%s %s %s\n" % (hex(node), state, label))
621 fp.write(b"%s %s %s\n" % (hex(node), state, label))
618 return nodecount
622 return nodecount
619
623
620 def _verifybranch(self, branch):
624 def _verifybranch(self, branch):
621 """verify head nodes for the given branch."""
625 """verify head nodes for the given branch."""
622 if not self._verify_node:
626 if not self._verify_node:
623 return
627 return
624 if branch not in self._entries or branch in self._verifiedbranches:
628 if branch not in self._entries or branch in self._verifiedbranches:
625 return
629 return
626 assert self._hasnode is not None
630 assert self._hasnode is not None
627 for n in self._entries[branch]:
631 for n in self._entries[branch]:
628 if not self._hasnode(n):
632 if not self._hasnode(n):
629 _unknownnode(n)
633 _unknownnode(n)
630
634
631 self._verifiedbranches.add(branch)
635 self._verifiedbranches.add(branch)
632
636
633 def _verifyall(self):
637 def _verifyall(self):
634 """verifies nodes of all the branches"""
638 """verifies nodes of all the branches"""
635 for b in self._entries.keys():
639 for b in self._entries.keys():
636 if b not in self._verifiedbranches:
640 if b not in self._verifiedbranches:
637 self._verifybranch(b)
641 self._verifybranch(b)
638
642
639 def __getitem__(self, key):
643 def __getitem__(self, key):
640 self._verifybranch(key)
644 self._verifybranch(key)
641 return super().__getitem__(key)
645 return super().__getitem__(key)
642
646
643 def __contains__(self, key):
647 def __contains__(self, key):
644 self._verifybranch(key)
648 self._verifybranch(key)
645 return super().__contains__(key)
649 return super().__contains__(key)
646
650
647 def iteritems(self):
651 def iteritems(self):
648 self._verifyall()
652 self._verifyall()
649 return super().iteritems()
653 return super().iteritems()
650
654
651 items = iteritems
655 items = iteritems
652
656
653 def iterheads(self):
657 def iterheads(self):
654 """returns all the heads"""
658 """returns all the heads"""
655 self._verifyall()
659 self._verifyall()
656 return super().iterheads()
660 return super().iterheads()
657
661
658 def hasbranch(self, label):
662 def hasbranch(self, label):
659 """checks whether a branch of this name exists or not"""
663 """checks whether a branch of this name exists or not"""
660 self._verifybranch(label)
664 self._verifybranch(label)
661 return super().hasbranch(label)
665 return super().hasbranch(label)
662
666
663 def branchheads(self, branch, closed=False):
667 def branchheads(self, branch, closed=False):
664 self._verifybranch(branch)
668 self._verifybranch(branch)
665 return super().branchheads(branch, closed=closed)
669 return super().branchheads(branch, closed=closed)
666
670
667 def update(self, repo, revgen):
671 def update(self, repo, revgen):
668 assert self._filtername == repo.filtername, (
672 assert self._filtername == repo.filtername, (
669 self._filtername,
673 self._filtername,
670 repo.filtername,
674 repo.filtername,
671 )
675 )
672 cl = repo.changelog
676 cl = repo.changelog
673 max_rev = super().update(repo, revgen)
677 max_rev = super().update(repo, revgen)
674 # new tip revision which we found after iterating items from new
678 # new tip revision which we found after iterating items from new
675 # branches
679 # branches
676 if max_rev is not None and max_rev > self.tiprev:
680 if max_rev is not None and max_rev > self.tiprev:
677 self.tiprev = max_rev
681 self.tiprev = max_rev
678 self.tipnode = cl.node(max_rev)
682 self.tipnode = cl.node(max_rev)
679
683
680 if not self.validfor(repo):
684 if not self.validfor(repo):
681 # old cache key is now invalid for the repo, but we've just updated
685 # old cache key is now invalid for the repo, but we've just updated
682 # the cache and we assume it's valid, so let's make the cache key
686 # the cache and we assume it's valid, so let's make the cache key
683 # valid as well by recomputing it from the cached data
687 # valid as well by recomputing it from the cached data
684 self.tipnode = repo.nullid
688 self.tipnode = repo.nullid
685 self.tiprev = nullrev
689 self.tiprev = nullrev
686 for heads in self.iterheads():
690 for heads in self.iterheads():
687 if not heads:
691 if not heads:
688 # all revisions on a branch are obsolete
692 # all revisions on a branch are obsolete
689 continue
693 continue
690 # note: tiprev is not necessarily the tip revision of repo,
694 # note: tiprev is not necessarily the tip revision of repo,
691 # because the tip could be obsolete (i.e. not a head)
695 # because the tip could be obsolete (i.e. not a head)
692 tiprev = max(cl.rev(node) for node in heads)
696 tiprev = max(cl.rev(node) for node in heads)
693 if tiprev > self.tiprev:
697 if tiprev > self.tiprev:
694 self.tipnode = cl.node(tiprev)
698 self.tipnode = cl.node(tiprev)
695 self.tiprev = tiprev
699 self.tiprev = tiprev
696 self.filteredhash = scmutil.filteredhash(
700 self.filteredhash = scmutil.filteredhash(
697 repo, self.tiprev, needobsolete=True
701 repo, self.tiprev, needobsolete=True
698 )
702 )
699
703
700 self.write(repo)
704 self.write(repo)
701
705
702
706
703 class remotebranchcache(_BaseBranchCache):
707 class remotebranchcache(_BaseBranchCache):
704 """Branchmap info for a remote connection, should not write locally"""
708 """Branchmap info for a remote connection, should not write locally"""
705
709
706 def __init__(
710 def __init__(
707 self,
711 self,
708 repo: "localrepo.localrepository",
712 repo: "localrepo.localrepository",
709 entries: Union[
713 entries: Union[
710 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
714 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
711 ] = (),
715 ] = (),
712 closednodes: Optional[Set[bytes]] = None,
716 closednodes: Optional[Set[bytes]] = None,
713 ) -> None:
717 ) -> None:
714 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
718 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
715
719
716
720
717 # Revision branch info cache
721 # Revision branch info cache
718
722
719 _rbcversion = b'-v1'
723 _rbcversion = b'-v1'
720 _rbcnames = b'rbc-names' + _rbcversion
724 _rbcnames = b'rbc-names' + _rbcversion
721 _rbcrevs = b'rbc-revs' + _rbcversion
725 _rbcrevs = b'rbc-revs' + _rbcversion
722 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
726 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
723 _rbcrecfmt = b'>4sI'
727 _rbcrecfmt = b'>4sI'
724 _rbcrecsize = calcsize(_rbcrecfmt)
728 _rbcrecsize = calcsize(_rbcrecfmt)
725 _rbcmininc = 64 * _rbcrecsize
729 _rbcmininc = 64 * _rbcrecsize
726 _rbcnodelen = 4
730 _rbcnodelen = 4
727 _rbcbranchidxmask = 0x7FFFFFFF
731 _rbcbranchidxmask = 0x7FFFFFFF
728 _rbccloseflag = 0x80000000
732 _rbccloseflag = 0x80000000
729
733
730
734
731 class rbcrevs:
735 class rbcrevs:
732 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
736 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
733
737
734 def __init__(self, revs):
738 def __init__(self, revs):
735 self._prefix = revs
739 self._prefix = revs
736 self._rest = bytearray()
740 self._rest = bytearray()
737
741
738 def __len__(self):
742 def __len__(self):
739 return len(self._prefix) + len(self._rest)
743 return len(self._prefix) + len(self._rest)
740
744
741 def unpack_record(self, rbcrevidx):
745 def unpack_record(self, rbcrevidx):
742 if rbcrevidx < len(self._prefix):
746 if rbcrevidx < len(self._prefix):
743 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
747 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
744 else:
748 else:
745 return unpack_from(
749 return unpack_from(
746 _rbcrecfmt,
750 _rbcrecfmt,
747 util.buffer(self._rest),
751 util.buffer(self._rest),
748 rbcrevidx - len(self._prefix),
752 rbcrevidx - len(self._prefix),
749 )
753 )
750
754
751 def make_mutable(self):
755 def make_mutable(self):
752 if len(self._prefix) > 0:
756 if len(self._prefix) > 0:
753 entirety = bytearray()
757 entirety = bytearray()
754 entirety[:] = self._prefix
758 entirety[:] = self._prefix
755 entirety.extend(self._rest)
759 entirety.extend(self._rest)
756 self._rest = entirety
760 self._rest = entirety
757 self._prefix = bytearray()
761 self._prefix = bytearray()
758
762
759 def truncate(self, pos):
763 def truncate(self, pos):
760 self.make_mutable()
764 self.make_mutable()
761 del self._rest[pos:]
765 del self._rest[pos:]
762
766
763 def pack_into(self, rbcrevidx, node, branchidx):
767 def pack_into(self, rbcrevidx, node, branchidx):
764 if rbcrevidx < len(self._prefix):
768 if rbcrevidx < len(self._prefix):
765 self.make_mutable()
769 self.make_mutable()
766 buf = self._rest
770 buf = self._rest
767 start_offset = rbcrevidx - len(self._prefix)
771 start_offset = rbcrevidx - len(self._prefix)
768 end_offset = start_offset + _rbcrecsize
772 end_offset = start_offset + _rbcrecsize
769
773
770 if len(self._rest) < end_offset:
774 if len(self._rest) < end_offset:
771 # bytearray doesn't allocate extra space at least in Python 3.7.
775 # bytearray doesn't allocate extra space at least in Python 3.7.
772 # When multiple changesets are added in a row, precise resize would
776 # When multiple changesets are added in a row, precise resize would
773 # result in quadratic complexity. Overallocate to compensate by
777 # result in quadratic complexity. Overallocate to compensate by
774 # using the classic doubling technique for dynamic arrays instead.
778 # using the classic doubling technique for dynamic arrays instead.
775 # If there was a gap in the map before, less space will be reserved.
779 # If there was a gap in the map before, less space will be reserved.
776 self._rest.extend(b'\0' * end_offset)
780 self._rest.extend(b'\0' * end_offset)
777 return pack_into(
781 return pack_into(
778 _rbcrecfmt,
782 _rbcrecfmt,
779 buf,
783 buf,
780 start_offset,
784 start_offset,
781 node,
785 node,
782 branchidx,
786 branchidx,
783 )
787 )
784
788
785 def extend(self, extension):
789 def extend(self, extension):
786 return self._rest.extend(extension)
790 return self._rest.extend(extension)
787
791
788 def slice(self, begin, end):
792 def slice(self, begin, end):
789 if begin < len(self._prefix):
793 if begin < len(self._prefix):
790 acc = bytearray()
794 acc = bytearray()
791 acc[:] = self._prefix[begin:end]
795 acc[:] = self._prefix[begin:end]
792 acc.extend(
796 acc.extend(
793 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
797 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
794 )
798 )
795 return acc
799 return acc
796 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
800 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
797
801
798
802
799 class revbranchcache:
803 class revbranchcache:
800 """Persistent cache, mapping from revision number to branch name and close.
804 """Persistent cache, mapping from revision number to branch name and close.
801 This is a low level cache, independent of filtering.
805 This is a low level cache, independent of filtering.
802
806
803 Branch names are stored in rbc-names in internal encoding separated by 0.
807 Branch names are stored in rbc-names in internal encoding separated by 0.
804 rbc-names is append-only, and each branch name is only stored once and will
808 rbc-names is append-only, and each branch name is only stored once and will
805 thus have a unique index.
809 thus have a unique index.
806
810
807 The branch info for each revision is stored in rbc-revs as constant size
811 The branch info for each revision is stored in rbc-revs as constant size
808 records. The whole file is read into memory, but it is only 'parsed' on
812 records. The whole file is read into memory, but it is only 'parsed' on
809 demand. The file is usually append-only but will be truncated if repo
813 demand. The file is usually append-only but will be truncated if repo
810 modification is detected.
814 modification is detected.
811 The record for each revision contains the first 4 bytes of the
815 The record for each revision contains the first 4 bytes of the
812 corresponding node hash, and the record is only used if it still matches.
816 corresponding node hash, and the record is only used if it still matches.
813 Even a completely trashed rbc-revs fill thus still give the right result
817 Even a completely trashed rbc-revs fill thus still give the right result
814 while converging towards full recovery ... assuming no incorrectly matching
818 while converging towards full recovery ... assuming no incorrectly matching
815 node hashes.
819 node hashes.
816 The record also contains 4 bytes where 31 bits contains the index of the
820 The record also contains 4 bytes where 31 bits contains the index of the
817 branch and the last bit indicate that it is a branch close commit.
821 branch and the last bit indicate that it is a branch close commit.
818 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
822 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
819 and will grow with it but be 1/8th of its size.
823 and will grow with it but be 1/8th of its size.
820 """
824 """
821
825
822 def __init__(self, repo, readonly=True):
826 def __init__(self, repo, readonly=True):
823 assert repo.filtername is None
827 assert repo.filtername is None
824 self._repo = repo
828 self._repo = repo
825 self._names = [] # branch names in local encoding with static index
829 self._names = [] # branch names in local encoding with static index
826 self._rbcrevs = rbcrevs(bytearray())
830 self._rbcrevs = rbcrevs(bytearray())
827 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
831 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
828 try:
832 try:
829 bndata = repo.cachevfs.read(_rbcnames)
833 bndata = repo.cachevfs.read(_rbcnames)
830 self._rbcsnameslen = len(bndata) # for verification before writing
834 self._rbcsnameslen = len(bndata) # for verification before writing
831 if bndata:
835 if bndata:
832 self._names = [
836 self._names = [
833 encoding.tolocal(bn) for bn in bndata.split(b'\0')
837 encoding.tolocal(bn) for bn in bndata.split(b'\0')
834 ]
838 ]
835 except (IOError, OSError):
839 except (IOError, OSError):
836 if readonly:
840 if readonly:
837 # don't try to use cache - fall back to the slow path
841 # don't try to use cache - fall back to the slow path
838 self.branchinfo = self._branchinfo
842 self.branchinfo = self._branchinfo
839
843
840 if self._names:
844 if self._names:
841 try:
845 try:
842 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
846 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
843 with repo.cachevfs(_rbcrevs) as fp:
847 with repo.cachevfs(_rbcrevs) as fp:
844 data = util.buffer(util.mmapread(fp))
848 data = util.buffer(util.mmapread(fp))
845 else:
849 else:
846 data = repo.cachevfs.read(_rbcrevs)
850 data = repo.cachevfs.read(_rbcrevs)
847 self._rbcrevs = rbcrevs(data)
851 self._rbcrevs = rbcrevs(data)
848 except (IOError, OSError) as inst:
852 except (IOError, OSError) as inst:
849 repo.ui.debug(
853 repo.ui.debug(
850 b"couldn't read revision branch cache: %s\n"
854 b"couldn't read revision branch cache: %s\n"
851 % stringutil.forcebytestr(inst)
855 % stringutil.forcebytestr(inst)
852 )
856 )
853 # remember number of good records on disk
857 # remember number of good records on disk
854 self._rbcrevslen = min(
858 self._rbcrevslen = min(
855 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
859 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
856 )
860 )
857 if self._rbcrevslen == 0:
861 if self._rbcrevslen == 0:
858 self._names = []
862 self._names = []
859 self._rbcnamescount = len(self._names) # number of names read at
863 self._rbcnamescount = len(self._names) # number of names read at
860 # _rbcsnameslen
864 # _rbcsnameslen
861
865
862 def _clear(self):
866 def _clear(self):
863 self._rbcsnameslen = 0
867 self._rbcsnameslen = 0
864 del self._names[:]
868 del self._names[:]
865 self._rbcnamescount = 0
869 self._rbcnamescount = 0
866 self._rbcrevslen = len(self._repo.changelog)
870 self._rbcrevslen = len(self._repo.changelog)
867 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
871 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
868 util.clearcachedproperty(self, b'_namesreverse')
872 util.clearcachedproperty(self, b'_namesreverse')
869
873
870 @util.propertycache
874 @util.propertycache
871 def _namesreverse(self):
875 def _namesreverse(self):
872 return {b: r for r, b in enumerate(self._names)}
876 return {b: r for r, b in enumerate(self._names)}
873
877
874 def branchinfo(self, rev):
878 def branchinfo(self, rev):
875 """Return branch name and close flag for rev, using and updating
879 """Return branch name and close flag for rev, using and updating
876 persistent cache."""
880 persistent cache."""
877 changelog = self._repo.changelog
881 changelog = self._repo.changelog
878 rbcrevidx = rev * _rbcrecsize
882 rbcrevidx = rev * _rbcrecsize
879
883
880 # avoid negative index, changelog.read(nullrev) is fast without cache
884 # avoid negative index, changelog.read(nullrev) is fast without cache
881 if rev == nullrev:
885 if rev == nullrev:
882 return changelog.branchinfo(rev)
886 return changelog.branchinfo(rev)
883
887
884 # if requested rev isn't allocated, grow and cache the rev info
888 # if requested rev isn't allocated, grow and cache the rev info
885 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
889 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
886 return self._branchinfo(rev)
890 return self._branchinfo(rev)
887
891
888 # fast path: extract data from cache, use it if node is matching
892 # fast path: extract data from cache, use it if node is matching
889 reponode = changelog.node(rev)[:_rbcnodelen]
893 reponode = changelog.node(rev)[:_rbcnodelen]
890 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
894 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
891 close = bool(branchidx & _rbccloseflag)
895 close = bool(branchidx & _rbccloseflag)
892 if close:
896 if close:
893 branchidx &= _rbcbranchidxmask
897 branchidx &= _rbcbranchidxmask
894 if cachenode == b'\0\0\0\0':
898 if cachenode == b'\0\0\0\0':
895 pass
899 pass
896 elif cachenode == reponode:
900 elif cachenode == reponode:
897 try:
901 try:
898 return self._names[branchidx], close
902 return self._names[branchidx], close
899 except IndexError:
903 except IndexError:
900 # recover from invalid reference to unknown branch
904 # recover from invalid reference to unknown branch
901 self._repo.ui.debug(
905 self._repo.ui.debug(
902 b"referenced branch names not found"
906 b"referenced branch names not found"
903 b" - rebuilding revision branch cache from scratch\n"
907 b" - rebuilding revision branch cache from scratch\n"
904 )
908 )
905 self._clear()
909 self._clear()
906 else:
910 else:
907 # rev/node map has changed, invalidate the cache from here up
911 # rev/node map has changed, invalidate the cache from here up
908 self._repo.ui.debug(
912 self._repo.ui.debug(
909 b"history modification detected - truncating "
913 b"history modification detected - truncating "
910 b"revision branch cache to revision %d\n" % rev
914 b"revision branch cache to revision %d\n" % rev
911 )
915 )
912 truncate = rbcrevidx + _rbcrecsize
916 truncate = rbcrevidx + _rbcrecsize
913 self._rbcrevs.truncate(truncate)
917 self._rbcrevs.truncate(truncate)
914 self._rbcrevslen = min(self._rbcrevslen, truncate)
918 self._rbcrevslen = min(self._rbcrevslen, truncate)
915
919
916 # fall back to slow path and make sure it will be written to disk
920 # fall back to slow path and make sure it will be written to disk
917 return self._branchinfo(rev)
921 return self._branchinfo(rev)
918
922
919 def _branchinfo(self, rev):
923 def _branchinfo(self, rev):
920 """Retrieve branch info from changelog and update _rbcrevs"""
924 """Retrieve branch info from changelog and update _rbcrevs"""
921 changelog = self._repo.changelog
925 changelog = self._repo.changelog
922 b, close = changelog.branchinfo(rev)
926 b, close = changelog.branchinfo(rev)
923 if b in self._namesreverse:
927 if b in self._namesreverse:
924 branchidx = self._namesreverse[b]
928 branchidx = self._namesreverse[b]
925 else:
929 else:
926 branchidx = len(self._names)
930 branchidx = len(self._names)
927 self._names.append(b)
931 self._names.append(b)
928 self._namesreverse[b] = branchidx
932 self._namesreverse[b] = branchidx
929 reponode = changelog.node(rev)
933 reponode = changelog.node(rev)
930 if close:
934 if close:
931 branchidx |= _rbccloseflag
935 branchidx |= _rbccloseflag
932 self._setcachedata(rev, reponode, branchidx)
936 self._setcachedata(rev, reponode, branchidx)
933 return b, close
937 return b, close
934
938
935 def setdata(self, rev, changelogrevision):
939 def setdata(self, rev, changelogrevision):
936 """add new data information to the cache"""
940 """add new data information to the cache"""
937 branch, close = changelogrevision.branchinfo
941 branch, close = changelogrevision.branchinfo
938
942
939 if branch in self._namesreverse:
943 if branch in self._namesreverse:
940 branchidx = self._namesreverse[branch]
944 branchidx = self._namesreverse[branch]
941 else:
945 else:
942 branchidx = len(self._names)
946 branchidx = len(self._names)
943 self._names.append(branch)
947 self._names.append(branch)
944 self._namesreverse[branch] = branchidx
948 self._namesreverse[branch] = branchidx
945 if close:
949 if close:
946 branchidx |= _rbccloseflag
950 branchidx |= _rbccloseflag
947 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
951 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
948 # If no cache data were readable (non exists, bad permission, etc)
952 # If no cache data were readable (non exists, bad permission, etc)
949 # the cache was bypassing itself by setting:
953 # the cache was bypassing itself by setting:
950 #
954 #
951 # self.branchinfo = self._branchinfo
955 # self.branchinfo = self._branchinfo
952 #
956 #
953 # Since we now have data in the cache, we need to drop this bypassing.
957 # Since we now have data in the cache, we need to drop this bypassing.
954 if 'branchinfo' in vars(self):
958 if 'branchinfo' in vars(self):
955 del self.branchinfo
959 del self.branchinfo
956
960
957 def _setcachedata(self, rev, node, branchidx):
961 def _setcachedata(self, rev, node, branchidx):
958 """Writes the node's branch data to the in-memory cache data."""
962 """Writes the node's branch data to the in-memory cache data."""
959 if rev == nullrev:
963 if rev == nullrev:
960 return
964 return
961 rbcrevidx = rev * _rbcrecsize
965 rbcrevidx = rev * _rbcrecsize
962 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
966 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
963 self._rbcrevslen = min(self._rbcrevslen, rev)
967 self._rbcrevslen = min(self._rbcrevslen, rev)
964
968
965 tr = self._repo.currenttransaction()
969 tr = self._repo.currenttransaction()
966 if tr:
970 if tr:
967 tr.addfinalize(b'write-revbranchcache', self.write)
971 tr.addfinalize(b'write-revbranchcache', self.write)
968
972
969 def write(self, tr=None):
973 def write(self, tr=None):
970 """Save branch cache if it is dirty."""
974 """Save branch cache if it is dirty."""
971 repo = self._repo
975 repo = self._repo
972 wlock = None
976 wlock = None
973 step = b''
977 step = b''
974 try:
978 try:
975 # write the new names
979 # write the new names
976 if self._rbcnamescount < len(self._names):
980 if self._rbcnamescount < len(self._names):
977 wlock = repo.wlock(wait=False)
981 wlock = repo.wlock(wait=False)
978 step = b' names'
982 step = b' names'
979 self._writenames(repo)
983 self._writenames(repo)
980
984
981 # write the new revs
985 # write the new revs
982 start = self._rbcrevslen * _rbcrecsize
986 start = self._rbcrevslen * _rbcrecsize
983 if start != len(self._rbcrevs):
987 if start != len(self._rbcrevs):
984 step = b''
988 step = b''
985 if wlock is None:
989 if wlock is None:
986 wlock = repo.wlock(wait=False)
990 wlock = repo.wlock(wait=False)
987 self._writerevs(repo, start)
991 self._writerevs(repo, start)
988
992
989 except (IOError, OSError, error.Abort, error.LockError) as inst:
993 except (IOError, OSError, error.Abort, error.LockError) as inst:
990 repo.ui.debug(
994 repo.ui.debug(
991 b"couldn't write revision branch cache%s: %s\n"
995 b"couldn't write revision branch cache%s: %s\n"
992 % (step, stringutil.forcebytestr(inst))
996 % (step, stringutil.forcebytestr(inst))
993 )
997 )
994 finally:
998 finally:
995 if wlock is not None:
999 if wlock is not None:
996 wlock.release()
1000 wlock.release()
997
1001
998 def _writenames(self, repo):
1002 def _writenames(self, repo):
999 """write the new branch names to revbranchcache"""
1003 """write the new branch names to revbranchcache"""
1000 if self._rbcnamescount != 0:
1004 if self._rbcnamescount != 0:
1001 f = repo.cachevfs.open(_rbcnames, b'ab')
1005 f = repo.cachevfs.open(_rbcnames, b'ab')
1002 if f.tell() == self._rbcsnameslen:
1006 if f.tell() == self._rbcsnameslen:
1003 f.write(b'\0')
1007 f.write(b'\0')
1004 else:
1008 else:
1005 f.close()
1009 f.close()
1006 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1010 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1007 self._rbcnamescount = 0
1011 self._rbcnamescount = 0
1008 self._rbcrevslen = 0
1012 self._rbcrevslen = 0
1009 if self._rbcnamescount == 0:
1013 if self._rbcnamescount == 0:
1010 # before rewriting names, make sure references are removed
1014 # before rewriting names, make sure references are removed
1011 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1015 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1012 f = repo.cachevfs.open(_rbcnames, b'wb')
1016 f = repo.cachevfs.open(_rbcnames, b'wb')
1013 f.write(
1017 f.write(
1014 b'\0'.join(
1018 b'\0'.join(
1015 encoding.fromlocal(b)
1019 encoding.fromlocal(b)
1016 for b in self._names[self._rbcnamescount :]
1020 for b in self._names[self._rbcnamescount :]
1017 )
1021 )
1018 )
1022 )
1019 self._rbcsnameslen = f.tell()
1023 self._rbcsnameslen = f.tell()
1020 f.close()
1024 f.close()
1021 self._rbcnamescount = len(self._names)
1025 self._rbcnamescount = len(self._names)
1022
1026
1023 def _writerevs(self, repo, start):
1027 def _writerevs(self, repo, start):
1024 """write the new revs to revbranchcache"""
1028 """write the new revs to revbranchcache"""
1025 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1029 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1026 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1030 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1027 if f.tell() != start:
1031 if f.tell() != start:
1028 repo.ui.debug(
1032 repo.ui.debug(
1029 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1033 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1030 )
1034 )
1031 f.seek(start)
1035 f.seek(start)
1032 if f.tell() != start:
1036 if f.tell() != start:
1033 start = 0
1037 start = 0
1034 f.seek(start)
1038 f.seek(start)
1035 f.truncate()
1039 f.truncate()
1036 end = revs * _rbcrecsize
1040 end = revs * _rbcrecsize
1037 f.write(self._rbcrevs.slice(start, end))
1041 f.write(self._rbcrevs.slice(start, end))
1038 self._rbcrevslen = revs
1042 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now