##// END OF EJS Templates
branchcache: stop using `copy(…)` in `replace(…)`...
marmoute -
r52382:659f7666 default
parent child Browse files
Show More
@@ -1,1048 +1,1050 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_dirty` method.
77 close. See the `BranchMapCache.write_dirty` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.write(repo)
88 bcache.write(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
114 else:
115 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
116 bcache = branchcache(repo)
117
117
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
119 if revs:
120 bcache.update(repo, revs)
120 bcache.update(repo, revs)
121
121
122 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
124
124
125 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
127
127
128 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
129 remote.
129 remote.
130
130
131 """
131 """
132 cl = repo.changelog
132 cl = repo.changelog
133 clrev = cl.rev
133 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
135 rbheads = []
135 rbheads = []
136 closed = set()
136 closed = set()
137 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
138 rbheads += bheads
138 rbheads += bheads
139 for h in bheads:
139 for h in bheads:
140 r = clrev(h)
140 r = clrev(h)
141 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
142 if c:
142 if c:
143 closed.add(h)
143 closed.add(h)
144
144
145 if rbheads:
145 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
147 cache = branchcache(
148 repo,
148 repo,
149 remotebranchmap,
149 remotebranchmap,
150 repo[rtiprev].node(),
150 repo[rtiprev].node(),
151 rtiprev,
151 rtiprev,
152 closednodes=closed,
152 closednodes=closed,
153 )
153 )
154
154
155 # Try to stick it as low as possible
155 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
159 if cache.validfor(rview):
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache._filtername = candidate
161 self._per_filter[candidate] = cache
162 cache._dirty = True
161 cache.write(rview)
163 cache.write(rview)
162 return
164 return
163
165
164 def clear(self):
166 def clear(self):
165 self._per_filter.clear()
167 self._per_filter.clear()
166
168
167 def write_dirty(self, repo):
169 def write_dirty(self, repo):
168 unfi = repo.unfiltered()
170 unfi = repo.unfiltered()
169 for filtername in repoviewutil.get_ordered_subset():
171 for filtername in repoviewutil.get_ordered_subset():
170 cache = self._per_filter.get(filtername)
172 cache = self._per_filter.get(filtername)
171 if cache is None:
173 if cache is None:
172 continue
174 continue
173 if cache._dirty:
175 if cache._dirty:
174 if filtername is None:
176 if filtername is None:
175 repo = unfi
177 repo = unfi
176 else:
178 else:
177 repo = unfi.filtered(filtername)
179 repo = unfi.filtered(filtername)
178 cache.write(repo)
180 cache.write(repo)
179
181
180
182
181 def _unknownnode(node):
183 def _unknownnode(node):
182 """raises ValueError when branchcache found a node which does not exists"""
184 """raises ValueError when branchcache found a node which does not exists"""
183 raise ValueError('node %s does not exist' % node.hex())
185 raise ValueError('node %s does not exist' % node.hex())
184
186
185
187
186 def _branchcachedesc(repo):
188 def _branchcachedesc(repo):
187 if repo.filtername is not None:
189 if repo.filtername is not None:
188 return b'branch cache (%s)' % repo.filtername
190 return b'branch cache (%s)' % repo.filtername
189 else:
191 else:
190 return b'branch cache'
192 return b'branch cache'
191
193
192
194
193 class _BaseBranchCache:
195 class _BaseBranchCache:
194 """A dict like object that hold branches heads cache.
196 """A dict like object that hold branches heads cache.
195
197
196 This cache is used to avoid costly computations to determine all the
198 This cache is used to avoid costly computations to determine all the
197 branch heads of a repo.
199 branch heads of a repo.
198
200
199 The cache is serialized on disk in the following format:
201 The cache is serialized on disk in the following format:
200
202
201 <tip hex node> <tip rev number> [optional filtered repo hex hash]
203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
202 <branch head hex node> <open/closed state> <branch name>
204 <branch head hex node> <open/closed state> <branch name>
203 <branch head hex node> <open/closed state> <branch name>
205 <branch head hex node> <open/closed state> <branch name>
204 ...
206 ...
205
207
206 The first line is used to check if the cache is still valid. If the
208 The first line is used to check if the cache is still valid. If the
207 branch cache is for a filtered repo view, an optional third hash is
209 branch cache is for a filtered repo view, an optional third hash is
208 included that hashes the hashes of all filtered and obsolete revisions.
210 included that hashes the hashes of all filtered and obsolete revisions.
209
211
210 The open/closed state is represented by a single letter 'o' or 'c'.
212 The open/closed state is represented by a single letter 'o' or 'c'.
211 This field can be used to avoid changelog reads when determining if a
213 This field can be used to avoid changelog reads when determining if a
212 branch head closes a branch or not.
214 branch head closes a branch or not.
213 """
215 """
214
216
215 def __init__(
217 def __init__(
216 self,
218 self,
217 repo: "localrepo.localrepository",
219 repo: "localrepo.localrepository",
218 entries: Union[
220 entries: Union[
219 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
220 ] = (),
222 ] = (),
221 closed_nodes: Optional[Set[bytes]] = None,
223 closed_nodes: Optional[Set[bytes]] = None,
222 ) -> None:
224 ) -> None:
223 """hasnode is a function which can be used to verify whether changelog
225 """hasnode is a function which can be used to verify whether changelog
224 has a given node or not. If it's not provided, we assume that every node
226 has a given node or not. If it's not provided, we assume that every node
225 we have exists in changelog"""
227 we have exists in changelog"""
226 # closednodes is a set of nodes that close their branch. If the branch
228 # closednodes is a set of nodes that close their branch. If the branch
227 # cache has been updated, it may contain nodes that are no longer
229 # cache has been updated, it may contain nodes that are no longer
228 # heads.
230 # heads.
229 if closed_nodes is None:
231 if closed_nodes is None:
230 closed_nodes = set()
232 closed_nodes = set()
231 self._closednodes = set(closed_nodes)
233 self._closednodes = set(closed_nodes)
232 self._entries = dict(entries)
234 self._entries = dict(entries)
233
235
234 def __iter__(self):
236 def __iter__(self):
235 return iter(self._entries)
237 return iter(self._entries)
236
238
237 def __setitem__(self, key, value):
239 def __setitem__(self, key, value):
238 self._entries[key] = value
240 self._entries[key] = value
239
241
240 def __getitem__(self, key):
242 def __getitem__(self, key):
241 return self._entries[key]
243 return self._entries[key]
242
244
243 def __contains__(self, key):
245 def __contains__(self, key):
244 return key in self._entries
246 return key in self._entries
245
247
246 def iteritems(self):
248 def iteritems(self):
247 return self._entries.items()
249 return self._entries.items()
248
250
249 items = iteritems
251 items = iteritems
250
252
251 def hasbranch(self, label):
253 def hasbranch(self, label):
252 """checks whether a branch of this name exists or not"""
254 """checks whether a branch of this name exists or not"""
253 return label in self._entries
255 return label in self._entries
254
256
255 def _branchtip(self, heads):
257 def _branchtip(self, heads):
256 """Return tuple with last open head in heads and false,
258 """Return tuple with last open head in heads and false,
257 otherwise return last closed head and true."""
259 otherwise return last closed head and true."""
258 tip = heads[-1]
260 tip = heads[-1]
259 closed = True
261 closed = True
260 for h in reversed(heads):
262 for h in reversed(heads):
261 if h not in self._closednodes:
263 if h not in self._closednodes:
262 tip = h
264 tip = h
263 closed = False
265 closed = False
264 break
266 break
265 return tip, closed
267 return tip, closed
266
268
267 def branchtip(self, branch):
269 def branchtip(self, branch):
268 """Return the tipmost open head on branch head, otherwise return the
270 """Return the tipmost open head on branch head, otherwise return the
269 tipmost closed head on branch.
271 tipmost closed head on branch.
270 Raise KeyError for unknown branch."""
272 Raise KeyError for unknown branch."""
271 return self._branchtip(self[branch])[0]
273 return self._branchtip(self[branch])[0]
272
274
273 def iteropen(self, nodes):
275 def iteropen(self, nodes):
274 return (n for n in nodes if n not in self._closednodes)
276 return (n for n in nodes if n not in self._closednodes)
275
277
276 def branchheads(self, branch, closed=False):
278 def branchheads(self, branch, closed=False):
277 heads = self._entries[branch]
279 heads = self._entries[branch]
278 if not closed:
280 if not closed:
279 heads = list(self.iteropen(heads))
281 heads = list(self.iteropen(heads))
280 return heads
282 return heads
281
283
282 def iterbranches(self):
284 def iterbranches(self):
283 for bn, heads in self.items():
285 for bn, heads in self.items():
284 yield (bn, heads) + self._branchtip(heads)
286 yield (bn, heads) + self._branchtip(heads)
285
287
286 def iterheads(self):
288 def iterheads(self):
287 """returns all the heads"""
289 """returns all the heads"""
288 return self._entries.values()
290 return self._entries.values()
289
291
290 def update(self, repo, revgen):
292 def update(self, repo, revgen):
291 """Given a branchhead cache, self, that may have extra nodes or be
293 """Given a branchhead cache, self, that may have extra nodes or be
292 missing heads, and a generator of nodes that are strictly a superset of
294 missing heads, and a generator of nodes that are strictly a superset of
293 heads missing, this function updates self to be correct.
295 heads missing, this function updates self to be correct.
294 """
296 """
295 starttime = util.timer()
297 starttime = util.timer()
296 cl = repo.changelog
298 cl = repo.changelog
297 # collect new branch entries
299 # collect new branch entries
298 newbranches = {}
300 newbranches = {}
299 getbranchinfo = repo.revbranchcache().branchinfo
301 getbranchinfo = repo.revbranchcache().branchinfo
300 max_rev = -1
302 max_rev = -1
301 for r in revgen:
303 for r in revgen:
302 branch, closesbranch = getbranchinfo(r)
304 branch, closesbranch = getbranchinfo(r)
303 newbranches.setdefault(branch, []).append(r)
305 newbranches.setdefault(branch, []).append(r)
304 if closesbranch:
306 if closesbranch:
305 self._closednodes.add(cl.node(r))
307 self._closednodes.add(cl.node(r))
306 max_rev = max(max_rev, r)
308 max_rev = max(max_rev, r)
307 if max_rev < 0:
309 if max_rev < 0:
308 msg = "running branchcache.update without revision to update"
310 msg = "running branchcache.update without revision to update"
309 raise error.ProgrammingError(msg)
311 raise error.ProgrammingError(msg)
310
312
311 # Delay fetching the topological heads until they are needed.
313 # Delay fetching the topological heads until they are needed.
312 # A repository without non-continous branches can skip this part.
314 # A repository without non-continous branches can skip this part.
313 topoheads = None
315 topoheads = None
314
316
315 # If a changeset is visible, its parents must be visible too, so
317 # If a changeset is visible, its parents must be visible too, so
316 # use the faster unfiltered parent accessor.
318 # use the faster unfiltered parent accessor.
317 parentrevs = repo.unfiltered().changelog.parentrevs
319 parentrevs = repo.unfiltered().changelog.parentrevs
318
320
319 # Faster than using ctx.obsolete()
321 # Faster than using ctx.obsolete()
320 obsrevs = obsolete.getrevs(repo, b'obsolete')
322 obsrevs = obsolete.getrevs(repo, b'obsolete')
321
323
322 for branch, newheadrevs in newbranches.items():
324 for branch, newheadrevs in newbranches.items():
323 # For every branch, compute the new branchheads.
325 # For every branch, compute the new branchheads.
324 # A branchhead is a revision such that no descendant is on
326 # A branchhead is a revision such that no descendant is on
325 # the same branch.
327 # the same branch.
326 #
328 #
327 # The branchheads are computed iteratively in revision order.
329 # The branchheads are computed iteratively in revision order.
328 # This ensures topological order, i.e. parents are processed
330 # This ensures topological order, i.e. parents are processed
329 # before their children. Ancestors are inclusive here, i.e.
331 # before their children. Ancestors are inclusive here, i.e.
330 # any revision is an ancestor of itself.
332 # any revision is an ancestor of itself.
331 #
333 #
332 # Core observations:
334 # Core observations:
333 # - The current revision is always a branchhead for the
335 # - The current revision is always a branchhead for the
334 # repository up to that point.
336 # repository up to that point.
335 # - It is the first revision of the branch if and only if
337 # - It is the first revision of the branch if and only if
336 # there was no branchhead before. In that case, it is the
338 # there was no branchhead before. In that case, it is the
337 # only branchhead as there are no possible ancestors on
339 # only branchhead as there are no possible ancestors on
338 # the same branch.
340 # the same branch.
339 # - If a parent is on the same branch, a branchhead can
341 # - If a parent is on the same branch, a branchhead can
340 # only be an ancestor of that parent, if it is parent
342 # only be an ancestor of that parent, if it is parent
341 # itself. Otherwise it would have been removed as ancestor
343 # itself. Otherwise it would have been removed as ancestor
342 # of that parent before.
344 # of that parent before.
343 # - Therefore, if all parents are on the same branch, they
345 # - Therefore, if all parents are on the same branch, they
344 # can just be removed from the branchhead set.
346 # can just be removed from the branchhead set.
345 # - If one parent is on the same branch and the other is not
347 # - If one parent is on the same branch and the other is not
346 # and there was exactly one branchhead known, the existing
348 # and there was exactly one branchhead known, the existing
347 # branchhead can only be an ancestor if it is the parent.
349 # branchhead can only be an ancestor if it is the parent.
348 # Otherwise it would have been removed as ancestor of
350 # Otherwise it would have been removed as ancestor of
349 # the parent before. The other parent therefore can't have
351 # the parent before. The other parent therefore can't have
350 # a branchhead as ancestor.
352 # a branchhead as ancestor.
351 # - In all other cases, the parents on different branches
353 # - In all other cases, the parents on different branches
352 # could have a branchhead as ancestor. Those parents are
354 # could have a branchhead as ancestor. Those parents are
353 # kept in the "uncertain" set. If all branchheads are also
355 # kept in the "uncertain" set. If all branchheads are also
354 # topological heads, they can't have descendants and further
356 # topological heads, they can't have descendants and further
355 # checks can be skipped. Otherwise, the ancestors of the
357 # checks can be skipped. Otherwise, the ancestors of the
356 # "uncertain" set are removed from branchheads.
358 # "uncertain" set are removed from branchheads.
357 # This computation is heavy and avoided if at all possible.
359 # This computation is heavy and avoided if at all possible.
358 bheads = self._entries.get(branch, [])
360 bheads = self._entries.get(branch, [])
359 bheadset = {cl.rev(node) for node in bheads}
361 bheadset = {cl.rev(node) for node in bheads}
360 uncertain = set()
362 uncertain = set()
361 for newrev in sorted(newheadrevs):
363 for newrev in sorted(newheadrevs):
362 if newrev in obsrevs:
364 if newrev in obsrevs:
363 # We ignore obsolete changesets as they shouldn't be
365 # We ignore obsolete changesets as they shouldn't be
364 # considered heads.
366 # considered heads.
365 continue
367 continue
366
368
367 if not bheadset:
369 if not bheadset:
368 bheadset.add(newrev)
370 bheadset.add(newrev)
369 continue
371 continue
370
372
371 parents = [p for p in parentrevs(newrev) if p != nullrev]
373 parents = [p for p in parentrevs(newrev) if p != nullrev]
372 samebranch = set()
374 samebranch = set()
373 otherbranch = set()
375 otherbranch = set()
374 obsparents = set()
376 obsparents = set()
375 for p in parents:
377 for p in parents:
376 if p in obsrevs:
378 if p in obsrevs:
377 # We ignored this obsolete changeset earlier, but now
379 # We ignored this obsolete changeset earlier, but now
378 # that it has non-ignored children, we need to make
380 # that it has non-ignored children, we need to make
379 # sure their ancestors are not considered heads. To
381 # sure their ancestors are not considered heads. To
380 # achieve that, we will simply treat this obsolete
382 # achieve that, we will simply treat this obsolete
381 # changeset as a parent from other branch.
383 # changeset as a parent from other branch.
382 obsparents.add(p)
384 obsparents.add(p)
383 elif p in bheadset or getbranchinfo(p)[0] == branch:
385 elif p in bheadset or getbranchinfo(p)[0] == branch:
384 samebranch.add(p)
386 samebranch.add(p)
385 else:
387 else:
386 otherbranch.add(p)
388 otherbranch.add(p)
387 if not (len(bheadset) == len(samebranch) == 1):
389 if not (len(bheadset) == len(samebranch) == 1):
388 uncertain.update(otherbranch)
390 uncertain.update(otherbranch)
389 uncertain.update(obsparents)
391 uncertain.update(obsparents)
390 bheadset.difference_update(samebranch)
392 bheadset.difference_update(samebranch)
391 bheadset.add(newrev)
393 bheadset.add(newrev)
392
394
393 if uncertain:
395 if uncertain:
394 if topoheads is None:
396 if topoheads is None:
395 topoheads = set(cl.headrevs())
397 topoheads = set(cl.headrevs())
396 if bheadset - topoheads:
398 if bheadset - topoheads:
397 floorrev = min(bheadset)
399 floorrev = min(bheadset)
398 if floorrev <= max(uncertain):
400 if floorrev <= max(uncertain):
399 ancestors = set(cl.ancestors(uncertain, floorrev))
401 ancestors = set(cl.ancestors(uncertain, floorrev))
400 bheadset -= ancestors
402 bheadset -= ancestors
401 if bheadset:
403 if bheadset:
402 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
403
405
404 duration = util.timer() - starttime
406 duration = util.timer() - starttime
405 repo.ui.log(
407 repo.ui.log(
406 b'branchcache',
408 b'branchcache',
407 b'updated %s in %.4f seconds\n',
409 b'updated %s in %.4f seconds\n',
408 _branchcachedesc(repo),
410 _branchcachedesc(repo),
409 duration,
411 duration,
410 )
412 )
411 return max_rev
413 return max_rev
412
414
413
415
414 class branchcache(_BaseBranchCache):
416 class branchcache(_BaseBranchCache):
415 """Branchmap info for a local repo or repoview"""
417 """Branchmap info for a local repo or repoview"""
416
418
417 _base_filename = b"branch2"
419 _base_filename = b"branch2"
418
420
419 def __init__(
421 def __init__(
420 self,
422 self,
421 repo: "localrepo.localrepository",
423 repo: "localrepo.localrepository",
422 entries: Union[
424 entries: Union[
423 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
425 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
424 ] = (),
426 ] = (),
425 tipnode: Optional[bytes] = None,
427 tipnode: Optional[bytes] = None,
426 tiprev: Optional[int] = nullrev,
428 tiprev: Optional[int] = nullrev,
427 filteredhash: Optional[bytes] = None,
429 filteredhash: Optional[bytes] = None,
428 closednodes: Optional[Set[bytes]] = None,
430 closednodes: Optional[Set[bytes]] = None,
429 hasnode: Optional[Callable[[bytes], bool]] = None,
431 hasnode: Optional[Callable[[bytes], bool]] = None,
430 verify_node: bool = False,
432 verify_node: bool = False,
431 ) -> None:
433 ) -> None:
432 """hasnode is a function which can be used to verify whether changelog
434 """hasnode is a function which can be used to verify whether changelog
433 has a given node or not. If it's not provided, we assume that every node
435 has a given node or not. If it's not provided, we assume that every node
434 we have exists in changelog"""
436 we have exists in changelog"""
435 self._filtername = repo.filtername
437 self._filtername = repo.filtername
436 if tipnode is None:
438 if tipnode is None:
437 self.tipnode = repo.nullid
439 self.tipnode = repo.nullid
438 else:
440 else:
439 self.tipnode = tipnode
441 self.tipnode = tipnode
440 self.tiprev = tiprev
442 self.tiprev = tiprev
441 self.filteredhash = filteredhash
443 self.filteredhash = filteredhash
442 self._dirty = False
444 self._dirty = False
443
445
444 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
446 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
445 # closednodes is a set of nodes that close their branch. If the branch
447 # closednodes is a set of nodes that close their branch. If the branch
446 # cache has been updated, it may contain nodes that are no longer
448 # cache has been updated, it may contain nodes that are no longer
447 # heads.
449 # heads.
448
450
449 # Do we need to verify branch at all ?
451 # Do we need to verify branch at all ?
450 self._verify_node = verify_node
452 self._verify_node = verify_node
451 # branches for which nodes are verified
453 # branches for which nodes are verified
452 self._verifiedbranches = set()
454 self._verifiedbranches = set()
453 self._hasnode = None
455 self._hasnode = None
454 if self._verify_node:
456 if self._verify_node:
455 self._hasnode = repo.changelog.hasnode
457 self._hasnode = repo.changelog.hasnode
456
458
457 def validfor(self, repo):
459 def validfor(self, repo):
458 """check that cache contents are valid for (a subset of) this repo
460 """check that cache contents are valid for (a subset of) this repo
459
461
460 - False when the order of changesets changed or if we detect a strip.
462 - False when the order of changesets changed or if we detect a strip.
461 - True when cache is up-to-date for the current repo or its subset."""
463 - True when cache is up-to-date for the current repo or its subset."""
462 try:
464 try:
463 node = repo.changelog.node(self.tiprev)
465 node = repo.changelog.node(self.tiprev)
464 except IndexError:
466 except IndexError:
465 # changesets were stripped and now we don't even have enough to
467 # changesets were stripped and now we don't even have enough to
466 # find tiprev
468 # find tiprev
467 return False
469 return False
468 if self.tipnode != node:
470 if self.tipnode != node:
469 # tiprev doesn't correspond to tipnode: repo was stripped, or this
471 # tiprev doesn't correspond to tipnode: repo was stripped, or this
470 # repo has a different order of changesets
472 # repo has a different order of changesets
471 return False
473 return False
472 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
474 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
473 # hashes don't match if this repo view has a different set of filtered
475 # hashes don't match if this repo view has a different set of filtered
474 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
476 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
475 # history was rewritten)
477 # history was rewritten)
476 return self.filteredhash == tiphash
478 return self.filteredhash == tiphash
477
479
478 @classmethod
480 @classmethod
479 def fromfile(cls, repo):
481 def fromfile(cls, repo):
480 f = None
482 f = None
481 try:
483 try:
482 f = repo.cachevfs(cls._filename(repo))
484 f = repo.cachevfs(cls._filename(repo))
483 lineiter = iter(f)
485 lineiter = iter(f)
484 init_kwargs = cls._load_header(repo, lineiter)
486 init_kwargs = cls._load_header(repo, lineiter)
485 bcache = cls(
487 bcache = cls(
486 repo,
488 repo,
487 verify_node=True,
489 verify_node=True,
488 **init_kwargs,
490 **init_kwargs,
489 )
491 )
490 if not bcache.validfor(repo):
492 if not bcache.validfor(repo):
491 # invalidate the cache
493 # invalidate the cache
492 raise ValueError('tip differs')
494 raise ValueError('tip differs')
493 bcache._load_heads(repo, lineiter)
495 bcache._load_heads(repo, lineiter)
494 except (IOError, OSError):
496 except (IOError, OSError):
495 return None
497 return None
496
498
497 except Exception as inst:
499 except Exception as inst:
498 if repo.ui.debugflag:
500 if repo.ui.debugflag:
499 msg = b'invalid %s: %s\n'
501 msg = b'invalid %s: %s\n'
500 msg %= (
502 msg %= (
501 _branchcachedesc(repo),
503 _branchcachedesc(repo),
502 stringutil.forcebytestr(inst),
504 stringutil.forcebytestr(inst),
503 )
505 )
504 repo.ui.debug(msg)
506 repo.ui.debug(msg)
505 bcache = None
507 bcache = None
506
508
507 finally:
509 finally:
508 if f:
510 if f:
509 f.close()
511 f.close()
510
512
511 return bcache
513 return bcache
512
514
513 @classmethod
515 @classmethod
514 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
516 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
515 """parse the head of a branchmap file
517 """parse the head of a branchmap file
516
518
517 return parameters to pass to a newly created class instance.
519 return parameters to pass to a newly created class instance.
518 """
520 """
519 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
521 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
520 last, lrev = cachekey[:2]
522 last, lrev = cachekey[:2]
521 last, lrev = bin(last), int(lrev)
523 last, lrev = bin(last), int(lrev)
522 filteredhash = None
524 filteredhash = None
523 if len(cachekey) > 2:
525 if len(cachekey) > 2:
524 filteredhash = bin(cachekey[2])
526 filteredhash = bin(cachekey[2])
525 return {
527 return {
526 "tipnode": last,
528 "tipnode": last,
527 "tiprev": lrev,
529 "tiprev": lrev,
528 "filteredhash": filteredhash,
530 "filteredhash": filteredhash,
529 }
531 }
530
532
531 def _load_heads(self, repo, lineiter):
533 def _load_heads(self, repo, lineiter):
532 """fully loads the branchcache by reading from the file using the line
534 """fully loads the branchcache by reading from the file using the line
533 iterator passed"""
535 iterator passed"""
534 for line in lineiter:
536 for line in lineiter:
535 line = line.rstrip(b'\n')
537 line = line.rstrip(b'\n')
536 if not line:
538 if not line:
537 continue
539 continue
538 node, state, label = line.split(b" ", 2)
540 node, state, label = line.split(b" ", 2)
539 if state not in b'oc':
541 if state not in b'oc':
540 raise ValueError('invalid branch state')
542 raise ValueError('invalid branch state')
541 label = encoding.tolocal(label.strip())
543 label = encoding.tolocal(label.strip())
542 node = bin(node)
544 node = bin(node)
543 self._entries.setdefault(label, []).append(node)
545 self._entries.setdefault(label, []).append(node)
544 if state == b'c':
546 if state == b'c':
545 self._closednodes.add(node)
547 self._closednodes.add(node)
546
548
547 @classmethod
549 @classmethod
548 def _filename(cls, repo):
550 def _filename(cls, repo):
549 """name of a branchcache file for a given repo or repoview"""
551 """name of a branchcache file for a given repo or repoview"""
550 filename = cls._base_filename
552 filename = cls._base_filename
551 if repo.filtername:
553 if repo.filtername:
552 filename = b'%s-%s' % (filename, repo.filtername)
554 filename = b'%s-%s' % (filename, repo.filtername)
553 return filename
555 return filename
554
556
555 def copy(self, repo):
557 def copy(self, repo):
556 """return a deep copy of the branchcache object"""
558 """return a deep copy of the branchcache object"""
557 other = type(self)(
559 other = type(self)(
558 repo=repo,
560 repo=repo,
559 # we always do a shally copy of self._entries, and the values is
561 # we always do a shally copy of self._entries, and the values is
560 # always replaced, so no need to deepcopy until the above remains
562 # always replaced, so no need to deepcopy until the above remains
561 # true.
563 # true.
562 entries=self._entries,
564 entries=self._entries,
563 tipnode=self.tipnode,
565 tipnode=self.tipnode,
564 tiprev=self.tiprev,
566 tiprev=self.tiprev,
565 filteredhash=self.filteredhash,
567 filteredhash=self.filteredhash,
566 closednodes=set(self._closednodes),
568 closednodes=set(self._closednodes),
567 verify_node=self._verify_node,
569 verify_node=self._verify_node,
568 )
570 )
569 # we copy will likely schedule a write anyway, but that does not seems
571 # we copy will likely schedule a write anyway, but that does not seems
570 # to hurt to overschedule
572 # to hurt to overschedule
571 other._dirty = self._dirty
573 other._dirty = self._dirty
572 # also copy information about the current verification state
574 # also copy information about the current verification state
573 other._verifiedbranches = set(self._verifiedbranches)
575 other._verifiedbranches = set(self._verifiedbranches)
574 return other
576 return other
575
577
576 def write(self, repo):
578 def write(self, repo):
577 assert self._filtername == repo.filtername, (
579 assert self._filtername == repo.filtername, (
578 self._filtername,
580 self._filtername,
579 repo.filtername,
581 repo.filtername,
580 )
582 )
581 tr = repo.currenttransaction()
583 tr = repo.currenttransaction()
582 if not getattr(tr, 'finalized', True):
584 if not getattr(tr, 'finalized', True):
583 # Avoid premature writing.
585 # Avoid premature writing.
584 #
586 #
585 # (The cache warming setup by localrepo will update the file later.)
587 # (The cache warming setup by localrepo will update the file later.)
586 return
588 return
587 try:
589 try:
588 filename = self._filename(repo)
590 filename = self._filename(repo)
589 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
591 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
590 self._write_header(f)
592 self._write_header(f)
591 nodecount = self._write_heads(f)
593 nodecount = self._write_heads(f)
592 repo.ui.log(
594 repo.ui.log(
593 b'branchcache',
595 b'branchcache',
594 b'wrote %s with %d labels and %d nodes\n',
596 b'wrote %s with %d labels and %d nodes\n',
595 _branchcachedesc(repo),
597 _branchcachedesc(repo),
596 len(self._entries),
598 len(self._entries),
597 nodecount,
599 nodecount,
598 )
600 )
599 self._dirty = False
601 self._dirty = False
600 except (IOError, OSError, error.Abort) as inst:
602 except (IOError, OSError, error.Abort) as inst:
601 # Abort may be raised by read only opener, so log and continue
603 # Abort may be raised by read only opener, so log and continue
602 repo.ui.debug(
604 repo.ui.debug(
603 b"couldn't write branch cache: %s\n"
605 b"couldn't write branch cache: %s\n"
604 % stringutil.forcebytestr(inst)
606 % stringutil.forcebytestr(inst)
605 )
607 )
606
608
607 def _write_header(self, fp) -> None:
609 def _write_header(self, fp) -> None:
608 """write the branch cache header to a file"""
610 """write the branch cache header to a file"""
609 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
611 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
610 if self.filteredhash is not None:
612 if self.filteredhash is not None:
611 cachekey.append(hex(self.filteredhash))
613 cachekey.append(hex(self.filteredhash))
612 fp.write(b" ".join(cachekey) + b'\n')
614 fp.write(b" ".join(cachekey) + b'\n')
613
615
614 def _write_heads(self, fp) -> int:
616 def _write_heads(self, fp) -> int:
615 """write list of heads to a file
617 """write list of heads to a file
616
618
617 Return the number of heads written."""
619 Return the number of heads written."""
618 nodecount = 0
620 nodecount = 0
619 for label, nodes in sorted(self._entries.items()):
621 for label, nodes in sorted(self._entries.items()):
620 label = encoding.fromlocal(label)
622 label = encoding.fromlocal(label)
621 for node in nodes:
623 for node in nodes:
622 nodecount += 1
624 nodecount += 1
623 if node in self._closednodes:
625 if node in self._closednodes:
624 state = b'c'
626 state = b'c'
625 else:
627 else:
626 state = b'o'
628 state = b'o'
627 fp.write(b"%s %s %s\n" % (hex(node), state, label))
629 fp.write(b"%s %s %s\n" % (hex(node), state, label))
628 return nodecount
630 return nodecount
629
631
630 def _verifybranch(self, branch):
632 def _verifybranch(self, branch):
631 """verify head nodes for the given branch."""
633 """verify head nodes for the given branch."""
632 if not self._verify_node:
634 if not self._verify_node:
633 return
635 return
634 if branch not in self._entries or branch in self._verifiedbranches:
636 if branch not in self._entries or branch in self._verifiedbranches:
635 return
637 return
636 assert self._hasnode is not None
638 assert self._hasnode is not None
637 for n in self._entries[branch]:
639 for n in self._entries[branch]:
638 if not self._hasnode(n):
640 if not self._hasnode(n):
639 _unknownnode(n)
641 _unknownnode(n)
640
642
641 self._verifiedbranches.add(branch)
643 self._verifiedbranches.add(branch)
642
644
643 def _verifyall(self):
645 def _verifyall(self):
644 """verifies nodes of all the branches"""
646 """verifies nodes of all the branches"""
645 for b in self._entries.keys():
647 for b in self._entries.keys():
646 if b not in self._verifiedbranches:
648 if b not in self._verifiedbranches:
647 self._verifybranch(b)
649 self._verifybranch(b)
648
650
649 def __getitem__(self, key):
651 def __getitem__(self, key):
650 self._verifybranch(key)
652 self._verifybranch(key)
651 return super().__getitem__(key)
653 return super().__getitem__(key)
652
654
653 def __contains__(self, key):
655 def __contains__(self, key):
654 self._verifybranch(key)
656 self._verifybranch(key)
655 return super().__contains__(key)
657 return super().__contains__(key)
656
658
657 def iteritems(self):
659 def iteritems(self):
658 self._verifyall()
660 self._verifyall()
659 return super().iteritems()
661 return super().iteritems()
660
662
661 items = iteritems
663 items = iteritems
662
664
663 def iterheads(self):
665 def iterheads(self):
664 """returns all the heads"""
666 """returns all the heads"""
665 self._verifyall()
667 self._verifyall()
666 return super().iterheads()
668 return super().iterheads()
667
669
668 def hasbranch(self, label):
670 def hasbranch(self, label):
669 """checks whether a branch of this name exists or not"""
671 """checks whether a branch of this name exists or not"""
670 self._verifybranch(label)
672 self._verifybranch(label)
671 return super().hasbranch(label)
673 return super().hasbranch(label)
672
674
673 def branchheads(self, branch, closed=False):
675 def branchheads(self, branch, closed=False):
674 self._verifybranch(branch)
676 self._verifybranch(branch)
675 return super().branchheads(branch, closed=closed)
677 return super().branchheads(branch, closed=closed)
676
678
677 def update(self, repo, revgen):
679 def update(self, repo, revgen):
678 assert self._filtername == repo.filtername, (
680 assert self._filtername == repo.filtername, (
679 self._filtername,
681 self._filtername,
680 repo.filtername,
682 repo.filtername,
681 )
683 )
682 cl = repo.changelog
684 cl = repo.changelog
683 max_rev = super().update(repo, revgen)
685 max_rev = super().update(repo, revgen)
684 # new tip revision which we found after iterating items from new
686 # new tip revision which we found after iterating items from new
685 # branches
687 # branches
686 if max_rev is not None and max_rev > self.tiprev:
688 if max_rev is not None and max_rev > self.tiprev:
687 self.tiprev = max_rev
689 self.tiprev = max_rev
688 self.tipnode = cl.node(max_rev)
690 self.tipnode = cl.node(max_rev)
689
691
690 if not self.validfor(repo):
692 if not self.validfor(repo):
691 # old cache key is now invalid for the repo, but we've just updated
693 # old cache key is now invalid for the repo, but we've just updated
692 # the cache and we assume it's valid, so let's make the cache key
694 # the cache and we assume it's valid, so let's make the cache key
693 # valid as well by recomputing it from the cached data
695 # valid as well by recomputing it from the cached data
694 self.tipnode = repo.nullid
696 self.tipnode = repo.nullid
695 self.tiprev = nullrev
697 self.tiprev = nullrev
696 for heads in self.iterheads():
698 for heads in self.iterheads():
697 if not heads:
699 if not heads:
698 # all revisions on a branch are obsolete
700 # all revisions on a branch are obsolete
699 continue
701 continue
700 # note: tiprev is not necessarily the tip revision of repo,
702 # note: tiprev is not necessarily the tip revision of repo,
701 # because the tip could be obsolete (i.e. not a head)
703 # because the tip could be obsolete (i.e. not a head)
702 tiprev = max(cl.rev(node) for node in heads)
704 tiprev = max(cl.rev(node) for node in heads)
703 if tiprev > self.tiprev:
705 if tiprev > self.tiprev:
704 self.tipnode = cl.node(tiprev)
706 self.tipnode = cl.node(tiprev)
705 self.tiprev = tiprev
707 self.tiprev = tiprev
706 self.filteredhash = scmutil.filteredhash(
708 self.filteredhash = scmutil.filteredhash(
707 repo, self.tiprev, needobsolete=True
709 repo, self.tiprev, needobsolete=True
708 )
710 )
709 self._dirty = True
711 self._dirty = True
710 self.write(repo)
712 self.write(repo)
711
713
712
714
713 class remotebranchcache(_BaseBranchCache):
715 class remotebranchcache(_BaseBranchCache):
714 """Branchmap info for a remote connection, should not write locally"""
716 """Branchmap info for a remote connection, should not write locally"""
715
717
716 def __init__(
718 def __init__(
717 self,
719 self,
718 repo: "localrepo.localrepository",
720 repo: "localrepo.localrepository",
719 entries: Union[
721 entries: Union[
720 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
722 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
721 ] = (),
723 ] = (),
722 closednodes: Optional[Set[bytes]] = None,
724 closednodes: Optional[Set[bytes]] = None,
723 ) -> None:
725 ) -> None:
724 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
726 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
725
727
726
728
727 # Revision branch info cache
729 # Revision branch info cache
728
730
729 _rbcversion = b'-v1'
731 _rbcversion = b'-v1'
730 _rbcnames = b'rbc-names' + _rbcversion
732 _rbcnames = b'rbc-names' + _rbcversion
731 _rbcrevs = b'rbc-revs' + _rbcversion
733 _rbcrevs = b'rbc-revs' + _rbcversion
732 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
734 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
733 _rbcrecfmt = b'>4sI'
735 _rbcrecfmt = b'>4sI'
734 _rbcrecsize = calcsize(_rbcrecfmt)
736 _rbcrecsize = calcsize(_rbcrecfmt)
735 _rbcmininc = 64 * _rbcrecsize
737 _rbcmininc = 64 * _rbcrecsize
736 _rbcnodelen = 4
738 _rbcnodelen = 4
737 _rbcbranchidxmask = 0x7FFFFFFF
739 _rbcbranchidxmask = 0x7FFFFFFF
738 _rbccloseflag = 0x80000000
740 _rbccloseflag = 0x80000000
739
741
740
742
741 class rbcrevs:
743 class rbcrevs:
742 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
744 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
743
745
744 def __init__(self, revs):
746 def __init__(self, revs):
745 self._prefix = revs
747 self._prefix = revs
746 self._rest = bytearray()
748 self._rest = bytearray()
747
749
748 def __len__(self):
750 def __len__(self):
749 return len(self._prefix) + len(self._rest)
751 return len(self._prefix) + len(self._rest)
750
752
751 def unpack_record(self, rbcrevidx):
753 def unpack_record(self, rbcrevidx):
752 if rbcrevidx < len(self._prefix):
754 if rbcrevidx < len(self._prefix):
753 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
755 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
754 else:
756 else:
755 return unpack_from(
757 return unpack_from(
756 _rbcrecfmt,
758 _rbcrecfmt,
757 util.buffer(self._rest),
759 util.buffer(self._rest),
758 rbcrevidx - len(self._prefix),
760 rbcrevidx - len(self._prefix),
759 )
761 )
760
762
761 def make_mutable(self):
763 def make_mutable(self):
762 if len(self._prefix) > 0:
764 if len(self._prefix) > 0:
763 entirety = bytearray()
765 entirety = bytearray()
764 entirety[:] = self._prefix
766 entirety[:] = self._prefix
765 entirety.extend(self._rest)
767 entirety.extend(self._rest)
766 self._rest = entirety
768 self._rest = entirety
767 self._prefix = bytearray()
769 self._prefix = bytearray()
768
770
769 def truncate(self, pos):
771 def truncate(self, pos):
770 self.make_mutable()
772 self.make_mutable()
771 del self._rest[pos:]
773 del self._rest[pos:]
772
774
773 def pack_into(self, rbcrevidx, node, branchidx):
775 def pack_into(self, rbcrevidx, node, branchidx):
774 if rbcrevidx < len(self._prefix):
776 if rbcrevidx < len(self._prefix):
775 self.make_mutable()
777 self.make_mutable()
776 buf = self._rest
778 buf = self._rest
777 start_offset = rbcrevidx - len(self._prefix)
779 start_offset = rbcrevidx - len(self._prefix)
778 end_offset = start_offset + _rbcrecsize
780 end_offset = start_offset + _rbcrecsize
779
781
780 if len(self._rest) < end_offset:
782 if len(self._rest) < end_offset:
781 # bytearray doesn't allocate extra space at least in Python 3.7.
783 # bytearray doesn't allocate extra space at least in Python 3.7.
782 # When multiple changesets are added in a row, precise resize would
784 # When multiple changesets are added in a row, precise resize would
783 # result in quadratic complexity. Overallocate to compensate by
785 # result in quadratic complexity. Overallocate to compensate by
784 # using the classic doubling technique for dynamic arrays instead.
786 # using the classic doubling technique for dynamic arrays instead.
785 # If there was a gap in the map before, less space will be reserved.
787 # If there was a gap in the map before, less space will be reserved.
786 self._rest.extend(b'\0' * end_offset)
788 self._rest.extend(b'\0' * end_offset)
787 return pack_into(
789 return pack_into(
788 _rbcrecfmt,
790 _rbcrecfmt,
789 buf,
791 buf,
790 start_offset,
792 start_offset,
791 node,
793 node,
792 branchidx,
794 branchidx,
793 )
795 )
794
796
795 def extend(self, extension):
797 def extend(self, extension):
796 return self._rest.extend(extension)
798 return self._rest.extend(extension)
797
799
798 def slice(self, begin, end):
800 def slice(self, begin, end):
799 if begin < len(self._prefix):
801 if begin < len(self._prefix):
800 acc = bytearray()
802 acc = bytearray()
801 acc[:] = self._prefix[begin:end]
803 acc[:] = self._prefix[begin:end]
802 acc.extend(
804 acc.extend(
803 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
805 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
804 )
806 )
805 return acc
807 return acc
806 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
808 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
807
809
808
810
809 class revbranchcache:
811 class revbranchcache:
810 """Persistent cache, mapping from revision number to branch name and close.
812 """Persistent cache, mapping from revision number to branch name and close.
811 This is a low level cache, independent of filtering.
813 This is a low level cache, independent of filtering.
812
814
813 Branch names are stored in rbc-names in internal encoding separated by 0.
815 Branch names are stored in rbc-names in internal encoding separated by 0.
814 rbc-names is append-only, and each branch name is only stored once and will
816 rbc-names is append-only, and each branch name is only stored once and will
815 thus have a unique index.
817 thus have a unique index.
816
818
817 The branch info for each revision is stored in rbc-revs as constant size
819 The branch info for each revision is stored in rbc-revs as constant size
818 records. The whole file is read into memory, but it is only 'parsed' on
820 records. The whole file is read into memory, but it is only 'parsed' on
819 demand. The file is usually append-only but will be truncated if repo
821 demand. The file is usually append-only but will be truncated if repo
820 modification is detected.
822 modification is detected.
821 The record for each revision contains the first 4 bytes of the
823 The record for each revision contains the first 4 bytes of the
822 corresponding node hash, and the record is only used if it still matches.
824 corresponding node hash, and the record is only used if it still matches.
823 Even a completely trashed rbc-revs fill thus still give the right result
825 Even a completely trashed rbc-revs fill thus still give the right result
824 while converging towards full recovery ... assuming no incorrectly matching
826 while converging towards full recovery ... assuming no incorrectly matching
825 node hashes.
827 node hashes.
826 The record also contains 4 bytes where 31 bits contains the index of the
828 The record also contains 4 bytes where 31 bits contains the index of the
827 branch and the last bit indicate that it is a branch close commit.
829 branch and the last bit indicate that it is a branch close commit.
828 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
830 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
829 and will grow with it but be 1/8th of its size.
831 and will grow with it but be 1/8th of its size.
830 """
832 """
831
833
832 def __init__(self, repo, readonly=True):
834 def __init__(self, repo, readonly=True):
833 assert repo.filtername is None
835 assert repo.filtername is None
834 self._repo = repo
836 self._repo = repo
835 self._names = [] # branch names in local encoding with static index
837 self._names = [] # branch names in local encoding with static index
836 self._rbcrevs = rbcrevs(bytearray())
838 self._rbcrevs = rbcrevs(bytearray())
837 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
839 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
838 try:
840 try:
839 bndata = repo.cachevfs.read(_rbcnames)
841 bndata = repo.cachevfs.read(_rbcnames)
840 self._rbcsnameslen = len(bndata) # for verification before writing
842 self._rbcsnameslen = len(bndata) # for verification before writing
841 if bndata:
843 if bndata:
842 self._names = [
844 self._names = [
843 encoding.tolocal(bn) for bn in bndata.split(b'\0')
845 encoding.tolocal(bn) for bn in bndata.split(b'\0')
844 ]
846 ]
845 except (IOError, OSError):
847 except (IOError, OSError):
846 if readonly:
848 if readonly:
847 # don't try to use cache - fall back to the slow path
849 # don't try to use cache - fall back to the slow path
848 self.branchinfo = self._branchinfo
850 self.branchinfo = self._branchinfo
849
851
850 if self._names:
852 if self._names:
851 try:
853 try:
852 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
854 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
853 with repo.cachevfs(_rbcrevs) as fp:
855 with repo.cachevfs(_rbcrevs) as fp:
854 data = util.buffer(util.mmapread(fp))
856 data = util.buffer(util.mmapread(fp))
855 else:
857 else:
856 data = repo.cachevfs.read(_rbcrevs)
858 data = repo.cachevfs.read(_rbcrevs)
857 self._rbcrevs = rbcrevs(data)
859 self._rbcrevs = rbcrevs(data)
858 except (IOError, OSError) as inst:
860 except (IOError, OSError) as inst:
859 repo.ui.debug(
861 repo.ui.debug(
860 b"couldn't read revision branch cache: %s\n"
862 b"couldn't read revision branch cache: %s\n"
861 % stringutil.forcebytestr(inst)
863 % stringutil.forcebytestr(inst)
862 )
864 )
863 # remember number of good records on disk
865 # remember number of good records on disk
864 self._rbcrevslen = min(
866 self._rbcrevslen = min(
865 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
867 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
866 )
868 )
867 if self._rbcrevslen == 0:
869 if self._rbcrevslen == 0:
868 self._names = []
870 self._names = []
869 self._rbcnamescount = len(self._names) # number of names read at
871 self._rbcnamescount = len(self._names) # number of names read at
870 # _rbcsnameslen
872 # _rbcsnameslen
871
873
872 def _clear(self):
874 def _clear(self):
873 self._rbcsnameslen = 0
875 self._rbcsnameslen = 0
874 del self._names[:]
876 del self._names[:]
875 self._rbcnamescount = 0
877 self._rbcnamescount = 0
876 self._rbcrevslen = len(self._repo.changelog)
878 self._rbcrevslen = len(self._repo.changelog)
877 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
879 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
878 util.clearcachedproperty(self, b'_namesreverse')
880 util.clearcachedproperty(self, b'_namesreverse')
879
881
880 @util.propertycache
882 @util.propertycache
881 def _namesreverse(self):
883 def _namesreverse(self):
882 return {b: r for r, b in enumerate(self._names)}
884 return {b: r for r, b in enumerate(self._names)}
883
885
884 def branchinfo(self, rev):
886 def branchinfo(self, rev):
885 """Return branch name and close flag for rev, using and updating
887 """Return branch name and close flag for rev, using and updating
886 persistent cache."""
888 persistent cache."""
887 changelog = self._repo.changelog
889 changelog = self._repo.changelog
888 rbcrevidx = rev * _rbcrecsize
890 rbcrevidx = rev * _rbcrecsize
889
891
890 # avoid negative index, changelog.read(nullrev) is fast without cache
892 # avoid negative index, changelog.read(nullrev) is fast without cache
891 if rev == nullrev:
893 if rev == nullrev:
892 return changelog.branchinfo(rev)
894 return changelog.branchinfo(rev)
893
895
894 # if requested rev isn't allocated, grow and cache the rev info
896 # if requested rev isn't allocated, grow and cache the rev info
895 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
897 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
896 return self._branchinfo(rev)
898 return self._branchinfo(rev)
897
899
898 # fast path: extract data from cache, use it if node is matching
900 # fast path: extract data from cache, use it if node is matching
899 reponode = changelog.node(rev)[:_rbcnodelen]
901 reponode = changelog.node(rev)[:_rbcnodelen]
900 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
902 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
901 close = bool(branchidx & _rbccloseflag)
903 close = bool(branchidx & _rbccloseflag)
902 if close:
904 if close:
903 branchidx &= _rbcbranchidxmask
905 branchidx &= _rbcbranchidxmask
904 if cachenode == b'\0\0\0\0':
906 if cachenode == b'\0\0\0\0':
905 pass
907 pass
906 elif cachenode == reponode:
908 elif cachenode == reponode:
907 try:
909 try:
908 return self._names[branchidx], close
910 return self._names[branchidx], close
909 except IndexError:
911 except IndexError:
910 # recover from invalid reference to unknown branch
912 # recover from invalid reference to unknown branch
911 self._repo.ui.debug(
913 self._repo.ui.debug(
912 b"referenced branch names not found"
914 b"referenced branch names not found"
913 b" - rebuilding revision branch cache from scratch\n"
915 b" - rebuilding revision branch cache from scratch\n"
914 )
916 )
915 self._clear()
917 self._clear()
916 else:
918 else:
917 # rev/node map has changed, invalidate the cache from here up
919 # rev/node map has changed, invalidate the cache from here up
918 self._repo.ui.debug(
920 self._repo.ui.debug(
919 b"history modification detected - truncating "
921 b"history modification detected - truncating "
920 b"revision branch cache to revision %d\n" % rev
922 b"revision branch cache to revision %d\n" % rev
921 )
923 )
922 truncate = rbcrevidx + _rbcrecsize
924 truncate = rbcrevidx + _rbcrecsize
923 self._rbcrevs.truncate(truncate)
925 self._rbcrevs.truncate(truncate)
924 self._rbcrevslen = min(self._rbcrevslen, truncate)
926 self._rbcrevslen = min(self._rbcrevslen, truncate)
925
927
926 # fall back to slow path and make sure it will be written to disk
928 # fall back to slow path and make sure it will be written to disk
927 return self._branchinfo(rev)
929 return self._branchinfo(rev)
928
930
929 def _branchinfo(self, rev):
931 def _branchinfo(self, rev):
930 """Retrieve branch info from changelog and update _rbcrevs"""
932 """Retrieve branch info from changelog and update _rbcrevs"""
931 changelog = self._repo.changelog
933 changelog = self._repo.changelog
932 b, close = changelog.branchinfo(rev)
934 b, close = changelog.branchinfo(rev)
933 if b in self._namesreverse:
935 if b in self._namesreverse:
934 branchidx = self._namesreverse[b]
936 branchidx = self._namesreverse[b]
935 else:
937 else:
936 branchidx = len(self._names)
938 branchidx = len(self._names)
937 self._names.append(b)
939 self._names.append(b)
938 self._namesreverse[b] = branchidx
940 self._namesreverse[b] = branchidx
939 reponode = changelog.node(rev)
941 reponode = changelog.node(rev)
940 if close:
942 if close:
941 branchidx |= _rbccloseflag
943 branchidx |= _rbccloseflag
942 self._setcachedata(rev, reponode, branchidx)
944 self._setcachedata(rev, reponode, branchidx)
943 return b, close
945 return b, close
944
946
945 def setdata(self, rev, changelogrevision):
947 def setdata(self, rev, changelogrevision):
946 """add new data information to the cache"""
948 """add new data information to the cache"""
947 branch, close = changelogrevision.branchinfo
949 branch, close = changelogrevision.branchinfo
948
950
949 if branch in self._namesreverse:
951 if branch in self._namesreverse:
950 branchidx = self._namesreverse[branch]
952 branchidx = self._namesreverse[branch]
951 else:
953 else:
952 branchidx = len(self._names)
954 branchidx = len(self._names)
953 self._names.append(branch)
955 self._names.append(branch)
954 self._namesreverse[branch] = branchidx
956 self._namesreverse[branch] = branchidx
955 if close:
957 if close:
956 branchidx |= _rbccloseflag
958 branchidx |= _rbccloseflag
957 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
959 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
958 # If no cache data were readable (non exists, bad permission, etc)
960 # If no cache data were readable (non exists, bad permission, etc)
959 # the cache was bypassing itself by setting:
961 # the cache was bypassing itself by setting:
960 #
962 #
961 # self.branchinfo = self._branchinfo
963 # self.branchinfo = self._branchinfo
962 #
964 #
963 # Since we now have data in the cache, we need to drop this bypassing.
965 # Since we now have data in the cache, we need to drop this bypassing.
964 if 'branchinfo' in vars(self):
966 if 'branchinfo' in vars(self):
965 del self.branchinfo
967 del self.branchinfo
966
968
967 def _setcachedata(self, rev, node, branchidx):
969 def _setcachedata(self, rev, node, branchidx):
968 """Writes the node's branch data to the in-memory cache data."""
970 """Writes the node's branch data to the in-memory cache data."""
969 if rev == nullrev:
971 if rev == nullrev:
970 return
972 return
971 rbcrevidx = rev * _rbcrecsize
973 rbcrevidx = rev * _rbcrecsize
972 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
974 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
973 self._rbcrevslen = min(self._rbcrevslen, rev)
975 self._rbcrevslen = min(self._rbcrevslen, rev)
974
976
975 tr = self._repo.currenttransaction()
977 tr = self._repo.currenttransaction()
976 if tr:
978 if tr:
977 tr.addfinalize(b'write-revbranchcache', self.write)
979 tr.addfinalize(b'write-revbranchcache', self.write)
978
980
979 def write(self, tr=None):
981 def write(self, tr=None):
980 """Save branch cache if it is dirty."""
982 """Save branch cache if it is dirty."""
981 repo = self._repo
983 repo = self._repo
982 wlock = None
984 wlock = None
983 step = b''
985 step = b''
984 try:
986 try:
985 # write the new names
987 # write the new names
986 if self._rbcnamescount < len(self._names):
988 if self._rbcnamescount < len(self._names):
987 wlock = repo.wlock(wait=False)
989 wlock = repo.wlock(wait=False)
988 step = b' names'
990 step = b' names'
989 self._writenames(repo)
991 self._writenames(repo)
990
992
991 # write the new revs
993 # write the new revs
992 start = self._rbcrevslen * _rbcrecsize
994 start = self._rbcrevslen * _rbcrecsize
993 if start != len(self._rbcrevs):
995 if start != len(self._rbcrevs):
994 step = b''
996 step = b''
995 if wlock is None:
997 if wlock is None:
996 wlock = repo.wlock(wait=False)
998 wlock = repo.wlock(wait=False)
997 self._writerevs(repo, start)
999 self._writerevs(repo, start)
998
1000
999 except (IOError, OSError, error.Abort, error.LockError) as inst:
1001 except (IOError, OSError, error.Abort, error.LockError) as inst:
1000 repo.ui.debug(
1002 repo.ui.debug(
1001 b"couldn't write revision branch cache%s: %s\n"
1003 b"couldn't write revision branch cache%s: %s\n"
1002 % (step, stringutil.forcebytestr(inst))
1004 % (step, stringutil.forcebytestr(inst))
1003 )
1005 )
1004 finally:
1006 finally:
1005 if wlock is not None:
1007 if wlock is not None:
1006 wlock.release()
1008 wlock.release()
1007
1009
1008 def _writenames(self, repo):
1010 def _writenames(self, repo):
1009 """write the new branch names to revbranchcache"""
1011 """write the new branch names to revbranchcache"""
1010 if self._rbcnamescount != 0:
1012 if self._rbcnamescount != 0:
1011 f = repo.cachevfs.open(_rbcnames, b'ab')
1013 f = repo.cachevfs.open(_rbcnames, b'ab')
1012 if f.tell() == self._rbcsnameslen:
1014 if f.tell() == self._rbcsnameslen:
1013 f.write(b'\0')
1015 f.write(b'\0')
1014 else:
1016 else:
1015 f.close()
1017 f.close()
1016 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1018 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1017 self._rbcnamescount = 0
1019 self._rbcnamescount = 0
1018 self._rbcrevslen = 0
1020 self._rbcrevslen = 0
1019 if self._rbcnamescount == 0:
1021 if self._rbcnamescount == 0:
1020 # before rewriting names, make sure references are removed
1022 # before rewriting names, make sure references are removed
1021 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1023 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1022 f = repo.cachevfs.open(_rbcnames, b'wb')
1024 f = repo.cachevfs.open(_rbcnames, b'wb')
1023 f.write(
1025 f.write(
1024 b'\0'.join(
1026 b'\0'.join(
1025 encoding.fromlocal(b)
1027 encoding.fromlocal(b)
1026 for b in self._names[self._rbcnamescount :]
1028 for b in self._names[self._rbcnamescount :]
1027 )
1029 )
1028 )
1030 )
1029 self._rbcsnameslen = f.tell()
1031 self._rbcsnameslen = f.tell()
1030 f.close()
1032 f.close()
1031 self._rbcnamescount = len(self._names)
1033 self._rbcnamescount = len(self._names)
1032
1034
1033 def _writerevs(self, repo, start):
1035 def _writerevs(self, repo, start):
1034 """write the new revs to revbranchcache"""
1036 """write the new revs to revbranchcache"""
1035 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1037 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1036 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1038 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1037 if f.tell() != start:
1039 if f.tell() != start:
1038 repo.ui.debug(
1040 repo.ui.debug(
1039 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1041 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1040 )
1042 )
1041 f.seek(start)
1043 f.seek(start)
1042 if f.tell() != start:
1044 if f.tell() != start:
1043 start = 0
1045 start = 0
1044 f.seek(start)
1046 f.seek(start)
1045 f.truncate()
1047 f.truncate()
1046 end = revs * _rbcrecsize
1048 end = revs * _rbcrecsize
1047 f.write(self._rbcrevs.slice(start, end))
1049 f.write(self._rbcrevs.slice(start, end))
1048 self._rbcrevslen = revs
1050 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now