##// END OF EJS Templates
branchcache: do not use `__getitem__` in updatecache...
marmoute -
r52387:54f0dd79 default
parent child Browse files
Show More
@@ -1,1072 +1,1073 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_dirty` method.
77 close. See the `BranchMapCache.write_dirty` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.sync_disk(repo)
88 bcache.sync_disk(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].inherit_for(repo)
111 self.updatecache(subset)
112 bcache = self._per_filter[subset.filtername].inherit_for(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
115 else:
115 # nothing to fall back on, start empty.
116 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
117 bcache = branchcache(repo)
117
118
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
120 if revs:
120 bcache.update(repo, revs)
121 bcache.update(repo, revs)
121
122
122 assert bcache.validfor(repo), filtername
123 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
124 self._per_filter[repo.filtername] = bcache
124
125
125 def replace(self, repo, remotebranchmap):
126 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
127 """Replace the branchmap cache for a repo with a branch mapping.
127
128
128 This is likely only called during clone with a branch map from a
129 This is likely only called during clone with a branch map from a
129 remote.
130 remote.
130
131
131 """
132 """
132 cl = repo.changelog
133 cl = repo.changelog
133 clrev = cl.rev
134 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
135 clbranchinfo = cl.branchinfo
135 rbheads = []
136 rbheads = []
136 closed = set()
137 closed = set()
137 for bheads in remotebranchmap.values():
138 for bheads in remotebranchmap.values():
138 rbheads += bheads
139 rbheads += bheads
139 for h in bheads:
140 for h in bheads:
140 r = clrev(h)
141 r = clrev(h)
141 b, c = clbranchinfo(r)
142 b, c = clbranchinfo(r)
142 if c:
143 if c:
143 closed.add(h)
144 closed.add(h)
144
145
145 if rbheads:
146 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
148 cache = branchcache(
148 repo,
149 repo,
149 remotebranchmap,
150 remotebranchmap,
150 repo[rtiprev].node(),
151 repo[rtiprev].node(),
151 rtiprev,
152 rtiprev,
152 closednodes=closed,
153 closednodes=closed,
153 )
154 )
154
155
155 # Try to stick it as low as possible
156 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
157 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
158 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
159 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
160 if cache.validfor(rview):
160 cache._filtername = candidate
161 cache._filtername = candidate
161 self._per_filter[candidate] = cache
162 self._per_filter[candidate] = cache
162 cache._state = STATE_DIRTY
163 cache._state = STATE_DIRTY
163 cache.write(rview)
164 cache.write(rview)
164 return
165 return
165
166
166 def clear(self):
167 def clear(self):
167 self._per_filter.clear()
168 self._per_filter.clear()
168
169
169 def write_dirty(self, repo):
170 def write_dirty(self, repo):
170 unfi = repo.unfiltered()
171 unfi = repo.unfiltered()
171 for filtername in repoviewutil.get_ordered_subset():
172 for filtername in repoviewutil.get_ordered_subset():
172 cache = self._per_filter.get(filtername)
173 cache = self._per_filter.get(filtername)
173 if cache is None:
174 if cache is None:
174 continue
175 continue
175 if filtername is None:
176 if filtername is None:
176 repo = unfi
177 repo = unfi
177 else:
178 else:
178 repo = unfi.filtered(filtername)
179 repo = unfi.filtered(filtername)
179 cache.sync_disk(repo)
180 cache.sync_disk(repo)
180
181
181
182
182 def _unknownnode(node):
183 def _unknownnode(node):
183 """raises ValueError when branchcache found a node which does not exists"""
184 """raises ValueError when branchcache found a node which does not exists"""
184 raise ValueError('node %s does not exist' % node.hex())
185 raise ValueError('node %s does not exist' % node.hex())
185
186
186
187
187 def _branchcachedesc(repo):
188 def _branchcachedesc(repo):
188 if repo.filtername is not None:
189 if repo.filtername is not None:
189 return b'branch cache (%s)' % repo.filtername
190 return b'branch cache (%s)' % repo.filtername
190 else:
191 else:
191 return b'branch cache'
192 return b'branch cache'
192
193
193
194
194 class _BaseBranchCache:
195 class _BaseBranchCache:
195 """A dict like object that hold branches heads cache.
196 """A dict like object that hold branches heads cache.
196
197
197 This cache is used to avoid costly computations to determine all the
198 This cache is used to avoid costly computations to determine all the
198 branch heads of a repo.
199 branch heads of a repo.
199
200
200 The cache is serialized on disk in the following format:
201 The cache is serialized on disk in the following format:
201
202
202 <tip hex node> <tip rev number> [optional filtered repo hex hash]
203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
203 <branch head hex node> <open/closed state> <branch name>
204 <branch head hex node> <open/closed state> <branch name>
204 <branch head hex node> <open/closed state> <branch name>
205 <branch head hex node> <open/closed state> <branch name>
205 ...
206 ...
206
207
207 The first line is used to check if the cache is still valid. If the
208 The first line is used to check if the cache is still valid. If the
208 branch cache is for a filtered repo view, an optional third hash is
209 branch cache is for a filtered repo view, an optional third hash is
209 included that hashes the hashes of all filtered and obsolete revisions.
210 included that hashes the hashes of all filtered and obsolete revisions.
210
211
211 The open/closed state is represented by a single letter 'o' or 'c'.
212 The open/closed state is represented by a single letter 'o' or 'c'.
212 This field can be used to avoid changelog reads when determining if a
213 This field can be used to avoid changelog reads when determining if a
213 branch head closes a branch or not.
214 branch head closes a branch or not.
214 """
215 """
215
216
216 def __init__(
217 def __init__(
217 self,
218 self,
218 repo: "localrepo.localrepository",
219 repo: "localrepo.localrepository",
219 entries: Union[
220 entries: Union[
220 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
221 ] = (),
222 ] = (),
222 closed_nodes: Optional[Set[bytes]] = None,
223 closed_nodes: Optional[Set[bytes]] = None,
223 ) -> None:
224 ) -> None:
224 """hasnode is a function which can be used to verify whether changelog
225 """hasnode is a function which can be used to verify whether changelog
225 has a given node or not. If it's not provided, we assume that every node
226 has a given node or not. If it's not provided, we assume that every node
226 we have exists in changelog"""
227 we have exists in changelog"""
227 # closednodes is a set of nodes that close their branch. If the branch
228 # closednodes is a set of nodes that close their branch. If the branch
228 # cache has been updated, it may contain nodes that are no longer
229 # cache has been updated, it may contain nodes that are no longer
229 # heads.
230 # heads.
230 if closed_nodes is None:
231 if closed_nodes is None:
231 closed_nodes = set()
232 closed_nodes = set()
232 self._closednodes = set(closed_nodes)
233 self._closednodes = set(closed_nodes)
233 self._entries = dict(entries)
234 self._entries = dict(entries)
234
235
235 def __iter__(self):
236 def __iter__(self):
236 return iter(self._entries)
237 return iter(self._entries)
237
238
238 def __setitem__(self, key, value):
239 def __setitem__(self, key, value):
239 self._entries[key] = value
240 self._entries[key] = value
240
241
241 def __getitem__(self, key):
242 def __getitem__(self, key):
242 return self._entries[key]
243 return self._entries[key]
243
244
244 def __contains__(self, key):
245 def __contains__(self, key):
245 return key in self._entries
246 return key in self._entries
246
247
247 def iteritems(self):
248 def iteritems(self):
248 return self._entries.items()
249 return self._entries.items()
249
250
250 items = iteritems
251 items = iteritems
251
252
252 def hasbranch(self, label):
253 def hasbranch(self, label):
253 """checks whether a branch of this name exists or not"""
254 """checks whether a branch of this name exists or not"""
254 return label in self._entries
255 return label in self._entries
255
256
256 def _branchtip(self, heads):
257 def _branchtip(self, heads):
257 """Return tuple with last open head in heads and false,
258 """Return tuple with last open head in heads and false,
258 otherwise return last closed head and true."""
259 otherwise return last closed head and true."""
259 tip = heads[-1]
260 tip = heads[-1]
260 closed = True
261 closed = True
261 for h in reversed(heads):
262 for h in reversed(heads):
262 if h not in self._closednodes:
263 if h not in self._closednodes:
263 tip = h
264 tip = h
264 closed = False
265 closed = False
265 break
266 break
266 return tip, closed
267 return tip, closed
267
268
268 def branchtip(self, branch):
269 def branchtip(self, branch):
269 """Return the tipmost open head on branch head, otherwise return the
270 """Return the tipmost open head on branch head, otherwise return the
270 tipmost closed head on branch.
271 tipmost closed head on branch.
271 Raise KeyError for unknown branch."""
272 Raise KeyError for unknown branch."""
272 return self._branchtip(self[branch])[0]
273 return self._branchtip(self[branch])[0]
273
274
274 def iteropen(self, nodes):
275 def iteropen(self, nodes):
275 return (n for n in nodes if n not in self._closednodes)
276 return (n for n in nodes if n not in self._closednodes)
276
277
277 def branchheads(self, branch, closed=False):
278 def branchheads(self, branch, closed=False):
278 heads = self._entries[branch]
279 heads = self._entries[branch]
279 if not closed:
280 if not closed:
280 heads = list(self.iteropen(heads))
281 heads = list(self.iteropen(heads))
281 return heads
282 return heads
282
283
283 def iterbranches(self):
284 def iterbranches(self):
284 for bn, heads in self.items():
285 for bn, heads in self.items():
285 yield (bn, heads) + self._branchtip(heads)
286 yield (bn, heads) + self._branchtip(heads)
286
287
287 def iterheads(self):
288 def iterheads(self):
288 """returns all the heads"""
289 """returns all the heads"""
289 return self._entries.values()
290 return self._entries.values()
290
291
291 def update(self, repo, revgen):
292 def update(self, repo, revgen):
292 """Given a branchhead cache, self, that may have extra nodes or be
293 """Given a branchhead cache, self, that may have extra nodes or be
293 missing heads, and a generator of nodes that are strictly a superset of
294 missing heads, and a generator of nodes that are strictly a superset of
294 heads missing, this function updates self to be correct.
295 heads missing, this function updates self to be correct.
295 """
296 """
296 starttime = util.timer()
297 starttime = util.timer()
297 cl = repo.changelog
298 cl = repo.changelog
298 # collect new branch entries
299 # collect new branch entries
299 newbranches = {}
300 newbranches = {}
300 getbranchinfo = repo.revbranchcache().branchinfo
301 getbranchinfo = repo.revbranchcache().branchinfo
301 max_rev = -1
302 max_rev = -1
302 for r in revgen:
303 for r in revgen:
303 branch, closesbranch = getbranchinfo(r)
304 branch, closesbranch = getbranchinfo(r)
304 newbranches.setdefault(branch, []).append(r)
305 newbranches.setdefault(branch, []).append(r)
305 if closesbranch:
306 if closesbranch:
306 self._closednodes.add(cl.node(r))
307 self._closednodes.add(cl.node(r))
307 max_rev = max(max_rev, r)
308 max_rev = max(max_rev, r)
308 if max_rev < 0:
309 if max_rev < 0:
309 msg = "running branchcache.update without revision to update"
310 msg = "running branchcache.update without revision to update"
310 raise error.ProgrammingError(msg)
311 raise error.ProgrammingError(msg)
311
312
312 # Delay fetching the topological heads until they are needed.
313 # Delay fetching the topological heads until they are needed.
313 # A repository without non-continous branches can skip this part.
314 # A repository without non-continous branches can skip this part.
314 topoheads = None
315 topoheads = None
315
316
316 # If a changeset is visible, its parents must be visible too, so
317 # If a changeset is visible, its parents must be visible too, so
317 # use the faster unfiltered parent accessor.
318 # use the faster unfiltered parent accessor.
318 parentrevs = repo.unfiltered().changelog.parentrevs
319 parentrevs = repo.unfiltered().changelog.parentrevs
319
320
320 # Faster than using ctx.obsolete()
321 # Faster than using ctx.obsolete()
321 obsrevs = obsolete.getrevs(repo, b'obsolete')
322 obsrevs = obsolete.getrevs(repo, b'obsolete')
322
323
323 for branch, newheadrevs in newbranches.items():
324 for branch, newheadrevs in newbranches.items():
324 # For every branch, compute the new branchheads.
325 # For every branch, compute the new branchheads.
325 # A branchhead is a revision such that no descendant is on
326 # A branchhead is a revision such that no descendant is on
326 # the same branch.
327 # the same branch.
327 #
328 #
328 # The branchheads are computed iteratively in revision order.
329 # The branchheads are computed iteratively in revision order.
329 # This ensures topological order, i.e. parents are processed
330 # This ensures topological order, i.e. parents are processed
330 # before their children. Ancestors are inclusive here, i.e.
331 # before their children. Ancestors are inclusive here, i.e.
331 # any revision is an ancestor of itself.
332 # any revision is an ancestor of itself.
332 #
333 #
333 # Core observations:
334 # Core observations:
334 # - The current revision is always a branchhead for the
335 # - The current revision is always a branchhead for the
335 # repository up to that point.
336 # repository up to that point.
336 # - It is the first revision of the branch if and only if
337 # - It is the first revision of the branch if and only if
337 # there was no branchhead before. In that case, it is the
338 # there was no branchhead before. In that case, it is the
338 # only branchhead as there are no possible ancestors on
339 # only branchhead as there are no possible ancestors on
339 # the same branch.
340 # the same branch.
340 # - If a parent is on the same branch, a branchhead can
341 # - If a parent is on the same branch, a branchhead can
341 # only be an ancestor of that parent, if it is parent
342 # only be an ancestor of that parent, if it is parent
342 # itself. Otherwise it would have been removed as ancestor
343 # itself. Otherwise it would have been removed as ancestor
343 # of that parent before.
344 # of that parent before.
344 # - Therefore, if all parents are on the same branch, they
345 # - Therefore, if all parents are on the same branch, they
345 # can just be removed from the branchhead set.
346 # can just be removed from the branchhead set.
346 # - If one parent is on the same branch and the other is not
347 # - If one parent is on the same branch and the other is not
347 # and there was exactly one branchhead known, the existing
348 # and there was exactly one branchhead known, the existing
348 # branchhead can only be an ancestor if it is the parent.
349 # branchhead can only be an ancestor if it is the parent.
349 # Otherwise it would have been removed as ancestor of
350 # Otherwise it would have been removed as ancestor of
350 # the parent before. The other parent therefore can't have
351 # the parent before. The other parent therefore can't have
351 # a branchhead as ancestor.
352 # a branchhead as ancestor.
352 # - In all other cases, the parents on different branches
353 # - In all other cases, the parents on different branches
353 # could have a branchhead as ancestor. Those parents are
354 # could have a branchhead as ancestor. Those parents are
354 # kept in the "uncertain" set. If all branchheads are also
355 # kept in the "uncertain" set. If all branchheads are also
355 # topological heads, they can't have descendants and further
356 # topological heads, they can't have descendants and further
356 # checks can be skipped. Otherwise, the ancestors of the
357 # checks can be skipped. Otherwise, the ancestors of the
357 # "uncertain" set are removed from branchheads.
358 # "uncertain" set are removed from branchheads.
358 # This computation is heavy and avoided if at all possible.
359 # This computation is heavy and avoided if at all possible.
359 bheads = self._entries.get(branch, [])
360 bheads = self._entries.get(branch, [])
360 bheadset = {cl.rev(node) for node in bheads}
361 bheadset = {cl.rev(node) for node in bheads}
361 uncertain = set()
362 uncertain = set()
362 for newrev in sorted(newheadrevs):
363 for newrev in sorted(newheadrevs):
363 if newrev in obsrevs:
364 if newrev in obsrevs:
364 # We ignore obsolete changesets as they shouldn't be
365 # We ignore obsolete changesets as they shouldn't be
365 # considered heads.
366 # considered heads.
366 continue
367 continue
367
368
368 if not bheadset:
369 if not bheadset:
369 bheadset.add(newrev)
370 bheadset.add(newrev)
370 continue
371 continue
371
372
372 parents = [p for p in parentrevs(newrev) if p != nullrev]
373 parents = [p for p in parentrevs(newrev) if p != nullrev]
373 samebranch = set()
374 samebranch = set()
374 otherbranch = set()
375 otherbranch = set()
375 obsparents = set()
376 obsparents = set()
376 for p in parents:
377 for p in parents:
377 if p in obsrevs:
378 if p in obsrevs:
378 # We ignored this obsolete changeset earlier, but now
379 # We ignored this obsolete changeset earlier, but now
379 # that it has non-ignored children, we need to make
380 # that it has non-ignored children, we need to make
380 # sure their ancestors are not considered heads. To
381 # sure their ancestors are not considered heads. To
381 # achieve that, we will simply treat this obsolete
382 # achieve that, we will simply treat this obsolete
382 # changeset as a parent from other branch.
383 # changeset as a parent from other branch.
383 obsparents.add(p)
384 obsparents.add(p)
384 elif p in bheadset or getbranchinfo(p)[0] == branch:
385 elif p in bheadset or getbranchinfo(p)[0] == branch:
385 samebranch.add(p)
386 samebranch.add(p)
386 else:
387 else:
387 otherbranch.add(p)
388 otherbranch.add(p)
388 if not (len(bheadset) == len(samebranch) == 1):
389 if not (len(bheadset) == len(samebranch) == 1):
389 uncertain.update(otherbranch)
390 uncertain.update(otherbranch)
390 uncertain.update(obsparents)
391 uncertain.update(obsparents)
391 bheadset.difference_update(samebranch)
392 bheadset.difference_update(samebranch)
392 bheadset.add(newrev)
393 bheadset.add(newrev)
393
394
394 if uncertain:
395 if uncertain:
395 if topoheads is None:
396 if topoheads is None:
396 topoheads = set(cl.headrevs())
397 topoheads = set(cl.headrevs())
397 if bheadset - topoheads:
398 if bheadset - topoheads:
398 floorrev = min(bheadset)
399 floorrev = min(bheadset)
399 if floorrev <= max(uncertain):
400 if floorrev <= max(uncertain):
400 ancestors = set(cl.ancestors(uncertain, floorrev))
401 ancestors = set(cl.ancestors(uncertain, floorrev))
401 bheadset -= ancestors
402 bheadset -= ancestors
402 if bheadset:
403 if bheadset:
403 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
404
405
405 duration = util.timer() - starttime
406 duration = util.timer() - starttime
406 repo.ui.log(
407 repo.ui.log(
407 b'branchcache',
408 b'branchcache',
408 b'updated %s in %.4f seconds\n',
409 b'updated %s in %.4f seconds\n',
409 _branchcachedesc(repo),
410 _branchcachedesc(repo),
410 duration,
411 duration,
411 )
412 )
412 return max_rev
413 return max_rev
413
414
414
415
415 STATE_CLEAN = 1
416 STATE_CLEAN = 1
416 STATE_INHERITED = 2
417 STATE_INHERITED = 2
417 STATE_DIRTY = 3
418 STATE_DIRTY = 3
418
419
419
420
420 class branchcache(_BaseBranchCache):
421 class branchcache(_BaseBranchCache):
421 """Branchmap info for a local repo or repoview"""
422 """Branchmap info for a local repo or repoview"""
422
423
423 _base_filename = b"branch2"
424 _base_filename = b"branch2"
424
425
425 def __init__(
426 def __init__(
426 self,
427 self,
427 repo: "localrepo.localrepository",
428 repo: "localrepo.localrepository",
428 entries: Union[
429 entries: Union[
429 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
430 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
430 ] = (),
431 ] = (),
431 tipnode: Optional[bytes] = None,
432 tipnode: Optional[bytes] = None,
432 tiprev: Optional[int] = nullrev,
433 tiprev: Optional[int] = nullrev,
433 filteredhash: Optional[bytes] = None,
434 filteredhash: Optional[bytes] = None,
434 closednodes: Optional[Set[bytes]] = None,
435 closednodes: Optional[Set[bytes]] = None,
435 hasnode: Optional[Callable[[bytes], bool]] = None,
436 hasnode: Optional[Callable[[bytes], bool]] = None,
436 verify_node: bool = False,
437 verify_node: bool = False,
437 inherited: bool = False,
438 inherited: bool = False,
438 ) -> None:
439 ) -> None:
439 """hasnode is a function which can be used to verify whether changelog
440 """hasnode is a function which can be used to verify whether changelog
440 has a given node or not. If it's not provided, we assume that every node
441 has a given node or not. If it's not provided, we assume that every node
441 we have exists in changelog"""
442 we have exists in changelog"""
442 self._filtername = repo.filtername
443 self._filtername = repo.filtername
443 if tipnode is None:
444 if tipnode is None:
444 self.tipnode = repo.nullid
445 self.tipnode = repo.nullid
445 else:
446 else:
446 self.tipnode = tipnode
447 self.tipnode = tipnode
447 self.tiprev = tiprev
448 self.tiprev = tiprev
448 self.filteredhash = filteredhash
449 self.filteredhash = filteredhash
449 self._state = STATE_CLEAN
450 self._state = STATE_CLEAN
450 if inherited:
451 if inherited:
451 self._state = STATE_INHERITED
452 self._state = STATE_INHERITED
452
453
453 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
454 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
454 # closednodes is a set of nodes that close their branch. If the branch
455 # closednodes is a set of nodes that close their branch. If the branch
455 # cache has been updated, it may contain nodes that are no longer
456 # cache has been updated, it may contain nodes that are no longer
456 # heads.
457 # heads.
457
458
458 # Do we need to verify branch at all ?
459 # Do we need to verify branch at all ?
459 self._verify_node = verify_node
460 self._verify_node = verify_node
460 # branches for which nodes are verified
461 # branches for which nodes are verified
461 self._verifiedbranches = set()
462 self._verifiedbranches = set()
462 self._hasnode = None
463 self._hasnode = None
463 if self._verify_node:
464 if self._verify_node:
464 self._hasnode = repo.changelog.hasnode
465 self._hasnode = repo.changelog.hasnode
465
466
466 def validfor(self, repo):
467 def validfor(self, repo):
467 """check that cache contents are valid for (a subset of) this repo
468 """check that cache contents are valid for (a subset of) this repo
468
469
469 - False when the order of changesets changed or if we detect a strip.
470 - False when the order of changesets changed or if we detect a strip.
470 - True when cache is up-to-date for the current repo or its subset."""
471 - True when cache is up-to-date for the current repo or its subset."""
471 try:
472 try:
472 node = repo.changelog.node(self.tiprev)
473 node = repo.changelog.node(self.tiprev)
473 except IndexError:
474 except IndexError:
474 # changesets were stripped and now we don't even have enough to
475 # changesets were stripped and now we don't even have enough to
475 # find tiprev
476 # find tiprev
476 return False
477 return False
477 if self.tipnode != node:
478 if self.tipnode != node:
478 # tiprev doesn't correspond to tipnode: repo was stripped, or this
479 # tiprev doesn't correspond to tipnode: repo was stripped, or this
479 # repo has a different order of changesets
480 # repo has a different order of changesets
480 return False
481 return False
481 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
482 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
482 # hashes don't match if this repo view has a different set of filtered
483 # hashes don't match if this repo view has a different set of filtered
483 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
484 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
484 # history was rewritten)
485 # history was rewritten)
485 return self.filteredhash == tiphash
486 return self.filteredhash == tiphash
486
487
487 @classmethod
488 @classmethod
488 def fromfile(cls, repo):
489 def fromfile(cls, repo):
489 f = None
490 f = None
490 try:
491 try:
491 f = repo.cachevfs(cls._filename(repo))
492 f = repo.cachevfs(cls._filename(repo))
492 lineiter = iter(f)
493 lineiter = iter(f)
493 init_kwargs = cls._load_header(repo, lineiter)
494 init_kwargs = cls._load_header(repo, lineiter)
494 bcache = cls(
495 bcache = cls(
495 repo,
496 repo,
496 verify_node=True,
497 verify_node=True,
497 **init_kwargs,
498 **init_kwargs,
498 )
499 )
499 if not bcache.validfor(repo):
500 if not bcache.validfor(repo):
500 # invalidate the cache
501 # invalidate the cache
501 raise ValueError('tip differs')
502 raise ValueError('tip differs')
502 bcache._load_heads(repo, lineiter)
503 bcache._load_heads(repo, lineiter)
503 except (IOError, OSError):
504 except (IOError, OSError):
504 return None
505 return None
505
506
506 except Exception as inst:
507 except Exception as inst:
507 if repo.ui.debugflag:
508 if repo.ui.debugflag:
508 msg = b'invalid %s: %s\n'
509 msg = b'invalid %s: %s\n'
509 msg %= (
510 msg %= (
510 _branchcachedesc(repo),
511 _branchcachedesc(repo),
511 stringutil.forcebytestr(inst),
512 stringutil.forcebytestr(inst),
512 )
513 )
513 repo.ui.debug(msg)
514 repo.ui.debug(msg)
514 bcache = None
515 bcache = None
515
516
516 finally:
517 finally:
517 if f:
518 if f:
518 f.close()
519 f.close()
519
520
520 return bcache
521 return bcache
521
522
522 @classmethod
523 @classmethod
523 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
524 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
524 """parse the head of a branchmap file
525 """parse the head of a branchmap file
525
526
526 return parameters to pass to a newly created class instance.
527 return parameters to pass to a newly created class instance.
527 """
528 """
528 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
529 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
529 last, lrev = cachekey[:2]
530 last, lrev = cachekey[:2]
530 last, lrev = bin(last), int(lrev)
531 last, lrev = bin(last), int(lrev)
531 filteredhash = None
532 filteredhash = None
532 if len(cachekey) > 2:
533 if len(cachekey) > 2:
533 filteredhash = bin(cachekey[2])
534 filteredhash = bin(cachekey[2])
534 return {
535 return {
535 "tipnode": last,
536 "tipnode": last,
536 "tiprev": lrev,
537 "tiprev": lrev,
537 "filteredhash": filteredhash,
538 "filteredhash": filteredhash,
538 }
539 }
539
540
540 def _load_heads(self, repo, lineiter):
541 def _load_heads(self, repo, lineiter):
541 """fully loads the branchcache by reading from the file using the line
542 """fully loads the branchcache by reading from the file using the line
542 iterator passed"""
543 iterator passed"""
543 for line in lineiter:
544 for line in lineiter:
544 line = line.rstrip(b'\n')
545 line = line.rstrip(b'\n')
545 if not line:
546 if not line:
546 continue
547 continue
547 node, state, label = line.split(b" ", 2)
548 node, state, label = line.split(b" ", 2)
548 if state not in b'oc':
549 if state not in b'oc':
549 raise ValueError('invalid branch state')
550 raise ValueError('invalid branch state')
550 label = encoding.tolocal(label.strip())
551 label = encoding.tolocal(label.strip())
551 node = bin(node)
552 node = bin(node)
552 self._entries.setdefault(label, []).append(node)
553 self._entries.setdefault(label, []).append(node)
553 if state == b'c':
554 if state == b'c':
554 self._closednodes.add(node)
555 self._closednodes.add(node)
555
556
556 @classmethod
557 @classmethod
557 def _filename(cls, repo):
558 def _filename(cls, repo):
558 """name of a branchcache file for a given repo or repoview"""
559 """name of a branchcache file for a given repo or repoview"""
559 filename = cls._base_filename
560 filename = cls._base_filename
560 if repo.filtername:
561 if repo.filtername:
561 filename = b'%s-%s' % (filename, repo.filtername)
562 filename = b'%s-%s' % (filename, repo.filtername)
562 return filename
563 return filename
563
564
564 def inherit_for(self, repo):
565 def inherit_for(self, repo):
565 """return a deep copy of the branchcache object"""
566 """return a deep copy of the branchcache object"""
566 assert repo.filtername != self._filtername
567 assert repo.filtername != self._filtername
567 other = type(self)(
568 other = type(self)(
568 repo=repo,
569 repo=repo,
569 # we always do a shally copy of self._entries, and the values is
570 # we always do a shally copy of self._entries, and the values is
570 # always replaced, so no need to deepcopy until the above remains
571 # always replaced, so no need to deepcopy until the above remains
571 # true.
572 # true.
572 entries=self._entries,
573 entries=self._entries,
573 tipnode=self.tipnode,
574 tipnode=self.tipnode,
574 tiprev=self.tiprev,
575 tiprev=self.tiprev,
575 filteredhash=self.filteredhash,
576 filteredhash=self.filteredhash,
576 closednodes=set(self._closednodes),
577 closednodes=set(self._closednodes),
577 verify_node=self._verify_node,
578 verify_node=self._verify_node,
578 inherited=True,
579 inherited=True,
579 )
580 )
580 # also copy information about the current verification state
581 # also copy information about the current verification state
581 other._verifiedbranches = set(self._verifiedbranches)
582 other._verifiedbranches = set(self._verifiedbranches)
582 return other
583 return other
583
584
584 def sync_disk(self, repo):
585 def sync_disk(self, repo):
585 """synchronise the on disk file with the cache state
586 """synchronise the on disk file with the cache state
586
587
587 If new value specific to this filter level need to be written, the file
588 If new value specific to this filter level need to be written, the file
588 will be updated, if the state of the branchcache is inherited from a
589 will be updated, if the state of the branchcache is inherited from a
589 subset, any stalled on disk file will be deleted.
590 subset, any stalled on disk file will be deleted.
590
591
591 That method does nothing if there is nothing to do.
592 That method does nothing if there is nothing to do.
592 """
593 """
593 if self._state == STATE_DIRTY:
594 if self._state == STATE_DIRTY:
594 self.write(repo)
595 self.write(repo)
595 elif self._state == STATE_INHERITED:
596 elif self._state == STATE_INHERITED:
596 filename = self._filename(repo)
597 filename = self._filename(repo)
597 repo.cachevfs.tryunlink(filename)
598 repo.cachevfs.tryunlink(filename)
598
599
599 def write(self, repo):
600 def write(self, repo):
600 assert self._filtername == repo.filtername, (
601 assert self._filtername == repo.filtername, (
601 self._filtername,
602 self._filtername,
602 repo.filtername,
603 repo.filtername,
603 )
604 )
604 assert self._state == STATE_DIRTY, self._state
605 assert self._state == STATE_DIRTY, self._state
605 tr = repo.currenttransaction()
606 tr = repo.currenttransaction()
606 if not getattr(tr, 'finalized', True):
607 if not getattr(tr, 'finalized', True):
607 # Avoid premature writing.
608 # Avoid premature writing.
608 #
609 #
609 # (The cache warming setup by localrepo will update the file later.)
610 # (The cache warming setup by localrepo will update the file later.)
610 return
611 return
611 try:
612 try:
612 filename = self._filename(repo)
613 filename = self._filename(repo)
613 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
614 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
614 self._write_header(f)
615 self._write_header(f)
615 nodecount = self._write_heads(f)
616 nodecount = self._write_heads(f)
616 repo.ui.log(
617 repo.ui.log(
617 b'branchcache',
618 b'branchcache',
618 b'wrote %s with %d labels and %d nodes\n',
619 b'wrote %s with %d labels and %d nodes\n',
619 _branchcachedesc(repo),
620 _branchcachedesc(repo),
620 len(self._entries),
621 len(self._entries),
621 nodecount,
622 nodecount,
622 )
623 )
623 self._state = STATE_CLEAN
624 self._state = STATE_CLEAN
624 except (IOError, OSError, error.Abort) as inst:
625 except (IOError, OSError, error.Abort) as inst:
625 # Abort may be raised by read only opener, so log and continue
626 # Abort may be raised by read only opener, so log and continue
626 repo.ui.debug(
627 repo.ui.debug(
627 b"couldn't write branch cache: %s\n"
628 b"couldn't write branch cache: %s\n"
628 % stringutil.forcebytestr(inst)
629 % stringutil.forcebytestr(inst)
629 )
630 )
630
631
631 def _write_header(self, fp) -> None:
632 def _write_header(self, fp) -> None:
632 """write the branch cache header to a file"""
633 """write the branch cache header to a file"""
633 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
634 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
634 if self.filteredhash is not None:
635 if self.filteredhash is not None:
635 cachekey.append(hex(self.filteredhash))
636 cachekey.append(hex(self.filteredhash))
636 fp.write(b" ".join(cachekey) + b'\n')
637 fp.write(b" ".join(cachekey) + b'\n')
637
638
638 def _write_heads(self, fp) -> int:
639 def _write_heads(self, fp) -> int:
639 """write list of heads to a file
640 """write list of heads to a file
640
641
641 Return the number of heads written."""
642 Return the number of heads written."""
642 nodecount = 0
643 nodecount = 0
643 for label, nodes in sorted(self._entries.items()):
644 for label, nodes in sorted(self._entries.items()):
644 label = encoding.fromlocal(label)
645 label = encoding.fromlocal(label)
645 for node in nodes:
646 for node in nodes:
646 nodecount += 1
647 nodecount += 1
647 if node in self._closednodes:
648 if node in self._closednodes:
648 state = b'c'
649 state = b'c'
649 else:
650 else:
650 state = b'o'
651 state = b'o'
651 fp.write(b"%s %s %s\n" % (hex(node), state, label))
652 fp.write(b"%s %s %s\n" % (hex(node), state, label))
652 return nodecount
653 return nodecount
653
654
654 def _verifybranch(self, branch):
655 def _verifybranch(self, branch):
655 """verify head nodes for the given branch."""
656 """verify head nodes for the given branch."""
656 if not self._verify_node:
657 if not self._verify_node:
657 return
658 return
658 if branch not in self._entries or branch in self._verifiedbranches:
659 if branch not in self._entries or branch in self._verifiedbranches:
659 return
660 return
660 assert self._hasnode is not None
661 assert self._hasnode is not None
661 for n in self._entries[branch]:
662 for n in self._entries[branch]:
662 if not self._hasnode(n):
663 if not self._hasnode(n):
663 _unknownnode(n)
664 _unknownnode(n)
664
665
665 self._verifiedbranches.add(branch)
666 self._verifiedbranches.add(branch)
666
667
667 def _verifyall(self):
668 def _verifyall(self):
668 """verifies nodes of all the branches"""
669 """verifies nodes of all the branches"""
669 for b in self._entries.keys():
670 for b in self._entries.keys():
670 if b not in self._verifiedbranches:
671 if b not in self._verifiedbranches:
671 self._verifybranch(b)
672 self._verifybranch(b)
672
673
673 def __getitem__(self, key):
674 def __getitem__(self, key):
674 self._verifybranch(key)
675 self._verifybranch(key)
675 return super().__getitem__(key)
676 return super().__getitem__(key)
676
677
677 def __contains__(self, key):
678 def __contains__(self, key):
678 self._verifybranch(key)
679 self._verifybranch(key)
679 return super().__contains__(key)
680 return super().__contains__(key)
680
681
681 def iteritems(self):
682 def iteritems(self):
682 self._verifyall()
683 self._verifyall()
683 return super().iteritems()
684 return super().iteritems()
684
685
685 items = iteritems
686 items = iteritems
686
687
687 def iterheads(self):
688 def iterheads(self):
688 """returns all the heads"""
689 """returns all the heads"""
689 self._verifyall()
690 self._verifyall()
690 return super().iterheads()
691 return super().iterheads()
691
692
692 def hasbranch(self, label):
693 def hasbranch(self, label):
693 """checks whether a branch of this name exists or not"""
694 """checks whether a branch of this name exists or not"""
694 self._verifybranch(label)
695 self._verifybranch(label)
695 return super().hasbranch(label)
696 return super().hasbranch(label)
696
697
697 def branchheads(self, branch, closed=False):
698 def branchheads(self, branch, closed=False):
698 self._verifybranch(branch)
699 self._verifybranch(branch)
699 return super().branchheads(branch, closed=closed)
700 return super().branchheads(branch, closed=closed)
700
701
701 def update(self, repo, revgen):
702 def update(self, repo, revgen):
702 assert self._filtername == repo.filtername, (
703 assert self._filtername == repo.filtername, (
703 self._filtername,
704 self._filtername,
704 repo.filtername,
705 repo.filtername,
705 )
706 )
706 cl = repo.changelog
707 cl = repo.changelog
707 max_rev = super().update(repo, revgen)
708 max_rev = super().update(repo, revgen)
708 # new tip revision which we found after iterating items from new
709 # new tip revision which we found after iterating items from new
709 # branches
710 # branches
710 if max_rev is not None and max_rev > self.tiprev:
711 if max_rev is not None and max_rev > self.tiprev:
711 self.tiprev = max_rev
712 self.tiprev = max_rev
712 self.tipnode = cl.node(max_rev)
713 self.tipnode = cl.node(max_rev)
713
714
714 if not self.validfor(repo):
715 if not self.validfor(repo):
715 # old cache key is now invalid for the repo, but we've just updated
716 # old cache key is now invalid for the repo, but we've just updated
716 # the cache and we assume it's valid, so let's make the cache key
717 # the cache and we assume it's valid, so let's make the cache key
717 # valid as well by recomputing it from the cached data
718 # valid as well by recomputing it from the cached data
718 self.tipnode = repo.nullid
719 self.tipnode = repo.nullid
719 self.tiprev = nullrev
720 self.tiprev = nullrev
720 for heads in self.iterheads():
721 for heads in self.iterheads():
721 if not heads:
722 if not heads:
722 # all revisions on a branch are obsolete
723 # all revisions on a branch are obsolete
723 continue
724 continue
724 # note: tiprev is not necessarily the tip revision of repo,
725 # note: tiprev is not necessarily the tip revision of repo,
725 # because the tip could be obsolete (i.e. not a head)
726 # because the tip could be obsolete (i.e. not a head)
726 tiprev = max(cl.rev(node) for node in heads)
727 tiprev = max(cl.rev(node) for node in heads)
727 if tiprev > self.tiprev:
728 if tiprev > self.tiprev:
728 self.tipnode = cl.node(tiprev)
729 self.tipnode = cl.node(tiprev)
729 self.tiprev = tiprev
730 self.tiprev = tiprev
730 self.filteredhash = scmutil.filteredhash(
731 self.filteredhash = scmutil.filteredhash(
731 repo, self.tiprev, needobsolete=True
732 repo, self.tiprev, needobsolete=True
732 )
733 )
733 self._state = STATE_DIRTY
734 self._state = STATE_DIRTY
734 self.write(repo)
735 self.write(repo)
735
736
736
737
737 class remotebranchcache(_BaseBranchCache):
738 class remotebranchcache(_BaseBranchCache):
738 """Branchmap info for a remote connection, should not write locally"""
739 """Branchmap info for a remote connection, should not write locally"""
739
740
740 def __init__(
741 def __init__(
741 self,
742 self,
742 repo: "localrepo.localrepository",
743 repo: "localrepo.localrepository",
743 entries: Union[
744 entries: Union[
744 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
745 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
745 ] = (),
746 ] = (),
746 closednodes: Optional[Set[bytes]] = None,
747 closednodes: Optional[Set[bytes]] = None,
747 ) -> None:
748 ) -> None:
748 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
749 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
749
750
750
751
751 # Revision branch info cache
752 # Revision branch info cache
752
753
753 _rbcversion = b'-v1'
754 _rbcversion = b'-v1'
754 _rbcnames = b'rbc-names' + _rbcversion
755 _rbcnames = b'rbc-names' + _rbcversion
755 _rbcrevs = b'rbc-revs' + _rbcversion
756 _rbcrevs = b'rbc-revs' + _rbcversion
756 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
757 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
757 _rbcrecfmt = b'>4sI'
758 _rbcrecfmt = b'>4sI'
758 _rbcrecsize = calcsize(_rbcrecfmt)
759 _rbcrecsize = calcsize(_rbcrecfmt)
759 _rbcmininc = 64 * _rbcrecsize
760 _rbcmininc = 64 * _rbcrecsize
760 _rbcnodelen = 4
761 _rbcnodelen = 4
761 _rbcbranchidxmask = 0x7FFFFFFF
762 _rbcbranchidxmask = 0x7FFFFFFF
762 _rbccloseflag = 0x80000000
763 _rbccloseflag = 0x80000000
763
764
764
765
765 class rbcrevs:
766 class rbcrevs:
766 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
767 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
767
768
768 def __init__(self, revs):
769 def __init__(self, revs):
769 self._prefix = revs
770 self._prefix = revs
770 self._rest = bytearray()
771 self._rest = bytearray()
771
772
772 def __len__(self):
773 def __len__(self):
773 return len(self._prefix) + len(self._rest)
774 return len(self._prefix) + len(self._rest)
774
775
775 def unpack_record(self, rbcrevidx):
776 def unpack_record(self, rbcrevidx):
776 if rbcrevidx < len(self._prefix):
777 if rbcrevidx < len(self._prefix):
777 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
778 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
778 else:
779 else:
779 return unpack_from(
780 return unpack_from(
780 _rbcrecfmt,
781 _rbcrecfmt,
781 util.buffer(self._rest),
782 util.buffer(self._rest),
782 rbcrevidx - len(self._prefix),
783 rbcrevidx - len(self._prefix),
783 )
784 )
784
785
785 def make_mutable(self):
786 def make_mutable(self):
786 if len(self._prefix) > 0:
787 if len(self._prefix) > 0:
787 entirety = bytearray()
788 entirety = bytearray()
788 entirety[:] = self._prefix
789 entirety[:] = self._prefix
789 entirety.extend(self._rest)
790 entirety.extend(self._rest)
790 self._rest = entirety
791 self._rest = entirety
791 self._prefix = bytearray()
792 self._prefix = bytearray()
792
793
793 def truncate(self, pos):
794 def truncate(self, pos):
794 self.make_mutable()
795 self.make_mutable()
795 del self._rest[pos:]
796 del self._rest[pos:]
796
797
797 def pack_into(self, rbcrevidx, node, branchidx):
798 def pack_into(self, rbcrevidx, node, branchidx):
798 if rbcrevidx < len(self._prefix):
799 if rbcrevidx < len(self._prefix):
799 self.make_mutable()
800 self.make_mutable()
800 buf = self._rest
801 buf = self._rest
801 start_offset = rbcrevidx - len(self._prefix)
802 start_offset = rbcrevidx - len(self._prefix)
802 end_offset = start_offset + _rbcrecsize
803 end_offset = start_offset + _rbcrecsize
803
804
804 if len(self._rest) < end_offset:
805 if len(self._rest) < end_offset:
805 # bytearray doesn't allocate extra space at least in Python 3.7.
806 # bytearray doesn't allocate extra space at least in Python 3.7.
806 # When multiple changesets are added in a row, precise resize would
807 # When multiple changesets are added in a row, precise resize would
807 # result in quadratic complexity. Overallocate to compensate by
808 # result in quadratic complexity. Overallocate to compensate by
808 # using the classic doubling technique for dynamic arrays instead.
809 # using the classic doubling technique for dynamic arrays instead.
809 # If there was a gap in the map before, less space will be reserved.
810 # If there was a gap in the map before, less space will be reserved.
810 self._rest.extend(b'\0' * end_offset)
811 self._rest.extend(b'\0' * end_offset)
811 return pack_into(
812 return pack_into(
812 _rbcrecfmt,
813 _rbcrecfmt,
813 buf,
814 buf,
814 start_offset,
815 start_offset,
815 node,
816 node,
816 branchidx,
817 branchidx,
817 )
818 )
818
819
819 def extend(self, extension):
820 def extend(self, extension):
820 return self._rest.extend(extension)
821 return self._rest.extend(extension)
821
822
822 def slice(self, begin, end):
823 def slice(self, begin, end):
823 if begin < len(self._prefix):
824 if begin < len(self._prefix):
824 acc = bytearray()
825 acc = bytearray()
825 acc[:] = self._prefix[begin:end]
826 acc[:] = self._prefix[begin:end]
826 acc.extend(
827 acc.extend(
827 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
828 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
828 )
829 )
829 return acc
830 return acc
830 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
831 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
831
832
832
833
833 class revbranchcache:
834 class revbranchcache:
834 """Persistent cache, mapping from revision number to branch name and close.
835 """Persistent cache, mapping from revision number to branch name and close.
835 This is a low level cache, independent of filtering.
836 This is a low level cache, independent of filtering.
836
837
837 Branch names are stored in rbc-names in internal encoding separated by 0.
838 Branch names are stored in rbc-names in internal encoding separated by 0.
838 rbc-names is append-only, and each branch name is only stored once and will
839 rbc-names is append-only, and each branch name is only stored once and will
839 thus have a unique index.
840 thus have a unique index.
840
841
841 The branch info for each revision is stored in rbc-revs as constant size
842 The branch info for each revision is stored in rbc-revs as constant size
842 records. The whole file is read into memory, but it is only 'parsed' on
843 records. The whole file is read into memory, but it is only 'parsed' on
843 demand. The file is usually append-only but will be truncated if repo
844 demand. The file is usually append-only but will be truncated if repo
844 modification is detected.
845 modification is detected.
845 The record for each revision contains the first 4 bytes of the
846 The record for each revision contains the first 4 bytes of the
846 corresponding node hash, and the record is only used if it still matches.
847 corresponding node hash, and the record is only used if it still matches.
847 Even a completely trashed rbc-revs fill thus still give the right result
848 Even a completely trashed rbc-revs fill thus still give the right result
848 while converging towards full recovery ... assuming no incorrectly matching
849 while converging towards full recovery ... assuming no incorrectly matching
849 node hashes.
850 node hashes.
850 The record also contains 4 bytes where 31 bits contains the index of the
851 The record also contains 4 bytes where 31 bits contains the index of the
851 branch and the last bit indicate that it is a branch close commit.
852 branch and the last bit indicate that it is a branch close commit.
852 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
853 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
853 and will grow with it but be 1/8th of its size.
854 and will grow with it but be 1/8th of its size.
854 """
855 """
855
856
856 def __init__(self, repo, readonly=True):
857 def __init__(self, repo, readonly=True):
857 assert repo.filtername is None
858 assert repo.filtername is None
858 self._repo = repo
859 self._repo = repo
859 self._names = [] # branch names in local encoding with static index
860 self._names = [] # branch names in local encoding with static index
860 self._rbcrevs = rbcrevs(bytearray())
861 self._rbcrevs = rbcrevs(bytearray())
861 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
862 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
862 try:
863 try:
863 bndata = repo.cachevfs.read(_rbcnames)
864 bndata = repo.cachevfs.read(_rbcnames)
864 self._rbcsnameslen = len(bndata) # for verification before writing
865 self._rbcsnameslen = len(bndata) # for verification before writing
865 if bndata:
866 if bndata:
866 self._names = [
867 self._names = [
867 encoding.tolocal(bn) for bn in bndata.split(b'\0')
868 encoding.tolocal(bn) for bn in bndata.split(b'\0')
868 ]
869 ]
869 except (IOError, OSError):
870 except (IOError, OSError):
870 if readonly:
871 if readonly:
871 # don't try to use cache - fall back to the slow path
872 # don't try to use cache - fall back to the slow path
872 self.branchinfo = self._branchinfo
873 self.branchinfo = self._branchinfo
873
874
874 if self._names:
875 if self._names:
875 try:
876 try:
876 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
877 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
877 with repo.cachevfs(_rbcrevs) as fp:
878 with repo.cachevfs(_rbcrevs) as fp:
878 data = util.buffer(util.mmapread(fp))
879 data = util.buffer(util.mmapread(fp))
879 else:
880 else:
880 data = repo.cachevfs.read(_rbcrevs)
881 data = repo.cachevfs.read(_rbcrevs)
881 self._rbcrevs = rbcrevs(data)
882 self._rbcrevs = rbcrevs(data)
882 except (IOError, OSError) as inst:
883 except (IOError, OSError) as inst:
883 repo.ui.debug(
884 repo.ui.debug(
884 b"couldn't read revision branch cache: %s\n"
885 b"couldn't read revision branch cache: %s\n"
885 % stringutil.forcebytestr(inst)
886 % stringutil.forcebytestr(inst)
886 )
887 )
887 # remember number of good records on disk
888 # remember number of good records on disk
888 self._rbcrevslen = min(
889 self._rbcrevslen = min(
889 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
890 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
890 )
891 )
891 if self._rbcrevslen == 0:
892 if self._rbcrevslen == 0:
892 self._names = []
893 self._names = []
893 self._rbcnamescount = len(self._names) # number of names read at
894 self._rbcnamescount = len(self._names) # number of names read at
894 # _rbcsnameslen
895 # _rbcsnameslen
895
896
896 def _clear(self):
897 def _clear(self):
897 self._rbcsnameslen = 0
898 self._rbcsnameslen = 0
898 del self._names[:]
899 del self._names[:]
899 self._rbcnamescount = 0
900 self._rbcnamescount = 0
900 self._rbcrevslen = len(self._repo.changelog)
901 self._rbcrevslen = len(self._repo.changelog)
901 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
902 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
902 util.clearcachedproperty(self, b'_namesreverse')
903 util.clearcachedproperty(self, b'_namesreverse')
903
904
904 @util.propertycache
905 @util.propertycache
905 def _namesreverse(self):
906 def _namesreverse(self):
906 return {b: r for r, b in enumerate(self._names)}
907 return {b: r for r, b in enumerate(self._names)}
907
908
908 def branchinfo(self, rev):
909 def branchinfo(self, rev):
909 """Return branch name and close flag for rev, using and updating
910 """Return branch name and close flag for rev, using and updating
910 persistent cache."""
911 persistent cache."""
911 changelog = self._repo.changelog
912 changelog = self._repo.changelog
912 rbcrevidx = rev * _rbcrecsize
913 rbcrevidx = rev * _rbcrecsize
913
914
914 # avoid negative index, changelog.read(nullrev) is fast without cache
915 # avoid negative index, changelog.read(nullrev) is fast without cache
915 if rev == nullrev:
916 if rev == nullrev:
916 return changelog.branchinfo(rev)
917 return changelog.branchinfo(rev)
917
918
918 # if requested rev isn't allocated, grow and cache the rev info
919 # if requested rev isn't allocated, grow and cache the rev info
919 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
920 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
920 return self._branchinfo(rev)
921 return self._branchinfo(rev)
921
922
922 # fast path: extract data from cache, use it if node is matching
923 # fast path: extract data from cache, use it if node is matching
923 reponode = changelog.node(rev)[:_rbcnodelen]
924 reponode = changelog.node(rev)[:_rbcnodelen]
924 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
925 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
925 close = bool(branchidx & _rbccloseflag)
926 close = bool(branchidx & _rbccloseflag)
926 if close:
927 if close:
927 branchidx &= _rbcbranchidxmask
928 branchidx &= _rbcbranchidxmask
928 if cachenode == b'\0\0\0\0':
929 if cachenode == b'\0\0\0\0':
929 pass
930 pass
930 elif cachenode == reponode:
931 elif cachenode == reponode:
931 try:
932 try:
932 return self._names[branchidx], close
933 return self._names[branchidx], close
933 except IndexError:
934 except IndexError:
934 # recover from invalid reference to unknown branch
935 # recover from invalid reference to unknown branch
935 self._repo.ui.debug(
936 self._repo.ui.debug(
936 b"referenced branch names not found"
937 b"referenced branch names not found"
937 b" - rebuilding revision branch cache from scratch\n"
938 b" - rebuilding revision branch cache from scratch\n"
938 )
939 )
939 self._clear()
940 self._clear()
940 else:
941 else:
941 # rev/node map has changed, invalidate the cache from here up
942 # rev/node map has changed, invalidate the cache from here up
942 self._repo.ui.debug(
943 self._repo.ui.debug(
943 b"history modification detected - truncating "
944 b"history modification detected - truncating "
944 b"revision branch cache to revision %d\n" % rev
945 b"revision branch cache to revision %d\n" % rev
945 )
946 )
946 truncate = rbcrevidx + _rbcrecsize
947 truncate = rbcrevidx + _rbcrecsize
947 self._rbcrevs.truncate(truncate)
948 self._rbcrevs.truncate(truncate)
948 self._rbcrevslen = min(self._rbcrevslen, truncate)
949 self._rbcrevslen = min(self._rbcrevslen, truncate)
949
950
950 # fall back to slow path and make sure it will be written to disk
951 # fall back to slow path and make sure it will be written to disk
951 return self._branchinfo(rev)
952 return self._branchinfo(rev)
952
953
953 def _branchinfo(self, rev):
954 def _branchinfo(self, rev):
954 """Retrieve branch info from changelog and update _rbcrevs"""
955 """Retrieve branch info from changelog and update _rbcrevs"""
955 changelog = self._repo.changelog
956 changelog = self._repo.changelog
956 b, close = changelog.branchinfo(rev)
957 b, close = changelog.branchinfo(rev)
957 if b in self._namesreverse:
958 if b in self._namesreverse:
958 branchidx = self._namesreverse[b]
959 branchidx = self._namesreverse[b]
959 else:
960 else:
960 branchidx = len(self._names)
961 branchidx = len(self._names)
961 self._names.append(b)
962 self._names.append(b)
962 self._namesreverse[b] = branchidx
963 self._namesreverse[b] = branchidx
963 reponode = changelog.node(rev)
964 reponode = changelog.node(rev)
964 if close:
965 if close:
965 branchidx |= _rbccloseflag
966 branchidx |= _rbccloseflag
966 self._setcachedata(rev, reponode, branchidx)
967 self._setcachedata(rev, reponode, branchidx)
967 return b, close
968 return b, close
968
969
969 def setdata(self, rev, changelogrevision):
970 def setdata(self, rev, changelogrevision):
970 """add new data information to the cache"""
971 """add new data information to the cache"""
971 branch, close = changelogrevision.branchinfo
972 branch, close = changelogrevision.branchinfo
972
973
973 if branch in self._namesreverse:
974 if branch in self._namesreverse:
974 branchidx = self._namesreverse[branch]
975 branchidx = self._namesreverse[branch]
975 else:
976 else:
976 branchidx = len(self._names)
977 branchidx = len(self._names)
977 self._names.append(branch)
978 self._names.append(branch)
978 self._namesreverse[branch] = branchidx
979 self._namesreverse[branch] = branchidx
979 if close:
980 if close:
980 branchidx |= _rbccloseflag
981 branchidx |= _rbccloseflag
981 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
982 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
982 # If no cache data were readable (non exists, bad permission, etc)
983 # If no cache data were readable (non exists, bad permission, etc)
983 # the cache was bypassing itself by setting:
984 # the cache was bypassing itself by setting:
984 #
985 #
985 # self.branchinfo = self._branchinfo
986 # self.branchinfo = self._branchinfo
986 #
987 #
987 # Since we now have data in the cache, we need to drop this bypassing.
988 # Since we now have data in the cache, we need to drop this bypassing.
988 if 'branchinfo' in vars(self):
989 if 'branchinfo' in vars(self):
989 del self.branchinfo
990 del self.branchinfo
990
991
991 def _setcachedata(self, rev, node, branchidx):
992 def _setcachedata(self, rev, node, branchidx):
992 """Writes the node's branch data to the in-memory cache data."""
993 """Writes the node's branch data to the in-memory cache data."""
993 if rev == nullrev:
994 if rev == nullrev:
994 return
995 return
995 rbcrevidx = rev * _rbcrecsize
996 rbcrevidx = rev * _rbcrecsize
996 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
997 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
997 self._rbcrevslen = min(self._rbcrevslen, rev)
998 self._rbcrevslen = min(self._rbcrevslen, rev)
998
999
999 tr = self._repo.currenttransaction()
1000 tr = self._repo.currenttransaction()
1000 if tr:
1001 if tr:
1001 tr.addfinalize(b'write-revbranchcache', self.write)
1002 tr.addfinalize(b'write-revbranchcache', self.write)
1002
1003
1003 def write(self, tr=None):
1004 def write(self, tr=None):
1004 """Save branch cache if it is dirty."""
1005 """Save branch cache if it is dirty."""
1005 repo = self._repo
1006 repo = self._repo
1006 wlock = None
1007 wlock = None
1007 step = b''
1008 step = b''
1008 try:
1009 try:
1009 # write the new names
1010 # write the new names
1010 if self._rbcnamescount < len(self._names):
1011 if self._rbcnamescount < len(self._names):
1011 wlock = repo.wlock(wait=False)
1012 wlock = repo.wlock(wait=False)
1012 step = b' names'
1013 step = b' names'
1013 self._writenames(repo)
1014 self._writenames(repo)
1014
1015
1015 # write the new revs
1016 # write the new revs
1016 start = self._rbcrevslen * _rbcrecsize
1017 start = self._rbcrevslen * _rbcrecsize
1017 if start != len(self._rbcrevs):
1018 if start != len(self._rbcrevs):
1018 step = b''
1019 step = b''
1019 if wlock is None:
1020 if wlock is None:
1020 wlock = repo.wlock(wait=False)
1021 wlock = repo.wlock(wait=False)
1021 self._writerevs(repo, start)
1022 self._writerevs(repo, start)
1022
1023
1023 except (IOError, OSError, error.Abort, error.LockError) as inst:
1024 except (IOError, OSError, error.Abort, error.LockError) as inst:
1024 repo.ui.debug(
1025 repo.ui.debug(
1025 b"couldn't write revision branch cache%s: %s\n"
1026 b"couldn't write revision branch cache%s: %s\n"
1026 % (step, stringutil.forcebytestr(inst))
1027 % (step, stringutil.forcebytestr(inst))
1027 )
1028 )
1028 finally:
1029 finally:
1029 if wlock is not None:
1030 if wlock is not None:
1030 wlock.release()
1031 wlock.release()
1031
1032
1032 def _writenames(self, repo):
1033 def _writenames(self, repo):
1033 """write the new branch names to revbranchcache"""
1034 """write the new branch names to revbranchcache"""
1034 if self._rbcnamescount != 0:
1035 if self._rbcnamescount != 0:
1035 f = repo.cachevfs.open(_rbcnames, b'ab')
1036 f = repo.cachevfs.open(_rbcnames, b'ab')
1036 if f.tell() == self._rbcsnameslen:
1037 if f.tell() == self._rbcsnameslen:
1037 f.write(b'\0')
1038 f.write(b'\0')
1038 else:
1039 else:
1039 f.close()
1040 f.close()
1040 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1041 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1041 self._rbcnamescount = 0
1042 self._rbcnamescount = 0
1042 self._rbcrevslen = 0
1043 self._rbcrevslen = 0
1043 if self._rbcnamescount == 0:
1044 if self._rbcnamescount == 0:
1044 # before rewriting names, make sure references are removed
1045 # before rewriting names, make sure references are removed
1045 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1046 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1046 f = repo.cachevfs.open(_rbcnames, b'wb')
1047 f = repo.cachevfs.open(_rbcnames, b'wb')
1047 f.write(
1048 f.write(
1048 b'\0'.join(
1049 b'\0'.join(
1049 encoding.fromlocal(b)
1050 encoding.fromlocal(b)
1050 for b in self._names[self._rbcnamescount :]
1051 for b in self._names[self._rbcnamescount :]
1051 )
1052 )
1052 )
1053 )
1053 self._rbcsnameslen = f.tell()
1054 self._rbcsnameslen = f.tell()
1054 f.close()
1055 f.close()
1055 self._rbcnamescount = len(self._names)
1056 self._rbcnamescount = len(self._names)
1056
1057
1057 def _writerevs(self, repo, start):
1058 def _writerevs(self, repo, start):
1058 """write the new revs to revbranchcache"""
1059 """write the new revs to revbranchcache"""
1059 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1060 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1060 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1061 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1061 if f.tell() != start:
1062 if f.tell() != start:
1062 repo.ui.debug(
1063 repo.ui.debug(
1063 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1064 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1064 )
1065 )
1065 f.seek(start)
1066 f.seek(start)
1066 if f.tell() != start:
1067 if f.tell() != start:
1067 start = 0
1068 start = 0
1068 f.seek(start)
1069 f.seek(start)
1069 f.truncate()
1070 f.truncate()
1070 end = revs * _rbcrecsize
1071 end = revs * _rbcrecsize
1071 f.write(self._rbcrevs.slice(start, end))
1072 f.write(self._rbcrevs.slice(start, end))
1072 self._rbcrevslen = revs
1073 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now