##// END OF EJS Templates
branchcache: move the header loading in a `_load_header` class method...
marmoute -
r52356:87b830e4 default
parent child Browse files
Show More
@@ -1,1019 +1,1031 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Callable,
19 Callable,
19 Dict,
20 Dict,
20 Iterable,
21 Iterable,
21 List,
22 List,
22 Optional,
23 Optional,
23 Set,
24 Set,
24 TYPE_CHECKING,
25 TYPE_CHECKING,
25 Tuple,
26 Tuple,
26 Union,
27 Union,
27 )
28 )
28
29
29 from . import (
30 from . import (
30 encoding,
31 encoding,
31 error,
32 error,
32 obsolete,
33 obsolete,
33 scmutil,
34 scmutil,
34 util,
35 util,
35 )
36 )
36
37
37 from .utils import (
38 from .utils import (
38 repoviewutil,
39 repoviewutil,
39 stringutil,
40 stringutil,
40 )
41 )
41
42
42 if TYPE_CHECKING:
43 if TYPE_CHECKING:
43 from . import localrepo
44 from . import localrepo
44
45
45 assert [localrepo]
46 assert [localrepo]
46
47
47 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
48
49
49 calcsize = struct.calcsize
50 calcsize = struct.calcsize
50 pack_into = struct.pack_into
51 pack_into = struct.pack_into
51 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
52
53
53
54
54 class BranchMapCache:
55 class BranchMapCache:
55 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
56
57
57 def __init__(self):
58 def __init__(self):
58 self._per_filter = {}
59 self._per_filter = {}
59
60
60 def __getitem__(self, repo):
61 def __getitem__(self, repo):
61 self.updatecache(repo)
62 self.updatecache(repo)
62 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
63 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
64 bcache._filtername,
65 bcache._filtername,
65 repo.filtername,
66 repo.filtername,
66 )
67 )
67 return bcache
68 return bcache
68
69
69 def update_disk(self, repo):
70 def update_disk(self, repo):
70 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
71
72
72 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
73 disk.
74 disk.
74
75
75 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
76 close. See the `BranchMapCache.write_delayed` method.
77 close. See the `BranchMapCache.write_delayed` method.
77
78
78 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
79 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
80 """
81 """
81 self.updatecache(repo)
82 self.updatecache(repo)
82 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
83 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
84 bcache._filtername,
85 bcache._filtername,
85 repo.filtername,
86 repo.filtername,
86 )
87 )
87 bcache.write(repo)
88 bcache.write(repo)
88
89
89 def updatecache(self, repo):
90 def updatecache(self, repo):
90 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
91 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
92 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # is stale.
94 # is stale.
94
95
95 cl = repo.changelog
96 cl = repo.changelog
96 filtername = repo.filtername
97 filtername = repo.filtername
97 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
98 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
99 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
100 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
101
102
102 revs = []
103 revs = []
103 if bcache is None:
104 if bcache is None:
104 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
106 # revisions.
107 # revisions.
107 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
108 if subsetname is not None:
109 if subsetname is not None:
109 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
110 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 else:
114 else:
114 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
115 bcache = branchcache(repo)
116 bcache = branchcache(repo)
116
117
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 if revs:
119 if revs:
119 bcache.update(repo, revs)
120 bcache.update(repo, revs)
120
121
121 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
122 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
123
124
124 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
125 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
126
127
127 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
128 remote.
129 remote.
129
130
130 """
131 """
131 cl = repo.changelog
132 cl = repo.changelog
132 clrev = cl.rev
133 clrev = cl.rev
133 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
134 rbheads = []
135 rbheads = []
135 closed = set()
136 closed = set()
136 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
137 rbheads += bheads
138 rbheads += bheads
138 for h in bheads:
139 for h in bheads:
139 r = clrev(h)
140 r = clrev(h)
140 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
141 if c:
142 if c:
142 closed.add(h)
143 closed.add(h)
143
144
144 if rbheads:
145 if rbheads:
145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 cache = branchcache(
147 cache = branchcache(
147 repo,
148 repo,
148 remotebranchmap,
149 remotebranchmap,
149 repo[rtiprev].node(),
150 repo[rtiprev].node(),
150 rtiprev,
151 rtiprev,
151 closednodes=closed,
152 closednodes=closed,
152 )
153 )
153
154
154 # Try to stick it as low as possible
155 # Try to stick it as low as possible
155 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
156 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
157 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
158 if cache.validfor(rview):
159 if cache.validfor(rview):
159 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache.write(rview)
161 cache.write(rview)
161 return
162 return
162
163
163 def clear(self):
164 def clear(self):
164 self._per_filter.clear()
165 self._per_filter.clear()
165
166
166 def write_delayed(self, repo):
167 def write_delayed(self, repo):
167 unfi = repo.unfiltered()
168 unfi = repo.unfiltered()
168 for filtername, cache in self._per_filter.items():
169 for filtername, cache in self._per_filter.items():
169 if cache._delayed:
170 if cache._delayed:
170 repo = unfi.filtered(filtername)
171 repo = unfi.filtered(filtername)
171 cache.write(repo)
172 cache.write(repo)
172
173
173
174
174 def _unknownnode(node):
175 def _unknownnode(node):
175 """raises ValueError when branchcache found a node which does not exists"""
176 """raises ValueError when branchcache found a node which does not exists"""
176 raise ValueError('node %s does not exist' % node.hex())
177 raise ValueError('node %s does not exist' % node.hex())
177
178
178
179
179 def _branchcachedesc(repo):
180 def _branchcachedesc(repo):
180 if repo.filtername is not None:
181 if repo.filtername is not None:
181 return b'branch cache (%s)' % repo.filtername
182 return b'branch cache (%s)' % repo.filtername
182 else:
183 else:
183 return b'branch cache'
184 return b'branch cache'
184
185
185
186
186 class _BaseBranchCache:
187 class _BaseBranchCache:
187 """A dict like object that hold branches heads cache.
188 """A dict like object that hold branches heads cache.
188
189
189 This cache is used to avoid costly computations to determine all the
190 This cache is used to avoid costly computations to determine all the
190 branch heads of a repo.
191 branch heads of a repo.
191
192
192 The cache is serialized on disk in the following format:
193 The cache is serialized on disk in the following format:
193
194
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
197 <branch head hex node> <open/closed state> <branch name>
197 ...
198 ...
198
199
199 The first line is used to check if the cache is still valid. If the
200 The first line is used to check if the cache is still valid. If the
200 branch cache is for a filtered repo view, an optional third hash is
201 branch cache is for a filtered repo view, an optional third hash is
201 included that hashes the hashes of all filtered and obsolete revisions.
202 included that hashes the hashes of all filtered and obsolete revisions.
202
203
203 The open/closed state is represented by a single letter 'o' or 'c'.
204 The open/closed state is represented by a single letter 'o' or 'c'.
204 This field can be used to avoid changelog reads when determining if a
205 This field can be used to avoid changelog reads when determining if a
205 branch head closes a branch or not.
206 branch head closes a branch or not.
206 """
207 """
207
208
208 def __init__(
209 def __init__(
209 self,
210 self,
210 repo: "localrepo.localrepository",
211 repo: "localrepo.localrepository",
211 entries: Union[
212 entries: Union[
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 ] = (),
214 ] = (),
214 closed_nodes: Optional[Set[bytes]] = None,
215 closed_nodes: Optional[Set[bytes]] = None,
215 ) -> None:
216 ) -> None:
216 """hasnode is a function which can be used to verify whether changelog
217 """hasnode is a function which can be used to verify whether changelog
217 has a given node or not. If it's not provided, we assume that every node
218 has a given node or not. If it's not provided, we assume that every node
218 we have exists in changelog"""
219 we have exists in changelog"""
219 # closednodes is a set of nodes that close their branch. If the branch
220 # closednodes is a set of nodes that close their branch. If the branch
220 # cache has been updated, it may contain nodes that are no longer
221 # cache has been updated, it may contain nodes that are no longer
221 # heads.
222 # heads.
222 if closed_nodes is None:
223 if closed_nodes is None:
223 closed_nodes = set()
224 closed_nodes = set()
224 self._closednodes = set(closed_nodes)
225 self._closednodes = set(closed_nodes)
225 self._entries = dict(entries)
226 self._entries = dict(entries)
226
227
227 def __iter__(self):
228 def __iter__(self):
228 return iter(self._entries)
229 return iter(self._entries)
229
230
230 def __setitem__(self, key, value):
231 def __setitem__(self, key, value):
231 self._entries[key] = value
232 self._entries[key] = value
232
233
233 def __getitem__(self, key):
234 def __getitem__(self, key):
234 return self._entries[key]
235 return self._entries[key]
235
236
236 def __contains__(self, key):
237 def __contains__(self, key):
237 return key in self._entries
238 return key in self._entries
238
239
239 def iteritems(self):
240 def iteritems(self):
240 return self._entries.items()
241 return self._entries.items()
241
242
242 items = iteritems
243 items = iteritems
243
244
244 def hasbranch(self, label):
245 def hasbranch(self, label):
245 """checks whether a branch of this name exists or not"""
246 """checks whether a branch of this name exists or not"""
246 return label in self._entries
247 return label in self._entries
247
248
248 def _branchtip(self, heads):
249 def _branchtip(self, heads):
249 """Return tuple with last open head in heads and false,
250 """Return tuple with last open head in heads and false,
250 otherwise return last closed head and true."""
251 otherwise return last closed head and true."""
251 tip = heads[-1]
252 tip = heads[-1]
252 closed = True
253 closed = True
253 for h in reversed(heads):
254 for h in reversed(heads):
254 if h not in self._closednodes:
255 if h not in self._closednodes:
255 tip = h
256 tip = h
256 closed = False
257 closed = False
257 break
258 break
258 return tip, closed
259 return tip, closed
259
260
260 def branchtip(self, branch):
261 def branchtip(self, branch):
261 """Return the tipmost open head on branch head, otherwise return the
262 """Return the tipmost open head on branch head, otherwise return the
262 tipmost closed head on branch.
263 tipmost closed head on branch.
263 Raise KeyError for unknown branch."""
264 Raise KeyError for unknown branch."""
264 return self._branchtip(self[branch])[0]
265 return self._branchtip(self[branch])[0]
265
266
266 def iteropen(self, nodes):
267 def iteropen(self, nodes):
267 return (n for n in nodes if n not in self._closednodes)
268 return (n for n in nodes if n not in self._closednodes)
268
269
269 def branchheads(self, branch, closed=False):
270 def branchheads(self, branch, closed=False):
270 heads = self._entries[branch]
271 heads = self._entries[branch]
271 if not closed:
272 if not closed:
272 heads = list(self.iteropen(heads))
273 heads = list(self.iteropen(heads))
273 return heads
274 return heads
274
275
275 def iterbranches(self):
276 def iterbranches(self):
276 for bn, heads in self.items():
277 for bn, heads in self.items():
277 yield (bn, heads) + self._branchtip(heads)
278 yield (bn, heads) + self._branchtip(heads)
278
279
279 def iterheads(self):
280 def iterheads(self):
280 """returns all the heads"""
281 """returns all the heads"""
281 return self._entries.values()
282 return self._entries.values()
282
283
283 def update(self, repo, revgen):
284 def update(self, repo, revgen):
284 """Given a branchhead cache, self, that may have extra nodes or be
285 """Given a branchhead cache, self, that may have extra nodes or be
285 missing heads, and a generator of nodes that are strictly a superset of
286 missing heads, and a generator of nodes that are strictly a superset of
286 heads missing, this function updates self to be correct.
287 heads missing, this function updates self to be correct.
287 """
288 """
288 starttime = util.timer()
289 starttime = util.timer()
289 cl = repo.changelog
290 cl = repo.changelog
290 # collect new branch entries
291 # collect new branch entries
291 newbranches = {}
292 newbranches = {}
292 getbranchinfo = repo.revbranchcache().branchinfo
293 getbranchinfo = repo.revbranchcache().branchinfo
293 max_rev = -1
294 max_rev = -1
294 for r in revgen:
295 for r in revgen:
295 branch, closesbranch = getbranchinfo(r)
296 branch, closesbranch = getbranchinfo(r)
296 newbranches.setdefault(branch, []).append(r)
297 newbranches.setdefault(branch, []).append(r)
297 if closesbranch:
298 if closesbranch:
298 self._closednodes.add(cl.node(r))
299 self._closednodes.add(cl.node(r))
299 max_rev = max(max_rev, r)
300 max_rev = max(max_rev, r)
300 if max_rev < 0:
301 if max_rev < 0:
301 max_rev = None
302 max_rev = None
302
303
303 # Delay fetching the topological heads until they are needed.
304 # Delay fetching the topological heads until they are needed.
304 # A repository without non-continous branches can skip this part.
305 # A repository without non-continous branches can skip this part.
305 topoheads = None
306 topoheads = None
306
307
307 # If a changeset is visible, its parents must be visible too, so
308 # If a changeset is visible, its parents must be visible too, so
308 # use the faster unfiltered parent accessor.
309 # use the faster unfiltered parent accessor.
309 parentrevs = repo.unfiltered().changelog.parentrevs
310 parentrevs = repo.unfiltered().changelog.parentrevs
310
311
311 # Faster than using ctx.obsolete()
312 # Faster than using ctx.obsolete()
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
313 obsrevs = obsolete.getrevs(repo, b'obsolete')
313
314
314 for branch, newheadrevs in newbranches.items():
315 for branch, newheadrevs in newbranches.items():
315 # For every branch, compute the new branchheads.
316 # For every branch, compute the new branchheads.
316 # A branchhead is a revision such that no descendant is on
317 # A branchhead is a revision such that no descendant is on
317 # the same branch.
318 # the same branch.
318 #
319 #
319 # The branchheads are computed iteratively in revision order.
320 # The branchheads are computed iteratively in revision order.
320 # This ensures topological order, i.e. parents are processed
321 # This ensures topological order, i.e. parents are processed
321 # before their children. Ancestors are inclusive here, i.e.
322 # before their children. Ancestors are inclusive here, i.e.
322 # any revision is an ancestor of itself.
323 # any revision is an ancestor of itself.
323 #
324 #
324 # Core observations:
325 # Core observations:
325 # - The current revision is always a branchhead for the
326 # - The current revision is always a branchhead for the
326 # repository up to that point.
327 # repository up to that point.
327 # - It is the first revision of the branch if and only if
328 # - It is the first revision of the branch if and only if
328 # there was no branchhead before. In that case, it is the
329 # there was no branchhead before. In that case, it is the
329 # only branchhead as there are no possible ancestors on
330 # only branchhead as there are no possible ancestors on
330 # the same branch.
331 # the same branch.
331 # - If a parent is on the same branch, a branchhead can
332 # - If a parent is on the same branch, a branchhead can
332 # only be an ancestor of that parent, if it is parent
333 # only be an ancestor of that parent, if it is parent
333 # itself. Otherwise it would have been removed as ancestor
334 # itself. Otherwise it would have been removed as ancestor
334 # of that parent before.
335 # of that parent before.
335 # - Therefore, if all parents are on the same branch, they
336 # - Therefore, if all parents are on the same branch, they
336 # can just be removed from the branchhead set.
337 # can just be removed from the branchhead set.
337 # - If one parent is on the same branch and the other is not
338 # - If one parent is on the same branch and the other is not
338 # and there was exactly one branchhead known, the existing
339 # and there was exactly one branchhead known, the existing
339 # branchhead can only be an ancestor if it is the parent.
340 # branchhead can only be an ancestor if it is the parent.
340 # Otherwise it would have been removed as ancestor of
341 # Otherwise it would have been removed as ancestor of
341 # the parent before. The other parent therefore can't have
342 # the parent before. The other parent therefore can't have
342 # a branchhead as ancestor.
343 # a branchhead as ancestor.
343 # - In all other cases, the parents on different branches
344 # - In all other cases, the parents on different branches
344 # could have a branchhead as ancestor. Those parents are
345 # could have a branchhead as ancestor. Those parents are
345 # kept in the "uncertain" set. If all branchheads are also
346 # kept in the "uncertain" set. If all branchheads are also
346 # topological heads, they can't have descendants and further
347 # topological heads, they can't have descendants and further
347 # checks can be skipped. Otherwise, the ancestors of the
348 # checks can be skipped. Otherwise, the ancestors of the
348 # "uncertain" set are removed from branchheads.
349 # "uncertain" set are removed from branchheads.
349 # This computation is heavy and avoided if at all possible.
350 # This computation is heavy and avoided if at all possible.
350 bheads = self._entries.get(branch, [])
351 bheads = self._entries.get(branch, [])
351 bheadset = {cl.rev(node) for node in bheads}
352 bheadset = {cl.rev(node) for node in bheads}
352 uncertain = set()
353 uncertain = set()
353 for newrev in sorted(newheadrevs):
354 for newrev in sorted(newheadrevs):
354 if newrev in obsrevs:
355 if newrev in obsrevs:
355 # We ignore obsolete changesets as they shouldn't be
356 # We ignore obsolete changesets as they shouldn't be
356 # considered heads.
357 # considered heads.
357 continue
358 continue
358
359
359 if not bheadset:
360 if not bheadset:
360 bheadset.add(newrev)
361 bheadset.add(newrev)
361 continue
362 continue
362
363
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 samebranch = set()
365 samebranch = set()
365 otherbranch = set()
366 otherbranch = set()
366 obsparents = set()
367 obsparents = set()
367 for p in parents:
368 for p in parents:
368 if p in obsrevs:
369 if p in obsrevs:
369 # We ignored this obsolete changeset earlier, but now
370 # We ignored this obsolete changeset earlier, but now
370 # that it has non-ignored children, we need to make
371 # that it has non-ignored children, we need to make
371 # sure their ancestors are not considered heads. To
372 # sure their ancestors are not considered heads. To
372 # achieve that, we will simply treat this obsolete
373 # achieve that, we will simply treat this obsolete
373 # changeset as a parent from other branch.
374 # changeset as a parent from other branch.
374 obsparents.add(p)
375 obsparents.add(p)
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 samebranch.add(p)
377 samebranch.add(p)
377 else:
378 else:
378 otherbranch.add(p)
379 otherbranch.add(p)
379 if not (len(bheadset) == len(samebranch) == 1):
380 if not (len(bheadset) == len(samebranch) == 1):
380 uncertain.update(otherbranch)
381 uncertain.update(otherbranch)
381 uncertain.update(obsparents)
382 uncertain.update(obsparents)
382 bheadset.difference_update(samebranch)
383 bheadset.difference_update(samebranch)
383 bheadset.add(newrev)
384 bheadset.add(newrev)
384
385
385 if uncertain:
386 if uncertain:
386 if topoheads is None:
387 if topoheads is None:
387 topoheads = set(cl.headrevs())
388 topoheads = set(cl.headrevs())
388 if bheadset - topoheads:
389 if bheadset - topoheads:
389 floorrev = min(bheadset)
390 floorrev = min(bheadset)
390 if floorrev <= max(uncertain):
391 if floorrev <= max(uncertain):
391 ancestors = set(cl.ancestors(uncertain, floorrev))
392 ancestors = set(cl.ancestors(uncertain, floorrev))
392 bheadset -= ancestors
393 bheadset -= ancestors
393 if bheadset:
394 if bheadset:
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395
396
396 duration = util.timer() - starttime
397 duration = util.timer() - starttime
397 repo.ui.log(
398 repo.ui.log(
398 b'branchcache',
399 b'branchcache',
399 b'updated %s in %.4f seconds\n',
400 b'updated %s in %.4f seconds\n',
400 _branchcachedesc(repo),
401 _branchcachedesc(repo),
401 duration,
402 duration,
402 )
403 )
403 return max_rev
404 return max_rev
404
405
405
406
406 class branchcache(_BaseBranchCache):
407 class branchcache(_BaseBranchCache):
407 """Branchmap info for a local repo or repoview"""
408 """Branchmap info for a local repo or repoview"""
408
409
409 _base_filename = b"branch2"
410 _base_filename = b"branch2"
410
411
411 def __init__(
412 def __init__(
412 self,
413 self,
413 repo: "localrepo.localrepository",
414 repo: "localrepo.localrepository",
414 entries: Union[
415 entries: Union[
415 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
416 ] = (),
417 ] = (),
417 tipnode: Optional[bytes] = None,
418 tipnode: Optional[bytes] = None,
418 tiprev: Optional[int] = nullrev,
419 tiprev: Optional[int] = nullrev,
419 filteredhash: Optional[bytes] = None,
420 filteredhash: Optional[bytes] = None,
420 closednodes: Optional[Set[bytes]] = None,
421 closednodes: Optional[Set[bytes]] = None,
421 hasnode: Optional[Callable[[bytes], bool]] = None,
422 hasnode: Optional[Callable[[bytes], bool]] = None,
422 verify_node: bool = False,
423 verify_node: bool = False,
423 ) -> None:
424 ) -> None:
424 """hasnode is a function which can be used to verify whether changelog
425 """hasnode is a function which can be used to verify whether changelog
425 has a given node or not. If it's not provided, we assume that every node
426 has a given node or not. If it's not provided, we assume that every node
426 we have exists in changelog"""
427 we have exists in changelog"""
427 self._filtername = repo.filtername
428 self._filtername = repo.filtername
428 self._delayed = False
429 self._delayed = False
429 if tipnode is None:
430 if tipnode is None:
430 self.tipnode = repo.nullid
431 self.tipnode = repo.nullid
431 else:
432 else:
432 self.tipnode = tipnode
433 self.tipnode = tipnode
433 self.tiprev = tiprev
434 self.tiprev = tiprev
434 self.filteredhash = filteredhash
435 self.filteredhash = filteredhash
435
436
436 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
437 # closednodes is a set of nodes that close their branch. If the branch
438 # closednodes is a set of nodes that close their branch. If the branch
438 # cache has been updated, it may contain nodes that are no longer
439 # cache has been updated, it may contain nodes that are no longer
439 # heads.
440 # heads.
440
441
441 # Do we need to verify branch at all ?
442 # Do we need to verify branch at all ?
442 self._verify_node = verify_node
443 self._verify_node = verify_node
443 # branches for which nodes are verified
444 # branches for which nodes are verified
444 self._verifiedbranches = set()
445 self._verifiedbranches = set()
445 self._hasnode = None
446 self._hasnode = None
446 if self._verify_node:
447 if self._verify_node:
447 self._hasnode = repo.changelog.hasnode
448 self._hasnode = repo.changelog.hasnode
448
449
449 def validfor(self, repo):
450 def validfor(self, repo):
450 """check that cache contents are valid for (a subset of) this repo
451 """check that cache contents are valid for (a subset of) this repo
451
452
452 - False when the order of changesets changed or if we detect a strip.
453 - False when the order of changesets changed or if we detect a strip.
453 - True when cache is up-to-date for the current repo or its subset."""
454 - True when cache is up-to-date for the current repo or its subset."""
454 try:
455 try:
455 node = repo.changelog.node(self.tiprev)
456 node = repo.changelog.node(self.tiprev)
456 except IndexError:
457 except IndexError:
457 # changesets were stripped and now we don't even have enough to
458 # changesets were stripped and now we don't even have enough to
458 # find tiprev
459 # find tiprev
459 return False
460 return False
460 if self.tipnode != node:
461 if self.tipnode != node:
461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 # repo has a different order of changesets
463 # repo has a different order of changesets
463 return False
464 return False
464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 # hashes don't match if this repo view has a different set of filtered
466 # hashes don't match if this repo view has a different set of filtered
466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 # history was rewritten)
468 # history was rewritten)
468 return self.filteredhash == tiphash
469 return self.filteredhash == tiphash
469
470
470 @classmethod
471 @classmethod
471 def fromfile(cls, repo):
472 def fromfile(cls, repo):
472 f = None
473 f = None
473 try:
474 try:
474 f = repo.cachevfs(cls._filename(repo))
475 f = repo.cachevfs(cls._filename(repo))
475 lineiter = iter(f)
476 lineiter = iter(f)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
477 init_kwargs = cls._load_header(repo, lineiter)
477 last, lrev = cachekey[:2]
478 last, lrev = bin(last), int(lrev)
479 filteredhash = None
480 if len(cachekey) > 2:
481 filteredhash = bin(cachekey[2])
482 bcache = cls(
478 bcache = cls(
483 repo,
479 repo,
484 tipnode=last,
485 tiprev=lrev,
486 filteredhash=filteredhash,
487 verify_node=True,
480 verify_node=True,
481 **init_kwargs,
488 )
482 )
489 if not bcache.validfor(repo):
483 if not bcache.validfor(repo):
490 # invalidate the cache
484 # invalidate the cache
491 raise ValueError('tip differs')
485 raise ValueError('tip differs')
492 bcache._load_heads(repo, lineiter)
486 bcache._load_heads(repo, lineiter)
493 except (IOError, OSError):
487 except (IOError, OSError):
494 return None
488 return None
495
489
496 except Exception as inst:
490 except Exception as inst:
497 if repo.ui.debugflag:
491 if repo.ui.debugflag:
498 msg = b'invalid %s: %s\n'
492 msg = b'invalid %s: %s\n'
499 msg %= (
493 msg %= (
500 _branchcachedesc(repo),
494 _branchcachedesc(repo),
501 stringutil.forcebytestr(inst),
495 stringutil.forcebytestr(inst),
502 )
496 )
503 repo.ui.debug(msg)
497 repo.ui.debug(msg)
504 bcache = None
498 bcache = None
505
499
506 finally:
500 finally:
507 if f:
501 if f:
508 f.close()
502 f.close()
509
503
510 return bcache
504 return bcache
511
505
506 @classmethod
507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 """parse the head of a branchmap file
509
510 return parameters to pass to a newly created class instance.
511 """
512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 last, lrev = cachekey[:2]
514 last, lrev = bin(last), int(lrev)
515 filteredhash = None
516 if len(cachekey) > 2:
517 filteredhash = bin(cachekey[2])
518 return {
519 "tipnode": last,
520 "tiprev": lrev,
521 "filteredhash": filteredhash,
522 }
523
512 def _load_heads(self, repo, lineiter):
524 def _load_heads(self, repo, lineiter):
513 """fully loads the branchcache by reading from the file using the line
525 """fully loads the branchcache by reading from the file using the line
514 iterator passed"""
526 iterator passed"""
515 for line in lineiter:
527 for line in lineiter:
516 line = line.rstrip(b'\n')
528 line = line.rstrip(b'\n')
517 if not line:
529 if not line:
518 continue
530 continue
519 node, state, label = line.split(b" ", 2)
531 node, state, label = line.split(b" ", 2)
520 if state not in b'oc':
532 if state not in b'oc':
521 raise ValueError('invalid branch state')
533 raise ValueError('invalid branch state')
522 label = encoding.tolocal(label.strip())
534 label = encoding.tolocal(label.strip())
523 node = bin(node)
535 node = bin(node)
524 self._entries.setdefault(label, []).append(node)
536 self._entries.setdefault(label, []).append(node)
525 if state == b'c':
537 if state == b'c':
526 self._closednodes.add(node)
538 self._closednodes.add(node)
527
539
528 @classmethod
540 @classmethod
529 def _filename(cls, repo):
541 def _filename(cls, repo):
530 """name of a branchcache file for a given repo or repoview"""
542 """name of a branchcache file for a given repo or repoview"""
531 filename = cls._base_filename
543 filename = cls._base_filename
532 if repo.filtername:
544 if repo.filtername:
533 filename = b'%s-%s' % (filename, repo.filtername)
545 filename = b'%s-%s' % (filename, repo.filtername)
534 return filename
546 return filename
535
547
536 def copy(self, repo):
548 def copy(self, repo):
537 """return a deep copy of the branchcache object"""
549 """return a deep copy of the branchcache object"""
538 other = type(self)(
550 other = type(self)(
539 repo=repo,
551 repo=repo,
540 # we always do a shally copy of self._entries, and the values is
552 # we always do a shally copy of self._entries, and the values is
541 # always replaced, so no need to deepcopy until the above remains
553 # always replaced, so no need to deepcopy until the above remains
542 # true.
554 # true.
543 entries=self._entries,
555 entries=self._entries,
544 tipnode=self.tipnode,
556 tipnode=self.tipnode,
545 tiprev=self.tiprev,
557 tiprev=self.tiprev,
546 filteredhash=self.filteredhash,
558 filteredhash=self.filteredhash,
547 closednodes=set(self._closednodes),
559 closednodes=set(self._closednodes),
548 verify_node=self._verify_node,
560 verify_node=self._verify_node,
549 )
561 )
550 # we copy will likely schedule a write anyway, but that does not seems
562 # we copy will likely schedule a write anyway, but that does not seems
551 # to hurt to overschedule
563 # to hurt to overschedule
552 other._delayed = self._delayed
564 other._delayed = self._delayed
553 # also copy information about the current verification state
565 # also copy information about the current verification state
554 other._verifiedbranches = set(self._verifiedbranches)
566 other._verifiedbranches = set(self._verifiedbranches)
555 return other
567 return other
556
568
557 def write(self, repo):
569 def write(self, repo):
558 assert self._filtername == repo.filtername, (
570 assert self._filtername == repo.filtername, (
559 self._filtername,
571 self._filtername,
560 repo.filtername,
572 repo.filtername,
561 )
573 )
562 tr = repo.currenttransaction()
574 tr = repo.currenttransaction()
563 if not getattr(tr, 'finalized', True):
575 if not getattr(tr, 'finalized', True):
564 # Avoid premature writing.
576 # Avoid premature writing.
565 #
577 #
566 # (The cache warming setup by localrepo will update the file later.)
578 # (The cache warming setup by localrepo will update the file later.)
567 self._delayed = True
579 self._delayed = True
568 return
580 return
569 try:
581 try:
570 filename = self._filename(repo)
582 filename = self._filename(repo)
571 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
572 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
584 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
573 if self.filteredhash is not None:
585 if self.filteredhash is not None:
574 cachekey.append(hex(self.filteredhash))
586 cachekey.append(hex(self.filteredhash))
575 f.write(b" ".join(cachekey) + b'\n')
587 f.write(b" ".join(cachekey) + b'\n')
576 nodecount = 0
588 nodecount = 0
577 for label, nodes in sorted(self._entries.items()):
589 for label, nodes in sorted(self._entries.items()):
578 label = encoding.fromlocal(label)
590 label = encoding.fromlocal(label)
579 for node in nodes:
591 for node in nodes:
580 nodecount += 1
592 nodecount += 1
581 if node in self._closednodes:
593 if node in self._closednodes:
582 state = b'c'
594 state = b'c'
583 else:
595 else:
584 state = b'o'
596 state = b'o'
585 f.write(b"%s %s %s\n" % (hex(node), state, label))
597 f.write(b"%s %s %s\n" % (hex(node), state, label))
586 repo.ui.log(
598 repo.ui.log(
587 b'branchcache',
599 b'branchcache',
588 b'wrote %s with %d labels and %d nodes\n',
600 b'wrote %s with %d labels and %d nodes\n',
589 _branchcachedesc(repo),
601 _branchcachedesc(repo),
590 len(self._entries),
602 len(self._entries),
591 nodecount,
603 nodecount,
592 )
604 )
593 self._delayed = False
605 self._delayed = False
594 except (IOError, OSError, error.Abort) as inst:
606 except (IOError, OSError, error.Abort) as inst:
595 # Abort may be raised by read only opener, so log and continue
607 # Abort may be raised by read only opener, so log and continue
596 repo.ui.debug(
608 repo.ui.debug(
597 b"couldn't write branch cache: %s\n"
609 b"couldn't write branch cache: %s\n"
598 % stringutil.forcebytestr(inst)
610 % stringutil.forcebytestr(inst)
599 )
611 )
600
612
601 def _verifybranch(self, branch):
613 def _verifybranch(self, branch):
602 """verify head nodes for the given branch."""
614 """verify head nodes for the given branch."""
603 if not self._verify_node:
615 if not self._verify_node:
604 return
616 return
605 if branch not in self._entries or branch in self._verifiedbranches:
617 if branch not in self._entries or branch in self._verifiedbranches:
606 return
618 return
607 assert self._hasnode is not None
619 assert self._hasnode is not None
608 for n in self._entries[branch]:
620 for n in self._entries[branch]:
609 if not self._hasnode(n):
621 if not self._hasnode(n):
610 _unknownnode(n)
622 _unknownnode(n)
611
623
612 self._verifiedbranches.add(branch)
624 self._verifiedbranches.add(branch)
613
625
614 def _verifyall(self):
626 def _verifyall(self):
615 """verifies nodes of all the branches"""
627 """verifies nodes of all the branches"""
616 for b in self._entries.keys():
628 for b in self._entries.keys():
617 if b not in self._verifiedbranches:
629 if b not in self._verifiedbranches:
618 self._verifybranch(b)
630 self._verifybranch(b)
619
631
620 def __getitem__(self, key):
632 def __getitem__(self, key):
621 self._verifybranch(key)
633 self._verifybranch(key)
622 return super().__getitem__(key)
634 return super().__getitem__(key)
623
635
624 def __contains__(self, key):
636 def __contains__(self, key):
625 self._verifybranch(key)
637 self._verifybranch(key)
626 return super().__contains__(key)
638 return super().__contains__(key)
627
639
628 def iteritems(self):
640 def iteritems(self):
629 self._verifyall()
641 self._verifyall()
630 return super().iteritems()
642 return super().iteritems()
631
643
632 items = iteritems
644 items = iteritems
633
645
634 def iterheads(self):
646 def iterheads(self):
635 """returns all the heads"""
647 """returns all the heads"""
636 self._verifyall()
648 self._verifyall()
637 return super().iterheads()
649 return super().iterheads()
638
650
639 def hasbranch(self, label):
651 def hasbranch(self, label):
640 """checks whether a branch of this name exists or not"""
652 """checks whether a branch of this name exists or not"""
641 self._verifybranch(label)
653 self._verifybranch(label)
642 return super().hasbranch(label)
654 return super().hasbranch(label)
643
655
644 def branchheads(self, branch, closed=False):
656 def branchheads(self, branch, closed=False):
645 self._verifybranch(branch)
657 self._verifybranch(branch)
646 return super().branchheads(branch, closed=closed)
658 return super().branchheads(branch, closed=closed)
647
659
648 def update(self, repo, revgen):
660 def update(self, repo, revgen):
649 assert self._filtername == repo.filtername, (
661 assert self._filtername == repo.filtername, (
650 self._filtername,
662 self._filtername,
651 repo.filtername,
663 repo.filtername,
652 )
664 )
653 cl = repo.changelog
665 cl = repo.changelog
654 max_rev = super().update(repo, revgen)
666 max_rev = super().update(repo, revgen)
655 # new tip revision which we found after iterating items from new
667 # new tip revision which we found after iterating items from new
656 # branches
668 # branches
657 if max_rev is not None and max_rev > self.tiprev:
669 if max_rev is not None and max_rev > self.tiprev:
658 self.tiprev = max_rev
670 self.tiprev = max_rev
659 self.tipnode = cl.node(max_rev)
671 self.tipnode = cl.node(max_rev)
660
672
661 if not self.validfor(repo):
673 if not self.validfor(repo):
662 # old cache key is now invalid for the repo, but we've just updated
674 # old cache key is now invalid for the repo, but we've just updated
663 # the cache and we assume it's valid, so let's make the cache key
675 # the cache and we assume it's valid, so let's make the cache key
664 # valid as well by recomputing it from the cached data
676 # valid as well by recomputing it from the cached data
665 self.tipnode = repo.nullid
677 self.tipnode = repo.nullid
666 self.tiprev = nullrev
678 self.tiprev = nullrev
667 for heads in self.iterheads():
679 for heads in self.iterheads():
668 if not heads:
680 if not heads:
669 # all revisions on a branch are obsolete
681 # all revisions on a branch are obsolete
670 continue
682 continue
671 # note: tiprev is not necessarily the tip revision of repo,
683 # note: tiprev is not necessarily the tip revision of repo,
672 # because the tip could be obsolete (i.e. not a head)
684 # because the tip could be obsolete (i.e. not a head)
673 tiprev = max(cl.rev(node) for node in heads)
685 tiprev = max(cl.rev(node) for node in heads)
674 if tiprev > self.tiprev:
686 if tiprev > self.tiprev:
675 self.tipnode = cl.node(tiprev)
687 self.tipnode = cl.node(tiprev)
676 self.tiprev = tiprev
688 self.tiprev = tiprev
677 self.filteredhash = scmutil.filteredhash(
689 self.filteredhash = scmutil.filteredhash(
678 repo, self.tiprev, needobsolete=True
690 repo, self.tiprev, needobsolete=True
679 )
691 )
680
692
681 self.write(repo)
693 self.write(repo)
682
694
683
695
684 class remotebranchcache(_BaseBranchCache):
696 class remotebranchcache(_BaseBranchCache):
685 """Branchmap info for a remote connection, should not write locally"""
697 """Branchmap info for a remote connection, should not write locally"""
686
698
687 def __init__(
699 def __init__(
688 self,
700 self,
689 repo: "localrepo.localrepository",
701 repo: "localrepo.localrepository",
690 entries: Union[
702 entries: Union[
691 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
703 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
692 ] = (),
704 ] = (),
693 closednodes: Optional[Set[bytes]] = None,
705 closednodes: Optional[Set[bytes]] = None,
694 ) -> None:
706 ) -> None:
695 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
707 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
696
708
697
709
698 # Revision branch info cache
710 # Revision branch info cache
699
711
700 _rbcversion = b'-v1'
712 _rbcversion = b'-v1'
701 _rbcnames = b'rbc-names' + _rbcversion
713 _rbcnames = b'rbc-names' + _rbcversion
702 _rbcrevs = b'rbc-revs' + _rbcversion
714 _rbcrevs = b'rbc-revs' + _rbcversion
703 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
715 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
704 _rbcrecfmt = b'>4sI'
716 _rbcrecfmt = b'>4sI'
705 _rbcrecsize = calcsize(_rbcrecfmt)
717 _rbcrecsize = calcsize(_rbcrecfmt)
706 _rbcmininc = 64 * _rbcrecsize
718 _rbcmininc = 64 * _rbcrecsize
707 _rbcnodelen = 4
719 _rbcnodelen = 4
708 _rbcbranchidxmask = 0x7FFFFFFF
720 _rbcbranchidxmask = 0x7FFFFFFF
709 _rbccloseflag = 0x80000000
721 _rbccloseflag = 0x80000000
710
722
711
723
712 class rbcrevs:
724 class rbcrevs:
713 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
725 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
714
726
715 def __init__(self, revs):
727 def __init__(self, revs):
716 self._prefix = revs
728 self._prefix = revs
717 self._rest = bytearray()
729 self._rest = bytearray()
718
730
719 def __len__(self):
731 def __len__(self):
720 return len(self._prefix) + len(self._rest)
732 return len(self._prefix) + len(self._rest)
721
733
722 def unpack_record(self, rbcrevidx):
734 def unpack_record(self, rbcrevidx):
723 if rbcrevidx < len(self._prefix):
735 if rbcrevidx < len(self._prefix):
724 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
736 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
725 else:
737 else:
726 return unpack_from(
738 return unpack_from(
727 _rbcrecfmt,
739 _rbcrecfmt,
728 util.buffer(self._rest),
740 util.buffer(self._rest),
729 rbcrevidx - len(self._prefix),
741 rbcrevidx - len(self._prefix),
730 )
742 )
731
743
732 def make_mutable(self):
744 def make_mutable(self):
733 if len(self._prefix) > 0:
745 if len(self._prefix) > 0:
734 entirety = bytearray()
746 entirety = bytearray()
735 entirety[:] = self._prefix
747 entirety[:] = self._prefix
736 entirety.extend(self._rest)
748 entirety.extend(self._rest)
737 self._rest = entirety
749 self._rest = entirety
738 self._prefix = bytearray()
750 self._prefix = bytearray()
739
751
740 def truncate(self, pos):
752 def truncate(self, pos):
741 self.make_mutable()
753 self.make_mutable()
742 del self._rest[pos:]
754 del self._rest[pos:]
743
755
744 def pack_into(self, rbcrevidx, node, branchidx):
756 def pack_into(self, rbcrevidx, node, branchidx):
745 if rbcrevidx < len(self._prefix):
757 if rbcrevidx < len(self._prefix):
746 self.make_mutable()
758 self.make_mutable()
747 buf = self._rest
759 buf = self._rest
748 start_offset = rbcrevidx - len(self._prefix)
760 start_offset = rbcrevidx - len(self._prefix)
749 end_offset = start_offset + _rbcrecsize
761 end_offset = start_offset + _rbcrecsize
750
762
751 if len(self._rest) < end_offset:
763 if len(self._rest) < end_offset:
752 # bytearray doesn't allocate extra space at least in Python 3.7.
764 # bytearray doesn't allocate extra space at least in Python 3.7.
753 # When multiple changesets are added in a row, precise resize would
765 # When multiple changesets are added in a row, precise resize would
754 # result in quadratic complexity. Overallocate to compensate by
766 # result in quadratic complexity. Overallocate to compensate by
755 # using the classic doubling technique for dynamic arrays instead.
767 # using the classic doubling technique for dynamic arrays instead.
756 # If there was a gap in the map before, less space will be reserved.
768 # If there was a gap in the map before, less space will be reserved.
757 self._rest.extend(b'\0' * end_offset)
769 self._rest.extend(b'\0' * end_offset)
758 return pack_into(
770 return pack_into(
759 _rbcrecfmt,
771 _rbcrecfmt,
760 buf,
772 buf,
761 start_offset,
773 start_offset,
762 node,
774 node,
763 branchidx,
775 branchidx,
764 )
776 )
765
777
766 def extend(self, extension):
778 def extend(self, extension):
767 return self._rest.extend(extension)
779 return self._rest.extend(extension)
768
780
769 def slice(self, begin, end):
781 def slice(self, begin, end):
770 if begin < len(self._prefix):
782 if begin < len(self._prefix):
771 acc = bytearray()
783 acc = bytearray()
772 acc[:] = self._prefix[begin:end]
784 acc[:] = self._prefix[begin:end]
773 acc.extend(
785 acc.extend(
774 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
786 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
775 )
787 )
776 return acc
788 return acc
777 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
789 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
778
790
779
791
780 class revbranchcache:
792 class revbranchcache:
781 """Persistent cache, mapping from revision number to branch name and close.
793 """Persistent cache, mapping from revision number to branch name and close.
782 This is a low level cache, independent of filtering.
794 This is a low level cache, independent of filtering.
783
795
784 Branch names are stored in rbc-names in internal encoding separated by 0.
796 Branch names are stored in rbc-names in internal encoding separated by 0.
785 rbc-names is append-only, and each branch name is only stored once and will
797 rbc-names is append-only, and each branch name is only stored once and will
786 thus have a unique index.
798 thus have a unique index.
787
799
788 The branch info for each revision is stored in rbc-revs as constant size
800 The branch info for each revision is stored in rbc-revs as constant size
789 records. The whole file is read into memory, but it is only 'parsed' on
801 records. The whole file is read into memory, but it is only 'parsed' on
790 demand. The file is usually append-only but will be truncated if repo
802 demand. The file is usually append-only but will be truncated if repo
791 modification is detected.
803 modification is detected.
792 The record for each revision contains the first 4 bytes of the
804 The record for each revision contains the first 4 bytes of the
793 corresponding node hash, and the record is only used if it still matches.
805 corresponding node hash, and the record is only used if it still matches.
794 Even a completely trashed rbc-revs fill thus still give the right result
806 Even a completely trashed rbc-revs fill thus still give the right result
795 while converging towards full recovery ... assuming no incorrectly matching
807 while converging towards full recovery ... assuming no incorrectly matching
796 node hashes.
808 node hashes.
797 The record also contains 4 bytes where 31 bits contains the index of the
809 The record also contains 4 bytes where 31 bits contains the index of the
798 branch and the last bit indicate that it is a branch close commit.
810 branch and the last bit indicate that it is a branch close commit.
799 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
811 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
800 and will grow with it but be 1/8th of its size.
812 and will grow with it but be 1/8th of its size.
801 """
813 """
802
814
803 def __init__(self, repo, readonly=True):
815 def __init__(self, repo, readonly=True):
804 assert repo.filtername is None
816 assert repo.filtername is None
805 self._repo = repo
817 self._repo = repo
806 self._names = [] # branch names in local encoding with static index
818 self._names = [] # branch names in local encoding with static index
807 self._rbcrevs = rbcrevs(bytearray())
819 self._rbcrevs = rbcrevs(bytearray())
808 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
820 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
809 try:
821 try:
810 bndata = repo.cachevfs.read(_rbcnames)
822 bndata = repo.cachevfs.read(_rbcnames)
811 self._rbcsnameslen = len(bndata) # for verification before writing
823 self._rbcsnameslen = len(bndata) # for verification before writing
812 if bndata:
824 if bndata:
813 self._names = [
825 self._names = [
814 encoding.tolocal(bn) for bn in bndata.split(b'\0')
826 encoding.tolocal(bn) for bn in bndata.split(b'\0')
815 ]
827 ]
816 except (IOError, OSError):
828 except (IOError, OSError):
817 if readonly:
829 if readonly:
818 # don't try to use cache - fall back to the slow path
830 # don't try to use cache - fall back to the slow path
819 self.branchinfo = self._branchinfo
831 self.branchinfo = self._branchinfo
820
832
821 if self._names:
833 if self._names:
822 try:
834 try:
823 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
835 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
824 with repo.cachevfs(_rbcrevs) as fp:
836 with repo.cachevfs(_rbcrevs) as fp:
825 data = util.buffer(util.mmapread(fp))
837 data = util.buffer(util.mmapread(fp))
826 else:
838 else:
827 data = repo.cachevfs.read(_rbcrevs)
839 data = repo.cachevfs.read(_rbcrevs)
828 self._rbcrevs = rbcrevs(data)
840 self._rbcrevs = rbcrevs(data)
829 except (IOError, OSError) as inst:
841 except (IOError, OSError) as inst:
830 repo.ui.debug(
842 repo.ui.debug(
831 b"couldn't read revision branch cache: %s\n"
843 b"couldn't read revision branch cache: %s\n"
832 % stringutil.forcebytestr(inst)
844 % stringutil.forcebytestr(inst)
833 )
845 )
834 # remember number of good records on disk
846 # remember number of good records on disk
835 self._rbcrevslen = min(
847 self._rbcrevslen = min(
836 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
848 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
837 )
849 )
838 if self._rbcrevslen == 0:
850 if self._rbcrevslen == 0:
839 self._names = []
851 self._names = []
840 self._rbcnamescount = len(self._names) # number of names read at
852 self._rbcnamescount = len(self._names) # number of names read at
841 # _rbcsnameslen
853 # _rbcsnameslen
842
854
843 def _clear(self):
855 def _clear(self):
844 self._rbcsnameslen = 0
856 self._rbcsnameslen = 0
845 del self._names[:]
857 del self._names[:]
846 self._rbcnamescount = 0
858 self._rbcnamescount = 0
847 self._rbcrevslen = len(self._repo.changelog)
859 self._rbcrevslen = len(self._repo.changelog)
848 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
860 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
849 util.clearcachedproperty(self, b'_namesreverse')
861 util.clearcachedproperty(self, b'_namesreverse')
850
862
851 @util.propertycache
863 @util.propertycache
852 def _namesreverse(self):
864 def _namesreverse(self):
853 return {b: r for r, b in enumerate(self._names)}
865 return {b: r for r, b in enumerate(self._names)}
854
866
855 def branchinfo(self, rev):
867 def branchinfo(self, rev):
856 """Return branch name and close flag for rev, using and updating
868 """Return branch name and close flag for rev, using and updating
857 persistent cache."""
869 persistent cache."""
858 changelog = self._repo.changelog
870 changelog = self._repo.changelog
859 rbcrevidx = rev * _rbcrecsize
871 rbcrevidx = rev * _rbcrecsize
860
872
861 # avoid negative index, changelog.read(nullrev) is fast without cache
873 # avoid negative index, changelog.read(nullrev) is fast without cache
862 if rev == nullrev:
874 if rev == nullrev:
863 return changelog.branchinfo(rev)
875 return changelog.branchinfo(rev)
864
876
865 # if requested rev isn't allocated, grow and cache the rev info
877 # if requested rev isn't allocated, grow and cache the rev info
866 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
878 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
867 return self._branchinfo(rev)
879 return self._branchinfo(rev)
868
880
869 # fast path: extract data from cache, use it if node is matching
881 # fast path: extract data from cache, use it if node is matching
870 reponode = changelog.node(rev)[:_rbcnodelen]
882 reponode = changelog.node(rev)[:_rbcnodelen]
871 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
883 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
872 close = bool(branchidx & _rbccloseflag)
884 close = bool(branchidx & _rbccloseflag)
873 if close:
885 if close:
874 branchidx &= _rbcbranchidxmask
886 branchidx &= _rbcbranchidxmask
875 if cachenode == b'\0\0\0\0':
887 if cachenode == b'\0\0\0\0':
876 pass
888 pass
877 elif cachenode == reponode:
889 elif cachenode == reponode:
878 try:
890 try:
879 return self._names[branchidx], close
891 return self._names[branchidx], close
880 except IndexError:
892 except IndexError:
881 # recover from invalid reference to unknown branch
893 # recover from invalid reference to unknown branch
882 self._repo.ui.debug(
894 self._repo.ui.debug(
883 b"referenced branch names not found"
895 b"referenced branch names not found"
884 b" - rebuilding revision branch cache from scratch\n"
896 b" - rebuilding revision branch cache from scratch\n"
885 )
897 )
886 self._clear()
898 self._clear()
887 else:
899 else:
888 # rev/node map has changed, invalidate the cache from here up
900 # rev/node map has changed, invalidate the cache from here up
889 self._repo.ui.debug(
901 self._repo.ui.debug(
890 b"history modification detected - truncating "
902 b"history modification detected - truncating "
891 b"revision branch cache to revision %d\n" % rev
903 b"revision branch cache to revision %d\n" % rev
892 )
904 )
893 truncate = rbcrevidx + _rbcrecsize
905 truncate = rbcrevidx + _rbcrecsize
894 self._rbcrevs.truncate(truncate)
906 self._rbcrevs.truncate(truncate)
895 self._rbcrevslen = min(self._rbcrevslen, truncate)
907 self._rbcrevslen = min(self._rbcrevslen, truncate)
896
908
897 # fall back to slow path and make sure it will be written to disk
909 # fall back to slow path and make sure it will be written to disk
898 return self._branchinfo(rev)
910 return self._branchinfo(rev)
899
911
900 def _branchinfo(self, rev):
912 def _branchinfo(self, rev):
901 """Retrieve branch info from changelog and update _rbcrevs"""
913 """Retrieve branch info from changelog and update _rbcrevs"""
902 changelog = self._repo.changelog
914 changelog = self._repo.changelog
903 b, close = changelog.branchinfo(rev)
915 b, close = changelog.branchinfo(rev)
904 if b in self._namesreverse:
916 if b in self._namesreverse:
905 branchidx = self._namesreverse[b]
917 branchidx = self._namesreverse[b]
906 else:
918 else:
907 branchidx = len(self._names)
919 branchidx = len(self._names)
908 self._names.append(b)
920 self._names.append(b)
909 self._namesreverse[b] = branchidx
921 self._namesreverse[b] = branchidx
910 reponode = changelog.node(rev)
922 reponode = changelog.node(rev)
911 if close:
923 if close:
912 branchidx |= _rbccloseflag
924 branchidx |= _rbccloseflag
913 self._setcachedata(rev, reponode, branchidx)
925 self._setcachedata(rev, reponode, branchidx)
914 return b, close
926 return b, close
915
927
916 def setdata(self, rev, changelogrevision):
928 def setdata(self, rev, changelogrevision):
917 """add new data information to the cache"""
929 """add new data information to the cache"""
918 branch, close = changelogrevision.branchinfo
930 branch, close = changelogrevision.branchinfo
919
931
920 if branch in self._namesreverse:
932 if branch in self._namesreverse:
921 branchidx = self._namesreverse[branch]
933 branchidx = self._namesreverse[branch]
922 else:
934 else:
923 branchidx = len(self._names)
935 branchidx = len(self._names)
924 self._names.append(branch)
936 self._names.append(branch)
925 self._namesreverse[branch] = branchidx
937 self._namesreverse[branch] = branchidx
926 if close:
938 if close:
927 branchidx |= _rbccloseflag
939 branchidx |= _rbccloseflag
928 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
940 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
929 # If no cache data were readable (non exists, bad permission, etc)
941 # If no cache data were readable (non exists, bad permission, etc)
930 # the cache was bypassing itself by setting:
942 # the cache was bypassing itself by setting:
931 #
943 #
932 # self.branchinfo = self._branchinfo
944 # self.branchinfo = self._branchinfo
933 #
945 #
934 # Since we now have data in the cache, we need to drop this bypassing.
946 # Since we now have data in the cache, we need to drop this bypassing.
935 if 'branchinfo' in vars(self):
947 if 'branchinfo' in vars(self):
936 del self.branchinfo
948 del self.branchinfo
937
949
938 def _setcachedata(self, rev, node, branchidx):
950 def _setcachedata(self, rev, node, branchidx):
939 """Writes the node's branch data to the in-memory cache data."""
951 """Writes the node's branch data to the in-memory cache data."""
940 if rev == nullrev:
952 if rev == nullrev:
941 return
953 return
942 rbcrevidx = rev * _rbcrecsize
954 rbcrevidx = rev * _rbcrecsize
943 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
955 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
944 self._rbcrevslen = min(self._rbcrevslen, rev)
956 self._rbcrevslen = min(self._rbcrevslen, rev)
945
957
946 tr = self._repo.currenttransaction()
958 tr = self._repo.currenttransaction()
947 if tr:
959 if tr:
948 tr.addfinalize(b'write-revbranchcache', self.write)
960 tr.addfinalize(b'write-revbranchcache', self.write)
949
961
950 def write(self, tr=None):
962 def write(self, tr=None):
951 """Save branch cache if it is dirty."""
963 """Save branch cache if it is dirty."""
952 repo = self._repo
964 repo = self._repo
953 wlock = None
965 wlock = None
954 step = b''
966 step = b''
955 try:
967 try:
956 # write the new names
968 # write the new names
957 if self._rbcnamescount < len(self._names):
969 if self._rbcnamescount < len(self._names):
958 wlock = repo.wlock(wait=False)
970 wlock = repo.wlock(wait=False)
959 step = b' names'
971 step = b' names'
960 self._writenames(repo)
972 self._writenames(repo)
961
973
962 # write the new revs
974 # write the new revs
963 start = self._rbcrevslen * _rbcrecsize
975 start = self._rbcrevslen * _rbcrecsize
964 if start != len(self._rbcrevs):
976 if start != len(self._rbcrevs):
965 step = b''
977 step = b''
966 if wlock is None:
978 if wlock is None:
967 wlock = repo.wlock(wait=False)
979 wlock = repo.wlock(wait=False)
968 self._writerevs(repo, start)
980 self._writerevs(repo, start)
969
981
970 except (IOError, OSError, error.Abort, error.LockError) as inst:
982 except (IOError, OSError, error.Abort, error.LockError) as inst:
971 repo.ui.debug(
983 repo.ui.debug(
972 b"couldn't write revision branch cache%s: %s\n"
984 b"couldn't write revision branch cache%s: %s\n"
973 % (step, stringutil.forcebytestr(inst))
985 % (step, stringutil.forcebytestr(inst))
974 )
986 )
975 finally:
987 finally:
976 if wlock is not None:
988 if wlock is not None:
977 wlock.release()
989 wlock.release()
978
990
979 def _writenames(self, repo):
991 def _writenames(self, repo):
980 """write the new branch names to revbranchcache"""
992 """write the new branch names to revbranchcache"""
981 if self._rbcnamescount != 0:
993 if self._rbcnamescount != 0:
982 f = repo.cachevfs.open(_rbcnames, b'ab')
994 f = repo.cachevfs.open(_rbcnames, b'ab')
983 if f.tell() == self._rbcsnameslen:
995 if f.tell() == self._rbcsnameslen:
984 f.write(b'\0')
996 f.write(b'\0')
985 else:
997 else:
986 f.close()
998 f.close()
987 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
999 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
988 self._rbcnamescount = 0
1000 self._rbcnamescount = 0
989 self._rbcrevslen = 0
1001 self._rbcrevslen = 0
990 if self._rbcnamescount == 0:
1002 if self._rbcnamescount == 0:
991 # before rewriting names, make sure references are removed
1003 # before rewriting names, make sure references are removed
992 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1004 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
993 f = repo.cachevfs.open(_rbcnames, b'wb')
1005 f = repo.cachevfs.open(_rbcnames, b'wb')
994 f.write(
1006 f.write(
995 b'\0'.join(
1007 b'\0'.join(
996 encoding.fromlocal(b)
1008 encoding.fromlocal(b)
997 for b in self._names[self._rbcnamescount :]
1009 for b in self._names[self._rbcnamescount :]
998 )
1010 )
999 )
1011 )
1000 self._rbcsnameslen = f.tell()
1012 self._rbcsnameslen = f.tell()
1001 f.close()
1013 f.close()
1002 self._rbcnamescount = len(self._names)
1014 self._rbcnamescount = len(self._names)
1003
1015
1004 def _writerevs(self, repo, start):
1016 def _writerevs(self, repo, start):
1005 """write the new revs to revbranchcache"""
1017 """write the new revs to revbranchcache"""
1006 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1018 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1007 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1019 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1008 if f.tell() != start:
1020 if f.tell() != start:
1009 repo.ui.debug(
1021 repo.ui.debug(
1010 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1022 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1011 )
1023 )
1012 f.seek(start)
1024 f.seek(start)
1013 if f.tell() != start:
1025 if f.tell() != start:
1014 start = 0
1026 start = 0
1015 f.seek(start)
1027 f.seek(start)
1016 f.truncate()
1028 f.truncate()
1017 end = revs * _rbcrecsize
1029 end = revs * _rbcrecsize
1018 f.write(self._rbcrevs.slice(start, end))
1030 f.write(self._rbcrevs.slice(start, end))
1019 self._rbcrevslen = revs
1031 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now