##// END OF EJS Templates
branchcache: move the filename to a class attribute...
marmoute -
r52353:cebd96de default
parent child Browse files
Show More
@@ -1,1019 +1,1021 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Callable,
18 Callable,
19 Dict,
19 Dict,
20 Iterable,
20 Iterable,
21 List,
21 List,
22 Optional,
22 Optional,
23 Set,
23 Set,
24 TYPE_CHECKING,
24 TYPE_CHECKING,
25 Tuple,
25 Tuple,
26 Union,
26 Union,
27 )
27 )
28
28
29 from . import (
29 from . import (
30 encoding,
30 encoding,
31 error,
31 error,
32 obsolete,
32 obsolete,
33 scmutil,
33 scmutil,
34 util,
34 util,
35 )
35 )
36
36
37 from .utils import (
37 from .utils import (
38 repoviewutil,
38 repoviewutil,
39 stringutil,
39 stringutil,
40 )
40 )
41
41
42 if TYPE_CHECKING:
42 if TYPE_CHECKING:
43 from . import localrepo
43 from . import localrepo
44
44
45 assert [localrepo]
45 assert [localrepo]
46
46
47 subsettable = repoviewutil.subsettable
47 subsettable = repoviewutil.subsettable
48
48
49 calcsize = struct.calcsize
49 calcsize = struct.calcsize
50 pack_into = struct.pack_into
50 pack_into = struct.pack_into
51 unpack_from = struct.unpack_from
51 unpack_from = struct.unpack_from
52
52
53
53
54 class BranchMapCache:
54 class BranchMapCache:
55 """mapping of filtered views of repo with their branchcache"""
55 """mapping of filtered views of repo with their branchcache"""
56
56
57 def __init__(self):
57 def __init__(self):
58 self._per_filter = {}
58 self._per_filter = {}
59
59
60 def __getitem__(self, repo):
60 def __getitem__(self, repo):
61 self.updatecache(repo)
61 self.updatecache(repo)
62 bcache = self._per_filter[repo.filtername]
62 bcache = self._per_filter[repo.filtername]
63 assert bcache._filtername == repo.filtername, (
63 assert bcache._filtername == repo.filtername, (
64 bcache._filtername,
64 bcache._filtername,
65 repo.filtername,
65 repo.filtername,
66 )
66 )
67 return bcache
67 return bcache
68
68
69 def update_disk(self, repo):
69 def update_disk(self, repo):
70 """ensure and up-to-date cache is (or will be) written on disk
70 """ensure and up-to-date cache is (or will be) written on disk
71
71
72 The cache for this repository view is updated if needed and written on
72 The cache for this repository view is updated if needed and written on
73 disk.
73 disk.
74
74
75 If a transaction is in progress, the writing is schedule to transaction
75 If a transaction is in progress, the writing is schedule to transaction
76 close. See the `BranchMapCache.write_delayed` method.
76 close. See the `BranchMapCache.write_delayed` method.
77
77
78 This method exist independently of __getitem__ as it is sometime useful
78 This method exist independently of __getitem__ as it is sometime useful
79 to signal that we have no intend to use the data in memory yet.
79 to signal that we have no intend to use the data in memory yet.
80 """
80 """
81 self.updatecache(repo)
81 self.updatecache(repo)
82 bcache = self._per_filter[repo.filtername]
82 bcache = self._per_filter[repo.filtername]
83 assert bcache._filtername == repo.filtername, (
83 assert bcache._filtername == repo.filtername, (
84 bcache._filtername,
84 bcache._filtername,
85 repo.filtername,
85 repo.filtername,
86 )
86 )
87 bcache.write(repo)
87 bcache.write(repo)
88
88
89 def updatecache(self, repo):
89 def updatecache(self, repo):
90 """Update the cache for the given filtered view on a repository"""
90 """Update the cache for the given filtered view on a repository"""
91 # This can trigger updates for the caches for subsets of the filtered
91 # This can trigger updates for the caches for subsets of the filtered
92 # view, e.g. when there is no cache for this filtered view or the cache
92 # view, e.g. when there is no cache for this filtered view or the cache
93 # is stale.
93 # is stale.
94
94
95 cl = repo.changelog
95 cl = repo.changelog
96 filtername = repo.filtername
96 filtername = repo.filtername
97 bcache = self._per_filter.get(filtername)
97 bcache = self._per_filter.get(filtername)
98 if bcache is None or not bcache.validfor(repo):
98 if bcache is None or not bcache.validfor(repo):
99 # cache object missing or cache object stale? Read from disk
99 # cache object missing or cache object stale? Read from disk
100 bcache = branchcache.fromfile(repo)
100 bcache = branchcache.fromfile(repo)
101
101
102 revs = []
102 revs = []
103 if bcache is None:
103 if bcache is None:
104 # no (fresh) cache available anymore, perhaps we can re-use
104 # no (fresh) cache available anymore, perhaps we can re-use
105 # the cache for a subset, then extend that to add info on missing
105 # the cache for a subset, then extend that to add info on missing
106 # revisions.
106 # revisions.
107 subsetname = subsettable.get(filtername)
107 subsetname = subsettable.get(filtername)
108 if subsetname is not None:
108 if subsetname is not None:
109 subset = repo.filtered(subsetname)
109 subset = repo.filtered(subsetname)
110 bcache = self[subset].copy(repo)
110 bcache = self[subset].copy(repo)
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 else:
113 else:
114 # nothing to fall back on, start empty.
114 # nothing to fall back on, start empty.
115 bcache = branchcache(repo)
115 bcache = branchcache(repo)
116
116
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 if revs:
118 if revs:
119 bcache.update(repo, revs)
119 bcache.update(repo, revs)
120
120
121 assert bcache.validfor(repo), filtername
121 assert bcache.validfor(repo), filtername
122 self._per_filter[repo.filtername] = bcache
122 self._per_filter[repo.filtername] = bcache
123
123
124 def replace(self, repo, remotebranchmap):
124 def replace(self, repo, remotebranchmap):
125 """Replace the branchmap cache for a repo with a branch mapping.
125 """Replace the branchmap cache for a repo with a branch mapping.
126
126
127 This is likely only called during clone with a branch map from a
127 This is likely only called during clone with a branch map from a
128 remote.
128 remote.
129
129
130 """
130 """
131 cl = repo.changelog
131 cl = repo.changelog
132 clrev = cl.rev
132 clrev = cl.rev
133 clbranchinfo = cl.branchinfo
133 clbranchinfo = cl.branchinfo
134 rbheads = []
134 rbheads = []
135 closed = set()
135 closed = set()
136 for bheads in remotebranchmap.values():
136 for bheads in remotebranchmap.values():
137 rbheads += bheads
137 rbheads += bheads
138 for h in bheads:
138 for h in bheads:
139 r = clrev(h)
139 r = clrev(h)
140 b, c = clbranchinfo(r)
140 b, c = clbranchinfo(r)
141 if c:
141 if c:
142 closed.add(h)
142 closed.add(h)
143
143
144 if rbheads:
144 if rbheads:
145 rtiprev = max((int(clrev(node)) for node in rbheads))
145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 cache = branchcache(
146 cache = branchcache(
147 repo,
147 repo,
148 remotebranchmap,
148 remotebranchmap,
149 repo[rtiprev].node(),
149 repo[rtiprev].node(),
150 rtiprev,
150 rtiprev,
151 closednodes=closed,
151 closednodes=closed,
152 )
152 )
153
153
154 # Try to stick it as low as possible
154 # Try to stick it as low as possible
155 # filter above served are unlikely to be fetch from a clone
155 # filter above served are unlikely to be fetch from a clone
156 for candidate in (b'base', b'immutable', b'served'):
156 for candidate in (b'base', b'immutable', b'served'):
157 rview = repo.filtered(candidate)
157 rview = repo.filtered(candidate)
158 if cache.validfor(rview):
158 if cache.validfor(rview):
159 cache = self._per_filter[candidate] = cache.copy(rview)
159 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache.write(rview)
160 cache.write(rview)
161 return
161 return
162
162
163 def clear(self):
163 def clear(self):
164 self._per_filter.clear()
164 self._per_filter.clear()
165
165
166 def write_delayed(self, repo):
166 def write_delayed(self, repo):
167 unfi = repo.unfiltered()
167 unfi = repo.unfiltered()
168 for filtername, cache in self._per_filter.items():
168 for filtername, cache in self._per_filter.items():
169 if cache._delayed:
169 if cache._delayed:
170 repo = unfi.filtered(filtername)
170 repo = unfi.filtered(filtername)
171 cache.write(repo)
171 cache.write(repo)
172
172
173
173
174 def _unknownnode(node):
174 def _unknownnode(node):
175 """raises ValueError when branchcache found a node which does not exists"""
175 """raises ValueError when branchcache found a node which does not exists"""
176 raise ValueError('node %s does not exist' % node.hex())
176 raise ValueError('node %s does not exist' % node.hex())
177
177
178
178
179 def _branchcachedesc(repo):
179 def _branchcachedesc(repo):
180 if repo.filtername is not None:
180 if repo.filtername is not None:
181 return b'branch cache (%s)' % repo.filtername
181 return b'branch cache (%s)' % repo.filtername
182 else:
182 else:
183 return b'branch cache'
183 return b'branch cache'
184
184
185
185
186 class _BaseBranchCache:
186 class _BaseBranchCache:
187 """A dict like object that hold branches heads cache.
187 """A dict like object that hold branches heads cache.
188
188
189 This cache is used to avoid costly computations to determine all the
189 This cache is used to avoid costly computations to determine all the
190 branch heads of a repo.
190 branch heads of a repo.
191
191
192 The cache is serialized on disk in the following format:
192 The cache is serialized on disk in the following format:
193
193
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <branch head hex node> <open/closed state> <branch name>
195 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
197 ...
197 ...
198
198
199 The first line is used to check if the cache is still valid. If the
199 The first line is used to check if the cache is still valid. If the
200 branch cache is for a filtered repo view, an optional third hash is
200 branch cache is for a filtered repo view, an optional third hash is
201 included that hashes the hashes of all filtered and obsolete revisions.
201 included that hashes the hashes of all filtered and obsolete revisions.
202
202
203 The open/closed state is represented by a single letter 'o' or 'c'.
203 The open/closed state is represented by a single letter 'o' or 'c'.
204 This field can be used to avoid changelog reads when determining if a
204 This field can be used to avoid changelog reads when determining if a
205 branch head closes a branch or not.
205 branch head closes a branch or not.
206 """
206 """
207
207
208 def __init__(
208 def __init__(
209 self,
209 self,
210 repo: "localrepo.localrepository",
210 repo: "localrepo.localrepository",
211 entries: Union[
211 entries: Union[
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 ] = (),
213 ] = (),
214 closed_nodes: Optional[Set[bytes]] = None,
214 closed_nodes: Optional[Set[bytes]] = None,
215 ) -> None:
215 ) -> None:
216 """hasnode is a function which can be used to verify whether changelog
216 """hasnode is a function which can be used to verify whether changelog
217 has a given node or not. If it's not provided, we assume that every node
217 has a given node or not. If it's not provided, we assume that every node
218 we have exists in changelog"""
218 we have exists in changelog"""
219 # closednodes is a set of nodes that close their branch. If the branch
219 # closednodes is a set of nodes that close their branch. If the branch
220 # cache has been updated, it may contain nodes that are no longer
220 # cache has been updated, it may contain nodes that are no longer
221 # heads.
221 # heads.
222 if closed_nodes is None:
222 if closed_nodes is None:
223 closed_nodes = set()
223 closed_nodes = set()
224 self._closednodes = set(closed_nodes)
224 self._closednodes = set(closed_nodes)
225 self._entries = dict(entries)
225 self._entries = dict(entries)
226
226
227 def __iter__(self):
227 def __iter__(self):
228 return iter(self._entries)
228 return iter(self._entries)
229
229
230 def __setitem__(self, key, value):
230 def __setitem__(self, key, value):
231 self._entries[key] = value
231 self._entries[key] = value
232
232
233 def __getitem__(self, key):
233 def __getitem__(self, key):
234 return self._entries[key]
234 return self._entries[key]
235
235
236 def __contains__(self, key):
236 def __contains__(self, key):
237 return key in self._entries
237 return key in self._entries
238
238
239 def iteritems(self):
239 def iteritems(self):
240 return self._entries.items()
240 return self._entries.items()
241
241
242 items = iteritems
242 items = iteritems
243
243
244 def hasbranch(self, label):
244 def hasbranch(self, label):
245 """checks whether a branch of this name exists or not"""
245 """checks whether a branch of this name exists or not"""
246 return label in self._entries
246 return label in self._entries
247
247
248 def _branchtip(self, heads):
248 def _branchtip(self, heads):
249 """Return tuple with last open head in heads and false,
249 """Return tuple with last open head in heads and false,
250 otherwise return last closed head and true."""
250 otherwise return last closed head and true."""
251 tip = heads[-1]
251 tip = heads[-1]
252 closed = True
252 closed = True
253 for h in reversed(heads):
253 for h in reversed(heads):
254 if h not in self._closednodes:
254 if h not in self._closednodes:
255 tip = h
255 tip = h
256 closed = False
256 closed = False
257 break
257 break
258 return tip, closed
258 return tip, closed
259
259
260 def branchtip(self, branch):
260 def branchtip(self, branch):
261 """Return the tipmost open head on branch head, otherwise return the
261 """Return the tipmost open head on branch head, otherwise return the
262 tipmost closed head on branch.
262 tipmost closed head on branch.
263 Raise KeyError for unknown branch."""
263 Raise KeyError for unknown branch."""
264 return self._branchtip(self[branch])[0]
264 return self._branchtip(self[branch])[0]
265
265
266 def iteropen(self, nodes):
266 def iteropen(self, nodes):
267 return (n for n in nodes if n not in self._closednodes)
267 return (n for n in nodes if n not in self._closednodes)
268
268
269 def branchheads(self, branch, closed=False):
269 def branchheads(self, branch, closed=False):
270 heads = self._entries[branch]
270 heads = self._entries[branch]
271 if not closed:
271 if not closed:
272 heads = list(self.iteropen(heads))
272 heads = list(self.iteropen(heads))
273 return heads
273 return heads
274
274
275 def iterbranches(self):
275 def iterbranches(self):
276 for bn, heads in self.items():
276 for bn, heads in self.items():
277 yield (bn, heads) + self._branchtip(heads)
277 yield (bn, heads) + self._branchtip(heads)
278
278
279 def iterheads(self):
279 def iterheads(self):
280 """returns all the heads"""
280 """returns all the heads"""
281 return self._entries.values()
281 return self._entries.values()
282
282
283 def update(self, repo, revgen):
283 def update(self, repo, revgen):
284 """Given a branchhead cache, self, that may have extra nodes or be
284 """Given a branchhead cache, self, that may have extra nodes or be
285 missing heads, and a generator of nodes that are strictly a superset of
285 missing heads, and a generator of nodes that are strictly a superset of
286 heads missing, this function updates self to be correct.
286 heads missing, this function updates self to be correct.
287 """
287 """
288 starttime = util.timer()
288 starttime = util.timer()
289 cl = repo.changelog
289 cl = repo.changelog
290 # collect new branch entries
290 # collect new branch entries
291 newbranches = {}
291 newbranches = {}
292 getbranchinfo = repo.revbranchcache().branchinfo
292 getbranchinfo = repo.revbranchcache().branchinfo
293 max_rev = -1
293 max_rev = -1
294 for r in revgen:
294 for r in revgen:
295 branch, closesbranch = getbranchinfo(r)
295 branch, closesbranch = getbranchinfo(r)
296 newbranches.setdefault(branch, []).append(r)
296 newbranches.setdefault(branch, []).append(r)
297 if closesbranch:
297 if closesbranch:
298 self._closednodes.add(cl.node(r))
298 self._closednodes.add(cl.node(r))
299 max_rev = max(max_rev, r)
299 max_rev = max(max_rev, r)
300 if max_rev < 0:
300 if max_rev < 0:
301 max_rev = None
301 max_rev = None
302
302
303 # Delay fetching the topological heads until they are needed.
303 # Delay fetching the topological heads until they are needed.
304 # A repository without non-continous branches can skip this part.
304 # A repository without non-continous branches can skip this part.
305 topoheads = None
305 topoheads = None
306
306
307 # If a changeset is visible, its parents must be visible too, so
307 # If a changeset is visible, its parents must be visible too, so
308 # use the faster unfiltered parent accessor.
308 # use the faster unfiltered parent accessor.
309 parentrevs = repo.unfiltered().changelog.parentrevs
309 parentrevs = repo.unfiltered().changelog.parentrevs
310
310
311 # Faster than using ctx.obsolete()
311 # Faster than using ctx.obsolete()
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
313
313
314 for branch, newheadrevs in newbranches.items():
314 for branch, newheadrevs in newbranches.items():
315 # For every branch, compute the new branchheads.
315 # For every branch, compute the new branchheads.
316 # A branchhead is a revision such that no descendant is on
316 # A branchhead is a revision such that no descendant is on
317 # the same branch.
317 # the same branch.
318 #
318 #
319 # The branchheads are computed iteratively in revision order.
319 # The branchheads are computed iteratively in revision order.
320 # This ensures topological order, i.e. parents are processed
320 # This ensures topological order, i.e. parents are processed
321 # before their children. Ancestors are inclusive here, i.e.
321 # before their children. Ancestors are inclusive here, i.e.
322 # any revision is an ancestor of itself.
322 # any revision is an ancestor of itself.
323 #
323 #
324 # Core observations:
324 # Core observations:
325 # - The current revision is always a branchhead for the
325 # - The current revision is always a branchhead for the
326 # repository up to that point.
326 # repository up to that point.
327 # - It is the first revision of the branch if and only if
327 # - It is the first revision of the branch if and only if
328 # there was no branchhead before. In that case, it is the
328 # there was no branchhead before. In that case, it is the
329 # only branchhead as there are no possible ancestors on
329 # only branchhead as there are no possible ancestors on
330 # the same branch.
330 # the same branch.
331 # - If a parent is on the same branch, a branchhead can
331 # - If a parent is on the same branch, a branchhead can
332 # only be an ancestor of that parent, if it is parent
332 # only be an ancestor of that parent, if it is parent
333 # itself. Otherwise it would have been removed as ancestor
333 # itself. Otherwise it would have been removed as ancestor
334 # of that parent before.
334 # of that parent before.
335 # - Therefore, if all parents are on the same branch, they
335 # - Therefore, if all parents are on the same branch, they
336 # can just be removed from the branchhead set.
336 # can just be removed from the branchhead set.
337 # - If one parent is on the same branch and the other is not
337 # - If one parent is on the same branch and the other is not
338 # and there was exactly one branchhead known, the existing
338 # and there was exactly one branchhead known, the existing
339 # branchhead can only be an ancestor if it is the parent.
339 # branchhead can only be an ancestor if it is the parent.
340 # Otherwise it would have been removed as ancestor of
340 # Otherwise it would have been removed as ancestor of
341 # the parent before. The other parent therefore can't have
341 # the parent before. The other parent therefore can't have
342 # a branchhead as ancestor.
342 # a branchhead as ancestor.
343 # - In all other cases, the parents on different branches
343 # - In all other cases, the parents on different branches
344 # could have a branchhead as ancestor. Those parents are
344 # could have a branchhead as ancestor. Those parents are
345 # kept in the "uncertain" set. If all branchheads are also
345 # kept in the "uncertain" set. If all branchheads are also
346 # topological heads, they can't have descendants and further
346 # topological heads, they can't have descendants and further
347 # checks can be skipped. Otherwise, the ancestors of the
347 # checks can be skipped. Otherwise, the ancestors of the
348 # "uncertain" set are removed from branchheads.
348 # "uncertain" set are removed from branchheads.
349 # This computation is heavy and avoided if at all possible.
349 # This computation is heavy and avoided if at all possible.
350 bheads = self._entries.get(branch, [])
350 bheads = self._entries.get(branch, [])
351 bheadset = {cl.rev(node) for node in bheads}
351 bheadset = {cl.rev(node) for node in bheads}
352 uncertain = set()
352 uncertain = set()
353 for newrev in sorted(newheadrevs):
353 for newrev in sorted(newheadrevs):
354 if newrev in obsrevs:
354 if newrev in obsrevs:
355 # We ignore obsolete changesets as they shouldn't be
355 # We ignore obsolete changesets as they shouldn't be
356 # considered heads.
356 # considered heads.
357 continue
357 continue
358
358
359 if not bheadset:
359 if not bheadset:
360 bheadset.add(newrev)
360 bheadset.add(newrev)
361 continue
361 continue
362
362
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 samebranch = set()
364 samebranch = set()
365 otherbranch = set()
365 otherbranch = set()
366 obsparents = set()
366 obsparents = set()
367 for p in parents:
367 for p in parents:
368 if p in obsrevs:
368 if p in obsrevs:
369 # We ignored this obsolete changeset earlier, but now
369 # We ignored this obsolete changeset earlier, but now
370 # that it has non-ignored children, we need to make
370 # that it has non-ignored children, we need to make
371 # sure their ancestors are not considered heads. To
371 # sure their ancestors are not considered heads. To
372 # achieve that, we will simply treat this obsolete
372 # achieve that, we will simply treat this obsolete
373 # changeset as a parent from other branch.
373 # changeset as a parent from other branch.
374 obsparents.add(p)
374 obsparents.add(p)
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 samebranch.add(p)
376 samebranch.add(p)
377 else:
377 else:
378 otherbranch.add(p)
378 otherbranch.add(p)
379 if not (len(bheadset) == len(samebranch) == 1):
379 if not (len(bheadset) == len(samebranch) == 1):
380 uncertain.update(otherbranch)
380 uncertain.update(otherbranch)
381 uncertain.update(obsparents)
381 uncertain.update(obsparents)
382 bheadset.difference_update(samebranch)
382 bheadset.difference_update(samebranch)
383 bheadset.add(newrev)
383 bheadset.add(newrev)
384
384
385 if uncertain:
385 if uncertain:
386 if topoheads is None:
386 if topoheads is None:
387 topoheads = set(cl.headrevs())
387 topoheads = set(cl.headrevs())
388 if bheadset - topoheads:
388 if bheadset - topoheads:
389 floorrev = min(bheadset)
389 floorrev = min(bheadset)
390 if floorrev <= max(uncertain):
390 if floorrev <= max(uncertain):
391 ancestors = set(cl.ancestors(uncertain, floorrev))
391 ancestors = set(cl.ancestors(uncertain, floorrev))
392 bheadset -= ancestors
392 bheadset -= ancestors
393 if bheadset:
393 if bheadset:
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395
395
396 duration = util.timer() - starttime
396 duration = util.timer() - starttime
397 repo.ui.log(
397 repo.ui.log(
398 b'branchcache',
398 b'branchcache',
399 b'updated %s in %.4f seconds\n',
399 b'updated %s in %.4f seconds\n',
400 _branchcachedesc(repo),
400 _branchcachedesc(repo),
401 duration,
401 duration,
402 )
402 )
403 return max_rev
403 return max_rev
404
404
405
405
406 class branchcache(_BaseBranchCache):
406 class branchcache(_BaseBranchCache):
407 """Branchmap info for a local repo or repoview"""
407 """Branchmap info for a local repo or repoview"""
408
408
409 _base_filename = b"branch2"
410
409 def __init__(
411 def __init__(
410 self,
412 self,
411 repo: "localrepo.localrepository",
413 repo: "localrepo.localrepository",
412 entries: Union[
414 entries: Union[
413 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
415 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
414 ] = (),
416 ] = (),
415 tipnode: Optional[bytes] = None,
417 tipnode: Optional[bytes] = None,
416 tiprev: Optional[int] = nullrev,
418 tiprev: Optional[int] = nullrev,
417 filteredhash: Optional[bytes] = None,
419 filteredhash: Optional[bytes] = None,
418 closednodes: Optional[Set[bytes]] = None,
420 closednodes: Optional[Set[bytes]] = None,
419 hasnode: Optional[Callable[[bytes], bool]] = None,
421 hasnode: Optional[Callable[[bytes], bool]] = None,
420 verify_node: bool = False,
422 verify_node: bool = False,
421 ) -> None:
423 ) -> None:
422 """hasnode is a function which can be used to verify whether changelog
424 """hasnode is a function which can be used to verify whether changelog
423 has a given node or not. If it's not provided, we assume that every node
425 has a given node or not. If it's not provided, we assume that every node
424 we have exists in changelog"""
426 we have exists in changelog"""
425 self._filtername = repo.filtername
427 self._filtername = repo.filtername
426 self._delayed = False
428 self._delayed = False
427 if tipnode is None:
429 if tipnode is None:
428 self.tipnode = repo.nullid
430 self.tipnode = repo.nullid
429 else:
431 else:
430 self.tipnode = tipnode
432 self.tipnode = tipnode
431 self.tiprev = tiprev
433 self.tiprev = tiprev
432 self.filteredhash = filteredhash
434 self.filteredhash = filteredhash
433
435
434 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
436 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
435 # closednodes is a set of nodes that close their branch. If the branch
437 # closednodes is a set of nodes that close their branch. If the branch
436 # cache has been updated, it may contain nodes that are no longer
438 # cache has been updated, it may contain nodes that are no longer
437 # heads.
439 # heads.
438
440
439 # Do we need to verify branch at all ?
441 # Do we need to verify branch at all ?
440 self._verify_node = verify_node
442 self._verify_node = verify_node
441 # branches for which nodes are verified
443 # branches for which nodes are verified
442 self._verifiedbranches = set()
444 self._verifiedbranches = set()
443 self._hasnode = None
445 self._hasnode = None
444 if self._verify_node:
446 if self._verify_node:
445 self._hasnode = repo.changelog.hasnode
447 self._hasnode = repo.changelog.hasnode
446
448
447 def validfor(self, repo):
449 def validfor(self, repo):
448 """check that cache contents are valid for (a subset of) this repo
450 """check that cache contents are valid for (a subset of) this repo
449
451
450 - False when the order of changesets changed or if we detect a strip.
452 - False when the order of changesets changed or if we detect a strip.
451 - True when cache is up-to-date for the current repo or its subset."""
453 - True when cache is up-to-date for the current repo or its subset."""
452 try:
454 try:
453 node = repo.changelog.node(self.tiprev)
455 node = repo.changelog.node(self.tiprev)
454 except IndexError:
456 except IndexError:
455 # changesets were stripped and now we don't even have enough to
457 # changesets were stripped and now we don't even have enough to
456 # find tiprev
458 # find tiprev
457 return False
459 return False
458 if self.tipnode != node:
460 if self.tipnode != node:
459 # tiprev doesn't correspond to tipnode: repo was stripped, or this
461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
460 # repo has a different order of changesets
462 # repo has a different order of changesets
461 return False
463 return False
462 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
463 # hashes don't match if this repo view has a different set of filtered
465 # hashes don't match if this repo view has a different set of filtered
464 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
465 # history was rewritten)
467 # history was rewritten)
466 return self.filteredhash == tiphash
468 return self.filteredhash == tiphash
467
469
468 @classmethod
470 @classmethod
469 def fromfile(cls, repo):
471 def fromfile(cls, repo):
470 f = None
472 f = None
471 try:
473 try:
472 f = repo.cachevfs(cls._filename(repo))
474 f = repo.cachevfs(cls._filename(repo))
473 lineiter = iter(f)
475 lineiter = iter(f)
474 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
475 last, lrev = cachekey[:2]
477 last, lrev = cachekey[:2]
476 last, lrev = bin(last), int(lrev)
478 last, lrev = bin(last), int(lrev)
477 filteredhash = None
479 filteredhash = None
478 if len(cachekey) > 2:
480 if len(cachekey) > 2:
479 filteredhash = bin(cachekey[2])
481 filteredhash = bin(cachekey[2])
480 bcache = cls(
482 bcache = cls(
481 repo,
483 repo,
482 tipnode=last,
484 tipnode=last,
483 tiprev=lrev,
485 tiprev=lrev,
484 filteredhash=filteredhash,
486 filteredhash=filteredhash,
485 verify_node=True,
487 verify_node=True,
486 )
488 )
487 if not bcache.validfor(repo):
489 if not bcache.validfor(repo):
488 # invalidate the cache
490 # invalidate the cache
489 raise ValueError('tip differs')
491 raise ValueError('tip differs')
490 bcache.load(repo, lineiter)
492 bcache.load(repo, lineiter)
491 except (IOError, OSError):
493 except (IOError, OSError):
492 return None
494 return None
493
495
494 except Exception as inst:
496 except Exception as inst:
495 if repo.ui.debugflag:
497 if repo.ui.debugflag:
496 msg = b'invalid %s: %s\n'
498 msg = b'invalid %s: %s\n'
497 repo.ui.debug(
499 repo.ui.debug(
498 msg
500 msg
499 % (
501 % (
500 _branchcachedesc(repo),
502 _branchcachedesc(repo),
501 stringutil.forcebytestr(inst),
503 stringutil.forcebytestr(inst),
502 )
504 )
503 )
505 )
504 bcache = None
506 bcache = None
505
507
506 finally:
508 finally:
507 if f:
509 if f:
508 f.close()
510 f.close()
509
511
510 return bcache
512 return bcache
511
513
512 def load(self, repo, lineiter):
514 def load(self, repo, lineiter):
513 """fully loads the branchcache by reading from the file using the line
515 """fully loads the branchcache by reading from the file using the line
514 iterator passed"""
516 iterator passed"""
515 for line in lineiter:
517 for line in lineiter:
516 line = line.rstrip(b'\n')
518 line = line.rstrip(b'\n')
517 if not line:
519 if not line:
518 continue
520 continue
519 node, state, label = line.split(b" ", 2)
521 node, state, label = line.split(b" ", 2)
520 if state not in b'oc':
522 if state not in b'oc':
521 raise ValueError('invalid branch state')
523 raise ValueError('invalid branch state')
522 label = encoding.tolocal(label.strip())
524 label = encoding.tolocal(label.strip())
523 node = bin(node)
525 node = bin(node)
524 self._entries.setdefault(label, []).append(node)
526 self._entries.setdefault(label, []).append(node)
525 if state == b'c':
527 if state == b'c':
526 self._closednodes.add(node)
528 self._closednodes.add(node)
527
529
528 @staticmethod
530 @classmethod
529 def _filename(repo):
531 def _filename(cls, repo):
530 """name of a branchcache file for a given repo or repoview"""
532 """name of a branchcache file for a given repo or repoview"""
531 filename = b"branch2"
533 filename = cls._base_filename
532 if repo.filtername:
534 if repo.filtername:
533 filename = b'%s-%s' % (filename, repo.filtername)
535 filename = b'%s-%s' % (filename, repo.filtername)
534 return filename
536 return filename
535
537
536 def copy(self, repo):
538 def copy(self, repo):
537 """return a deep copy of the branchcache object"""
539 """return a deep copy of the branchcache object"""
538 other = type(self)(
540 other = type(self)(
539 repo=repo,
541 repo=repo,
540 # we always do a shally copy of self._entries, and the values is
542 # we always do a shally copy of self._entries, and the values is
541 # always replaced, so no need to deepcopy until the above remains
543 # always replaced, so no need to deepcopy until the above remains
542 # true.
544 # true.
543 entries=self._entries,
545 entries=self._entries,
544 tipnode=self.tipnode,
546 tipnode=self.tipnode,
545 tiprev=self.tiprev,
547 tiprev=self.tiprev,
546 filteredhash=self.filteredhash,
548 filteredhash=self.filteredhash,
547 closednodes=set(self._closednodes),
549 closednodes=set(self._closednodes),
548 verify_node=self._verify_node,
550 verify_node=self._verify_node,
549 )
551 )
550 # we copy will likely schedule a write anyway, but that does not seems
552 # we copy will likely schedule a write anyway, but that does not seems
551 # to hurt to overschedule
553 # to hurt to overschedule
552 other._delayed = self._delayed
554 other._delayed = self._delayed
553 # also copy information about the current verification state
555 # also copy information about the current verification state
554 other._verifiedbranches = set(self._verifiedbranches)
556 other._verifiedbranches = set(self._verifiedbranches)
555 return other
557 return other
556
558
557 def write(self, repo):
559 def write(self, repo):
558 assert self._filtername == repo.filtername, (
560 assert self._filtername == repo.filtername, (
559 self._filtername,
561 self._filtername,
560 repo.filtername,
562 repo.filtername,
561 )
563 )
562 tr = repo.currenttransaction()
564 tr = repo.currenttransaction()
563 if not getattr(tr, 'finalized', True):
565 if not getattr(tr, 'finalized', True):
564 # Avoid premature writing.
566 # Avoid premature writing.
565 #
567 #
566 # (The cache warming setup by localrepo will update the file later.)
568 # (The cache warming setup by localrepo will update the file later.)
567 self._delayed = True
569 self._delayed = True
568 return
570 return
569 try:
571 try:
570 filename = self._filename(repo)
572 filename = self._filename(repo)
571 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
573 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
572 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
574 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
573 if self.filteredhash is not None:
575 if self.filteredhash is not None:
574 cachekey.append(hex(self.filteredhash))
576 cachekey.append(hex(self.filteredhash))
575 f.write(b" ".join(cachekey) + b'\n')
577 f.write(b" ".join(cachekey) + b'\n')
576 nodecount = 0
578 nodecount = 0
577 for label, nodes in sorted(self._entries.items()):
579 for label, nodes in sorted(self._entries.items()):
578 label = encoding.fromlocal(label)
580 label = encoding.fromlocal(label)
579 for node in nodes:
581 for node in nodes:
580 nodecount += 1
582 nodecount += 1
581 if node in self._closednodes:
583 if node in self._closednodes:
582 state = b'c'
584 state = b'c'
583 else:
585 else:
584 state = b'o'
586 state = b'o'
585 f.write(b"%s %s %s\n" % (hex(node), state, label))
587 f.write(b"%s %s %s\n" % (hex(node), state, label))
586 repo.ui.log(
588 repo.ui.log(
587 b'branchcache',
589 b'branchcache',
588 b'wrote %s with %d labels and %d nodes\n',
590 b'wrote %s with %d labels and %d nodes\n',
589 _branchcachedesc(repo),
591 _branchcachedesc(repo),
590 len(self._entries),
592 len(self._entries),
591 nodecount,
593 nodecount,
592 )
594 )
593 self._delayed = False
595 self._delayed = False
594 except (IOError, OSError, error.Abort) as inst:
596 except (IOError, OSError, error.Abort) as inst:
595 # Abort may be raised by read only opener, so log and continue
597 # Abort may be raised by read only opener, so log and continue
596 repo.ui.debug(
598 repo.ui.debug(
597 b"couldn't write branch cache: %s\n"
599 b"couldn't write branch cache: %s\n"
598 % stringutil.forcebytestr(inst)
600 % stringutil.forcebytestr(inst)
599 )
601 )
600
602
601 def _verifybranch(self, branch):
603 def _verifybranch(self, branch):
602 """verify head nodes for the given branch."""
604 """verify head nodes for the given branch."""
603 if not self._verify_node:
605 if not self._verify_node:
604 return
606 return
605 if branch not in self._entries or branch in self._verifiedbranches:
607 if branch not in self._entries or branch in self._verifiedbranches:
606 return
608 return
607 assert self._hasnode is not None
609 assert self._hasnode is not None
608 for n in self._entries[branch]:
610 for n in self._entries[branch]:
609 if not self._hasnode(n):
611 if not self._hasnode(n):
610 _unknownnode(n)
612 _unknownnode(n)
611
613
612 self._verifiedbranches.add(branch)
614 self._verifiedbranches.add(branch)
613
615
614 def _verifyall(self):
616 def _verifyall(self):
615 """verifies nodes of all the branches"""
617 """verifies nodes of all the branches"""
616 for b in self._entries.keys():
618 for b in self._entries.keys():
617 if b not in self._verifiedbranches:
619 if b not in self._verifiedbranches:
618 self._verifybranch(b)
620 self._verifybranch(b)
619
621
620 def __getitem__(self, key):
622 def __getitem__(self, key):
621 self._verifybranch(key)
623 self._verifybranch(key)
622 return super().__getitem__(key)
624 return super().__getitem__(key)
623
625
624 def __contains__(self, key):
626 def __contains__(self, key):
625 self._verifybranch(key)
627 self._verifybranch(key)
626 return super().__contains__(key)
628 return super().__contains__(key)
627
629
628 def iteritems(self):
630 def iteritems(self):
629 self._verifyall()
631 self._verifyall()
630 return super().iteritems()
632 return super().iteritems()
631
633
632 items = iteritems
634 items = iteritems
633
635
634 def iterheads(self):
636 def iterheads(self):
635 """returns all the heads"""
637 """returns all the heads"""
636 self._verifyall()
638 self._verifyall()
637 return super().iterheads()
639 return super().iterheads()
638
640
639 def hasbranch(self, label):
641 def hasbranch(self, label):
640 """checks whether a branch of this name exists or not"""
642 """checks whether a branch of this name exists or not"""
641 self._verifybranch(label)
643 self._verifybranch(label)
642 return super().hasbranch(label)
644 return super().hasbranch(label)
643
645
644 def branchheads(self, branch, closed=False):
646 def branchheads(self, branch, closed=False):
645 self._verifybranch(branch)
647 self._verifybranch(branch)
646 return super().branchheads(branch, closed=closed)
648 return super().branchheads(branch, closed=closed)
647
649
648 def update(self, repo, revgen):
650 def update(self, repo, revgen):
649 assert self._filtername == repo.filtername, (
651 assert self._filtername == repo.filtername, (
650 self._filtername,
652 self._filtername,
651 repo.filtername,
653 repo.filtername,
652 )
654 )
653 cl = repo.changelog
655 cl = repo.changelog
654 max_rev = super().update(repo, revgen)
656 max_rev = super().update(repo, revgen)
655 # new tip revision which we found after iterating items from new
657 # new tip revision which we found after iterating items from new
656 # branches
658 # branches
657 if max_rev is not None and max_rev > self.tiprev:
659 if max_rev is not None and max_rev > self.tiprev:
658 self.tiprev = max_rev
660 self.tiprev = max_rev
659 self.tipnode = cl.node(max_rev)
661 self.tipnode = cl.node(max_rev)
660
662
661 if not self.validfor(repo):
663 if not self.validfor(repo):
662 # old cache key is now invalid for the repo, but we've just updated
664 # old cache key is now invalid for the repo, but we've just updated
663 # the cache and we assume it's valid, so let's make the cache key
665 # the cache and we assume it's valid, so let's make the cache key
664 # valid as well by recomputing it from the cached data
666 # valid as well by recomputing it from the cached data
665 self.tipnode = repo.nullid
667 self.tipnode = repo.nullid
666 self.tiprev = nullrev
668 self.tiprev = nullrev
667 for heads in self.iterheads():
669 for heads in self.iterheads():
668 if not heads:
670 if not heads:
669 # all revisions on a branch are obsolete
671 # all revisions on a branch are obsolete
670 continue
672 continue
671 # note: tiprev is not necessarily the tip revision of repo,
673 # note: tiprev is not necessarily the tip revision of repo,
672 # because the tip could be obsolete (i.e. not a head)
674 # because the tip could be obsolete (i.e. not a head)
673 tiprev = max(cl.rev(node) for node in heads)
675 tiprev = max(cl.rev(node) for node in heads)
674 if tiprev > self.tiprev:
676 if tiprev > self.tiprev:
675 self.tipnode = cl.node(tiprev)
677 self.tipnode = cl.node(tiprev)
676 self.tiprev = tiprev
678 self.tiprev = tiprev
677 self.filteredhash = scmutil.filteredhash(
679 self.filteredhash = scmutil.filteredhash(
678 repo, self.tiprev, needobsolete=True
680 repo, self.tiprev, needobsolete=True
679 )
681 )
680
682
681 self.write(repo)
683 self.write(repo)
682
684
683
685
684 class remotebranchcache(_BaseBranchCache):
686 class remotebranchcache(_BaseBranchCache):
685 """Branchmap info for a remote connection, should not write locally"""
687 """Branchmap info for a remote connection, should not write locally"""
686
688
687 def __init__(
689 def __init__(
688 self,
690 self,
689 repo: "localrepo.localrepository",
691 repo: "localrepo.localrepository",
690 entries: Union[
692 entries: Union[
691 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
693 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
692 ] = (),
694 ] = (),
693 closednodes: Optional[Set[bytes]] = None,
695 closednodes: Optional[Set[bytes]] = None,
694 ) -> None:
696 ) -> None:
695 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
697 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
696
698
697
699
698 # Revision branch info cache
700 # Revision branch info cache
699
701
700 _rbcversion = b'-v1'
702 _rbcversion = b'-v1'
701 _rbcnames = b'rbc-names' + _rbcversion
703 _rbcnames = b'rbc-names' + _rbcversion
702 _rbcrevs = b'rbc-revs' + _rbcversion
704 _rbcrevs = b'rbc-revs' + _rbcversion
703 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
705 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
704 _rbcrecfmt = b'>4sI'
706 _rbcrecfmt = b'>4sI'
705 _rbcrecsize = calcsize(_rbcrecfmt)
707 _rbcrecsize = calcsize(_rbcrecfmt)
706 _rbcmininc = 64 * _rbcrecsize
708 _rbcmininc = 64 * _rbcrecsize
707 _rbcnodelen = 4
709 _rbcnodelen = 4
708 _rbcbranchidxmask = 0x7FFFFFFF
710 _rbcbranchidxmask = 0x7FFFFFFF
709 _rbccloseflag = 0x80000000
711 _rbccloseflag = 0x80000000
710
712
711
713
712 class rbcrevs:
714 class rbcrevs:
713 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
715 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
714
716
715 def __init__(self, revs):
717 def __init__(self, revs):
716 self._prefix = revs
718 self._prefix = revs
717 self._rest = bytearray()
719 self._rest = bytearray()
718
720
719 def __len__(self):
721 def __len__(self):
720 return len(self._prefix) + len(self._rest)
722 return len(self._prefix) + len(self._rest)
721
723
722 def unpack_record(self, rbcrevidx):
724 def unpack_record(self, rbcrevidx):
723 if rbcrevidx < len(self._prefix):
725 if rbcrevidx < len(self._prefix):
724 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
726 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
725 else:
727 else:
726 return unpack_from(
728 return unpack_from(
727 _rbcrecfmt,
729 _rbcrecfmt,
728 util.buffer(self._rest),
730 util.buffer(self._rest),
729 rbcrevidx - len(self._prefix),
731 rbcrevidx - len(self._prefix),
730 )
732 )
731
733
732 def make_mutable(self):
734 def make_mutable(self):
733 if len(self._prefix) > 0:
735 if len(self._prefix) > 0:
734 entirety = bytearray()
736 entirety = bytearray()
735 entirety[:] = self._prefix
737 entirety[:] = self._prefix
736 entirety.extend(self._rest)
738 entirety.extend(self._rest)
737 self._rest = entirety
739 self._rest = entirety
738 self._prefix = bytearray()
740 self._prefix = bytearray()
739
741
740 def truncate(self, pos):
742 def truncate(self, pos):
741 self.make_mutable()
743 self.make_mutable()
742 del self._rest[pos:]
744 del self._rest[pos:]
743
745
744 def pack_into(self, rbcrevidx, node, branchidx):
746 def pack_into(self, rbcrevidx, node, branchidx):
745 if rbcrevidx < len(self._prefix):
747 if rbcrevidx < len(self._prefix):
746 self.make_mutable()
748 self.make_mutable()
747 buf = self._rest
749 buf = self._rest
748 start_offset = rbcrevidx - len(self._prefix)
750 start_offset = rbcrevidx - len(self._prefix)
749 end_offset = start_offset + _rbcrecsize
751 end_offset = start_offset + _rbcrecsize
750
752
751 if len(self._rest) < end_offset:
753 if len(self._rest) < end_offset:
752 # bytearray doesn't allocate extra space at least in Python 3.7.
754 # bytearray doesn't allocate extra space at least in Python 3.7.
753 # When multiple changesets are added in a row, precise resize would
755 # When multiple changesets are added in a row, precise resize would
754 # result in quadratic complexity. Overallocate to compensate by
756 # result in quadratic complexity. Overallocate to compensate by
755 # using the classic doubling technique for dynamic arrays instead.
757 # using the classic doubling technique for dynamic arrays instead.
756 # If there was a gap in the map before, less space will be reserved.
758 # If there was a gap in the map before, less space will be reserved.
757 self._rest.extend(b'\0' * end_offset)
759 self._rest.extend(b'\0' * end_offset)
758 return pack_into(
760 return pack_into(
759 _rbcrecfmt,
761 _rbcrecfmt,
760 buf,
762 buf,
761 start_offset,
763 start_offset,
762 node,
764 node,
763 branchidx,
765 branchidx,
764 )
766 )
765
767
766 def extend(self, extension):
768 def extend(self, extension):
767 return self._rest.extend(extension)
769 return self._rest.extend(extension)
768
770
769 def slice(self, begin, end):
771 def slice(self, begin, end):
770 if begin < len(self._prefix):
772 if begin < len(self._prefix):
771 acc = bytearray()
773 acc = bytearray()
772 acc[:] = self._prefix[begin:end]
774 acc[:] = self._prefix[begin:end]
773 acc.extend(
775 acc.extend(
774 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
776 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
775 )
777 )
776 return acc
778 return acc
777 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
779 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
778
780
779
781
780 class revbranchcache:
782 class revbranchcache:
781 """Persistent cache, mapping from revision number to branch name and close.
783 """Persistent cache, mapping from revision number to branch name and close.
782 This is a low level cache, independent of filtering.
784 This is a low level cache, independent of filtering.
783
785
784 Branch names are stored in rbc-names in internal encoding separated by 0.
786 Branch names are stored in rbc-names in internal encoding separated by 0.
785 rbc-names is append-only, and each branch name is only stored once and will
787 rbc-names is append-only, and each branch name is only stored once and will
786 thus have a unique index.
788 thus have a unique index.
787
789
788 The branch info for each revision is stored in rbc-revs as constant size
790 The branch info for each revision is stored in rbc-revs as constant size
789 records. The whole file is read into memory, but it is only 'parsed' on
791 records. The whole file is read into memory, but it is only 'parsed' on
790 demand. The file is usually append-only but will be truncated if repo
792 demand. The file is usually append-only but will be truncated if repo
791 modification is detected.
793 modification is detected.
792 The record for each revision contains the first 4 bytes of the
794 The record for each revision contains the first 4 bytes of the
793 corresponding node hash, and the record is only used if it still matches.
795 corresponding node hash, and the record is only used if it still matches.
794 Even a completely trashed rbc-revs fill thus still give the right result
796 Even a completely trashed rbc-revs fill thus still give the right result
795 while converging towards full recovery ... assuming no incorrectly matching
797 while converging towards full recovery ... assuming no incorrectly matching
796 node hashes.
798 node hashes.
797 The record also contains 4 bytes where 31 bits contains the index of the
799 The record also contains 4 bytes where 31 bits contains the index of the
798 branch and the last bit indicate that it is a branch close commit.
800 branch and the last bit indicate that it is a branch close commit.
799 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
801 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
800 and will grow with it but be 1/8th of its size.
802 and will grow with it but be 1/8th of its size.
801 """
803 """
802
804
803 def __init__(self, repo, readonly=True):
805 def __init__(self, repo, readonly=True):
804 assert repo.filtername is None
806 assert repo.filtername is None
805 self._repo = repo
807 self._repo = repo
806 self._names = [] # branch names in local encoding with static index
808 self._names = [] # branch names in local encoding with static index
807 self._rbcrevs = rbcrevs(bytearray())
809 self._rbcrevs = rbcrevs(bytearray())
808 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
810 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
809 try:
811 try:
810 bndata = repo.cachevfs.read(_rbcnames)
812 bndata = repo.cachevfs.read(_rbcnames)
811 self._rbcsnameslen = len(bndata) # for verification before writing
813 self._rbcsnameslen = len(bndata) # for verification before writing
812 if bndata:
814 if bndata:
813 self._names = [
815 self._names = [
814 encoding.tolocal(bn) for bn in bndata.split(b'\0')
816 encoding.tolocal(bn) for bn in bndata.split(b'\0')
815 ]
817 ]
816 except (IOError, OSError):
818 except (IOError, OSError):
817 if readonly:
819 if readonly:
818 # don't try to use cache - fall back to the slow path
820 # don't try to use cache - fall back to the slow path
819 self.branchinfo = self._branchinfo
821 self.branchinfo = self._branchinfo
820
822
821 if self._names:
823 if self._names:
822 try:
824 try:
823 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
825 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
824 with repo.cachevfs(_rbcrevs) as fp:
826 with repo.cachevfs(_rbcrevs) as fp:
825 data = util.buffer(util.mmapread(fp))
827 data = util.buffer(util.mmapread(fp))
826 else:
828 else:
827 data = repo.cachevfs.read(_rbcrevs)
829 data = repo.cachevfs.read(_rbcrevs)
828 self._rbcrevs = rbcrevs(data)
830 self._rbcrevs = rbcrevs(data)
829 except (IOError, OSError) as inst:
831 except (IOError, OSError) as inst:
830 repo.ui.debug(
832 repo.ui.debug(
831 b"couldn't read revision branch cache: %s\n"
833 b"couldn't read revision branch cache: %s\n"
832 % stringutil.forcebytestr(inst)
834 % stringutil.forcebytestr(inst)
833 )
835 )
834 # remember number of good records on disk
836 # remember number of good records on disk
835 self._rbcrevslen = min(
837 self._rbcrevslen = min(
836 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
838 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
837 )
839 )
838 if self._rbcrevslen == 0:
840 if self._rbcrevslen == 0:
839 self._names = []
841 self._names = []
840 self._rbcnamescount = len(self._names) # number of names read at
842 self._rbcnamescount = len(self._names) # number of names read at
841 # _rbcsnameslen
843 # _rbcsnameslen
842
844
843 def _clear(self):
845 def _clear(self):
844 self._rbcsnameslen = 0
846 self._rbcsnameslen = 0
845 del self._names[:]
847 del self._names[:]
846 self._rbcnamescount = 0
848 self._rbcnamescount = 0
847 self._rbcrevslen = len(self._repo.changelog)
849 self._rbcrevslen = len(self._repo.changelog)
848 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
850 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
849 util.clearcachedproperty(self, b'_namesreverse')
851 util.clearcachedproperty(self, b'_namesreverse')
850
852
851 @util.propertycache
853 @util.propertycache
852 def _namesreverse(self):
854 def _namesreverse(self):
853 return {b: r for r, b in enumerate(self._names)}
855 return {b: r for r, b in enumerate(self._names)}
854
856
855 def branchinfo(self, rev):
857 def branchinfo(self, rev):
856 """Return branch name and close flag for rev, using and updating
858 """Return branch name and close flag for rev, using and updating
857 persistent cache."""
859 persistent cache."""
858 changelog = self._repo.changelog
860 changelog = self._repo.changelog
859 rbcrevidx = rev * _rbcrecsize
861 rbcrevidx = rev * _rbcrecsize
860
862
861 # avoid negative index, changelog.read(nullrev) is fast without cache
863 # avoid negative index, changelog.read(nullrev) is fast without cache
862 if rev == nullrev:
864 if rev == nullrev:
863 return changelog.branchinfo(rev)
865 return changelog.branchinfo(rev)
864
866
865 # if requested rev isn't allocated, grow and cache the rev info
867 # if requested rev isn't allocated, grow and cache the rev info
866 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
868 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
867 return self._branchinfo(rev)
869 return self._branchinfo(rev)
868
870
869 # fast path: extract data from cache, use it if node is matching
871 # fast path: extract data from cache, use it if node is matching
870 reponode = changelog.node(rev)[:_rbcnodelen]
872 reponode = changelog.node(rev)[:_rbcnodelen]
871 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
873 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
872 close = bool(branchidx & _rbccloseflag)
874 close = bool(branchidx & _rbccloseflag)
873 if close:
875 if close:
874 branchidx &= _rbcbranchidxmask
876 branchidx &= _rbcbranchidxmask
875 if cachenode == b'\0\0\0\0':
877 if cachenode == b'\0\0\0\0':
876 pass
878 pass
877 elif cachenode == reponode:
879 elif cachenode == reponode:
878 try:
880 try:
879 return self._names[branchidx], close
881 return self._names[branchidx], close
880 except IndexError:
882 except IndexError:
881 # recover from invalid reference to unknown branch
883 # recover from invalid reference to unknown branch
882 self._repo.ui.debug(
884 self._repo.ui.debug(
883 b"referenced branch names not found"
885 b"referenced branch names not found"
884 b" - rebuilding revision branch cache from scratch\n"
886 b" - rebuilding revision branch cache from scratch\n"
885 )
887 )
886 self._clear()
888 self._clear()
887 else:
889 else:
888 # rev/node map has changed, invalidate the cache from here up
890 # rev/node map has changed, invalidate the cache from here up
889 self._repo.ui.debug(
891 self._repo.ui.debug(
890 b"history modification detected - truncating "
892 b"history modification detected - truncating "
891 b"revision branch cache to revision %d\n" % rev
893 b"revision branch cache to revision %d\n" % rev
892 )
894 )
893 truncate = rbcrevidx + _rbcrecsize
895 truncate = rbcrevidx + _rbcrecsize
894 self._rbcrevs.truncate(truncate)
896 self._rbcrevs.truncate(truncate)
895 self._rbcrevslen = min(self._rbcrevslen, truncate)
897 self._rbcrevslen = min(self._rbcrevslen, truncate)
896
898
897 # fall back to slow path and make sure it will be written to disk
899 # fall back to slow path and make sure it will be written to disk
898 return self._branchinfo(rev)
900 return self._branchinfo(rev)
899
901
900 def _branchinfo(self, rev):
902 def _branchinfo(self, rev):
901 """Retrieve branch info from changelog and update _rbcrevs"""
903 """Retrieve branch info from changelog and update _rbcrevs"""
902 changelog = self._repo.changelog
904 changelog = self._repo.changelog
903 b, close = changelog.branchinfo(rev)
905 b, close = changelog.branchinfo(rev)
904 if b in self._namesreverse:
906 if b in self._namesreverse:
905 branchidx = self._namesreverse[b]
907 branchidx = self._namesreverse[b]
906 else:
908 else:
907 branchidx = len(self._names)
909 branchidx = len(self._names)
908 self._names.append(b)
910 self._names.append(b)
909 self._namesreverse[b] = branchidx
911 self._namesreverse[b] = branchidx
910 reponode = changelog.node(rev)
912 reponode = changelog.node(rev)
911 if close:
913 if close:
912 branchidx |= _rbccloseflag
914 branchidx |= _rbccloseflag
913 self._setcachedata(rev, reponode, branchidx)
915 self._setcachedata(rev, reponode, branchidx)
914 return b, close
916 return b, close
915
917
916 def setdata(self, rev, changelogrevision):
918 def setdata(self, rev, changelogrevision):
917 """add new data information to the cache"""
919 """add new data information to the cache"""
918 branch, close = changelogrevision.branchinfo
920 branch, close = changelogrevision.branchinfo
919
921
920 if branch in self._namesreverse:
922 if branch in self._namesreverse:
921 branchidx = self._namesreverse[branch]
923 branchidx = self._namesreverse[branch]
922 else:
924 else:
923 branchidx = len(self._names)
925 branchidx = len(self._names)
924 self._names.append(branch)
926 self._names.append(branch)
925 self._namesreverse[branch] = branchidx
927 self._namesreverse[branch] = branchidx
926 if close:
928 if close:
927 branchidx |= _rbccloseflag
929 branchidx |= _rbccloseflag
928 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
930 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
929 # If no cache data were readable (non exists, bad permission, etc)
931 # If no cache data were readable (non exists, bad permission, etc)
930 # the cache was bypassing itself by setting:
932 # the cache was bypassing itself by setting:
931 #
933 #
932 # self.branchinfo = self._branchinfo
934 # self.branchinfo = self._branchinfo
933 #
935 #
934 # Since we now have data in the cache, we need to drop this bypassing.
936 # Since we now have data in the cache, we need to drop this bypassing.
935 if 'branchinfo' in vars(self):
937 if 'branchinfo' in vars(self):
936 del self.branchinfo
938 del self.branchinfo
937
939
938 def _setcachedata(self, rev, node, branchidx):
940 def _setcachedata(self, rev, node, branchidx):
939 """Writes the node's branch data to the in-memory cache data."""
941 """Writes the node's branch data to the in-memory cache data."""
940 if rev == nullrev:
942 if rev == nullrev:
941 return
943 return
942 rbcrevidx = rev * _rbcrecsize
944 rbcrevidx = rev * _rbcrecsize
943 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
945 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
944 self._rbcrevslen = min(self._rbcrevslen, rev)
946 self._rbcrevslen = min(self._rbcrevslen, rev)
945
947
946 tr = self._repo.currenttransaction()
948 tr = self._repo.currenttransaction()
947 if tr:
949 if tr:
948 tr.addfinalize(b'write-revbranchcache', self.write)
950 tr.addfinalize(b'write-revbranchcache', self.write)
949
951
950 def write(self, tr=None):
952 def write(self, tr=None):
951 """Save branch cache if it is dirty."""
953 """Save branch cache if it is dirty."""
952 repo = self._repo
954 repo = self._repo
953 wlock = None
955 wlock = None
954 step = b''
956 step = b''
955 try:
957 try:
956 # write the new names
958 # write the new names
957 if self._rbcnamescount < len(self._names):
959 if self._rbcnamescount < len(self._names):
958 wlock = repo.wlock(wait=False)
960 wlock = repo.wlock(wait=False)
959 step = b' names'
961 step = b' names'
960 self._writenames(repo)
962 self._writenames(repo)
961
963
962 # write the new revs
964 # write the new revs
963 start = self._rbcrevslen * _rbcrecsize
965 start = self._rbcrevslen * _rbcrecsize
964 if start != len(self._rbcrevs):
966 if start != len(self._rbcrevs):
965 step = b''
967 step = b''
966 if wlock is None:
968 if wlock is None:
967 wlock = repo.wlock(wait=False)
969 wlock = repo.wlock(wait=False)
968 self._writerevs(repo, start)
970 self._writerevs(repo, start)
969
971
970 except (IOError, OSError, error.Abort, error.LockError) as inst:
972 except (IOError, OSError, error.Abort, error.LockError) as inst:
971 repo.ui.debug(
973 repo.ui.debug(
972 b"couldn't write revision branch cache%s: %s\n"
974 b"couldn't write revision branch cache%s: %s\n"
973 % (step, stringutil.forcebytestr(inst))
975 % (step, stringutil.forcebytestr(inst))
974 )
976 )
975 finally:
977 finally:
976 if wlock is not None:
978 if wlock is not None:
977 wlock.release()
979 wlock.release()
978
980
979 def _writenames(self, repo):
981 def _writenames(self, repo):
980 """write the new branch names to revbranchcache"""
982 """write the new branch names to revbranchcache"""
981 if self._rbcnamescount != 0:
983 if self._rbcnamescount != 0:
982 f = repo.cachevfs.open(_rbcnames, b'ab')
984 f = repo.cachevfs.open(_rbcnames, b'ab')
983 if f.tell() == self._rbcsnameslen:
985 if f.tell() == self._rbcsnameslen:
984 f.write(b'\0')
986 f.write(b'\0')
985 else:
987 else:
986 f.close()
988 f.close()
987 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
989 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
988 self._rbcnamescount = 0
990 self._rbcnamescount = 0
989 self._rbcrevslen = 0
991 self._rbcrevslen = 0
990 if self._rbcnamescount == 0:
992 if self._rbcnamescount == 0:
991 # before rewriting names, make sure references are removed
993 # before rewriting names, make sure references are removed
992 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
994 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
993 f = repo.cachevfs.open(_rbcnames, b'wb')
995 f = repo.cachevfs.open(_rbcnames, b'wb')
994 f.write(
996 f.write(
995 b'\0'.join(
997 b'\0'.join(
996 encoding.fromlocal(b)
998 encoding.fromlocal(b)
997 for b in self._names[self._rbcnamescount :]
999 for b in self._names[self._rbcnamescount :]
998 )
1000 )
999 )
1001 )
1000 self._rbcsnameslen = f.tell()
1002 self._rbcsnameslen = f.tell()
1001 f.close()
1003 f.close()
1002 self._rbcnamescount = len(self._names)
1004 self._rbcnamescount = len(self._names)
1003
1005
1004 def _writerevs(self, repo, start):
1006 def _writerevs(self, repo, start):
1005 """write the new revs to revbranchcache"""
1007 """write the new revs to revbranchcache"""
1006 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1008 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1007 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1009 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1008 if f.tell() != start:
1010 if f.tell() != start:
1009 repo.ui.debug(
1011 repo.ui.debug(
1010 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1012 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1011 )
1013 )
1012 f.seek(start)
1014 f.seek(start)
1013 if f.tell() != start:
1015 if f.tell() != start:
1014 start = 0
1016 start = 0
1015 f.seek(start)
1017 f.seek(start)
1016 f.truncate()
1018 f.truncate()
1017 end = revs * _rbcrecsize
1019 end = revs * _rbcrecsize
1018 f.write(self._rbcrevs.slice(start, end))
1020 f.write(self._rbcrevs.slice(start, end))
1019 self._rbcrevslen = revs
1021 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now