##// END OF EJS Templates
branchcache: simplify a long line...
marmoute -
r52355:de1bc7db default
parent child Browse files
Show More
@@ -1,1021 +1,1019 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Callable,
18 Callable,
19 Dict,
19 Dict,
20 Iterable,
20 Iterable,
21 List,
21 List,
22 Optional,
22 Optional,
23 Set,
23 Set,
24 TYPE_CHECKING,
24 TYPE_CHECKING,
25 Tuple,
25 Tuple,
26 Union,
26 Union,
27 )
27 )
28
28
29 from . import (
29 from . import (
30 encoding,
30 encoding,
31 error,
31 error,
32 obsolete,
32 obsolete,
33 scmutil,
33 scmutil,
34 util,
34 util,
35 )
35 )
36
36
37 from .utils import (
37 from .utils import (
38 repoviewutil,
38 repoviewutil,
39 stringutil,
39 stringutil,
40 )
40 )
41
41
42 if TYPE_CHECKING:
42 if TYPE_CHECKING:
43 from . import localrepo
43 from . import localrepo
44
44
45 assert [localrepo]
45 assert [localrepo]
46
46
47 subsettable = repoviewutil.subsettable
47 subsettable = repoviewutil.subsettable
48
48
49 calcsize = struct.calcsize
49 calcsize = struct.calcsize
50 pack_into = struct.pack_into
50 pack_into = struct.pack_into
51 unpack_from = struct.unpack_from
51 unpack_from = struct.unpack_from
52
52
53
53
54 class BranchMapCache:
54 class BranchMapCache:
55 """mapping of filtered views of repo with their branchcache"""
55 """mapping of filtered views of repo with their branchcache"""
56
56
57 def __init__(self):
57 def __init__(self):
58 self._per_filter = {}
58 self._per_filter = {}
59
59
60 def __getitem__(self, repo):
60 def __getitem__(self, repo):
61 self.updatecache(repo)
61 self.updatecache(repo)
62 bcache = self._per_filter[repo.filtername]
62 bcache = self._per_filter[repo.filtername]
63 assert bcache._filtername == repo.filtername, (
63 assert bcache._filtername == repo.filtername, (
64 bcache._filtername,
64 bcache._filtername,
65 repo.filtername,
65 repo.filtername,
66 )
66 )
67 return bcache
67 return bcache
68
68
69 def update_disk(self, repo):
69 def update_disk(self, repo):
70 """ensure and up-to-date cache is (or will be) written on disk
70 """ensure and up-to-date cache is (or will be) written on disk
71
71
72 The cache for this repository view is updated if needed and written on
72 The cache for this repository view is updated if needed and written on
73 disk.
73 disk.
74
74
75 If a transaction is in progress, the writing is schedule to transaction
75 If a transaction is in progress, the writing is schedule to transaction
76 close. See the `BranchMapCache.write_delayed` method.
76 close. See the `BranchMapCache.write_delayed` method.
77
77
78 This method exist independently of __getitem__ as it is sometime useful
78 This method exist independently of __getitem__ as it is sometime useful
79 to signal that we have no intend to use the data in memory yet.
79 to signal that we have no intend to use the data in memory yet.
80 """
80 """
81 self.updatecache(repo)
81 self.updatecache(repo)
82 bcache = self._per_filter[repo.filtername]
82 bcache = self._per_filter[repo.filtername]
83 assert bcache._filtername == repo.filtername, (
83 assert bcache._filtername == repo.filtername, (
84 bcache._filtername,
84 bcache._filtername,
85 repo.filtername,
85 repo.filtername,
86 )
86 )
87 bcache.write(repo)
87 bcache.write(repo)
88
88
89 def updatecache(self, repo):
89 def updatecache(self, repo):
90 """Update the cache for the given filtered view on a repository"""
90 """Update the cache for the given filtered view on a repository"""
91 # This can trigger updates for the caches for subsets of the filtered
91 # This can trigger updates for the caches for subsets of the filtered
92 # view, e.g. when there is no cache for this filtered view or the cache
92 # view, e.g. when there is no cache for this filtered view or the cache
93 # is stale.
93 # is stale.
94
94
95 cl = repo.changelog
95 cl = repo.changelog
96 filtername = repo.filtername
96 filtername = repo.filtername
97 bcache = self._per_filter.get(filtername)
97 bcache = self._per_filter.get(filtername)
98 if bcache is None or not bcache.validfor(repo):
98 if bcache is None or not bcache.validfor(repo):
99 # cache object missing or cache object stale? Read from disk
99 # cache object missing or cache object stale? Read from disk
100 bcache = branchcache.fromfile(repo)
100 bcache = branchcache.fromfile(repo)
101
101
102 revs = []
102 revs = []
103 if bcache is None:
103 if bcache is None:
104 # no (fresh) cache available anymore, perhaps we can re-use
104 # no (fresh) cache available anymore, perhaps we can re-use
105 # the cache for a subset, then extend that to add info on missing
105 # the cache for a subset, then extend that to add info on missing
106 # revisions.
106 # revisions.
107 subsetname = subsettable.get(filtername)
107 subsetname = subsettable.get(filtername)
108 if subsetname is not None:
108 if subsetname is not None:
109 subset = repo.filtered(subsetname)
109 subset = repo.filtered(subsetname)
110 bcache = self[subset].copy(repo)
110 bcache = self[subset].copy(repo)
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 else:
113 else:
114 # nothing to fall back on, start empty.
114 # nothing to fall back on, start empty.
115 bcache = branchcache(repo)
115 bcache = branchcache(repo)
116
116
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 if revs:
118 if revs:
119 bcache.update(repo, revs)
119 bcache.update(repo, revs)
120
120
121 assert bcache.validfor(repo), filtername
121 assert bcache.validfor(repo), filtername
122 self._per_filter[repo.filtername] = bcache
122 self._per_filter[repo.filtername] = bcache
123
123
124 def replace(self, repo, remotebranchmap):
124 def replace(self, repo, remotebranchmap):
125 """Replace the branchmap cache for a repo with a branch mapping.
125 """Replace the branchmap cache for a repo with a branch mapping.
126
126
127 This is likely only called during clone with a branch map from a
127 This is likely only called during clone with a branch map from a
128 remote.
128 remote.
129
129
130 """
130 """
131 cl = repo.changelog
131 cl = repo.changelog
132 clrev = cl.rev
132 clrev = cl.rev
133 clbranchinfo = cl.branchinfo
133 clbranchinfo = cl.branchinfo
134 rbheads = []
134 rbheads = []
135 closed = set()
135 closed = set()
136 for bheads in remotebranchmap.values():
136 for bheads in remotebranchmap.values():
137 rbheads += bheads
137 rbheads += bheads
138 for h in bheads:
138 for h in bheads:
139 r = clrev(h)
139 r = clrev(h)
140 b, c = clbranchinfo(r)
140 b, c = clbranchinfo(r)
141 if c:
141 if c:
142 closed.add(h)
142 closed.add(h)
143
143
144 if rbheads:
144 if rbheads:
145 rtiprev = max((int(clrev(node)) for node in rbheads))
145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 cache = branchcache(
146 cache = branchcache(
147 repo,
147 repo,
148 remotebranchmap,
148 remotebranchmap,
149 repo[rtiprev].node(),
149 repo[rtiprev].node(),
150 rtiprev,
150 rtiprev,
151 closednodes=closed,
151 closednodes=closed,
152 )
152 )
153
153
154 # Try to stick it as low as possible
154 # Try to stick it as low as possible
155 # filter above served are unlikely to be fetch from a clone
155 # filter above served are unlikely to be fetch from a clone
156 for candidate in (b'base', b'immutable', b'served'):
156 for candidate in (b'base', b'immutable', b'served'):
157 rview = repo.filtered(candidate)
157 rview = repo.filtered(candidate)
158 if cache.validfor(rview):
158 if cache.validfor(rview):
159 cache = self._per_filter[candidate] = cache.copy(rview)
159 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache.write(rview)
160 cache.write(rview)
161 return
161 return
162
162
163 def clear(self):
163 def clear(self):
164 self._per_filter.clear()
164 self._per_filter.clear()
165
165
166 def write_delayed(self, repo):
166 def write_delayed(self, repo):
167 unfi = repo.unfiltered()
167 unfi = repo.unfiltered()
168 for filtername, cache in self._per_filter.items():
168 for filtername, cache in self._per_filter.items():
169 if cache._delayed:
169 if cache._delayed:
170 repo = unfi.filtered(filtername)
170 repo = unfi.filtered(filtername)
171 cache.write(repo)
171 cache.write(repo)
172
172
173
173
174 def _unknownnode(node):
174 def _unknownnode(node):
175 """raises ValueError when branchcache found a node which does not exists"""
175 """raises ValueError when branchcache found a node which does not exists"""
176 raise ValueError('node %s does not exist' % node.hex())
176 raise ValueError('node %s does not exist' % node.hex())
177
177
178
178
179 def _branchcachedesc(repo):
179 def _branchcachedesc(repo):
180 if repo.filtername is not None:
180 if repo.filtername is not None:
181 return b'branch cache (%s)' % repo.filtername
181 return b'branch cache (%s)' % repo.filtername
182 else:
182 else:
183 return b'branch cache'
183 return b'branch cache'
184
184
185
185
186 class _BaseBranchCache:
186 class _BaseBranchCache:
187 """A dict like object that hold branches heads cache.
187 """A dict like object that hold branches heads cache.
188
188
189 This cache is used to avoid costly computations to determine all the
189 This cache is used to avoid costly computations to determine all the
190 branch heads of a repo.
190 branch heads of a repo.
191
191
192 The cache is serialized on disk in the following format:
192 The cache is serialized on disk in the following format:
193
193
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 <branch head hex node> <open/closed state> <branch name>
195 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
196 <branch head hex node> <open/closed state> <branch name>
197 ...
197 ...
198
198
199 The first line is used to check if the cache is still valid. If the
199 The first line is used to check if the cache is still valid. If the
200 branch cache is for a filtered repo view, an optional third hash is
200 branch cache is for a filtered repo view, an optional third hash is
201 included that hashes the hashes of all filtered and obsolete revisions.
201 included that hashes the hashes of all filtered and obsolete revisions.
202
202
203 The open/closed state is represented by a single letter 'o' or 'c'.
203 The open/closed state is represented by a single letter 'o' or 'c'.
204 This field can be used to avoid changelog reads when determining if a
204 This field can be used to avoid changelog reads when determining if a
205 branch head closes a branch or not.
205 branch head closes a branch or not.
206 """
206 """
207
207
208 def __init__(
208 def __init__(
209 self,
209 self,
210 repo: "localrepo.localrepository",
210 repo: "localrepo.localrepository",
211 entries: Union[
211 entries: Union[
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 ] = (),
213 ] = (),
214 closed_nodes: Optional[Set[bytes]] = None,
214 closed_nodes: Optional[Set[bytes]] = None,
215 ) -> None:
215 ) -> None:
216 """hasnode is a function which can be used to verify whether changelog
216 """hasnode is a function which can be used to verify whether changelog
217 has a given node or not. If it's not provided, we assume that every node
217 has a given node or not. If it's not provided, we assume that every node
218 we have exists in changelog"""
218 we have exists in changelog"""
219 # closednodes is a set of nodes that close their branch. If the branch
219 # closednodes is a set of nodes that close their branch. If the branch
220 # cache has been updated, it may contain nodes that are no longer
220 # cache has been updated, it may contain nodes that are no longer
221 # heads.
221 # heads.
222 if closed_nodes is None:
222 if closed_nodes is None:
223 closed_nodes = set()
223 closed_nodes = set()
224 self._closednodes = set(closed_nodes)
224 self._closednodes = set(closed_nodes)
225 self._entries = dict(entries)
225 self._entries = dict(entries)
226
226
227 def __iter__(self):
227 def __iter__(self):
228 return iter(self._entries)
228 return iter(self._entries)
229
229
230 def __setitem__(self, key, value):
230 def __setitem__(self, key, value):
231 self._entries[key] = value
231 self._entries[key] = value
232
232
233 def __getitem__(self, key):
233 def __getitem__(self, key):
234 return self._entries[key]
234 return self._entries[key]
235
235
236 def __contains__(self, key):
236 def __contains__(self, key):
237 return key in self._entries
237 return key in self._entries
238
238
239 def iteritems(self):
239 def iteritems(self):
240 return self._entries.items()
240 return self._entries.items()
241
241
242 items = iteritems
242 items = iteritems
243
243
244 def hasbranch(self, label):
244 def hasbranch(self, label):
245 """checks whether a branch of this name exists or not"""
245 """checks whether a branch of this name exists or not"""
246 return label in self._entries
246 return label in self._entries
247
247
248 def _branchtip(self, heads):
248 def _branchtip(self, heads):
249 """Return tuple with last open head in heads and false,
249 """Return tuple with last open head in heads and false,
250 otherwise return last closed head and true."""
250 otherwise return last closed head and true."""
251 tip = heads[-1]
251 tip = heads[-1]
252 closed = True
252 closed = True
253 for h in reversed(heads):
253 for h in reversed(heads):
254 if h not in self._closednodes:
254 if h not in self._closednodes:
255 tip = h
255 tip = h
256 closed = False
256 closed = False
257 break
257 break
258 return tip, closed
258 return tip, closed
259
259
260 def branchtip(self, branch):
260 def branchtip(self, branch):
261 """Return the tipmost open head on branch head, otherwise return the
261 """Return the tipmost open head on branch head, otherwise return the
262 tipmost closed head on branch.
262 tipmost closed head on branch.
263 Raise KeyError for unknown branch."""
263 Raise KeyError for unknown branch."""
264 return self._branchtip(self[branch])[0]
264 return self._branchtip(self[branch])[0]
265
265
266 def iteropen(self, nodes):
266 def iteropen(self, nodes):
267 return (n for n in nodes if n not in self._closednodes)
267 return (n for n in nodes if n not in self._closednodes)
268
268
269 def branchheads(self, branch, closed=False):
269 def branchheads(self, branch, closed=False):
270 heads = self._entries[branch]
270 heads = self._entries[branch]
271 if not closed:
271 if not closed:
272 heads = list(self.iteropen(heads))
272 heads = list(self.iteropen(heads))
273 return heads
273 return heads
274
274
275 def iterbranches(self):
275 def iterbranches(self):
276 for bn, heads in self.items():
276 for bn, heads in self.items():
277 yield (bn, heads) + self._branchtip(heads)
277 yield (bn, heads) + self._branchtip(heads)
278
278
279 def iterheads(self):
279 def iterheads(self):
280 """returns all the heads"""
280 """returns all the heads"""
281 return self._entries.values()
281 return self._entries.values()
282
282
283 def update(self, repo, revgen):
283 def update(self, repo, revgen):
284 """Given a branchhead cache, self, that may have extra nodes or be
284 """Given a branchhead cache, self, that may have extra nodes or be
285 missing heads, and a generator of nodes that are strictly a superset of
285 missing heads, and a generator of nodes that are strictly a superset of
286 heads missing, this function updates self to be correct.
286 heads missing, this function updates self to be correct.
287 """
287 """
288 starttime = util.timer()
288 starttime = util.timer()
289 cl = repo.changelog
289 cl = repo.changelog
290 # collect new branch entries
290 # collect new branch entries
291 newbranches = {}
291 newbranches = {}
292 getbranchinfo = repo.revbranchcache().branchinfo
292 getbranchinfo = repo.revbranchcache().branchinfo
293 max_rev = -1
293 max_rev = -1
294 for r in revgen:
294 for r in revgen:
295 branch, closesbranch = getbranchinfo(r)
295 branch, closesbranch = getbranchinfo(r)
296 newbranches.setdefault(branch, []).append(r)
296 newbranches.setdefault(branch, []).append(r)
297 if closesbranch:
297 if closesbranch:
298 self._closednodes.add(cl.node(r))
298 self._closednodes.add(cl.node(r))
299 max_rev = max(max_rev, r)
299 max_rev = max(max_rev, r)
300 if max_rev < 0:
300 if max_rev < 0:
301 max_rev = None
301 max_rev = None
302
302
303 # Delay fetching the topological heads until they are needed.
303 # Delay fetching the topological heads until they are needed.
304 # A repository without non-continous branches can skip this part.
304 # A repository without non-continous branches can skip this part.
305 topoheads = None
305 topoheads = None
306
306
307 # If a changeset is visible, its parents must be visible too, so
307 # If a changeset is visible, its parents must be visible too, so
308 # use the faster unfiltered parent accessor.
308 # use the faster unfiltered parent accessor.
309 parentrevs = repo.unfiltered().changelog.parentrevs
309 parentrevs = repo.unfiltered().changelog.parentrevs
310
310
311 # Faster than using ctx.obsolete()
311 # Faster than using ctx.obsolete()
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
312 obsrevs = obsolete.getrevs(repo, b'obsolete')
313
313
314 for branch, newheadrevs in newbranches.items():
314 for branch, newheadrevs in newbranches.items():
315 # For every branch, compute the new branchheads.
315 # For every branch, compute the new branchheads.
316 # A branchhead is a revision such that no descendant is on
316 # A branchhead is a revision such that no descendant is on
317 # the same branch.
317 # the same branch.
318 #
318 #
319 # The branchheads are computed iteratively in revision order.
319 # The branchheads are computed iteratively in revision order.
320 # This ensures topological order, i.e. parents are processed
320 # This ensures topological order, i.e. parents are processed
321 # before their children. Ancestors are inclusive here, i.e.
321 # before their children. Ancestors are inclusive here, i.e.
322 # any revision is an ancestor of itself.
322 # any revision is an ancestor of itself.
323 #
323 #
324 # Core observations:
324 # Core observations:
325 # - The current revision is always a branchhead for the
325 # - The current revision is always a branchhead for the
326 # repository up to that point.
326 # repository up to that point.
327 # - It is the first revision of the branch if and only if
327 # - It is the first revision of the branch if and only if
328 # there was no branchhead before. In that case, it is the
328 # there was no branchhead before. In that case, it is the
329 # only branchhead as there are no possible ancestors on
329 # only branchhead as there are no possible ancestors on
330 # the same branch.
330 # the same branch.
331 # - If a parent is on the same branch, a branchhead can
331 # - If a parent is on the same branch, a branchhead can
332 # only be an ancestor of that parent, if it is parent
332 # only be an ancestor of that parent, if it is parent
333 # itself. Otherwise it would have been removed as ancestor
333 # itself. Otherwise it would have been removed as ancestor
334 # of that parent before.
334 # of that parent before.
335 # - Therefore, if all parents are on the same branch, they
335 # - Therefore, if all parents are on the same branch, they
336 # can just be removed from the branchhead set.
336 # can just be removed from the branchhead set.
337 # - If one parent is on the same branch and the other is not
337 # - If one parent is on the same branch and the other is not
338 # and there was exactly one branchhead known, the existing
338 # and there was exactly one branchhead known, the existing
339 # branchhead can only be an ancestor if it is the parent.
339 # branchhead can only be an ancestor if it is the parent.
340 # Otherwise it would have been removed as ancestor of
340 # Otherwise it would have been removed as ancestor of
341 # the parent before. The other parent therefore can't have
341 # the parent before. The other parent therefore can't have
342 # a branchhead as ancestor.
342 # a branchhead as ancestor.
343 # - In all other cases, the parents on different branches
343 # - In all other cases, the parents on different branches
344 # could have a branchhead as ancestor. Those parents are
344 # could have a branchhead as ancestor. Those parents are
345 # kept in the "uncertain" set. If all branchheads are also
345 # kept in the "uncertain" set. If all branchheads are also
346 # topological heads, they can't have descendants and further
346 # topological heads, they can't have descendants and further
347 # checks can be skipped. Otherwise, the ancestors of the
347 # checks can be skipped. Otherwise, the ancestors of the
348 # "uncertain" set are removed from branchheads.
348 # "uncertain" set are removed from branchheads.
349 # This computation is heavy and avoided if at all possible.
349 # This computation is heavy and avoided if at all possible.
350 bheads = self._entries.get(branch, [])
350 bheads = self._entries.get(branch, [])
351 bheadset = {cl.rev(node) for node in bheads}
351 bheadset = {cl.rev(node) for node in bheads}
352 uncertain = set()
352 uncertain = set()
353 for newrev in sorted(newheadrevs):
353 for newrev in sorted(newheadrevs):
354 if newrev in obsrevs:
354 if newrev in obsrevs:
355 # We ignore obsolete changesets as they shouldn't be
355 # We ignore obsolete changesets as they shouldn't be
356 # considered heads.
356 # considered heads.
357 continue
357 continue
358
358
359 if not bheadset:
359 if not bheadset:
360 bheadset.add(newrev)
360 bheadset.add(newrev)
361 continue
361 continue
362
362
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
363 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 samebranch = set()
364 samebranch = set()
365 otherbranch = set()
365 otherbranch = set()
366 obsparents = set()
366 obsparents = set()
367 for p in parents:
367 for p in parents:
368 if p in obsrevs:
368 if p in obsrevs:
369 # We ignored this obsolete changeset earlier, but now
369 # We ignored this obsolete changeset earlier, but now
370 # that it has non-ignored children, we need to make
370 # that it has non-ignored children, we need to make
371 # sure their ancestors are not considered heads. To
371 # sure their ancestors are not considered heads. To
372 # achieve that, we will simply treat this obsolete
372 # achieve that, we will simply treat this obsolete
373 # changeset as a parent from other branch.
373 # changeset as a parent from other branch.
374 obsparents.add(p)
374 obsparents.add(p)
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
375 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 samebranch.add(p)
376 samebranch.add(p)
377 else:
377 else:
378 otherbranch.add(p)
378 otherbranch.add(p)
379 if not (len(bheadset) == len(samebranch) == 1):
379 if not (len(bheadset) == len(samebranch) == 1):
380 uncertain.update(otherbranch)
380 uncertain.update(otherbranch)
381 uncertain.update(obsparents)
381 uncertain.update(obsparents)
382 bheadset.difference_update(samebranch)
382 bheadset.difference_update(samebranch)
383 bheadset.add(newrev)
383 bheadset.add(newrev)
384
384
385 if uncertain:
385 if uncertain:
386 if topoheads is None:
386 if topoheads is None:
387 topoheads = set(cl.headrevs())
387 topoheads = set(cl.headrevs())
388 if bheadset - topoheads:
388 if bheadset - topoheads:
389 floorrev = min(bheadset)
389 floorrev = min(bheadset)
390 if floorrev <= max(uncertain):
390 if floorrev <= max(uncertain):
391 ancestors = set(cl.ancestors(uncertain, floorrev))
391 ancestors = set(cl.ancestors(uncertain, floorrev))
392 bheadset -= ancestors
392 bheadset -= ancestors
393 if bheadset:
393 if bheadset:
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395
395
396 duration = util.timer() - starttime
396 duration = util.timer() - starttime
397 repo.ui.log(
397 repo.ui.log(
398 b'branchcache',
398 b'branchcache',
399 b'updated %s in %.4f seconds\n',
399 b'updated %s in %.4f seconds\n',
400 _branchcachedesc(repo),
400 _branchcachedesc(repo),
401 duration,
401 duration,
402 )
402 )
403 return max_rev
403 return max_rev
404
404
405
405
406 class branchcache(_BaseBranchCache):
406 class branchcache(_BaseBranchCache):
407 """Branchmap info for a local repo or repoview"""
407 """Branchmap info for a local repo or repoview"""
408
408
409 _base_filename = b"branch2"
409 _base_filename = b"branch2"
410
410
411 def __init__(
411 def __init__(
412 self,
412 self,
413 repo: "localrepo.localrepository",
413 repo: "localrepo.localrepository",
414 entries: Union[
414 entries: Union[
415 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
415 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
416 ] = (),
416 ] = (),
417 tipnode: Optional[bytes] = None,
417 tipnode: Optional[bytes] = None,
418 tiprev: Optional[int] = nullrev,
418 tiprev: Optional[int] = nullrev,
419 filteredhash: Optional[bytes] = None,
419 filteredhash: Optional[bytes] = None,
420 closednodes: Optional[Set[bytes]] = None,
420 closednodes: Optional[Set[bytes]] = None,
421 hasnode: Optional[Callable[[bytes], bool]] = None,
421 hasnode: Optional[Callable[[bytes], bool]] = None,
422 verify_node: bool = False,
422 verify_node: bool = False,
423 ) -> None:
423 ) -> None:
424 """hasnode is a function which can be used to verify whether changelog
424 """hasnode is a function which can be used to verify whether changelog
425 has a given node or not. If it's not provided, we assume that every node
425 has a given node or not. If it's not provided, we assume that every node
426 we have exists in changelog"""
426 we have exists in changelog"""
427 self._filtername = repo.filtername
427 self._filtername = repo.filtername
428 self._delayed = False
428 self._delayed = False
429 if tipnode is None:
429 if tipnode is None:
430 self.tipnode = repo.nullid
430 self.tipnode = repo.nullid
431 else:
431 else:
432 self.tipnode = tipnode
432 self.tipnode = tipnode
433 self.tiprev = tiprev
433 self.tiprev = tiprev
434 self.filteredhash = filteredhash
434 self.filteredhash = filteredhash
435
435
436 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
436 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
437 # closednodes is a set of nodes that close their branch. If the branch
437 # closednodes is a set of nodes that close their branch. If the branch
438 # cache has been updated, it may contain nodes that are no longer
438 # cache has been updated, it may contain nodes that are no longer
439 # heads.
439 # heads.
440
440
441 # Do we need to verify branch at all ?
441 # Do we need to verify branch at all ?
442 self._verify_node = verify_node
442 self._verify_node = verify_node
443 # branches for which nodes are verified
443 # branches for which nodes are verified
444 self._verifiedbranches = set()
444 self._verifiedbranches = set()
445 self._hasnode = None
445 self._hasnode = None
446 if self._verify_node:
446 if self._verify_node:
447 self._hasnode = repo.changelog.hasnode
447 self._hasnode = repo.changelog.hasnode
448
448
449 def validfor(self, repo):
449 def validfor(self, repo):
450 """check that cache contents are valid for (a subset of) this repo
450 """check that cache contents are valid for (a subset of) this repo
451
451
452 - False when the order of changesets changed or if we detect a strip.
452 - False when the order of changesets changed or if we detect a strip.
453 - True when cache is up-to-date for the current repo or its subset."""
453 - True when cache is up-to-date for the current repo or its subset."""
454 try:
454 try:
455 node = repo.changelog.node(self.tiprev)
455 node = repo.changelog.node(self.tiprev)
456 except IndexError:
456 except IndexError:
457 # changesets were stripped and now we don't even have enough to
457 # changesets were stripped and now we don't even have enough to
458 # find tiprev
458 # find tiprev
459 return False
459 return False
460 if self.tipnode != node:
460 if self.tipnode != node:
461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 # repo has a different order of changesets
462 # repo has a different order of changesets
463 return False
463 return False
464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 # hashes don't match if this repo view has a different set of filtered
465 # hashes don't match if this repo view has a different set of filtered
466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 # history was rewritten)
467 # history was rewritten)
468 return self.filteredhash == tiphash
468 return self.filteredhash == tiphash
469
469
470 @classmethod
470 @classmethod
471 def fromfile(cls, repo):
471 def fromfile(cls, repo):
472 f = None
472 f = None
473 try:
473 try:
474 f = repo.cachevfs(cls._filename(repo))
474 f = repo.cachevfs(cls._filename(repo))
475 lineiter = iter(f)
475 lineiter = iter(f)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
477 last, lrev = cachekey[:2]
477 last, lrev = cachekey[:2]
478 last, lrev = bin(last), int(lrev)
478 last, lrev = bin(last), int(lrev)
479 filteredhash = None
479 filteredhash = None
480 if len(cachekey) > 2:
480 if len(cachekey) > 2:
481 filteredhash = bin(cachekey[2])
481 filteredhash = bin(cachekey[2])
482 bcache = cls(
482 bcache = cls(
483 repo,
483 repo,
484 tipnode=last,
484 tipnode=last,
485 tiprev=lrev,
485 tiprev=lrev,
486 filteredhash=filteredhash,
486 filteredhash=filteredhash,
487 verify_node=True,
487 verify_node=True,
488 )
488 )
489 if not bcache.validfor(repo):
489 if not bcache.validfor(repo):
490 # invalidate the cache
490 # invalidate the cache
491 raise ValueError('tip differs')
491 raise ValueError('tip differs')
492 bcache._load_heads(repo, lineiter)
492 bcache._load_heads(repo, lineiter)
493 except (IOError, OSError):
493 except (IOError, OSError):
494 return None
494 return None
495
495
496 except Exception as inst:
496 except Exception as inst:
497 if repo.ui.debugflag:
497 if repo.ui.debugflag:
498 msg = b'invalid %s: %s\n'
498 msg = b'invalid %s: %s\n'
499 repo.ui.debug(
499 msg %= (
500 msg
500 _branchcachedesc(repo),
501 % (
501 stringutil.forcebytestr(inst),
502 _branchcachedesc(repo),
503 stringutil.forcebytestr(inst),
504 )
505 )
502 )
503 repo.ui.debug(msg)
506 bcache = None
504 bcache = None
507
505
508 finally:
506 finally:
509 if f:
507 if f:
510 f.close()
508 f.close()
511
509
512 return bcache
510 return bcache
513
511
514 def _load_heads(self, repo, lineiter):
512 def _load_heads(self, repo, lineiter):
515 """fully loads the branchcache by reading from the file using the line
513 """fully loads the branchcache by reading from the file using the line
516 iterator passed"""
514 iterator passed"""
517 for line in lineiter:
515 for line in lineiter:
518 line = line.rstrip(b'\n')
516 line = line.rstrip(b'\n')
519 if not line:
517 if not line:
520 continue
518 continue
521 node, state, label = line.split(b" ", 2)
519 node, state, label = line.split(b" ", 2)
522 if state not in b'oc':
520 if state not in b'oc':
523 raise ValueError('invalid branch state')
521 raise ValueError('invalid branch state')
524 label = encoding.tolocal(label.strip())
522 label = encoding.tolocal(label.strip())
525 node = bin(node)
523 node = bin(node)
526 self._entries.setdefault(label, []).append(node)
524 self._entries.setdefault(label, []).append(node)
527 if state == b'c':
525 if state == b'c':
528 self._closednodes.add(node)
526 self._closednodes.add(node)
529
527
530 @classmethod
528 @classmethod
531 def _filename(cls, repo):
529 def _filename(cls, repo):
532 """name of a branchcache file for a given repo or repoview"""
530 """name of a branchcache file for a given repo or repoview"""
533 filename = cls._base_filename
531 filename = cls._base_filename
534 if repo.filtername:
532 if repo.filtername:
535 filename = b'%s-%s' % (filename, repo.filtername)
533 filename = b'%s-%s' % (filename, repo.filtername)
536 return filename
534 return filename
537
535
538 def copy(self, repo):
536 def copy(self, repo):
539 """return a deep copy of the branchcache object"""
537 """return a deep copy of the branchcache object"""
540 other = type(self)(
538 other = type(self)(
541 repo=repo,
539 repo=repo,
542 # we always do a shally copy of self._entries, and the values is
540 # we always do a shally copy of self._entries, and the values is
543 # always replaced, so no need to deepcopy until the above remains
541 # always replaced, so no need to deepcopy until the above remains
544 # true.
542 # true.
545 entries=self._entries,
543 entries=self._entries,
546 tipnode=self.tipnode,
544 tipnode=self.tipnode,
547 tiprev=self.tiprev,
545 tiprev=self.tiprev,
548 filteredhash=self.filteredhash,
546 filteredhash=self.filteredhash,
549 closednodes=set(self._closednodes),
547 closednodes=set(self._closednodes),
550 verify_node=self._verify_node,
548 verify_node=self._verify_node,
551 )
549 )
552 # we copy will likely schedule a write anyway, but that does not seems
550 # we copy will likely schedule a write anyway, but that does not seems
553 # to hurt to overschedule
551 # to hurt to overschedule
554 other._delayed = self._delayed
552 other._delayed = self._delayed
555 # also copy information about the current verification state
553 # also copy information about the current verification state
556 other._verifiedbranches = set(self._verifiedbranches)
554 other._verifiedbranches = set(self._verifiedbranches)
557 return other
555 return other
558
556
559 def write(self, repo):
557 def write(self, repo):
560 assert self._filtername == repo.filtername, (
558 assert self._filtername == repo.filtername, (
561 self._filtername,
559 self._filtername,
562 repo.filtername,
560 repo.filtername,
563 )
561 )
564 tr = repo.currenttransaction()
562 tr = repo.currenttransaction()
565 if not getattr(tr, 'finalized', True):
563 if not getattr(tr, 'finalized', True):
566 # Avoid premature writing.
564 # Avoid premature writing.
567 #
565 #
568 # (The cache warming setup by localrepo will update the file later.)
566 # (The cache warming setup by localrepo will update the file later.)
569 self._delayed = True
567 self._delayed = True
570 return
568 return
571 try:
569 try:
572 filename = self._filename(repo)
570 filename = self._filename(repo)
573 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
571 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
574 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
572 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
575 if self.filteredhash is not None:
573 if self.filteredhash is not None:
576 cachekey.append(hex(self.filteredhash))
574 cachekey.append(hex(self.filteredhash))
577 f.write(b" ".join(cachekey) + b'\n')
575 f.write(b" ".join(cachekey) + b'\n')
578 nodecount = 0
576 nodecount = 0
579 for label, nodes in sorted(self._entries.items()):
577 for label, nodes in sorted(self._entries.items()):
580 label = encoding.fromlocal(label)
578 label = encoding.fromlocal(label)
581 for node in nodes:
579 for node in nodes:
582 nodecount += 1
580 nodecount += 1
583 if node in self._closednodes:
581 if node in self._closednodes:
584 state = b'c'
582 state = b'c'
585 else:
583 else:
586 state = b'o'
584 state = b'o'
587 f.write(b"%s %s %s\n" % (hex(node), state, label))
585 f.write(b"%s %s %s\n" % (hex(node), state, label))
588 repo.ui.log(
586 repo.ui.log(
589 b'branchcache',
587 b'branchcache',
590 b'wrote %s with %d labels and %d nodes\n',
588 b'wrote %s with %d labels and %d nodes\n',
591 _branchcachedesc(repo),
589 _branchcachedesc(repo),
592 len(self._entries),
590 len(self._entries),
593 nodecount,
591 nodecount,
594 )
592 )
595 self._delayed = False
593 self._delayed = False
596 except (IOError, OSError, error.Abort) as inst:
594 except (IOError, OSError, error.Abort) as inst:
597 # Abort may be raised by read only opener, so log and continue
595 # Abort may be raised by read only opener, so log and continue
598 repo.ui.debug(
596 repo.ui.debug(
599 b"couldn't write branch cache: %s\n"
597 b"couldn't write branch cache: %s\n"
600 % stringutil.forcebytestr(inst)
598 % stringutil.forcebytestr(inst)
601 )
599 )
602
600
603 def _verifybranch(self, branch):
601 def _verifybranch(self, branch):
604 """verify head nodes for the given branch."""
602 """verify head nodes for the given branch."""
605 if not self._verify_node:
603 if not self._verify_node:
606 return
604 return
607 if branch not in self._entries or branch in self._verifiedbranches:
605 if branch not in self._entries or branch in self._verifiedbranches:
608 return
606 return
609 assert self._hasnode is not None
607 assert self._hasnode is not None
610 for n in self._entries[branch]:
608 for n in self._entries[branch]:
611 if not self._hasnode(n):
609 if not self._hasnode(n):
612 _unknownnode(n)
610 _unknownnode(n)
613
611
614 self._verifiedbranches.add(branch)
612 self._verifiedbranches.add(branch)
615
613
616 def _verifyall(self):
614 def _verifyall(self):
617 """verifies nodes of all the branches"""
615 """verifies nodes of all the branches"""
618 for b in self._entries.keys():
616 for b in self._entries.keys():
619 if b not in self._verifiedbranches:
617 if b not in self._verifiedbranches:
620 self._verifybranch(b)
618 self._verifybranch(b)
621
619
622 def __getitem__(self, key):
620 def __getitem__(self, key):
623 self._verifybranch(key)
621 self._verifybranch(key)
624 return super().__getitem__(key)
622 return super().__getitem__(key)
625
623
626 def __contains__(self, key):
624 def __contains__(self, key):
627 self._verifybranch(key)
625 self._verifybranch(key)
628 return super().__contains__(key)
626 return super().__contains__(key)
629
627
630 def iteritems(self):
628 def iteritems(self):
631 self._verifyall()
629 self._verifyall()
632 return super().iteritems()
630 return super().iteritems()
633
631
634 items = iteritems
632 items = iteritems
635
633
636 def iterheads(self):
634 def iterheads(self):
637 """returns all the heads"""
635 """returns all the heads"""
638 self._verifyall()
636 self._verifyall()
639 return super().iterheads()
637 return super().iterheads()
640
638
641 def hasbranch(self, label):
639 def hasbranch(self, label):
642 """checks whether a branch of this name exists or not"""
640 """checks whether a branch of this name exists or not"""
643 self._verifybranch(label)
641 self._verifybranch(label)
644 return super().hasbranch(label)
642 return super().hasbranch(label)
645
643
646 def branchheads(self, branch, closed=False):
644 def branchheads(self, branch, closed=False):
647 self._verifybranch(branch)
645 self._verifybranch(branch)
648 return super().branchheads(branch, closed=closed)
646 return super().branchheads(branch, closed=closed)
649
647
650 def update(self, repo, revgen):
648 def update(self, repo, revgen):
651 assert self._filtername == repo.filtername, (
649 assert self._filtername == repo.filtername, (
652 self._filtername,
650 self._filtername,
653 repo.filtername,
651 repo.filtername,
654 )
652 )
655 cl = repo.changelog
653 cl = repo.changelog
656 max_rev = super().update(repo, revgen)
654 max_rev = super().update(repo, revgen)
657 # new tip revision which we found after iterating items from new
655 # new tip revision which we found after iterating items from new
658 # branches
656 # branches
659 if max_rev is not None and max_rev > self.tiprev:
657 if max_rev is not None and max_rev > self.tiprev:
660 self.tiprev = max_rev
658 self.tiprev = max_rev
661 self.tipnode = cl.node(max_rev)
659 self.tipnode = cl.node(max_rev)
662
660
663 if not self.validfor(repo):
661 if not self.validfor(repo):
664 # old cache key is now invalid for the repo, but we've just updated
662 # old cache key is now invalid for the repo, but we've just updated
665 # the cache and we assume it's valid, so let's make the cache key
663 # the cache and we assume it's valid, so let's make the cache key
666 # valid as well by recomputing it from the cached data
664 # valid as well by recomputing it from the cached data
667 self.tipnode = repo.nullid
665 self.tipnode = repo.nullid
668 self.tiprev = nullrev
666 self.tiprev = nullrev
669 for heads in self.iterheads():
667 for heads in self.iterheads():
670 if not heads:
668 if not heads:
671 # all revisions on a branch are obsolete
669 # all revisions on a branch are obsolete
672 continue
670 continue
673 # note: tiprev is not necessarily the tip revision of repo,
671 # note: tiprev is not necessarily the tip revision of repo,
674 # because the tip could be obsolete (i.e. not a head)
672 # because the tip could be obsolete (i.e. not a head)
675 tiprev = max(cl.rev(node) for node in heads)
673 tiprev = max(cl.rev(node) for node in heads)
676 if tiprev > self.tiprev:
674 if tiprev > self.tiprev:
677 self.tipnode = cl.node(tiprev)
675 self.tipnode = cl.node(tiprev)
678 self.tiprev = tiprev
676 self.tiprev = tiprev
679 self.filteredhash = scmutil.filteredhash(
677 self.filteredhash = scmutil.filteredhash(
680 repo, self.tiprev, needobsolete=True
678 repo, self.tiprev, needobsolete=True
681 )
679 )
682
680
683 self.write(repo)
681 self.write(repo)
684
682
685
683
686 class remotebranchcache(_BaseBranchCache):
684 class remotebranchcache(_BaseBranchCache):
687 """Branchmap info for a remote connection, should not write locally"""
685 """Branchmap info for a remote connection, should not write locally"""
688
686
689 def __init__(
687 def __init__(
690 self,
688 self,
691 repo: "localrepo.localrepository",
689 repo: "localrepo.localrepository",
692 entries: Union[
690 entries: Union[
693 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
691 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
694 ] = (),
692 ] = (),
695 closednodes: Optional[Set[bytes]] = None,
693 closednodes: Optional[Set[bytes]] = None,
696 ) -> None:
694 ) -> None:
697 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
695 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
698
696
699
697
700 # Revision branch info cache
698 # Revision branch info cache
701
699
702 _rbcversion = b'-v1'
700 _rbcversion = b'-v1'
703 _rbcnames = b'rbc-names' + _rbcversion
701 _rbcnames = b'rbc-names' + _rbcversion
704 _rbcrevs = b'rbc-revs' + _rbcversion
702 _rbcrevs = b'rbc-revs' + _rbcversion
705 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
703 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
706 _rbcrecfmt = b'>4sI'
704 _rbcrecfmt = b'>4sI'
707 _rbcrecsize = calcsize(_rbcrecfmt)
705 _rbcrecsize = calcsize(_rbcrecfmt)
708 _rbcmininc = 64 * _rbcrecsize
706 _rbcmininc = 64 * _rbcrecsize
709 _rbcnodelen = 4
707 _rbcnodelen = 4
710 _rbcbranchidxmask = 0x7FFFFFFF
708 _rbcbranchidxmask = 0x7FFFFFFF
711 _rbccloseflag = 0x80000000
709 _rbccloseflag = 0x80000000
712
710
713
711
714 class rbcrevs:
712 class rbcrevs:
715 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
713 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
716
714
717 def __init__(self, revs):
715 def __init__(self, revs):
718 self._prefix = revs
716 self._prefix = revs
719 self._rest = bytearray()
717 self._rest = bytearray()
720
718
721 def __len__(self):
719 def __len__(self):
722 return len(self._prefix) + len(self._rest)
720 return len(self._prefix) + len(self._rest)
723
721
724 def unpack_record(self, rbcrevidx):
722 def unpack_record(self, rbcrevidx):
725 if rbcrevidx < len(self._prefix):
723 if rbcrevidx < len(self._prefix):
726 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
724 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
727 else:
725 else:
728 return unpack_from(
726 return unpack_from(
729 _rbcrecfmt,
727 _rbcrecfmt,
730 util.buffer(self._rest),
728 util.buffer(self._rest),
731 rbcrevidx - len(self._prefix),
729 rbcrevidx - len(self._prefix),
732 )
730 )
733
731
734 def make_mutable(self):
732 def make_mutable(self):
735 if len(self._prefix) > 0:
733 if len(self._prefix) > 0:
736 entirety = bytearray()
734 entirety = bytearray()
737 entirety[:] = self._prefix
735 entirety[:] = self._prefix
738 entirety.extend(self._rest)
736 entirety.extend(self._rest)
739 self._rest = entirety
737 self._rest = entirety
740 self._prefix = bytearray()
738 self._prefix = bytearray()
741
739
742 def truncate(self, pos):
740 def truncate(self, pos):
743 self.make_mutable()
741 self.make_mutable()
744 del self._rest[pos:]
742 del self._rest[pos:]
745
743
746 def pack_into(self, rbcrevidx, node, branchidx):
744 def pack_into(self, rbcrevidx, node, branchidx):
747 if rbcrevidx < len(self._prefix):
745 if rbcrevidx < len(self._prefix):
748 self.make_mutable()
746 self.make_mutable()
749 buf = self._rest
747 buf = self._rest
750 start_offset = rbcrevidx - len(self._prefix)
748 start_offset = rbcrevidx - len(self._prefix)
751 end_offset = start_offset + _rbcrecsize
749 end_offset = start_offset + _rbcrecsize
752
750
753 if len(self._rest) < end_offset:
751 if len(self._rest) < end_offset:
754 # bytearray doesn't allocate extra space at least in Python 3.7.
752 # bytearray doesn't allocate extra space at least in Python 3.7.
755 # When multiple changesets are added in a row, precise resize would
753 # When multiple changesets are added in a row, precise resize would
756 # result in quadratic complexity. Overallocate to compensate by
754 # result in quadratic complexity. Overallocate to compensate by
757 # using the classic doubling technique for dynamic arrays instead.
755 # using the classic doubling technique for dynamic arrays instead.
758 # If there was a gap in the map before, less space will be reserved.
756 # If there was a gap in the map before, less space will be reserved.
759 self._rest.extend(b'\0' * end_offset)
757 self._rest.extend(b'\0' * end_offset)
760 return pack_into(
758 return pack_into(
761 _rbcrecfmt,
759 _rbcrecfmt,
762 buf,
760 buf,
763 start_offset,
761 start_offset,
764 node,
762 node,
765 branchidx,
763 branchidx,
766 )
764 )
767
765
768 def extend(self, extension):
766 def extend(self, extension):
769 return self._rest.extend(extension)
767 return self._rest.extend(extension)
770
768
771 def slice(self, begin, end):
769 def slice(self, begin, end):
772 if begin < len(self._prefix):
770 if begin < len(self._prefix):
773 acc = bytearray()
771 acc = bytearray()
774 acc[:] = self._prefix[begin:end]
772 acc[:] = self._prefix[begin:end]
775 acc.extend(
773 acc.extend(
776 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
774 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
777 )
775 )
778 return acc
776 return acc
779 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
777 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
780
778
781
779
782 class revbranchcache:
780 class revbranchcache:
783 """Persistent cache, mapping from revision number to branch name and close.
781 """Persistent cache, mapping from revision number to branch name and close.
784 This is a low level cache, independent of filtering.
782 This is a low level cache, independent of filtering.
785
783
786 Branch names are stored in rbc-names in internal encoding separated by 0.
784 Branch names are stored in rbc-names in internal encoding separated by 0.
787 rbc-names is append-only, and each branch name is only stored once and will
785 rbc-names is append-only, and each branch name is only stored once and will
788 thus have a unique index.
786 thus have a unique index.
789
787
790 The branch info for each revision is stored in rbc-revs as constant size
788 The branch info for each revision is stored in rbc-revs as constant size
791 records. The whole file is read into memory, but it is only 'parsed' on
789 records. The whole file is read into memory, but it is only 'parsed' on
792 demand. The file is usually append-only but will be truncated if repo
790 demand. The file is usually append-only but will be truncated if repo
793 modification is detected.
791 modification is detected.
794 The record for each revision contains the first 4 bytes of the
792 The record for each revision contains the first 4 bytes of the
795 corresponding node hash, and the record is only used if it still matches.
793 corresponding node hash, and the record is only used if it still matches.
796 Even a completely trashed rbc-revs fill thus still give the right result
794 Even a completely trashed rbc-revs fill thus still give the right result
797 while converging towards full recovery ... assuming no incorrectly matching
795 while converging towards full recovery ... assuming no incorrectly matching
798 node hashes.
796 node hashes.
799 The record also contains 4 bytes where 31 bits contains the index of the
797 The record also contains 4 bytes where 31 bits contains the index of the
800 branch and the last bit indicate that it is a branch close commit.
798 branch and the last bit indicate that it is a branch close commit.
801 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
799 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
802 and will grow with it but be 1/8th of its size.
800 and will grow with it but be 1/8th of its size.
803 """
801 """
804
802
805 def __init__(self, repo, readonly=True):
803 def __init__(self, repo, readonly=True):
806 assert repo.filtername is None
804 assert repo.filtername is None
807 self._repo = repo
805 self._repo = repo
808 self._names = [] # branch names in local encoding with static index
806 self._names = [] # branch names in local encoding with static index
809 self._rbcrevs = rbcrevs(bytearray())
807 self._rbcrevs = rbcrevs(bytearray())
810 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
808 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
811 try:
809 try:
812 bndata = repo.cachevfs.read(_rbcnames)
810 bndata = repo.cachevfs.read(_rbcnames)
813 self._rbcsnameslen = len(bndata) # for verification before writing
811 self._rbcsnameslen = len(bndata) # for verification before writing
814 if bndata:
812 if bndata:
815 self._names = [
813 self._names = [
816 encoding.tolocal(bn) for bn in bndata.split(b'\0')
814 encoding.tolocal(bn) for bn in bndata.split(b'\0')
817 ]
815 ]
818 except (IOError, OSError):
816 except (IOError, OSError):
819 if readonly:
817 if readonly:
820 # don't try to use cache - fall back to the slow path
818 # don't try to use cache - fall back to the slow path
821 self.branchinfo = self._branchinfo
819 self.branchinfo = self._branchinfo
822
820
823 if self._names:
821 if self._names:
824 try:
822 try:
825 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
823 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
826 with repo.cachevfs(_rbcrevs) as fp:
824 with repo.cachevfs(_rbcrevs) as fp:
827 data = util.buffer(util.mmapread(fp))
825 data = util.buffer(util.mmapread(fp))
828 else:
826 else:
829 data = repo.cachevfs.read(_rbcrevs)
827 data = repo.cachevfs.read(_rbcrevs)
830 self._rbcrevs = rbcrevs(data)
828 self._rbcrevs = rbcrevs(data)
831 except (IOError, OSError) as inst:
829 except (IOError, OSError) as inst:
832 repo.ui.debug(
830 repo.ui.debug(
833 b"couldn't read revision branch cache: %s\n"
831 b"couldn't read revision branch cache: %s\n"
834 % stringutil.forcebytestr(inst)
832 % stringutil.forcebytestr(inst)
835 )
833 )
836 # remember number of good records on disk
834 # remember number of good records on disk
837 self._rbcrevslen = min(
835 self._rbcrevslen = min(
838 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
836 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
839 )
837 )
840 if self._rbcrevslen == 0:
838 if self._rbcrevslen == 0:
841 self._names = []
839 self._names = []
842 self._rbcnamescount = len(self._names) # number of names read at
840 self._rbcnamescount = len(self._names) # number of names read at
843 # _rbcsnameslen
841 # _rbcsnameslen
844
842
845 def _clear(self):
843 def _clear(self):
846 self._rbcsnameslen = 0
844 self._rbcsnameslen = 0
847 del self._names[:]
845 del self._names[:]
848 self._rbcnamescount = 0
846 self._rbcnamescount = 0
849 self._rbcrevslen = len(self._repo.changelog)
847 self._rbcrevslen = len(self._repo.changelog)
850 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
848 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
851 util.clearcachedproperty(self, b'_namesreverse')
849 util.clearcachedproperty(self, b'_namesreverse')
852
850
853 @util.propertycache
851 @util.propertycache
854 def _namesreverse(self):
852 def _namesreverse(self):
855 return {b: r for r, b in enumerate(self._names)}
853 return {b: r for r, b in enumerate(self._names)}
856
854
857 def branchinfo(self, rev):
855 def branchinfo(self, rev):
858 """Return branch name and close flag for rev, using and updating
856 """Return branch name and close flag for rev, using and updating
859 persistent cache."""
857 persistent cache."""
860 changelog = self._repo.changelog
858 changelog = self._repo.changelog
861 rbcrevidx = rev * _rbcrecsize
859 rbcrevidx = rev * _rbcrecsize
862
860
863 # avoid negative index, changelog.read(nullrev) is fast without cache
861 # avoid negative index, changelog.read(nullrev) is fast without cache
864 if rev == nullrev:
862 if rev == nullrev:
865 return changelog.branchinfo(rev)
863 return changelog.branchinfo(rev)
866
864
867 # if requested rev isn't allocated, grow and cache the rev info
865 # if requested rev isn't allocated, grow and cache the rev info
868 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
866 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
869 return self._branchinfo(rev)
867 return self._branchinfo(rev)
870
868
871 # fast path: extract data from cache, use it if node is matching
869 # fast path: extract data from cache, use it if node is matching
872 reponode = changelog.node(rev)[:_rbcnodelen]
870 reponode = changelog.node(rev)[:_rbcnodelen]
873 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
871 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
874 close = bool(branchidx & _rbccloseflag)
872 close = bool(branchidx & _rbccloseflag)
875 if close:
873 if close:
876 branchidx &= _rbcbranchidxmask
874 branchidx &= _rbcbranchidxmask
877 if cachenode == b'\0\0\0\0':
875 if cachenode == b'\0\0\0\0':
878 pass
876 pass
879 elif cachenode == reponode:
877 elif cachenode == reponode:
880 try:
878 try:
881 return self._names[branchidx], close
879 return self._names[branchidx], close
882 except IndexError:
880 except IndexError:
883 # recover from invalid reference to unknown branch
881 # recover from invalid reference to unknown branch
884 self._repo.ui.debug(
882 self._repo.ui.debug(
885 b"referenced branch names not found"
883 b"referenced branch names not found"
886 b" - rebuilding revision branch cache from scratch\n"
884 b" - rebuilding revision branch cache from scratch\n"
887 )
885 )
888 self._clear()
886 self._clear()
889 else:
887 else:
890 # rev/node map has changed, invalidate the cache from here up
888 # rev/node map has changed, invalidate the cache from here up
891 self._repo.ui.debug(
889 self._repo.ui.debug(
892 b"history modification detected - truncating "
890 b"history modification detected - truncating "
893 b"revision branch cache to revision %d\n" % rev
891 b"revision branch cache to revision %d\n" % rev
894 )
892 )
895 truncate = rbcrevidx + _rbcrecsize
893 truncate = rbcrevidx + _rbcrecsize
896 self._rbcrevs.truncate(truncate)
894 self._rbcrevs.truncate(truncate)
897 self._rbcrevslen = min(self._rbcrevslen, truncate)
895 self._rbcrevslen = min(self._rbcrevslen, truncate)
898
896
899 # fall back to slow path and make sure it will be written to disk
897 # fall back to slow path and make sure it will be written to disk
900 return self._branchinfo(rev)
898 return self._branchinfo(rev)
901
899
902 def _branchinfo(self, rev):
900 def _branchinfo(self, rev):
903 """Retrieve branch info from changelog and update _rbcrevs"""
901 """Retrieve branch info from changelog and update _rbcrevs"""
904 changelog = self._repo.changelog
902 changelog = self._repo.changelog
905 b, close = changelog.branchinfo(rev)
903 b, close = changelog.branchinfo(rev)
906 if b in self._namesreverse:
904 if b in self._namesreverse:
907 branchidx = self._namesreverse[b]
905 branchidx = self._namesreverse[b]
908 else:
906 else:
909 branchidx = len(self._names)
907 branchidx = len(self._names)
910 self._names.append(b)
908 self._names.append(b)
911 self._namesreverse[b] = branchidx
909 self._namesreverse[b] = branchidx
912 reponode = changelog.node(rev)
910 reponode = changelog.node(rev)
913 if close:
911 if close:
914 branchidx |= _rbccloseflag
912 branchidx |= _rbccloseflag
915 self._setcachedata(rev, reponode, branchidx)
913 self._setcachedata(rev, reponode, branchidx)
916 return b, close
914 return b, close
917
915
918 def setdata(self, rev, changelogrevision):
916 def setdata(self, rev, changelogrevision):
919 """add new data information to the cache"""
917 """add new data information to the cache"""
920 branch, close = changelogrevision.branchinfo
918 branch, close = changelogrevision.branchinfo
921
919
922 if branch in self._namesreverse:
920 if branch in self._namesreverse:
923 branchidx = self._namesreverse[branch]
921 branchidx = self._namesreverse[branch]
924 else:
922 else:
925 branchidx = len(self._names)
923 branchidx = len(self._names)
926 self._names.append(branch)
924 self._names.append(branch)
927 self._namesreverse[branch] = branchidx
925 self._namesreverse[branch] = branchidx
928 if close:
926 if close:
929 branchidx |= _rbccloseflag
927 branchidx |= _rbccloseflag
930 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
928 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
931 # If no cache data were readable (non exists, bad permission, etc)
929 # If no cache data were readable (non exists, bad permission, etc)
932 # the cache was bypassing itself by setting:
930 # the cache was bypassing itself by setting:
933 #
931 #
934 # self.branchinfo = self._branchinfo
932 # self.branchinfo = self._branchinfo
935 #
933 #
936 # Since we now have data in the cache, we need to drop this bypassing.
934 # Since we now have data in the cache, we need to drop this bypassing.
937 if 'branchinfo' in vars(self):
935 if 'branchinfo' in vars(self):
938 del self.branchinfo
936 del self.branchinfo
939
937
940 def _setcachedata(self, rev, node, branchidx):
938 def _setcachedata(self, rev, node, branchidx):
941 """Writes the node's branch data to the in-memory cache data."""
939 """Writes the node's branch data to the in-memory cache data."""
942 if rev == nullrev:
940 if rev == nullrev:
943 return
941 return
944 rbcrevidx = rev * _rbcrecsize
942 rbcrevidx = rev * _rbcrecsize
945 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
943 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
946 self._rbcrevslen = min(self._rbcrevslen, rev)
944 self._rbcrevslen = min(self._rbcrevslen, rev)
947
945
948 tr = self._repo.currenttransaction()
946 tr = self._repo.currenttransaction()
949 if tr:
947 if tr:
950 tr.addfinalize(b'write-revbranchcache', self.write)
948 tr.addfinalize(b'write-revbranchcache', self.write)
951
949
952 def write(self, tr=None):
950 def write(self, tr=None):
953 """Save branch cache if it is dirty."""
951 """Save branch cache if it is dirty."""
954 repo = self._repo
952 repo = self._repo
955 wlock = None
953 wlock = None
956 step = b''
954 step = b''
957 try:
955 try:
958 # write the new names
956 # write the new names
959 if self._rbcnamescount < len(self._names):
957 if self._rbcnamescount < len(self._names):
960 wlock = repo.wlock(wait=False)
958 wlock = repo.wlock(wait=False)
961 step = b' names'
959 step = b' names'
962 self._writenames(repo)
960 self._writenames(repo)
963
961
964 # write the new revs
962 # write the new revs
965 start = self._rbcrevslen * _rbcrecsize
963 start = self._rbcrevslen * _rbcrecsize
966 if start != len(self._rbcrevs):
964 if start != len(self._rbcrevs):
967 step = b''
965 step = b''
968 if wlock is None:
966 if wlock is None:
969 wlock = repo.wlock(wait=False)
967 wlock = repo.wlock(wait=False)
970 self._writerevs(repo, start)
968 self._writerevs(repo, start)
971
969
972 except (IOError, OSError, error.Abort, error.LockError) as inst:
970 except (IOError, OSError, error.Abort, error.LockError) as inst:
973 repo.ui.debug(
971 repo.ui.debug(
974 b"couldn't write revision branch cache%s: %s\n"
972 b"couldn't write revision branch cache%s: %s\n"
975 % (step, stringutil.forcebytestr(inst))
973 % (step, stringutil.forcebytestr(inst))
976 )
974 )
977 finally:
975 finally:
978 if wlock is not None:
976 if wlock is not None:
979 wlock.release()
977 wlock.release()
980
978
981 def _writenames(self, repo):
979 def _writenames(self, repo):
982 """write the new branch names to revbranchcache"""
980 """write the new branch names to revbranchcache"""
983 if self._rbcnamescount != 0:
981 if self._rbcnamescount != 0:
984 f = repo.cachevfs.open(_rbcnames, b'ab')
982 f = repo.cachevfs.open(_rbcnames, b'ab')
985 if f.tell() == self._rbcsnameslen:
983 if f.tell() == self._rbcsnameslen:
986 f.write(b'\0')
984 f.write(b'\0')
987 else:
985 else:
988 f.close()
986 f.close()
989 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
987 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
990 self._rbcnamescount = 0
988 self._rbcnamescount = 0
991 self._rbcrevslen = 0
989 self._rbcrevslen = 0
992 if self._rbcnamescount == 0:
990 if self._rbcnamescount == 0:
993 # before rewriting names, make sure references are removed
991 # before rewriting names, make sure references are removed
994 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
992 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
995 f = repo.cachevfs.open(_rbcnames, b'wb')
993 f = repo.cachevfs.open(_rbcnames, b'wb')
996 f.write(
994 f.write(
997 b'\0'.join(
995 b'\0'.join(
998 encoding.fromlocal(b)
996 encoding.fromlocal(b)
999 for b in self._names[self._rbcnamescount :]
997 for b in self._names[self._rbcnamescount :]
1000 )
998 )
1001 )
999 )
1002 self._rbcsnameslen = f.tell()
1000 self._rbcsnameslen = f.tell()
1003 f.close()
1001 f.close()
1004 self._rbcnamescount = len(self._names)
1002 self._rbcnamescount = len(self._names)
1005
1003
1006 def _writerevs(self, repo, start):
1004 def _writerevs(self, repo, start):
1007 """write the new revs to revbranchcache"""
1005 """write the new revs to revbranchcache"""
1008 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1006 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1009 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1007 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1010 if f.tell() != start:
1008 if f.tell() != start:
1011 repo.ui.debug(
1009 repo.ui.debug(
1012 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1010 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1013 )
1011 )
1014 f.seek(start)
1012 f.seek(start)
1015 if f.tell() != start:
1013 if f.tell() != start:
1016 start = 0
1014 start = 0
1017 f.seek(start)
1015 f.seek(start)
1018 f.truncate()
1016 f.truncate()
1019 end = revs * _rbcrecsize
1017 end = revs * _rbcrecsize
1020 f.write(self._rbcrevs.slice(start, end))
1018 f.write(self._rbcrevs.slice(start, end))
1021 self._rbcrevslen = revs
1019 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now