##// END OF EJS Templates
branchcache: avoid created a `None` filter repoview when writing...
marmoute -
r52361:4188a057 default
parent child Browse files
Show More
@@ -1,1042 +1,1045 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_delayed` method.
77 close. See the `BranchMapCache.write_delayed` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.write(repo)
88 bcache.write(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
114 else:
115 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
116 bcache = branchcache(repo)
117
117
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
119 if revs:
120 bcache.update(repo, revs)
120 bcache.update(repo, revs)
121
121
122 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
124
124
125 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
127
127
128 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
129 remote.
129 remote.
130
130
131 """
131 """
132 cl = repo.changelog
132 cl = repo.changelog
133 clrev = cl.rev
133 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
135 rbheads = []
135 rbheads = []
136 closed = set()
136 closed = set()
137 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
138 rbheads += bheads
138 rbheads += bheads
139 for h in bheads:
139 for h in bheads:
140 r = clrev(h)
140 r = clrev(h)
141 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
142 if c:
142 if c:
143 closed.add(h)
143 closed.add(h)
144
144
145 if rbheads:
145 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
147 cache = branchcache(
148 repo,
148 repo,
149 remotebranchmap,
149 remotebranchmap,
150 repo[rtiprev].node(),
150 repo[rtiprev].node(),
151 rtiprev,
151 rtiprev,
152 closednodes=closed,
152 closednodes=closed,
153 )
153 )
154
154
155 # Try to stick it as low as possible
155 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
159 if cache.validfor(rview):
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache = self._per_filter[candidate] = cache.copy(rview)
161 cache.write(rview)
161 cache.write(rview)
162 return
162 return
163
163
164 def clear(self):
164 def clear(self):
165 self._per_filter.clear()
165 self._per_filter.clear()
166
166
167 def write_delayed(self, repo):
167 def write_delayed(self, repo):
168 unfi = repo.unfiltered()
168 unfi = repo.unfiltered()
169 for filtername, cache in self._per_filter.items():
169 for filtername, cache in self._per_filter.items():
170 if cache._delayed:
170 if cache._delayed:
171 repo = unfi.filtered(filtername)
171 if filtername is None:
172 repo = unfi
173 else:
174 repo = unfi.filtered(filtername)
172 cache.write(repo)
175 cache.write(repo)
173
176
174
177
175 def _unknownnode(node):
178 def _unknownnode(node):
176 """raises ValueError when branchcache found a node which does not exists"""
179 """raises ValueError when branchcache found a node which does not exists"""
177 raise ValueError('node %s does not exist' % node.hex())
180 raise ValueError('node %s does not exist' % node.hex())
178
181
179
182
180 def _branchcachedesc(repo):
183 def _branchcachedesc(repo):
181 if repo.filtername is not None:
184 if repo.filtername is not None:
182 return b'branch cache (%s)' % repo.filtername
185 return b'branch cache (%s)' % repo.filtername
183 else:
186 else:
184 return b'branch cache'
187 return b'branch cache'
185
188
186
189
187 class _BaseBranchCache:
190 class _BaseBranchCache:
188 """A dict like object that hold branches heads cache.
191 """A dict like object that hold branches heads cache.
189
192
190 This cache is used to avoid costly computations to determine all the
193 This cache is used to avoid costly computations to determine all the
191 branch heads of a repo.
194 branch heads of a repo.
192
195
193 The cache is serialized on disk in the following format:
196 The cache is serialized on disk in the following format:
194
197
195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
198 <tip hex node> <tip rev number> [optional filtered repo hex hash]
196 <branch head hex node> <open/closed state> <branch name>
199 <branch head hex node> <open/closed state> <branch name>
197 <branch head hex node> <open/closed state> <branch name>
200 <branch head hex node> <open/closed state> <branch name>
198 ...
201 ...
199
202
200 The first line is used to check if the cache is still valid. If the
203 The first line is used to check if the cache is still valid. If the
201 branch cache is for a filtered repo view, an optional third hash is
204 branch cache is for a filtered repo view, an optional third hash is
202 included that hashes the hashes of all filtered and obsolete revisions.
205 included that hashes the hashes of all filtered and obsolete revisions.
203
206
204 The open/closed state is represented by a single letter 'o' or 'c'.
207 The open/closed state is represented by a single letter 'o' or 'c'.
205 This field can be used to avoid changelog reads when determining if a
208 This field can be used to avoid changelog reads when determining if a
206 branch head closes a branch or not.
209 branch head closes a branch or not.
207 """
210 """
208
211
209 def __init__(
212 def __init__(
210 self,
213 self,
211 repo: "localrepo.localrepository",
214 repo: "localrepo.localrepository",
212 entries: Union[
215 entries: Union[
213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
216 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
214 ] = (),
217 ] = (),
215 closed_nodes: Optional[Set[bytes]] = None,
218 closed_nodes: Optional[Set[bytes]] = None,
216 ) -> None:
219 ) -> None:
217 """hasnode is a function which can be used to verify whether changelog
220 """hasnode is a function which can be used to verify whether changelog
218 has a given node or not. If it's not provided, we assume that every node
221 has a given node or not. If it's not provided, we assume that every node
219 we have exists in changelog"""
222 we have exists in changelog"""
220 # closednodes is a set of nodes that close their branch. If the branch
223 # closednodes is a set of nodes that close their branch. If the branch
221 # cache has been updated, it may contain nodes that are no longer
224 # cache has been updated, it may contain nodes that are no longer
222 # heads.
225 # heads.
223 if closed_nodes is None:
226 if closed_nodes is None:
224 closed_nodes = set()
227 closed_nodes = set()
225 self._closednodes = set(closed_nodes)
228 self._closednodes = set(closed_nodes)
226 self._entries = dict(entries)
229 self._entries = dict(entries)
227
230
228 def __iter__(self):
231 def __iter__(self):
229 return iter(self._entries)
232 return iter(self._entries)
230
233
231 def __setitem__(self, key, value):
234 def __setitem__(self, key, value):
232 self._entries[key] = value
235 self._entries[key] = value
233
236
234 def __getitem__(self, key):
237 def __getitem__(self, key):
235 return self._entries[key]
238 return self._entries[key]
236
239
237 def __contains__(self, key):
240 def __contains__(self, key):
238 return key in self._entries
241 return key in self._entries
239
242
240 def iteritems(self):
243 def iteritems(self):
241 return self._entries.items()
244 return self._entries.items()
242
245
243 items = iteritems
246 items = iteritems
244
247
245 def hasbranch(self, label):
248 def hasbranch(self, label):
246 """checks whether a branch of this name exists or not"""
249 """checks whether a branch of this name exists or not"""
247 return label in self._entries
250 return label in self._entries
248
251
249 def _branchtip(self, heads):
252 def _branchtip(self, heads):
250 """Return tuple with last open head in heads and false,
253 """Return tuple with last open head in heads and false,
251 otherwise return last closed head and true."""
254 otherwise return last closed head and true."""
252 tip = heads[-1]
255 tip = heads[-1]
253 closed = True
256 closed = True
254 for h in reversed(heads):
257 for h in reversed(heads):
255 if h not in self._closednodes:
258 if h not in self._closednodes:
256 tip = h
259 tip = h
257 closed = False
260 closed = False
258 break
261 break
259 return tip, closed
262 return tip, closed
260
263
261 def branchtip(self, branch):
264 def branchtip(self, branch):
262 """Return the tipmost open head on branch head, otherwise return the
265 """Return the tipmost open head on branch head, otherwise return the
263 tipmost closed head on branch.
266 tipmost closed head on branch.
264 Raise KeyError for unknown branch."""
267 Raise KeyError for unknown branch."""
265 return self._branchtip(self[branch])[0]
268 return self._branchtip(self[branch])[0]
266
269
267 def iteropen(self, nodes):
270 def iteropen(self, nodes):
268 return (n for n in nodes if n not in self._closednodes)
271 return (n for n in nodes if n not in self._closednodes)
269
272
270 def branchheads(self, branch, closed=False):
273 def branchheads(self, branch, closed=False):
271 heads = self._entries[branch]
274 heads = self._entries[branch]
272 if not closed:
275 if not closed:
273 heads = list(self.iteropen(heads))
276 heads = list(self.iteropen(heads))
274 return heads
277 return heads
275
278
276 def iterbranches(self):
279 def iterbranches(self):
277 for bn, heads in self.items():
280 for bn, heads in self.items():
278 yield (bn, heads) + self._branchtip(heads)
281 yield (bn, heads) + self._branchtip(heads)
279
282
280 def iterheads(self):
283 def iterheads(self):
281 """returns all the heads"""
284 """returns all the heads"""
282 return self._entries.values()
285 return self._entries.values()
283
286
284 def update(self, repo, revgen):
287 def update(self, repo, revgen):
285 """Given a branchhead cache, self, that may have extra nodes or be
288 """Given a branchhead cache, self, that may have extra nodes or be
286 missing heads, and a generator of nodes that are strictly a superset of
289 missing heads, and a generator of nodes that are strictly a superset of
287 heads missing, this function updates self to be correct.
290 heads missing, this function updates self to be correct.
288 """
291 """
289 starttime = util.timer()
292 starttime = util.timer()
290 cl = repo.changelog
293 cl = repo.changelog
291 # collect new branch entries
294 # collect new branch entries
292 newbranches = {}
295 newbranches = {}
293 getbranchinfo = repo.revbranchcache().branchinfo
296 getbranchinfo = repo.revbranchcache().branchinfo
294 max_rev = -1
297 max_rev = -1
295 for r in revgen:
298 for r in revgen:
296 branch, closesbranch = getbranchinfo(r)
299 branch, closesbranch = getbranchinfo(r)
297 newbranches.setdefault(branch, []).append(r)
300 newbranches.setdefault(branch, []).append(r)
298 if closesbranch:
301 if closesbranch:
299 self._closednodes.add(cl.node(r))
302 self._closednodes.add(cl.node(r))
300 max_rev = max(max_rev, r)
303 max_rev = max(max_rev, r)
301 if max_rev < 0:
304 if max_rev < 0:
302 max_rev = None
305 max_rev = None
303
306
304 # Delay fetching the topological heads until they are needed.
307 # Delay fetching the topological heads until they are needed.
305 # A repository without non-continous branches can skip this part.
308 # A repository without non-continous branches can skip this part.
306 topoheads = None
309 topoheads = None
307
310
308 # If a changeset is visible, its parents must be visible too, so
311 # If a changeset is visible, its parents must be visible too, so
309 # use the faster unfiltered parent accessor.
312 # use the faster unfiltered parent accessor.
310 parentrevs = repo.unfiltered().changelog.parentrevs
313 parentrevs = repo.unfiltered().changelog.parentrevs
311
314
312 # Faster than using ctx.obsolete()
315 # Faster than using ctx.obsolete()
313 obsrevs = obsolete.getrevs(repo, b'obsolete')
316 obsrevs = obsolete.getrevs(repo, b'obsolete')
314
317
315 for branch, newheadrevs in newbranches.items():
318 for branch, newheadrevs in newbranches.items():
316 # For every branch, compute the new branchheads.
319 # For every branch, compute the new branchheads.
317 # A branchhead is a revision such that no descendant is on
320 # A branchhead is a revision such that no descendant is on
318 # the same branch.
321 # the same branch.
319 #
322 #
320 # The branchheads are computed iteratively in revision order.
323 # The branchheads are computed iteratively in revision order.
321 # This ensures topological order, i.e. parents are processed
324 # This ensures topological order, i.e. parents are processed
322 # before their children. Ancestors are inclusive here, i.e.
325 # before their children. Ancestors are inclusive here, i.e.
323 # any revision is an ancestor of itself.
326 # any revision is an ancestor of itself.
324 #
327 #
325 # Core observations:
328 # Core observations:
326 # - The current revision is always a branchhead for the
329 # - The current revision is always a branchhead for the
327 # repository up to that point.
330 # repository up to that point.
328 # - It is the first revision of the branch if and only if
331 # - It is the first revision of the branch if and only if
329 # there was no branchhead before. In that case, it is the
332 # there was no branchhead before. In that case, it is the
330 # only branchhead as there are no possible ancestors on
333 # only branchhead as there are no possible ancestors on
331 # the same branch.
334 # the same branch.
332 # - If a parent is on the same branch, a branchhead can
335 # - If a parent is on the same branch, a branchhead can
333 # only be an ancestor of that parent, if it is parent
336 # only be an ancestor of that parent, if it is parent
334 # itself. Otherwise it would have been removed as ancestor
337 # itself. Otherwise it would have been removed as ancestor
335 # of that parent before.
338 # of that parent before.
336 # - Therefore, if all parents are on the same branch, they
339 # - Therefore, if all parents are on the same branch, they
337 # can just be removed from the branchhead set.
340 # can just be removed from the branchhead set.
338 # - If one parent is on the same branch and the other is not
341 # - If one parent is on the same branch and the other is not
339 # and there was exactly one branchhead known, the existing
342 # and there was exactly one branchhead known, the existing
340 # branchhead can only be an ancestor if it is the parent.
343 # branchhead can only be an ancestor if it is the parent.
341 # Otherwise it would have been removed as ancestor of
344 # Otherwise it would have been removed as ancestor of
342 # the parent before. The other parent therefore can't have
345 # the parent before. The other parent therefore can't have
343 # a branchhead as ancestor.
346 # a branchhead as ancestor.
344 # - In all other cases, the parents on different branches
347 # - In all other cases, the parents on different branches
345 # could have a branchhead as ancestor. Those parents are
348 # could have a branchhead as ancestor. Those parents are
346 # kept in the "uncertain" set. If all branchheads are also
349 # kept in the "uncertain" set. If all branchheads are also
347 # topological heads, they can't have descendants and further
350 # topological heads, they can't have descendants and further
348 # checks can be skipped. Otherwise, the ancestors of the
351 # checks can be skipped. Otherwise, the ancestors of the
349 # "uncertain" set are removed from branchheads.
352 # "uncertain" set are removed from branchheads.
350 # This computation is heavy and avoided if at all possible.
353 # This computation is heavy and avoided if at all possible.
351 bheads = self._entries.get(branch, [])
354 bheads = self._entries.get(branch, [])
352 bheadset = {cl.rev(node) for node in bheads}
355 bheadset = {cl.rev(node) for node in bheads}
353 uncertain = set()
356 uncertain = set()
354 for newrev in sorted(newheadrevs):
357 for newrev in sorted(newheadrevs):
355 if newrev in obsrevs:
358 if newrev in obsrevs:
356 # We ignore obsolete changesets as they shouldn't be
359 # We ignore obsolete changesets as they shouldn't be
357 # considered heads.
360 # considered heads.
358 continue
361 continue
359
362
360 if not bheadset:
363 if not bheadset:
361 bheadset.add(newrev)
364 bheadset.add(newrev)
362 continue
365 continue
363
366
364 parents = [p for p in parentrevs(newrev) if p != nullrev]
367 parents = [p for p in parentrevs(newrev) if p != nullrev]
365 samebranch = set()
368 samebranch = set()
366 otherbranch = set()
369 otherbranch = set()
367 obsparents = set()
370 obsparents = set()
368 for p in parents:
371 for p in parents:
369 if p in obsrevs:
372 if p in obsrevs:
370 # We ignored this obsolete changeset earlier, but now
373 # We ignored this obsolete changeset earlier, but now
371 # that it has non-ignored children, we need to make
374 # that it has non-ignored children, we need to make
372 # sure their ancestors are not considered heads. To
375 # sure their ancestors are not considered heads. To
373 # achieve that, we will simply treat this obsolete
376 # achieve that, we will simply treat this obsolete
374 # changeset as a parent from other branch.
377 # changeset as a parent from other branch.
375 obsparents.add(p)
378 obsparents.add(p)
376 elif p in bheadset or getbranchinfo(p)[0] == branch:
379 elif p in bheadset or getbranchinfo(p)[0] == branch:
377 samebranch.add(p)
380 samebranch.add(p)
378 else:
381 else:
379 otherbranch.add(p)
382 otherbranch.add(p)
380 if not (len(bheadset) == len(samebranch) == 1):
383 if not (len(bheadset) == len(samebranch) == 1):
381 uncertain.update(otherbranch)
384 uncertain.update(otherbranch)
382 uncertain.update(obsparents)
385 uncertain.update(obsparents)
383 bheadset.difference_update(samebranch)
386 bheadset.difference_update(samebranch)
384 bheadset.add(newrev)
387 bheadset.add(newrev)
385
388
386 if uncertain:
389 if uncertain:
387 if topoheads is None:
390 if topoheads is None:
388 topoheads = set(cl.headrevs())
391 topoheads = set(cl.headrevs())
389 if bheadset - topoheads:
392 if bheadset - topoheads:
390 floorrev = min(bheadset)
393 floorrev = min(bheadset)
391 if floorrev <= max(uncertain):
394 if floorrev <= max(uncertain):
392 ancestors = set(cl.ancestors(uncertain, floorrev))
395 ancestors = set(cl.ancestors(uncertain, floorrev))
393 bheadset -= ancestors
396 bheadset -= ancestors
394 if bheadset:
397 if bheadset:
395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
398 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
396
399
397 duration = util.timer() - starttime
400 duration = util.timer() - starttime
398 repo.ui.log(
401 repo.ui.log(
399 b'branchcache',
402 b'branchcache',
400 b'updated %s in %.4f seconds\n',
403 b'updated %s in %.4f seconds\n',
401 _branchcachedesc(repo),
404 _branchcachedesc(repo),
402 duration,
405 duration,
403 )
406 )
404 return max_rev
407 return max_rev
405
408
406
409
407 class branchcache(_BaseBranchCache):
410 class branchcache(_BaseBranchCache):
408 """Branchmap info for a local repo or repoview"""
411 """Branchmap info for a local repo or repoview"""
409
412
410 _base_filename = b"branch2"
413 _base_filename = b"branch2"
411
414
412 def __init__(
415 def __init__(
413 self,
416 self,
414 repo: "localrepo.localrepository",
417 repo: "localrepo.localrepository",
415 entries: Union[
418 entries: Union[
416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
419 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
417 ] = (),
420 ] = (),
418 tipnode: Optional[bytes] = None,
421 tipnode: Optional[bytes] = None,
419 tiprev: Optional[int] = nullrev,
422 tiprev: Optional[int] = nullrev,
420 filteredhash: Optional[bytes] = None,
423 filteredhash: Optional[bytes] = None,
421 closednodes: Optional[Set[bytes]] = None,
424 closednodes: Optional[Set[bytes]] = None,
422 hasnode: Optional[Callable[[bytes], bool]] = None,
425 hasnode: Optional[Callable[[bytes], bool]] = None,
423 verify_node: bool = False,
426 verify_node: bool = False,
424 ) -> None:
427 ) -> None:
425 """hasnode is a function which can be used to verify whether changelog
428 """hasnode is a function which can be used to verify whether changelog
426 has a given node or not. If it's not provided, we assume that every node
429 has a given node or not. If it's not provided, we assume that every node
427 we have exists in changelog"""
430 we have exists in changelog"""
428 self._filtername = repo.filtername
431 self._filtername = repo.filtername
429 self._delayed = False
432 self._delayed = False
430 if tipnode is None:
433 if tipnode is None:
431 self.tipnode = repo.nullid
434 self.tipnode = repo.nullid
432 else:
435 else:
433 self.tipnode = tipnode
436 self.tipnode = tipnode
434 self.tiprev = tiprev
437 self.tiprev = tiprev
435 self.filteredhash = filteredhash
438 self.filteredhash = filteredhash
436
439
437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
440 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
438 # closednodes is a set of nodes that close their branch. If the branch
441 # closednodes is a set of nodes that close their branch. If the branch
439 # cache has been updated, it may contain nodes that are no longer
442 # cache has been updated, it may contain nodes that are no longer
440 # heads.
443 # heads.
441
444
442 # Do we need to verify branch at all ?
445 # Do we need to verify branch at all ?
443 self._verify_node = verify_node
446 self._verify_node = verify_node
444 # branches for which nodes are verified
447 # branches for which nodes are verified
445 self._verifiedbranches = set()
448 self._verifiedbranches = set()
446 self._hasnode = None
449 self._hasnode = None
447 if self._verify_node:
450 if self._verify_node:
448 self._hasnode = repo.changelog.hasnode
451 self._hasnode = repo.changelog.hasnode
449
452
450 def validfor(self, repo):
453 def validfor(self, repo):
451 """check that cache contents are valid for (a subset of) this repo
454 """check that cache contents are valid for (a subset of) this repo
452
455
453 - False when the order of changesets changed or if we detect a strip.
456 - False when the order of changesets changed or if we detect a strip.
454 - True when cache is up-to-date for the current repo or its subset."""
457 - True when cache is up-to-date for the current repo or its subset."""
455 try:
458 try:
456 node = repo.changelog.node(self.tiprev)
459 node = repo.changelog.node(self.tiprev)
457 except IndexError:
460 except IndexError:
458 # changesets were stripped and now we don't even have enough to
461 # changesets were stripped and now we don't even have enough to
459 # find tiprev
462 # find tiprev
460 return False
463 return False
461 if self.tipnode != node:
464 if self.tipnode != node:
462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
465 # tiprev doesn't correspond to tipnode: repo was stripped, or this
463 # repo has a different order of changesets
466 # repo has a different order of changesets
464 return False
467 return False
465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
468 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
466 # hashes don't match if this repo view has a different set of filtered
469 # hashes don't match if this repo view has a different set of filtered
467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
470 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
468 # history was rewritten)
471 # history was rewritten)
469 return self.filteredhash == tiphash
472 return self.filteredhash == tiphash
470
473
471 @classmethod
474 @classmethod
472 def fromfile(cls, repo):
475 def fromfile(cls, repo):
473 f = None
476 f = None
474 try:
477 try:
475 f = repo.cachevfs(cls._filename(repo))
478 f = repo.cachevfs(cls._filename(repo))
476 lineiter = iter(f)
479 lineiter = iter(f)
477 init_kwargs = cls._load_header(repo, lineiter)
480 init_kwargs = cls._load_header(repo, lineiter)
478 bcache = cls(
481 bcache = cls(
479 repo,
482 repo,
480 verify_node=True,
483 verify_node=True,
481 **init_kwargs,
484 **init_kwargs,
482 )
485 )
483 if not bcache.validfor(repo):
486 if not bcache.validfor(repo):
484 # invalidate the cache
487 # invalidate the cache
485 raise ValueError('tip differs')
488 raise ValueError('tip differs')
486 bcache._load_heads(repo, lineiter)
489 bcache._load_heads(repo, lineiter)
487 except (IOError, OSError):
490 except (IOError, OSError):
488 return None
491 return None
489
492
490 except Exception as inst:
493 except Exception as inst:
491 if repo.ui.debugflag:
494 if repo.ui.debugflag:
492 msg = b'invalid %s: %s\n'
495 msg = b'invalid %s: %s\n'
493 msg %= (
496 msg %= (
494 _branchcachedesc(repo),
497 _branchcachedesc(repo),
495 stringutil.forcebytestr(inst),
498 stringutil.forcebytestr(inst),
496 )
499 )
497 repo.ui.debug(msg)
500 repo.ui.debug(msg)
498 bcache = None
501 bcache = None
499
502
500 finally:
503 finally:
501 if f:
504 if f:
502 f.close()
505 f.close()
503
506
504 return bcache
507 return bcache
505
508
506 @classmethod
509 @classmethod
507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
510 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 """parse the head of a branchmap file
511 """parse the head of a branchmap file
509
512
510 return parameters to pass to a newly created class instance.
513 return parameters to pass to a newly created class instance.
511 """
514 """
512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
515 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 last, lrev = cachekey[:2]
516 last, lrev = cachekey[:2]
514 last, lrev = bin(last), int(lrev)
517 last, lrev = bin(last), int(lrev)
515 filteredhash = None
518 filteredhash = None
516 if len(cachekey) > 2:
519 if len(cachekey) > 2:
517 filteredhash = bin(cachekey[2])
520 filteredhash = bin(cachekey[2])
518 return {
521 return {
519 "tipnode": last,
522 "tipnode": last,
520 "tiprev": lrev,
523 "tiprev": lrev,
521 "filteredhash": filteredhash,
524 "filteredhash": filteredhash,
522 }
525 }
523
526
524 def _load_heads(self, repo, lineiter):
527 def _load_heads(self, repo, lineiter):
525 """fully loads the branchcache by reading from the file using the line
528 """fully loads the branchcache by reading from the file using the line
526 iterator passed"""
529 iterator passed"""
527 for line in lineiter:
530 for line in lineiter:
528 line = line.rstrip(b'\n')
531 line = line.rstrip(b'\n')
529 if not line:
532 if not line:
530 continue
533 continue
531 node, state, label = line.split(b" ", 2)
534 node, state, label = line.split(b" ", 2)
532 if state not in b'oc':
535 if state not in b'oc':
533 raise ValueError('invalid branch state')
536 raise ValueError('invalid branch state')
534 label = encoding.tolocal(label.strip())
537 label = encoding.tolocal(label.strip())
535 node = bin(node)
538 node = bin(node)
536 self._entries.setdefault(label, []).append(node)
539 self._entries.setdefault(label, []).append(node)
537 if state == b'c':
540 if state == b'c':
538 self._closednodes.add(node)
541 self._closednodes.add(node)
539
542
540 @classmethod
543 @classmethod
541 def _filename(cls, repo):
544 def _filename(cls, repo):
542 """name of a branchcache file for a given repo or repoview"""
545 """name of a branchcache file for a given repo or repoview"""
543 filename = cls._base_filename
546 filename = cls._base_filename
544 if repo.filtername:
547 if repo.filtername:
545 filename = b'%s-%s' % (filename, repo.filtername)
548 filename = b'%s-%s' % (filename, repo.filtername)
546 return filename
549 return filename
547
550
548 def copy(self, repo):
551 def copy(self, repo):
549 """return a deep copy of the branchcache object"""
552 """return a deep copy of the branchcache object"""
550 other = type(self)(
553 other = type(self)(
551 repo=repo,
554 repo=repo,
552 # we always do a shally copy of self._entries, and the values is
555 # we always do a shally copy of self._entries, and the values is
553 # always replaced, so no need to deepcopy until the above remains
556 # always replaced, so no need to deepcopy until the above remains
554 # true.
557 # true.
555 entries=self._entries,
558 entries=self._entries,
556 tipnode=self.tipnode,
559 tipnode=self.tipnode,
557 tiprev=self.tiprev,
560 tiprev=self.tiprev,
558 filteredhash=self.filteredhash,
561 filteredhash=self.filteredhash,
559 closednodes=set(self._closednodes),
562 closednodes=set(self._closednodes),
560 verify_node=self._verify_node,
563 verify_node=self._verify_node,
561 )
564 )
562 # we copy will likely schedule a write anyway, but that does not seems
565 # we copy will likely schedule a write anyway, but that does not seems
563 # to hurt to overschedule
566 # to hurt to overschedule
564 other._delayed = self._delayed
567 other._delayed = self._delayed
565 # also copy information about the current verification state
568 # also copy information about the current verification state
566 other._verifiedbranches = set(self._verifiedbranches)
569 other._verifiedbranches = set(self._verifiedbranches)
567 return other
570 return other
568
571
569 def write(self, repo):
572 def write(self, repo):
570 assert self._filtername == repo.filtername, (
573 assert self._filtername == repo.filtername, (
571 self._filtername,
574 self._filtername,
572 repo.filtername,
575 repo.filtername,
573 )
576 )
574 tr = repo.currenttransaction()
577 tr = repo.currenttransaction()
575 if not getattr(tr, 'finalized', True):
578 if not getattr(tr, 'finalized', True):
576 # Avoid premature writing.
579 # Avoid premature writing.
577 #
580 #
578 # (The cache warming setup by localrepo will update the file later.)
581 # (The cache warming setup by localrepo will update the file later.)
579 self._delayed = True
582 self._delayed = True
580 return
583 return
581 try:
584 try:
582 filename = self._filename(repo)
585 filename = self._filename(repo)
583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
586 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
584 self._write_header(f)
587 self._write_header(f)
585 nodecount = self._write_heads(f)
588 nodecount = self._write_heads(f)
586 repo.ui.log(
589 repo.ui.log(
587 b'branchcache',
590 b'branchcache',
588 b'wrote %s with %d labels and %d nodes\n',
591 b'wrote %s with %d labels and %d nodes\n',
589 _branchcachedesc(repo),
592 _branchcachedesc(repo),
590 len(self._entries),
593 len(self._entries),
591 nodecount,
594 nodecount,
592 )
595 )
593 self._delayed = False
596 self._delayed = False
594 except (IOError, OSError, error.Abort) as inst:
597 except (IOError, OSError, error.Abort) as inst:
595 # Abort may be raised by read only opener, so log and continue
598 # Abort may be raised by read only opener, so log and continue
596 repo.ui.debug(
599 repo.ui.debug(
597 b"couldn't write branch cache: %s\n"
600 b"couldn't write branch cache: %s\n"
598 % stringutil.forcebytestr(inst)
601 % stringutil.forcebytestr(inst)
599 )
602 )
600
603
601 def _write_header(self, fp) -> None:
604 def _write_header(self, fp) -> None:
602 """write the branch cache header to a file"""
605 """write the branch cache header to a file"""
603 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
606 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
604 if self.filteredhash is not None:
607 if self.filteredhash is not None:
605 cachekey.append(hex(self.filteredhash))
608 cachekey.append(hex(self.filteredhash))
606 fp.write(b" ".join(cachekey) + b'\n')
609 fp.write(b" ".join(cachekey) + b'\n')
607
610
608 def _write_heads(self, fp) -> int:
611 def _write_heads(self, fp) -> int:
609 """write list of heads to a file
612 """write list of heads to a file
610
613
611 Return the number of heads written."""
614 Return the number of heads written."""
612 nodecount = 0
615 nodecount = 0
613 for label, nodes in sorted(self._entries.items()):
616 for label, nodes in sorted(self._entries.items()):
614 label = encoding.fromlocal(label)
617 label = encoding.fromlocal(label)
615 for node in nodes:
618 for node in nodes:
616 nodecount += 1
619 nodecount += 1
617 if node in self._closednodes:
620 if node in self._closednodes:
618 state = b'c'
621 state = b'c'
619 else:
622 else:
620 state = b'o'
623 state = b'o'
621 fp.write(b"%s %s %s\n" % (hex(node), state, label))
624 fp.write(b"%s %s %s\n" % (hex(node), state, label))
622 return nodecount
625 return nodecount
623
626
624 def _verifybranch(self, branch):
627 def _verifybranch(self, branch):
625 """verify head nodes for the given branch."""
628 """verify head nodes for the given branch."""
626 if not self._verify_node:
629 if not self._verify_node:
627 return
630 return
628 if branch not in self._entries or branch in self._verifiedbranches:
631 if branch not in self._entries or branch in self._verifiedbranches:
629 return
632 return
630 assert self._hasnode is not None
633 assert self._hasnode is not None
631 for n in self._entries[branch]:
634 for n in self._entries[branch]:
632 if not self._hasnode(n):
635 if not self._hasnode(n):
633 _unknownnode(n)
636 _unknownnode(n)
634
637
635 self._verifiedbranches.add(branch)
638 self._verifiedbranches.add(branch)
636
639
637 def _verifyall(self):
640 def _verifyall(self):
638 """verifies nodes of all the branches"""
641 """verifies nodes of all the branches"""
639 for b in self._entries.keys():
642 for b in self._entries.keys():
640 if b not in self._verifiedbranches:
643 if b not in self._verifiedbranches:
641 self._verifybranch(b)
644 self._verifybranch(b)
642
645
643 def __getitem__(self, key):
646 def __getitem__(self, key):
644 self._verifybranch(key)
647 self._verifybranch(key)
645 return super().__getitem__(key)
648 return super().__getitem__(key)
646
649
647 def __contains__(self, key):
650 def __contains__(self, key):
648 self._verifybranch(key)
651 self._verifybranch(key)
649 return super().__contains__(key)
652 return super().__contains__(key)
650
653
651 def iteritems(self):
654 def iteritems(self):
652 self._verifyall()
655 self._verifyall()
653 return super().iteritems()
656 return super().iteritems()
654
657
655 items = iteritems
658 items = iteritems
656
659
657 def iterheads(self):
660 def iterheads(self):
658 """returns all the heads"""
661 """returns all the heads"""
659 self._verifyall()
662 self._verifyall()
660 return super().iterheads()
663 return super().iterheads()
661
664
662 def hasbranch(self, label):
665 def hasbranch(self, label):
663 """checks whether a branch of this name exists or not"""
666 """checks whether a branch of this name exists or not"""
664 self._verifybranch(label)
667 self._verifybranch(label)
665 return super().hasbranch(label)
668 return super().hasbranch(label)
666
669
667 def branchheads(self, branch, closed=False):
670 def branchheads(self, branch, closed=False):
668 self._verifybranch(branch)
671 self._verifybranch(branch)
669 return super().branchheads(branch, closed=closed)
672 return super().branchheads(branch, closed=closed)
670
673
671 def update(self, repo, revgen):
674 def update(self, repo, revgen):
672 assert self._filtername == repo.filtername, (
675 assert self._filtername == repo.filtername, (
673 self._filtername,
676 self._filtername,
674 repo.filtername,
677 repo.filtername,
675 )
678 )
676 cl = repo.changelog
679 cl = repo.changelog
677 max_rev = super().update(repo, revgen)
680 max_rev = super().update(repo, revgen)
678 # new tip revision which we found after iterating items from new
681 # new tip revision which we found after iterating items from new
679 # branches
682 # branches
680 if max_rev is not None and max_rev > self.tiprev:
683 if max_rev is not None and max_rev > self.tiprev:
681 self.tiprev = max_rev
684 self.tiprev = max_rev
682 self.tipnode = cl.node(max_rev)
685 self.tipnode = cl.node(max_rev)
683
686
684 if not self.validfor(repo):
687 if not self.validfor(repo):
685 # old cache key is now invalid for the repo, but we've just updated
688 # old cache key is now invalid for the repo, but we've just updated
686 # the cache and we assume it's valid, so let's make the cache key
689 # the cache and we assume it's valid, so let's make the cache key
687 # valid as well by recomputing it from the cached data
690 # valid as well by recomputing it from the cached data
688 self.tipnode = repo.nullid
691 self.tipnode = repo.nullid
689 self.tiprev = nullrev
692 self.tiprev = nullrev
690 for heads in self.iterheads():
693 for heads in self.iterheads():
691 if not heads:
694 if not heads:
692 # all revisions on a branch are obsolete
695 # all revisions on a branch are obsolete
693 continue
696 continue
694 # note: tiprev is not necessarily the tip revision of repo,
697 # note: tiprev is not necessarily the tip revision of repo,
695 # because the tip could be obsolete (i.e. not a head)
698 # because the tip could be obsolete (i.e. not a head)
696 tiprev = max(cl.rev(node) for node in heads)
699 tiprev = max(cl.rev(node) for node in heads)
697 if tiprev > self.tiprev:
700 if tiprev > self.tiprev:
698 self.tipnode = cl.node(tiprev)
701 self.tipnode = cl.node(tiprev)
699 self.tiprev = tiprev
702 self.tiprev = tiprev
700 self.filteredhash = scmutil.filteredhash(
703 self.filteredhash = scmutil.filteredhash(
701 repo, self.tiprev, needobsolete=True
704 repo, self.tiprev, needobsolete=True
702 )
705 )
703
706
704 self.write(repo)
707 self.write(repo)
705
708
706
709
707 class remotebranchcache(_BaseBranchCache):
710 class remotebranchcache(_BaseBranchCache):
708 """Branchmap info for a remote connection, should not write locally"""
711 """Branchmap info for a remote connection, should not write locally"""
709
712
710 def __init__(
713 def __init__(
711 self,
714 self,
712 repo: "localrepo.localrepository",
715 repo: "localrepo.localrepository",
713 entries: Union[
716 entries: Union[
714 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
717 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
715 ] = (),
718 ] = (),
716 closednodes: Optional[Set[bytes]] = None,
719 closednodes: Optional[Set[bytes]] = None,
717 ) -> None:
720 ) -> None:
718 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
721 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
719
722
720
723
721 # Revision branch info cache
724 # Revision branch info cache
722
725
723 _rbcversion = b'-v1'
726 _rbcversion = b'-v1'
724 _rbcnames = b'rbc-names' + _rbcversion
727 _rbcnames = b'rbc-names' + _rbcversion
725 _rbcrevs = b'rbc-revs' + _rbcversion
728 _rbcrevs = b'rbc-revs' + _rbcversion
726 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
729 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
727 _rbcrecfmt = b'>4sI'
730 _rbcrecfmt = b'>4sI'
728 _rbcrecsize = calcsize(_rbcrecfmt)
731 _rbcrecsize = calcsize(_rbcrecfmt)
729 _rbcmininc = 64 * _rbcrecsize
732 _rbcmininc = 64 * _rbcrecsize
730 _rbcnodelen = 4
733 _rbcnodelen = 4
731 _rbcbranchidxmask = 0x7FFFFFFF
734 _rbcbranchidxmask = 0x7FFFFFFF
732 _rbccloseflag = 0x80000000
735 _rbccloseflag = 0x80000000
733
736
734
737
735 class rbcrevs:
738 class rbcrevs:
736 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
739 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
737
740
738 def __init__(self, revs):
741 def __init__(self, revs):
739 self._prefix = revs
742 self._prefix = revs
740 self._rest = bytearray()
743 self._rest = bytearray()
741
744
742 def __len__(self):
745 def __len__(self):
743 return len(self._prefix) + len(self._rest)
746 return len(self._prefix) + len(self._rest)
744
747
745 def unpack_record(self, rbcrevidx):
748 def unpack_record(self, rbcrevidx):
746 if rbcrevidx < len(self._prefix):
749 if rbcrevidx < len(self._prefix):
747 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
750 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
748 else:
751 else:
749 return unpack_from(
752 return unpack_from(
750 _rbcrecfmt,
753 _rbcrecfmt,
751 util.buffer(self._rest),
754 util.buffer(self._rest),
752 rbcrevidx - len(self._prefix),
755 rbcrevidx - len(self._prefix),
753 )
756 )
754
757
755 def make_mutable(self):
758 def make_mutable(self):
756 if len(self._prefix) > 0:
759 if len(self._prefix) > 0:
757 entirety = bytearray()
760 entirety = bytearray()
758 entirety[:] = self._prefix
761 entirety[:] = self._prefix
759 entirety.extend(self._rest)
762 entirety.extend(self._rest)
760 self._rest = entirety
763 self._rest = entirety
761 self._prefix = bytearray()
764 self._prefix = bytearray()
762
765
763 def truncate(self, pos):
766 def truncate(self, pos):
764 self.make_mutable()
767 self.make_mutable()
765 del self._rest[pos:]
768 del self._rest[pos:]
766
769
767 def pack_into(self, rbcrevidx, node, branchidx):
770 def pack_into(self, rbcrevidx, node, branchidx):
768 if rbcrevidx < len(self._prefix):
771 if rbcrevidx < len(self._prefix):
769 self.make_mutable()
772 self.make_mutable()
770 buf = self._rest
773 buf = self._rest
771 start_offset = rbcrevidx - len(self._prefix)
774 start_offset = rbcrevidx - len(self._prefix)
772 end_offset = start_offset + _rbcrecsize
775 end_offset = start_offset + _rbcrecsize
773
776
774 if len(self._rest) < end_offset:
777 if len(self._rest) < end_offset:
775 # bytearray doesn't allocate extra space at least in Python 3.7.
778 # bytearray doesn't allocate extra space at least in Python 3.7.
776 # When multiple changesets are added in a row, precise resize would
779 # When multiple changesets are added in a row, precise resize would
777 # result in quadratic complexity. Overallocate to compensate by
780 # result in quadratic complexity. Overallocate to compensate by
778 # using the classic doubling technique for dynamic arrays instead.
781 # using the classic doubling technique for dynamic arrays instead.
779 # If there was a gap in the map before, less space will be reserved.
782 # If there was a gap in the map before, less space will be reserved.
780 self._rest.extend(b'\0' * end_offset)
783 self._rest.extend(b'\0' * end_offset)
781 return pack_into(
784 return pack_into(
782 _rbcrecfmt,
785 _rbcrecfmt,
783 buf,
786 buf,
784 start_offset,
787 start_offset,
785 node,
788 node,
786 branchidx,
789 branchidx,
787 )
790 )
788
791
789 def extend(self, extension):
792 def extend(self, extension):
790 return self._rest.extend(extension)
793 return self._rest.extend(extension)
791
794
792 def slice(self, begin, end):
795 def slice(self, begin, end):
793 if begin < len(self._prefix):
796 if begin < len(self._prefix):
794 acc = bytearray()
797 acc = bytearray()
795 acc[:] = self._prefix[begin:end]
798 acc[:] = self._prefix[begin:end]
796 acc.extend(
799 acc.extend(
797 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
800 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
798 )
801 )
799 return acc
802 return acc
800 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
803 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
801
804
802
805
803 class revbranchcache:
806 class revbranchcache:
804 """Persistent cache, mapping from revision number to branch name and close.
807 """Persistent cache, mapping from revision number to branch name and close.
805 This is a low level cache, independent of filtering.
808 This is a low level cache, independent of filtering.
806
809
807 Branch names are stored in rbc-names in internal encoding separated by 0.
810 Branch names are stored in rbc-names in internal encoding separated by 0.
808 rbc-names is append-only, and each branch name is only stored once and will
811 rbc-names is append-only, and each branch name is only stored once and will
809 thus have a unique index.
812 thus have a unique index.
810
813
811 The branch info for each revision is stored in rbc-revs as constant size
814 The branch info for each revision is stored in rbc-revs as constant size
812 records. The whole file is read into memory, but it is only 'parsed' on
815 records. The whole file is read into memory, but it is only 'parsed' on
813 demand. The file is usually append-only but will be truncated if repo
816 demand. The file is usually append-only but will be truncated if repo
814 modification is detected.
817 modification is detected.
815 The record for each revision contains the first 4 bytes of the
818 The record for each revision contains the first 4 bytes of the
816 corresponding node hash, and the record is only used if it still matches.
819 corresponding node hash, and the record is only used if it still matches.
817 Even a completely trashed rbc-revs fill thus still give the right result
820 Even a completely trashed rbc-revs fill thus still give the right result
818 while converging towards full recovery ... assuming no incorrectly matching
821 while converging towards full recovery ... assuming no incorrectly matching
819 node hashes.
822 node hashes.
820 The record also contains 4 bytes where 31 bits contains the index of the
823 The record also contains 4 bytes where 31 bits contains the index of the
821 branch and the last bit indicate that it is a branch close commit.
824 branch and the last bit indicate that it is a branch close commit.
822 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
825 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
823 and will grow with it but be 1/8th of its size.
826 and will grow with it but be 1/8th of its size.
824 """
827 """
825
828
826 def __init__(self, repo, readonly=True):
829 def __init__(self, repo, readonly=True):
827 assert repo.filtername is None
830 assert repo.filtername is None
828 self._repo = repo
831 self._repo = repo
829 self._names = [] # branch names in local encoding with static index
832 self._names = [] # branch names in local encoding with static index
830 self._rbcrevs = rbcrevs(bytearray())
833 self._rbcrevs = rbcrevs(bytearray())
831 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
834 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
832 try:
835 try:
833 bndata = repo.cachevfs.read(_rbcnames)
836 bndata = repo.cachevfs.read(_rbcnames)
834 self._rbcsnameslen = len(bndata) # for verification before writing
837 self._rbcsnameslen = len(bndata) # for verification before writing
835 if bndata:
838 if bndata:
836 self._names = [
839 self._names = [
837 encoding.tolocal(bn) for bn in bndata.split(b'\0')
840 encoding.tolocal(bn) for bn in bndata.split(b'\0')
838 ]
841 ]
839 except (IOError, OSError):
842 except (IOError, OSError):
840 if readonly:
843 if readonly:
841 # don't try to use cache - fall back to the slow path
844 # don't try to use cache - fall back to the slow path
842 self.branchinfo = self._branchinfo
845 self.branchinfo = self._branchinfo
843
846
844 if self._names:
847 if self._names:
845 try:
848 try:
846 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
849 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
847 with repo.cachevfs(_rbcrevs) as fp:
850 with repo.cachevfs(_rbcrevs) as fp:
848 data = util.buffer(util.mmapread(fp))
851 data = util.buffer(util.mmapread(fp))
849 else:
852 else:
850 data = repo.cachevfs.read(_rbcrevs)
853 data = repo.cachevfs.read(_rbcrevs)
851 self._rbcrevs = rbcrevs(data)
854 self._rbcrevs = rbcrevs(data)
852 except (IOError, OSError) as inst:
855 except (IOError, OSError) as inst:
853 repo.ui.debug(
856 repo.ui.debug(
854 b"couldn't read revision branch cache: %s\n"
857 b"couldn't read revision branch cache: %s\n"
855 % stringutil.forcebytestr(inst)
858 % stringutil.forcebytestr(inst)
856 )
859 )
857 # remember number of good records on disk
860 # remember number of good records on disk
858 self._rbcrevslen = min(
861 self._rbcrevslen = min(
859 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
862 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
860 )
863 )
861 if self._rbcrevslen == 0:
864 if self._rbcrevslen == 0:
862 self._names = []
865 self._names = []
863 self._rbcnamescount = len(self._names) # number of names read at
866 self._rbcnamescount = len(self._names) # number of names read at
864 # _rbcsnameslen
867 # _rbcsnameslen
865
868
866 def _clear(self):
869 def _clear(self):
867 self._rbcsnameslen = 0
870 self._rbcsnameslen = 0
868 del self._names[:]
871 del self._names[:]
869 self._rbcnamescount = 0
872 self._rbcnamescount = 0
870 self._rbcrevslen = len(self._repo.changelog)
873 self._rbcrevslen = len(self._repo.changelog)
871 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
874 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
872 util.clearcachedproperty(self, b'_namesreverse')
875 util.clearcachedproperty(self, b'_namesreverse')
873
876
874 @util.propertycache
877 @util.propertycache
875 def _namesreverse(self):
878 def _namesreverse(self):
876 return {b: r for r, b in enumerate(self._names)}
879 return {b: r for r, b in enumerate(self._names)}
877
880
878 def branchinfo(self, rev):
881 def branchinfo(self, rev):
879 """Return branch name and close flag for rev, using and updating
882 """Return branch name and close flag for rev, using and updating
880 persistent cache."""
883 persistent cache."""
881 changelog = self._repo.changelog
884 changelog = self._repo.changelog
882 rbcrevidx = rev * _rbcrecsize
885 rbcrevidx = rev * _rbcrecsize
883
886
884 # avoid negative index, changelog.read(nullrev) is fast without cache
887 # avoid negative index, changelog.read(nullrev) is fast without cache
885 if rev == nullrev:
888 if rev == nullrev:
886 return changelog.branchinfo(rev)
889 return changelog.branchinfo(rev)
887
890
888 # if requested rev isn't allocated, grow and cache the rev info
891 # if requested rev isn't allocated, grow and cache the rev info
889 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
892 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
890 return self._branchinfo(rev)
893 return self._branchinfo(rev)
891
894
892 # fast path: extract data from cache, use it if node is matching
895 # fast path: extract data from cache, use it if node is matching
893 reponode = changelog.node(rev)[:_rbcnodelen]
896 reponode = changelog.node(rev)[:_rbcnodelen]
894 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
897 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
895 close = bool(branchidx & _rbccloseflag)
898 close = bool(branchidx & _rbccloseflag)
896 if close:
899 if close:
897 branchidx &= _rbcbranchidxmask
900 branchidx &= _rbcbranchidxmask
898 if cachenode == b'\0\0\0\0':
901 if cachenode == b'\0\0\0\0':
899 pass
902 pass
900 elif cachenode == reponode:
903 elif cachenode == reponode:
901 try:
904 try:
902 return self._names[branchidx], close
905 return self._names[branchidx], close
903 except IndexError:
906 except IndexError:
904 # recover from invalid reference to unknown branch
907 # recover from invalid reference to unknown branch
905 self._repo.ui.debug(
908 self._repo.ui.debug(
906 b"referenced branch names not found"
909 b"referenced branch names not found"
907 b" - rebuilding revision branch cache from scratch\n"
910 b" - rebuilding revision branch cache from scratch\n"
908 )
911 )
909 self._clear()
912 self._clear()
910 else:
913 else:
911 # rev/node map has changed, invalidate the cache from here up
914 # rev/node map has changed, invalidate the cache from here up
912 self._repo.ui.debug(
915 self._repo.ui.debug(
913 b"history modification detected - truncating "
916 b"history modification detected - truncating "
914 b"revision branch cache to revision %d\n" % rev
917 b"revision branch cache to revision %d\n" % rev
915 )
918 )
916 truncate = rbcrevidx + _rbcrecsize
919 truncate = rbcrevidx + _rbcrecsize
917 self._rbcrevs.truncate(truncate)
920 self._rbcrevs.truncate(truncate)
918 self._rbcrevslen = min(self._rbcrevslen, truncate)
921 self._rbcrevslen = min(self._rbcrevslen, truncate)
919
922
920 # fall back to slow path and make sure it will be written to disk
923 # fall back to slow path and make sure it will be written to disk
921 return self._branchinfo(rev)
924 return self._branchinfo(rev)
922
925
923 def _branchinfo(self, rev):
926 def _branchinfo(self, rev):
924 """Retrieve branch info from changelog and update _rbcrevs"""
927 """Retrieve branch info from changelog and update _rbcrevs"""
925 changelog = self._repo.changelog
928 changelog = self._repo.changelog
926 b, close = changelog.branchinfo(rev)
929 b, close = changelog.branchinfo(rev)
927 if b in self._namesreverse:
930 if b in self._namesreverse:
928 branchidx = self._namesreverse[b]
931 branchidx = self._namesreverse[b]
929 else:
932 else:
930 branchidx = len(self._names)
933 branchidx = len(self._names)
931 self._names.append(b)
934 self._names.append(b)
932 self._namesreverse[b] = branchidx
935 self._namesreverse[b] = branchidx
933 reponode = changelog.node(rev)
936 reponode = changelog.node(rev)
934 if close:
937 if close:
935 branchidx |= _rbccloseflag
938 branchidx |= _rbccloseflag
936 self._setcachedata(rev, reponode, branchidx)
939 self._setcachedata(rev, reponode, branchidx)
937 return b, close
940 return b, close
938
941
939 def setdata(self, rev, changelogrevision):
942 def setdata(self, rev, changelogrevision):
940 """add new data information to the cache"""
943 """add new data information to the cache"""
941 branch, close = changelogrevision.branchinfo
944 branch, close = changelogrevision.branchinfo
942
945
943 if branch in self._namesreverse:
946 if branch in self._namesreverse:
944 branchidx = self._namesreverse[branch]
947 branchidx = self._namesreverse[branch]
945 else:
948 else:
946 branchidx = len(self._names)
949 branchidx = len(self._names)
947 self._names.append(branch)
950 self._names.append(branch)
948 self._namesreverse[branch] = branchidx
951 self._namesreverse[branch] = branchidx
949 if close:
952 if close:
950 branchidx |= _rbccloseflag
953 branchidx |= _rbccloseflag
951 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
954 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
952 # If no cache data were readable (non exists, bad permission, etc)
955 # If no cache data were readable (non exists, bad permission, etc)
953 # the cache was bypassing itself by setting:
956 # the cache was bypassing itself by setting:
954 #
957 #
955 # self.branchinfo = self._branchinfo
958 # self.branchinfo = self._branchinfo
956 #
959 #
957 # Since we now have data in the cache, we need to drop this bypassing.
960 # Since we now have data in the cache, we need to drop this bypassing.
958 if 'branchinfo' in vars(self):
961 if 'branchinfo' in vars(self):
959 del self.branchinfo
962 del self.branchinfo
960
963
961 def _setcachedata(self, rev, node, branchidx):
964 def _setcachedata(self, rev, node, branchidx):
962 """Writes the node's branch data to the in-memory cache data."""
965 """Writes the node's branch data to the in-memory cache data."""
963 if rev == nullrev:
966 if rev == nullrev:
964 return
967 return
965 rbcrevidx = rev * _rbcrecsize
968 rbcrevidx = rev * _rbcrecsize
966 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
969 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
967 self._rbcrevslen = min(self._rbcrevslen, rev)
970 self._rbcrevslen = min(self._rbcrevslen, rev)
968
971
969 tr = self._repo.currenttransaction()
972 tr = self._repo.currenttransaction()
970 if tr:
973 if tr:
971 tr.addfinalize(b'write-revbranchcache', self.write)
974 tr.addfinalize(b'write-revbranchcache', self.write)
972
975
973 def write(self, tr=None):
976 def write(self, tr=None):
974 """Save branch cache if it is dirty."""
977 """Save branch cache if it is dirty."""
975 repo = self._repo
978 repo = self._repo
976 wlock = None
979 wlock = None
977 step = b''
980 step = b''
978 try:
981 try:
979 # write the new names
982 # write the new names
980 if self._rbcnamescount < len(self._names):
983 if self._rbcnamescount < len(self._names):
981 wlock = repo.wlock(wait=False)
984 wlock = repo.wlock(wait=False)
982 step = b' names'
985 step = b' names'
983 self._writenames(repo)
986 self._writenames(repo)
984
987
985 # write the new revs
988 # write the new revs
986 start = self._rbcrevslen * _rbcrecsize
989 start = self._rbcrevslen * _rbcrecsize
987 if start != len(self._rbcrevs):
990 if start != len(self._rbcrevs):
988 step = b''
991 step = b''
989 if wlock is None:
992 if wlock is None:
990 wlock = repo.wlock(wait=False)
993 wlock = repo.wlock(wait=False)
991 self._writerevs(repo, start)
994 self._writerevs(repo, start)
992
995
993 except (IOError, OSError, error.Abort, error.LockError) as inst:
996 except (IOError, OSError, error.Abort, error.LockError) as inst:
994 repo.ui.debug(
997 repo.ui.debug(
995 b"couldn't write revision branch cache%s: %s\n"
998 b"couldn't write revision branch cache%s: %s\n"
996 % (step, stringutil.forcebytestr(inst))
999 % (step, stringutil.forcebytestr(inst))
997 )
1000 )
998 finally:
1001 finally:
999 if wlock is not None:
1002 if wlock is not None:
1000 wlock.release()
1003 wlock.release()
1001
1004
1002 def _writenames(self, repo):
1005 def _writenames(self, repo):
1003 """write the new branch names to revbranchcache"""
1006 """write the new branch names to revbranchcache"""
1004 if self._rbcnamescount != 0:
1007 if self._rbcnamescount != 0:
1005 f = repo.cachevfs.open(_rbcnames, b'ab')
1008 f = repo.cachevfs.open(_rbcnames, b'ab')
1006 if f.tell() == self._rbcsnameslen:
1009 if f.tell() == self._rbcsnameslen:
1007 f.write(b'\0')
1010 f.write(b'\0')
1008 else:
1011 else:
1009 f.close()
1012 f.close()
1010 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1013 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1011 self._rbcnamescount = 0
1014 self._rbcnamescount = 0
1012 self._rbcrevslen = 0
1015 self._rbcrevslen = 0
1013 if self._rbcnamescount == 0:
1016 if self._rbcnamescount == 0:
1014 # before rewriting names, make sure references are removed
1017 # before rewriting names, make sure references are removed
1015 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1018 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1016 f = repo.cachevfs.open(_rbcnames, b'wb')
1019 f = repo.cachevfs.open(_rbcnames, b'wb')
1017 f.write(
1020 f.write(
1018 b'\0'.join(
1021 b'\0'.join(
1019 encoding.fromlocal(b)
1022 encoding.fromlocal(b)
1020 for b in self._names[self._rbcnamescount :]
1023 for b in self._names[self._rbcnamescount :]
1021 )
1024 )
1022 )
1025 )
1023 self._rbcsnameslen = f.tell()
1026 self._rbcsnameslen = f.tell()
1024 f.close()
1027 f.close()
1025 self._rbcnamescount = len(self._names)
1028 self._rbcnamescount = len(self._names)
1026
1029
1027 def _writerevs(self, repo, start):
1030 def _writerevs(self, repo, start):
1028 """write the new revs to revbranchcache"""
1031 """write the new revs to revbranchcache"""
1029 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1032 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1030 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1033 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1031 if f.tell() != start:
1034 if f.tell() != start:
1032 repo.ui.debug(
1035 repo.ui.debug(
1033 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1036 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1034 )
1037 )
1035 f.seek(start)
1038 f.seek(start)
1036 if f.tell() != start:
1039 if f.tell() != start:
1037 start = 0
1040 start = 0
1038 f.seek(start)
1041 f.seek(start)
1039 f.truncate()
1042 f.truncate()
1040 end = revs * _rbcrecsize
1043 end = revs * _rbcrecsize
1041 f.write(self._rbcrevs.slice(start, end))
1044 f.write(self._rbcrevs.slice(start, end))
1042 self._rbcrevslen = revs
1045 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now