##// END OF EJS Templates
branchcache: write branchmap in subset inheritance order...
marmoute -
r52380:1a9bdd0e default
parent child Browse files
Show More
@@ -1,1046 +1,1049 b''
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullrev,
14 nullrev,
15 )
15 )
16
16
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 List,
22 List,
23 Optional,
23 Optional,
24 Set,
24 Set,
25 TYPE_CHECKING,
25 TYPE_CHECKING,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50 calcsize = struct.calcsize
50 calcsize = struct.calcsize
51 pack_into = struct.pack_into
51 pack_into = struct.pack_into
52 unpack_from = struct.unpack_from
52 unpack_from = struct.unpack_from
53
53
54
54
55 class BranchMapCache:
55 class BranchMapCache:
56 """mapping of filtered views of repo with their branchcache"""
56 """mapping of filtered views of repo with their branchcache"""
57
57
58 def __init__(self):
58 def __init__(self):
59 self._per_filter = {}
59 self._per_filter = {}
60
60
61 def __getitem__(self, repo):
61 def __getitem__(self, repo):
62 self.updatecache(repo)
62 self.updatecache(repo)
63 bcache = self._per_filter[repo.filtername]
63 bcache = self._per_filter[repo.filtername]
64 assert bcache._filtername == repo.filtername, (
64 assert bcache._filtername == repo.filtername, (
65 bcache._filtername,
65 bcache._filtername,
66 repo.filtername,
66 repo.filtername,
67 )
67 )
68 return bcache
68 return bcache
69
69
70 def update_disk(self, repo):
70 def update_disk(self, repo):
71 """ensure and up-to-date cache is (or will be) written on disk
71 """ensure and up-to-date cache is (or will be) written on disk
72
72
73 The cache for this repository view is updated if needed and written on
73 The cache for this repository view is updated if needed and written on
74 disk.
74 disk.
75
75
76 If a transaction is in progress, the writing is schedule to transaction
76 If a transaction is in progress, the writing is schedule to transaction
77 close. See the `BranchMapCache.write_delayed` method.
77 close. See the `BranchMapCache.write_delayed` method.
78
78
79 This method exist independently of __getitem__ as it is sometime useful
79 This method exist independently of __getitem__ as it is sometime useful
80 to signal that we have no intend to use the data in memory yet.
80 to signal that we have no intend to use the data in memory yet.
81 """
81 """
82 self.updatecache(repo)
82 self.updatecache(repo)
83 bcache = self._per_filter[repo.filtername]
83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, (
84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername,
85 bcache._filtername,
86 repo.filtername,
86 repo.filtername,
87 )
87 )
88 bcache.write(repo)
88 bcache.write(repo)
89
89
90 def updatecache(self, repo):
90 def updatecache(self, repo):
91 """Update the cache for the given filtered view on a repository"""
91 """Update the cache for the given filtered view on a repository"""
92 # This can trigger updates for the caches for subsets of the filtered
92 # This can trigger updates for the caches for subsets of the filtered
93 # view, e.g. when there is no cache for this filtered view or the cache
93 # view, e.g. when there is no cache for this filtered view or the cache
94 # is stale.
94 # is stale.
95
95
96 cl = repo.changelog
96 cl = repo.changelog
97 filtername = repo.filtername
97 filtername = repo.filtername
98 bcache = self._per_filter.get(filtername)
98 bcache = self._per_filter.get(filtername)
99 if bcache is None or not bcache.validfor(repo):
99 if bcache is None or not bcache.validfor(repo):
100 # cache object missing or cache object stale? Read from disk
100 # cache object missing or cache object stale? Read from disk
101 bcache = branchcache.fromfile(repo)
101 bcache = branchcache.fromfile(repo)
102
102
103 revs = []
103 revs = []
104 if bcache is None:
104 if bcache is None:
105 # no (fresh) cache available anymore, perhaps we can re-use
105 # no (fresh) cache available anymore, perhaps we can re-use
106 # the cache for a subset, then extend that to add info on missing
106 # the cache for a subset, then extend that to add info on missing
107 # revisions.
107 # revisions.
108 subsetname = subsettable.get(filtername)
108 subsetname = subsettable.get(filtername)
109 if subsetname is not None:
109 if subsetname is not None:
110 subset = repo.filtered(subsetname)
110 subset = repo.filtered(subsetname)
111 bcache = self[subset].copy(repo)
111 bcache = self[subset].copy(repo)
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 else:
114 else:
115 # nothing to fall back on, start empty.
115 # nothing to fall back on, start empty.
116 bcache = branchcache(repo)
116 bcache = branchcache(repo)
117
117
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 if revs:
119 if revs:
120 bcache.update(repo, revs)
120 bcache.update(repo, revs)
121
121
122 assert bcache.validfor(repo), filtername
122 assert bcache.validfor(repo), filtername
123 self._per_filter[repo.filtername] = bcache
123 self._per_filter[repo.filtername] = bcache
124
124
125 def replace(self, repo, remotebranchmap):
125 def replace(self, repo, remotebranchmap):
126 """Replace the branchmap cache for a repo with a branch mapping.
126 """Replace the branchmap cache for a repo with a branch mapping.
127
127
128 This is likely only called during clone with a branch map from a
128 This is likely only called during clone with a branch map from a
129 remote.
129 remote.
130
130
131 """
131 """
132 cl = repo.changelog
132 cl = repo.changelog
133 clrev = cl.rev
133 clrev = cl.rev
134 clbranchinfo = cl.branchinfo
134 clbranchinfo = cl.branchinfo
135 rbheads = []
135 rbheads = []
136 closed = set()
136 closed = set()
137 for bheads in remotebranchmap.values():
137 for bheads in remotebranchmap.values():
138 rbheads += bheads
138 rbheads += bheads
139 for h in bheads:
139 for h in bheads:
140 r = clrev(h)
140 r = clrev(h)
141 b, c = clbranchinfo(r)
141 b, c = clbranchinfo(r)
142 if c:
142 if c:
143 closed.add(h)
143 closed.add(h)
144
144
145 if rbheads:
145 if rbheads:
146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 cache = branchcache(
147 cache = branchcache(
148 repo,
148 repo,
149 remotebranchmap,
149 remotebranchmap,
150 repo[rtiprev].node(),
150 repo[rtiprev].node(),
151 rtiprev,
151 rtiprev,
152 closednodes=closed,
152 closednodes=closed,
153 )
153 )
154
154
155 # Try to stick it as low as possible
155 # Try to stick it as low as possible
156 # filter above served are unlikely to be fetch from a clone
156 # filter above served are unlikely to be fetch from a clone
157 for candidate in (b'base', b'immutable', b'served'):
157 for candidate in (b'base', b'immutable', b'served'):
158 rview = repo.filtered(candidate)
158 rview = repo.filtered(candidate)
159 if cache.validfor(rview):
159 if cache.validfor(rview):
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache = self._per_filter[candidate] = cache.copy(rview)
161 cache.write(rview)
161 cache.write(rview)
162 return
162 return
163
163
164 def clear(self):
164 def clear(self):
165 self._per_filter.clear()
165 self._per_filter.clear()
166
166
167 def write_delayed(self, repo):
167 def write_delayed(self, repo):
168 unfi = repo.unfiltered()
168 unfi = repo.unfiltered()
169 for filtername, cache in self._per_filter.items():
169 for filtername in repoviewutil.get_ordered_subset():
170 cache = self._per_filter.get(filtername)
171 if cache is None:
172 continue
170 if cache._delayed:
173 if cache._delayed:
171 if filtername is None:
174 if filtername is None:
172 repo = unfi
175 repo = unfi
173 else:
176 else:
174 repo = unfi.filtered(filtername)
177 repo = unfi.filtered(filtername)
175 cache.write(repo)
178 cache.write(repo)
176
179
177
180
178 def _unknownnode(node):
181 def _unknownnode(node):
179 """raises ValueError when branchcache found a node which does not exists"""
182 """raises ValueError when branchcache found a node which does not exists"""
180 raise ValueError('node %s does not exist' % node.hex())
183 raise ValueError('node %s does not exist' % node.hex())
181
184
182
185
183 def _branchcachedesc(repo):
186 def _branchcachedesc(repo):
184 if repo.filtername is not None:
187 if repo.filtername is not None:
185 return b'branch cache (%s)' % repo.filtername
188 return b'branch cache (%s)' % repo.filtername
186 else:
189 else:
187 return b'branch cache'
190 return b'branch cache'
188
191
189
192
190 class _BaseBranchCache:
193 class _BaseBranchCache:
191 """A dict like object that hold branches heads cache.
194 """A dict like object that hold branches heads cache.
192
195
193 This cache is used to avoid costly computations to determine all the
196 This cache is used to avoid costly computations to determine all the
194 branch heads of a repo.
197 branch heads of a repo.
195
198
196 The cache is serialized on disk in the following format:
199 The cache is serialized on disk in the following format:
197
200
198 <tip hex node> <tip rev number> [optional filtered repo hex hash]
201 <tip hex node> <tip rev number> [optional filtered repo hex hash]
199 <branch head hex node> <open/closed state> <branch name>
202 <branch head hex node> <open/closed state> <branch name>
200 <branch head hex node> <open/closed state> <branch name>
203 <branch head hex node> <open/closed state> <branch name>
201 ...
204 ...
202
205
203 The first line is used to check if the cache is still valid. If the
206 The first line is used to check if the cache is still valid. If the
204 branch cache is for a filtered repo view, an optional third hash is
207 branch cache is for a filtered repo view, an optional third hash is
205 included that hashes the hashes of all filtered and obsolete revisions.
208 included that hashes the hashes of all filtered and obsolete revisions.
206
209
207 The open/closed state is represented by a single letter 'o' or 'c'.
210 The open/closed state is represented by a single letter 'o' or 'c'.
208 This field can be used to avoid changelog reads when determining if a
211 This field can be used to avoid changelog reads when determining if a
209 branch head closes a branch or not.
212 branch head closes a branch or not.
210 """
213 """
211
214
212 def __init__(
215 def __init__(
213 self,
216 self,
214 repo: "localrepo.localrepository",
217 repo: "localrepo.localrepository",
215 entries: Union[
218 entries: Union[
216 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
219 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
217 ] = (),
220 ] = (),
218 closed_nodes: Optional[Set[bytes]] = None,
221 closed_nodes: Optional[Set[bytes]] = None,
219 ) -> None:
222 ) -> None:
220 """hasnode is a function which can be used to verify whether changelog
223 """hasnode is a function which can be used to verify whether changelog
221 has a given node or not. If it's not provided, we assume that every node
224 has a given node or not. If it's not provided, we assume that every node
222 we have exists in changelog"""
225 we have exists in changelog"""
223 # closednodes is a set of nodes that close their branch. If the branch
226 # closednodes is a set of nodes that close their branch. If the branch
224 # cache has been updated, it may contain nodes that are no longer
227 # cache has been updated, it may contain nodes that are no longer
225 # heads.
228 # heads.
226 if closed_nodes is None:
229 if closed_nodes is None:
227 closed_nodes = set()
230 closed_nodes = set()
228 self._closednodes = set(closed_nodes)
231 self._closednodes = set(closed_nodes)
229 self._entries = dict(entries)
232 self._entries = dict(entries)
230
233
231 def __iter__(self):
234 def __iter__(self):
232 return iter(self._entries)
235 return iter(self._entries)
233
236
234 def __setitem__(self, key, value):
237 def __setitem__(self, key, value):
235 self._entries[key] = value
238 self._entries[key] = value
236
239
237 def __getitem__(self, key):
240 def __getitem__(self, key):
238 return self._entries[key]
241 return self._entries[key]
239
242
240 def __contains__(self, key):
243 def __contains__(self, key):
241 return key in self._entries
244 return key in self._entries
242
245
243 def iteritems(self):
246 def iteritems(self):
244 return self._entries.items()
247 return self._entries.items()
245
248
246 items = iteritems
249 items = iteritems
247
250
248 def hasbranch(self, label):
251 def hasbranch(self, label):
249 """checks whether a branch of this name exists or not"""
252 """checks whether a branch of this name exists or not"""
250 return label in self._entries
253 return label in self._entries
251
254
252 def _branchtip(self, heads):
255 def _branchtip(self, heads):
253 """Return tuple with last open head in heads and false,
256 """Return tuple with last open head in heads and false,
254 otherwise return last closed head and true."""
257 otherwise return last closed head and true."""
255 tip = heads[-1]
258 tip = heads[-1]
256 closed = True
259 closed = True
257 for h in reversed(heads):
260 for h in reversed(heads):
258 if h not in self._closednodes:
261 if h not in self._closednodes:
259 tip = h
262 tip = h
260 closed = False
263 closed = False
261 break
264 break
262 return tip, closed
265 return tip, closed
263
266
264 def branchtip(self, branch):
267 def branchtip(self, branch):
265 """Return the tipmost open head on branch head, otherwise return the
268 """Return the tipmost open head on branch head, otherwise return the
266 tipmost closed head on branch.
269 tipmost closed head on branch.
267 Raise KeyError for unknown branch."""
270 Raise KeyError for unknown branch."""
268 return self._branchtip(self[branch])[0]
271 return self._branchtip(self[branch])[0]
269
272
270 def iteropen(self, nodes):
273 def iteropen(self, nodes):
271 return (n for n in nodes if n not in self._closednodes)
274 return (n for n in nodes if n not in self._closednodes)
272
275
273 def branchheads(self, branch, closed=False):
276 def branchheads(self, branch, closed=False):
274 heads = self._entries[branch]
277 heads = self._entries[branch]
275 if not closed:
278 if not closed:
276 heads = list(self.iteropen(heads))
279 heads = list(self.iteropen(heads))
277 return heads
280 return heads
278
281
279 def iterbranches(self):
282 def iterbranches(self):
280 for bn, heads in self.items():
283 for bn, heads in self.items():
281 yield (bn, heads) + self._branchtip(heads)
284 yield (bn, heads) + self._branchtip(heads)
282
285
283 def iterheads(self):
286 def iterheads(self):
284 """returns all the heads"""
287 """returns all the heads"""
285 return self._entries.values()
288 return self._entries.values()
286
289
287 def update(self, repo, revgen):
290 def update(self, repo, revgen):
288 """Given a branchhead cache, self, that may have extra nodes or be
291 """Given a branchhead cache, self, that may have extra nodes or be
289 missing heads, and a generator of nodes that are strictly a superset of
292 missing heads, and a generator of nodes that are strictly a superset of
290 heads missing, this function updates self to be correct.
293 heads missing, this function updates self to be correct.
291 """
294 """
292 starttime = util.timer()
295 starttime = util.timer()
293 cl = repo.changelog
296 cl = repo.changelog
294 # collect new branch entries
297 # collect new branch entries
295 newbranches = {}
298 newbranches = {}
296 getbranchinfo = repo.revbranchcache().branchinfo
299 getbranchinfo = repo.revbranchcache().branchinfo
297 max_rev = -1
300 max_rev = -1
298 for r in revgen:
301 for r in revgen:
299 branch, closesbranch = getbranchinfo(r)
302 branch, closesbranch = getbranchinfo(r)
300 newbranches.setdefault(branch, []).append(r)
303 newbranches.setdefault(branch, []).append(r)
301 if closesbranch:
304 if closesbranch:
302 self._closednodes.add(cl.node(r))
305 self._closednodes.add(cl.node(r))
303 max_rev = max(max_rev, r)
306 max_rev = max(max_rev, r)
304 if max_rev < 0:
307 if max_rev < 0:
305 msg = "running branchcache.update without revision to update"
308 msg = "running branchcache.update without revision to update"
306 raise error.ProgrammingError(msg)
309 raise error.ProgrammingError(msg)
307
310
308 # Delay fetching the topological heads until they are needed.
311 # Delay fetching the topological heads until they are needed.
309 # A repository without non-continous branches can skip this part.
312 # A repository without non-continous branches can skip this part.
310 topoheads = None
313 topoheads = None
311
314
312 # If a changeset is visible, its parents must be visible too, so
315 # If a changeset is visible, its parents must be visible too, so
313 # use the faster unfiltered parent accessor.
316 # use the faster unfiltered parent accessor.
314 parentrevs = repo.unfiltered().changelog.parentrevs
317 parentrevs = repo.unfiltered().changelog.parentrevs
315
318
316 # Faster than using ctx.obsolete()
319 # Faster than using ctx.obsolete()
317 obsrevs = obsolete.getrevs(repo, b'obsolete')
320 obsrevs = obsolete.getrevs(repo, b'obsolete')
318
321
319 for branch, newheadrevs in newbranches.items():
322 for branch, newheadrevs in newbranches.items():
320 # For every branch, compute the new branchheads.
323 # For every branch, compute the new branchheads.
321 # A branchhead is a revision such that no descendant is on
324 # A branchhead is a revision such that no descendant is on
322 # the same branch.
325 # the same branch.
323 #
326 #
324 # The branchheads are computed iteratively in revision order.
327 # The branchheads are computed iteratively in revision order.
325 # This ensures topological order, i.e. parents are processed
328 # This ensures topological order, i.e. parents are processed
326 # before their children. Ancestors are inclusive here, i.e.
329 # before their children. Ancestors are inclusive here, i.e.
327 # any revision is an ancestor of itself.
330 # any revision is an ancestor of itself.
328 #
331 #
329 # Core observations:
332 # Core observations:
330 # - The current revision is always a branchhead for the
333 # - The current revision is always a branchhead for the
331 # repository up to that point.
334 # repository up to that point.
332 # - It is the first revision of the branch if and only if
335 # - It is the first revision of the branch if and only if
333 # there was no branchhead before. In that case, it is the
336 # there was no branchhead before. In that case, it is the
334 # only branchhead as there are no possible ancestors on
337 # only branchhead as there are no possible ancestors on
335 # the same branch.
338 # the same branch.
336 # - If a parent is on the same branch, a branchhead can
339 # - If a parent is on the same branch, a branchhead can
337 # only be an ancestor of that parent, if it is parent
340 # only be an ancestor of that parent, if it is parent
338 # itself. Otherwise it would have been removed as ancestor
341 # itself. Otherwise it would have been removed as ancestor
339 # of that parent before.
342 # of that parent before.
340 # - Therefore, if all parents are on the same branch, they
343 # - Therefore, if all parents are on the same branch, they
341 # can just be removed from the branchhead set.
344 # can just be removed from the branchhead set.
342 # - If one parent is on the same branch and the other is not
345 # - If one parent is on the same branch and the other is not
343 # and there was exactly one branchhead known, the existing
346 # and there was exactly one branchhead known, the existing
344 # branchhead can only be an ancestor if it is the parent.
347 # branchhead can only be an ancestor if it is the parent.
345 # Otherwise it would have been removed as ancestor of
348 # Otherwise it would have been removed as ancestor of
346 # the parent before. The other parent therefore can't have
349 # the parent before. The other parent therefore can't have
347 # a branchhead as ancestor.
350 # a branchhead as ancestor.
348 # - In all other cases, the parents on different branches
351 # - In all other cases, the parents on different branches
349 # could have a branchhead as ancestor. Those parents are
352 # could have a branchhead as ancestor. Those parents are
350 # kept in the "uncertain" set. If all branchheads are also
353 # kept in the "uncertain" set. If all branchheads are also
351 # topological heads, they can't have descendants and further
354 # topological heads, they can't have descendants and further
352 # checks can be skipped. Otherwise, the ancestors of the
355 # checks can be skipped. Otherwise, the ancestors of the
353 # "uncertain" set are removed from branchheads.
356 # "uncertain" set are removed from branchheads.
354 # This computation is heavy and avoided if at all possible.
357 # This computation is heavy and avoided if at all possible.
355 bheads = self._entries.get(branch, [])
358 bheads = self._entries.get(branch, [])
356 bheadset = {cl.rev(node) for node in bheads}
359 bheadset = {cl.rev(node) for node in bheads}
357 uncertain = set()
360 uncertain = set()
358 for newrev in sorted(newheadrevs):
361 for newrev in sorted(newheadrevs):
359 if newrev in obsrevs:
362 if newrev in obsrevs:
360 # We ignore obsolete changesets as they shouldn't be
363 # We ignore obsolete changesets as they shouldn't be
361 # considered heads.
364 # considered heads.
362 continue
365 continue
363
366
364 if not bheadset:
367 if not bheadset:
365 bheadset.add(newrev)
368 bheadset.add(newrev)
366 continue
369 continue
367
370
368 parents = [p for p in parentrevs(newrev) if p != nullrev]
371 parents = [p for p in parentrevs(newrev) if p != nullrev]
369 samebranch = set()
372 samebranch = set()
370 otherbranch = set()
373 otherbranch = set()
371 obsparents = set()
374 obsparents = set()
372 for p in parents:
375 for p in parents:
373 if p in obsrevs:
376 if p in obsrevs:
374 # We ignored this obsolete changeset earlier, but now
377 # We ignored this obsolete changeset earlier, but now
375 # that it has non-ignored children, we need to make
378 # that it has non-ignored children, we need to make
376 # sure their ancestors are not considered heads. To
379 # sure their ancestors are not considered heads. To
377 # achieve that, we will simply treat this obsolete
380 # achieve that, we will simply treat this obsolete
378 # changeset as a parent from other branch.
381 # changeset as a parent from other branch.
379 obsparents.add(p)
382 obsparents.add(p)
380 elif p in bheadset or getbranchinfo(p)[0] == branch:
383 elif p in bheadset or getbranchinfo(p)[0] == branch:
381 samebranch.add(p)
384 samebranch.add(p)
382 else:
385 else:
383 otherbranch.add(p)
386 otherbranch.add(p)
384 if not (len(bheadset) == len(samebranch) == 1):
387 if not (len(bheadset) == len(samebranch) == 1):
385 uncertain.update(otherbranch)
388 uncertain.update(otherbranch)
386 uncertain.update(obsparents)
389 uncertain.update(obsparents)
387 bheadset.difference_update(samebranch)
390 bheadset.difference_update(samebranch)
388 bheadset.add(newrev)
391 bheadset.add(newrev)
389
392
390 if uncertain:
393 if uncertain:
391 if topoheads is None:
394 if topoheads is None:
392 topoheads = set(cl.headrevs())
395 topoheads = set(cl.headrevs())
393 if bheadset - topoheads:
396 if bheadset - topoheads:
394 floorrev = min(bheadset)
397 floorrev = min(bheadset)
395 if floorrev <= max(uncertain):
398 if floorrev <= max(uncertain):
396 ancestors = set(cl.ancestors(uncertain, floorrev))
399 ancestors = set(cl.ancestors(uncertain, floorrev))
397 bheadset -= ancestors
400 bheadset -= ancestors
398 if bheadset:
401 if bheadset:
399 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
402 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
400
403
401 duration = util.timer() - starttime
404 duration = util.timer() - starttime
402 repo.ui.log(
405 repo.ui.log(
403 b'branchcache',
406 b'branchcache',
404 b'updated %s in %.4f seconds\n',
407 b'updated %s in %.4f seconds\n',
405 _branchcachedesc(repo),
408 _branchcachedesc(repo),
406 duration,
409 duration,
407 )
410 )
408 return max_rev
411 return max_rev
409
412
410
413
411 class branchcache(_BaseBranchCache):
414 class branchcache(_BaseBranchCache):
412 """Branchmap info for a local repo or repoview"""
415 """Branchmap info for a local repo or repoview"""
413
416
414 _base_filename = b"branch2"
417 _base_filename = b"branch2"
415
418
416 def __init__(
419 def __init__(
417 self,
420 self,
418 repo: "localrepo.localrepository",
421 repo: "localrepo.localrepository",
419 entries: Union[
422 entries: Union[
420 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
423 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
421 ] = (),
424 ] = (),
422 tipnode: Optional[bytes] = None,
425 tipnode: Optional[bytes] = None,
423 tiprev: Optional[int] = nullrev,
426 tiprev: Optional[int] = nullrev,
424 filteredhash: Optional[bytes] = None,
427 filteredhash: Optional[bytes] = None,
425 closednodes: Optional[Set[bytes]] = None,
428 closednodes: Optional[Set[bytes]] = None,
426 hasnode: Optional[Callable[[bytes], bool]] = None,
429 hasnode: Optional[Callable[[bytes], bool]] = None,
427 verify_node: bool = False,
430 verify_node: bool = False,
428 ) -> None:
431 ) -> None:
429 """hasnode is a function which can be used to verify whether changelog
432 """hasnode is a function which can be used to verify whether changelog
430 has a given node or not. If it's not provided, we assume that every node
433 has a given node or not. If it's not provided, we assume that every node
431 we have exists in changelog"""
434 we have exists in changelog"""
432 self._filtername = repo.filtername
435 self._filtername = repo.filtername
433 self._delayed = False
436 self._delayed = False
434 if tipnode is None:
437 if tipnode is None:
435 self.tipnode = repo.nullid
438 self.tipnode = repo.nullid
436 else:
439 else:
437 self.tipnode = tipnode
440 self.tipnode = tipnode
438 self.tiprev = tiprev
441 self.tiprev = tiprev
439 self.filteredhash = filteredhash
442 self.filteredhash = filteredhash
440
443
441 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
444 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
442 # closednodes is a set of nodes that close their branch. If the branch
445 # closednodes is a set of nodes that close their branch. If the branch
443 # cache has been updated, it may contain nodes that are no longer
446 # cache has been updated, it may contain nodes that are no longer
444 # heads.
447 # heads.
445
448
446 # Do we need to verify branch at all ?
449 # Do we need to verify branch at all ?
447 self._verify_node = verify_node
450 self._verify_node = verify_node
448 # branches for which nodes are verified
451 # branches for which nodes are verified
449 self._verifiedbranches = set()
452 self._verifiedbranches = set()
450 self._hasnode = None
453 self._hasnode = None
451 if self._verify_node:
454 if self._verify_node:
452 self._hasnode = repo.changelog.hasnode
455 self._hasnode = repo.changelog.hasnode
453
456
454 def validfor(self, repo):
457 def validfor(self, repo):
455 """check that cache contents are valid for (a subset of) this repo
458 """check that cache contents are valid for (a subset of) this repo
456
459
457 - False when the order of changesets changed or if we detect a strip.
460 - False when the order of changesets changed or if we detect a strip.
458 - True when cache is up-to-date for the current repo or its subset."""
461 - True when cache is up-to-date for the current repo or its subset."""
459 try:
462 try:
460 node = repo.changelog.node(self.tiprev)
463 node = repo.changelog.node(self.tiprev)
461 except IndexError:
464 except IndexError:
462 # changesets were stripped and now we don't even have enough to
465 # changesets were stripped and now we don't even have enough to
463 # find tiprev
466 # find tiprev
464 return False
467 return False
465 if self.tipnode != node:
468 if self.tipnode != node:
466 # tiprev doesn't correspond to tipnode: repo was stripped, or this
469 # tiprev doesn't correspond to tipnode: repo was stripped, or this
467 # repo has a different order of changesets
470 # repo has a different order of changesets
468 return False
471 return False
469 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
472 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
470 # hashes don't match if this repo view has a different set of filtered
473 # hashes don't match if this repo view has a different set of filtered
471 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
474 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
472 # history was rewritten)
475 # history was rewritten)
473 return self.filteredhash == tiphash
476 return self.filteredhash == tiphash
474
477
475 @classmethod
478 @classmethod
476 def fromfile(cls, repo):
479 def fromfile(cls, repo):
477 f = None
480 f = None
478 try:
481 try:
479 f = repo.cachevfs(cls._filename(repo))
482 f = repo.cachevfs(cls._filename(repo))
480 lineiter = iter(f)
483 lineiter = iter(f)
481 init_kwargs = cls._load_header(repo, lineiter)
484 init_kwargs = cls._load_header(repo, lineiter)
482 bcache = cls(
485 bcache = cls(
483 repo,
486 repo,
484 verify_node=True,
487 verify_node=True,
485 **init_kwargs,
488 **init_kwargs,
486 )
489 )
487 if not bcache.validfor(repo):
490 if not bcache.validfor(repo):
488 # invalidate the cache
491 # invalidate the cache
489 raise ValueError('tip differs')
492 raise ValueError('tip differs')
490 bcache._load_heads(repo, lineiter)
493 bcache._load_heads(repo, lineiter)
491 except (IOError, OSError):
494 except (IOError, OSError):
492 return None
495 return None
493
496
494 except Exception as inst:
497 except Exception as inst:
495 if repo.ui.debugflag:
498 if repo.ui.debugflag:
496 msg = b'invalid %s: %s\n'
499 msg = b'invalid %s: %s\n'
497 msg %= (
500 msg %= (
498 _branchcachedesc(repo),
501 _branchcachedesc(repo),
499 stringutil.forcebytestr(inst),
502 stringutil.forcebytestr(inst),
500 )
503 )
501 repo.ui.debug(msg)
504 repo.ui.debug(msg)
502 bcache = None
505 bcache = None
503
506
504 finally:
507 finally:
505 if f:
508 if f:
506 f.close()
509 f.close()
507
510
508 return bcache
511 return bcache
509
512
510 @classmethod
513 @classmethod
511 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
514 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
512 """parse the head of a branchmap file
515 """parse the head of a branchmap file
513
516
514 return parameters to pass to a newly created class instance.
517 return parameters to pass to a newly created class instance.
515 """
518 """
516 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
519 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
517 last, lrev = cachekey[:2]
520 last, lrev = cachekey[:2]
518 last, lrev = bin(last), int(lrev)
521 last, lrev = bin(last), int(lrev)
519 filteredhash = None
522 filteredhash = None
520 if len(cachekey) > 2:
523 if len(cachekey) > 2:
521 filteredhash = bin(cachekey[2])
524 filteredhash = bin(cachekey[2])
522 return {
525 return {
523 "tipnode": last,
526 "tipnode": last,
524 "tiprev": lrev,
527 "tiprev": lrev,
525 "filteredhash": filteredhash,
528 "filteredhash": filteredhash,
526 }
529 }
527
530
528 def _load_heads(self, repo, lineiter):
531 def _load_heads(self, repo, lineiter):
529 """fully loads the branchcache by reading from the file using the line
532 """fully loads the branchcache by reading from the file using the line
530 iterator passed"""
533 iterator passed"""
531 for line in lineiter:
534 for line in lineiter:
532 line = line.rstrip(b'\n')
535 line = line.rstrip(b'\n')
533 if not line:
536 if not line:
534 continue
537 continue
535 node, state, label = line.split(b" ", 2)
538 node, state, label = line.split(b" ", 2)
536 if state not in b'oc':
539 if state not in b'oc':
537 raise ValueError('invalid branch state')
540 raise ValueError('invalid branch state')
538 label = encoding.tolocal(label.strip())
541 label = encoding.tolocal(label.strip())
539 node = bin(node)
542 node = bin(node)
540 self._entries.setdefault(label, []).append(node)
543 self._entries.setdefault(label, []).append(node)
541 if state == b'c':
544 if state == b'c':
542 self._closednodes.add(node)
545 self._closednodes.add(node)
543
546
544 @classmethod
547 @classmethod
545 def _filename(cls, repo):
548 def _filename(cls, repo):
546 """name of a branchcache file for a given repo or repoview"""
549 """name of a branchcache file for a given repo or repoview"""
547 filename = cls._base_filename
550 filename = cls._base_filename
548 if repo.filtername:
551 if repo.filtername:
549 filename = b'%s-%s' % (filename, repo.filtername)
552 filename = b'%s-%s' % (filename, repo.filtername)
550 return filename
553 return filename
551
554
552 def copy(self, repo):
555 def copy(self, repo):
553 """return a deep copy of the branchcache object"""
556 """return a deep copy of the branchcache object"""
554 other = type(self)(
557 other = type(self)(
555 repo=repo,
558 repo=repo,
556 # we always do a shally copy of self._entries, and the values is
559 # we always do a shally copy of self._entries, and the values is
557 # always replaced, so no need to deepcopy until the above remains
560 # always replaced, so no need to deepcopy until the above remains
558 # true.
561 # true.
559 entries=self._entries,
562 entries=self._entries,
560 tipnode=self.tipnode,
563 tipnode=self.tipnode,
561 tiprev=self.tiprev,
564 tiprev=self.tiprev,
562 filteredhash=self.filteredhash,
565 filteredhash=self.filteredhash,
563 closednodes=set(self._closednodes),
566 closednodes=set(self._closednodes),
564 verify_node=self._verify_node,
567 verify_node=self._verify_node,
565 )
568 )
566 # we copy will likely schedule a write anyway, but that does not seems
569 # we copy will likely schedule a write anyway, but that does not seems
567 # to hurt to overschedule
570 # to hurt to overschedule
568 other._delayed = self._delayed
571 other._delayed = self._delayed
569 # also copy information about the current verification state
572 # also copy information about the current verification state
570 other._verifiedbranches = set(self._verifiedbranches)
573 other._verifiedbranches = set(self._verifiedbranches)
571 return other
574 return other
572
575
573 def write(self, repo):
576 def write(self, repo):
574 assert self._filtername == repo.filtername, (
577 assert self._filtername == repo.filtername, (
575 self._filtername,
578 self._filtername,
576 repo.filtername,
579 repo.filtername,
577 )
580 )
578 tr = repo.currenttransaction()
581 tr = repo.currenttransaction()
579 if not getattr(tr, 'finalized', True):
582 if not getattr(tr, 'finalized', True):
580 # Avoid premature writing.
583 # Avoid premature writing.
581 #
584 #
582 # (The cache warming setup by localrepo will update the file later.)
585 # (The cache warming setup by localrepo will update the file later.)
583 self._delayed = True
586 self._delayed = True
584 return
587 return
585 try:
588 try:
586 filename = self._filename(repo)
589 filename = self._filename(repo)
587 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
590 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
588 self._write_header(f)
591 self._write_header(f)
589 nodecount = self._write_heads(f)
592 nodecount = self._write_heads(f)
590 repo.ui.log(
593 repo.ui.log(
591 b'branchcache',
594 b'branchcache',
592 b'wrote %s with %d labels and %d nodes\n',
595 b'wrote %s with %d labels and %d nodes\n',
593 _branchcachedesc(repo),
596 _branchcachedesc(repo),
594 len(self._entries),
597 len(self._entries),
595 nodecount,
598 nodecount,
596 )
599 )
597 self._delayed = False
600 self._delayed = False
598 except (IOError, OSError, error.Abort) as inst:
601 except (IOError, OSError, error.Abort) as inst:
599 # Abort may be raised by read only opener, so log and continue
602 # Abort may be raised by read only opener, so log and continue
600 repo.ui.debug(
603 repo.ui.debug(
601 b"couldn't write branch cache: %s\n"
604 b"couldn't write branch cache: %s\n"
602 % stringutil.forcebytestr(inst)
605 % stringutil.forcebytestr(inst)
603 )
606 )
604
607
605 def _write_header(self, fp) -> None:
608 def _write_header(self, fp) -> None:
606 """write the branch cache header to a file"""
609 """write the branch cache header to a file"""
607 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
610 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
608 if self.filteredhash is not None:
611 if self.filteredhash is not None:
609 cachekey.append(hex(self.filteredhash))
612 cachekey.append(hex(self.filteredhash))
610 fp.write(b" ".join(cachekey) + b'\n')
613 fp.write(b" ".join(cachekey) + b'\n')
611
614
612 def _write_heads(self, fp) -> int:
615 def _write_heads(self, fp) -> int:
613 """write list of heads to a file
616 """write list of heads to a file
614
617
615 Return the number of heads written."""
618 Return the number of heads written."""
616 nodecount = 0
619 nodecount = 0
617 for label, nodes in sorted(self._entries.items()):
620 for label, nodes in sorted(self._entries.items()):
618 label = encoding.fromlocal(label)
621 label = encoding.fromlocal(label)
619 for node in nodes:
622 for node in nodes:
620 nodecount += 1
623 nodecount += 1
621 if node in self._closednodes:
624 if node in self._closednodes:
622 state = b'c'
625 state = b'c'
623 else:
626 else:
624 state = b'o'
627 state = b'o'
625 fp.write(b"%s %s %s\n" % (hex(node), state, label))
628 fp.write(b"%s %s %s\n" % (hex(node), state, label))
626 return nodecount
629 return nodecount
627
630
628 def _verifybranch(self, branch):
631 def _verifybranch(self, branch):
629 """verify head nodes for the given branch."""
632 """verify head nodes for the given branch."""
630 if not self._verify_node:
633 if not self._verify_node:
631 return
634 return
632 if branch not in self._entries or branch in self._verifiedbranches:
635 if branch not in self._entries or branch in self._verifiedbranches:
633 return
636 return
634 assert self._hasnode is not None
637 assert self._hasnode is not None
635 for n in self._entries[branch]:
638 for n in self._entries[branch]:
636 if not self._hasnode(n):
639 if not self._hasnode(n):
637 _unknownnode(n)
640 _unknownnode(n)
638
641
639 self._verifiedbranches.add(branch)
642 self._verifiedbranches.add(branch)
640
643
641 def _verifyall(self):
644 def _verifyall(self):
642 """verifies nodes of all the branches"""
645 """verifies nodes of all the branches"""
643 for b in self._entries.keys():
646 for b in self._entries.keys():
644 if b not in self._verifiedbranches:
647 if b not in self._verifiedbranches:
645 self._verifybranch(b)
648 self._verifybranch(b)
646
649
647 def __getitem__(self, key):
650 def __getitem__(self, key):
648 self._verifybranch(key)
651 self._verifybranch(key)
649 return super().__getitem__(key)
652 return super().__getitem__(key)
650
653
651 def __contains__(self, key):
654 def __contains__(self, key):
652 self._verifybranch(key)
655 self._verifybranch(key)
653 return super().__contains__(key)
656 return super().__contains__(key)
654
657
655 def iteritems(self):
658 def iteritems(self):
656 self._verifyall()
659 self._verifyall()
657 return super().iteritems()
660 return super().iteritems()
658
661
659 items = iteritems
662 items = iteritems
660
663
661 def iterheads(self):
664 def iterheads(self):
662 """returns all the heads"""
665 """returns all the heads"""
663 self._verifyall()
666 self._verifyall()
664 return super().iterheads()
667 return super().iterheads()
665
668
666 def hasbranch(self, label):
669 def hasbranch(self, label):
667 """checks whether a branch of this name exists or not"""
670 """checks whether a branch of this name exists or not"""
668 self._verifybranch(label)
671 self._verifybranch(label)
669 return super().hasbranch(label)
672 return super().hasbranch(label)
670
673
671 def branchheads(self, branch, closed=False):
674 def branchheads(self, branch, closed=False):
672 self._verifybranch(branch)
675 self._verifybranch(branch)
673 return super().branchheads(branch, closed=closed)
676 return super().branchheads(branch, closed=closed)
674
677
675 def update(self, repo, revgen):
678 def update(self, repo, revgen):
676 assert self._filtername == repo.filtername, (
679 assert self._filtername == repo.filtername, (
677 self._filtername,
680 self._filtername,
678 repo.filtername,
681 repo.filtername,
679 )
682 )
680 cl = repo.changelog
683 cl = repo.changelog
681 max_rev = super().update(repo, revgen)
684 max_rev = super().update(repo, revgen)
682 # new tip revision which we found after iterating items from new
685 # new tip revision which we found after iterating items from new
683 # branches
686 # branches
684 if max_rev is not None and max_rev > self.tiprev:
687 if max_rev is not None and max_rev > self.tiprev:
685 self.tiprev = max_rev
688 self.tiprev = max_rev
686 self.tipnode = cl.node(max_rev)
689 self.tipnode = cl.node(max_rev)
687
690
688 if not self.validfor(repo):
691 if not self.validfor(repo):
689 # old cache key is now invalid for the repo, but we've just updated
692 # old cache key is now invalid for the repo, but we've just updated
690 # the cache and we assume it's valid, so let's make the cache key
693 # the cache and we assume it's valid, so let's make the cache key
691 # valid as well by recomputing it from the cached data
694 # valid as well by recomputing it from the cached data
692 self.tipnode = repo.nullid
695 self.tipnode = repo.nullid
693 self.tiprev = nullrev
696 self.tiprev = nullrev
694 for heads in self.iterheads():
697 for heads in self.iterheads():
695 if not heads:
698 if not heads:
696 # all revisions on a branch are obsolete
699 # all revisions on a branch are obsolete
697 continue
700 continue
698 # note: tiprev is not necessarily the tip revision of repo,
701 # note: tiprev is not necessarily the tip revision of repo,
699 # because the tip could be obsolete (i.e. not a head)
702 # because the tip could be obsolete (i.e. not a head)
700 tiprev = max(cl.rev(node) for node in heads)
703 tiprev = max(cl.rev(node) for node in heads)
701 if tiprev > self.tiprev:
704 if tiprev > self.tiprev:
702 self.tipnode = cl.node(tiprev)
705 self.tipnode = cl.node(tiprev)
703 self.tiprev = tiprev
706 self.tiprev = tiprev
704 self.filteredhash = scmutil.filteredhash(
707 self.filteredhash = scmutil.filteredhash(
705 repo, self.tiprev, needobsolete=True
708 repo, self.tiprev, needobsolete=True
706 )
709 )
707
710
708 self.write(repo)
711 self.write(repo)
709
712
710
713
711 class remotebranchcache(_BaseBranchCache):
714 class remotebranchcache(_BaseBranchCache):
712 """Branchmap info for a remote connection, should not write locally"""
715 """Branchmap info for a remote connection, should not write locally"""
713
716
714 def __init__(
717 def __init__(
715 self,
718 self,
716 repo: "localrepo.localrepository",
719 repo: "localrepo.localrepository",
717 entries: Union[
720 entries: Union[
718 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
721 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
719 ] = (),
722 ] = (),
720 closednodes: Optional[Set[bytes]] = None,
723 closednodes: Optional[Set[bytes]] = None,
721 ) -> None:
724 ) -> None:
722 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
725 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
723
726
724
727
725 # Revision branch info cache
728 # Revision branch info cache
726
729
727 _rbcversion = b'-v1'
730 _rbcversion = b'-v1'
728 _rbcnames = b'rbc-names' + _rbcversion
731 _rbcnames = b'rbc-names' + _rbcversion
729 _rbcrevs = b'rbc-revs' + _rbcversion
732 _rbcrevs = b'rbc-revs' + _rbcversion
730 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
733 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
731 _rbcrecfmt = b'>4sI'
734 _rbcrecfmt = b'>4sI'
732 _rbcrecsize = calcsize(_rbcrecfmt)
735 _rbcrecsize = calcsize(_rbcrecfmt)
733 _rbcmininc = 64 * _rbcrecsize
736 _rbcmininc = 64 * _rbcrecsize
734 _rbcnodelen = 4
737 _rbcnodelen = 4
735 _rbcbranchidxmask = 0x7FFFFFFF
738 _rbcbranchidxmask = 0x7FFFFFFF
736 _rbccloseflag = 0x80000000
739 _rbccloseflag = 0x80000000
737
740
738
741
739 class rbcrevs:
742 class rbcrevs:
740 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
743 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
741
744
742 def __init__(self, revs):
745 def __init__(self, revs):
743 self._prefix = revs
746 self._prefix = revs
744 self._rest = bytearray()
747 self._rest = bytearray()
745
748
746 def __len__(self):
749 def __len__(self):
747 return len(self._prefix) + len(self._rest)
750 return len(self._prefix) + len(self._rest)
748
751
749 def unpack_record(self, rbcrevidx):
752 def unpack_record(self, rbcrevidx):
750 if rbcrevidx < len(self._prefix):
753 if rbcrevidx < len(self._prefix):
751 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
754 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
752 else:
755 else:
753 return unpack_from(
756 return unpack_from(
754 _rbcrecfmt,
757 _rbcrecfmt,
755 util.buffer(self._rest),
758 util.buffer(self._rest),
756 rbcrevidx - len(self._prefix),
759 rbcrevidx - len(self._prefix),
757 )
760 )
758
761
759 def make_mutable(self):
762 def make_mutable(self):
760 if len(self._prefix) > 0:
763 if len(self._prefix) > 0:
761 entirety = bytearray()
764 entirety = bytearray()
762 entirety[:] = self._prefix
765 entirety[:] = self._prefix
763 entirety.extend(self._rest)
766 entirety.extend(self._rest)
764 self._rest = entirety
767 self._rest = entirety
765 self._prefix = bytearray()
768 self._prefix = bytearray()
766
769
767 def truncate(self, pos):
770 def truncate(self, pos):
768 self.make_mutable()
771 self.make_mutable()
769 del self._rest[pos:]
772 del self._rest[pos:]
770
773
771 def pack_into(self, rbcrevidx, node, branchidx):
774 def pack_into(self, rbcrevidx, node, branchidx):
772 if rbcrevidx < len(self._prefix):
775 if rbcrevidx < len(self._prefix):
773 self.make_mutable()
776 self.make_mutable()
774 buf = self._rest
777 buf = self._rest
775 start_offset = rbcrevidx - len(self._prefix)
778 start_offset = rbcrevidx - len(self._prefix)
776 end_offset = start_offset + _rbcrecsize
779 end_offset = start_offset + _rbcrecsize
777
780
778 if len(self._rest) < end_offset:
781 if len(self._rest) < end_offset:
779 # bytearray doesn't allocate extra space at least in Python 3.7.
782 # bytearray doesn't allocate extra space at least in Python 3.7.
780 # When multiple changesets are added in a row, precise resize would
783 # When multiple changesets are added in a row, precise resize would
781 # result in quadratic complexity. Overallocate to compensate by
784 # result in quadratic complexity. Overallocate to compensate by
782 # using the classic doubling technique for dynamic arrays instead.
785 # using the classic doubling technique for dynamic arrays instead.
783 # If there was a gap in the map before, less space will be reserved.
786 # If there was a gap in the map before, less space will be reserved.
784 self._rest.extend(b'\0' * end_offset)
787 self._rest.extend(b'\0' * end_offset)
785 return pack_into(
788 return pack_into(
786 _rbcrecfmt,
789 _rbcrecfmt,
787 buf,
790 buf,
788 start_offset,
791 start_offset,
789 node,
792 node,
790 branchidx,
793 branchidx,
791 )
794 )
792
795
793 def extend(self, extension):
796 def extend(self, extension):
794 return self._rest.extend(extension)
797 return self._rest.extend(extension)
795
798
796 def slice(self, begin, end):
799 def slice(self, begin, end):
797 if begin < len(self._prefix):
800 if begin < len(self._prefix):
798 acc = bytearray()
801 acc = bytearray()
799 acc[:] = self._prefix[begin:end]
802 acc[:] = self._prefix[begin:end]
800 acc.extend(
803 acc.extend(
801 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
804 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
802 )
805 )
803 return acc
806 return acc
804 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
807 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
805
808
806
809
807 class revbranchcache:
810 class revbranchcache:
808 """Persistent cache, mapping from revision number to branch name and close.
811 """Persistent cache, mapping from revision number to branch name and close.
809 This is a low level cache, independent of filtering.
812 This is a low level cache, independent of filtering.
810
813
811 Branch names are stored in rbc-names in internal encoding separated by 0.
814 Branch names are stored in rbc-names in internal encoding separated by 0.
812 rbc-names is append-only, and each branch name is only stored once and will
815 rbc-names is append-only, and each branch name is only stored once and will
813 thus have a unique index.
816 thus have a unique index.
814
817
815 The branch info for each revision is stored in rbc-revs as constant size
818 The branch info for each revision is stored in rbc-revs as constant size
816 records. The whole file is read into memory, but it is only 'parsed' on
819 records. The whole file is read into memory, but it is only 'parsed' on
817 demand. The file is usually append-only but will be truncated if repo
820 demand. The file is usually append-only but will be truncated if repo
818 modification is detected.
821 modification is detected.
819 The record for each revision contains the first 4 bytes of the
822 The record for each revision contains the first 4 bytes of the
820 corresponding node hash, and the record is only used if it still matches.
823 corresponding node hash, and the record is only used if it still matches.
821 Even a completely trashed rbc-revs fill thus still give the right result
824 Even a completely trashed rbc-revs fill thus still give the right result
822 while converging towards full recovery ... assuming no incorrectly matching
825 while converging towards full recovery ... assuming no incorrectly matching
823 node hashes.
826 node hashes.
824 The record also contains 4 bytes where 31 bits contains the index of the
827 The record also contains 4 bytes where 31 bits contains the index of the
825 branch and the last bit indicate that it is a branch close commit.
828 branch and the last bit indicate that it is a branch close commit.
826 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
829 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
827 and will grow with it but be 1/8th of its size.
830 and will grow with it but be 1/8th of its size.
828 """
831 """
829
832
830 def __init__(self, repo, readonly=True):
833 def __init__(self, repo, readonly=True):
831 assert repo.filtername is None
834 assert repo.filtername is None
832 self._repo = repo
835 self._repo = repo
833 self._names = [] # branch names in local encoding with static index
836 self._names = [] # branch names in local encoding with static index
834 self._rbcrevs = rbcrevs(bytearray())
837 self._rbcrevs = rbcrevs(bytearray())
835 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
838 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
836 try:
839 try:
837 bndata = repo.cachevfs.read(_rbcnames)
840 bndata = repo.cachevfs.read(_rbcnames)
838 self._rbcsnameslen = len(bndata) # for verification before writing
841 self._rbcsnameslen = len(bndata) # for verification before writing
839 if bndata:
842 if bndata:
840 self._names = [
843 self._names = [
841 encoding.tolocal(bn) for bn in bndata.split(b'\0')
844 encoding.tolocal(bn) for bn in bndata.split(b'\0')
842 ]
845 ]
843 except (IOError, OSError):
846 except (IOError, OSError):
844 if readonly:
847 if readonly:
845 # don't try to use cache - fall back to the slow path
848 # don't try to use cache - fall back to the slow path
846 self.branchinfo = self._branchinfo
849 self.branchinfo = self._branchinfo
847
850
848 if self._names:
851 if self._names:
849 try:
852 try:
850 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
853 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
851 with repo.cachevfs(_rbcrevs) as fp:
854 with repo.cachevfs(_rbcrevs) as fp:
852 data = util.buffer(util.mmapread(fp))
855 data = util.buffer(util.mmapread(fp))
853 else:
856 else:
854 data = repo.cachevfs.read(_rbcrevs)
857 data = repo.cachevfs.read(_rbcrevs)
855 self._rbcrevs = rbcrevs(data)
858 self._rbcrevs = rbcrevs(data)
856 except (IOError, OSError) as inst:
859 except (IOError, OSError) as inst:
857 repo.ui.debug(
860 repo.ui.debug(
858 b"couldn't read revision branch cache: %s\n"
861 b"couldn't read revision branch cache: %s\n"
859 % stringutil.forcebytestr(inst)
862 % stringutil.forcebytestr(inst)
860 )
863 )
861 # remember number of good records on disk
864 # remember number of good records on disk
862 self._rbcrevslen = min(
865 self._rbcrevslen = min(
863 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
866 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
864 )
867 )
865 if self._rbcrevslen == 0:
868 if self._rbcrevslen == 0:
866 self._names = []
869 self._names = []
867 self._rbcnamescount = len(self._names) # number of names read at
870 self._rbcnamescount = len(self._names) # number of names read at
868 # _rbcsnameslen
871 # _rbcsnameslen
869
872
870 def _clear(self):
873 def _clear(self):
871 self._rbcsnameslen = 0
874 self._rbcsnameslen = 0
872 del self._names[:]
875 del self._names[:]
873 self._rbcnamescount = 0
876 self._rbcnamescount = 0
874 self._rbcrevslen = len(self._repo.changelog)
877 self._rbcrevslen = len(self._repo.changelog)
875 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
878 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
876 util.clearcachedproperty(self, b'_namesreverse')
879 util.clearcachedproperty(self, b'_namesreverse')
877
880
878 @util.propertycache
881 @util.propertycache
879 def _namesreverse(self):
882 def _namesreverse(self):
880 return {b: r for r, b in enumerate(self._names)}
883 return {b: r for r, b in enumerate(self._names)}
881
884
882 def branchinfo(self, rev):
885 def branchinfo(self, rev):
883 """Return branch name and close flag for rev, using and updating
886 """Return branch name and close flag for rev, using and updating
884 persistent cache."""
887 persistent cache."""
885 changelog = self._repo.changelog
888 changelog = self._repo.changelog
886 rbcrevidx = rev * _rbcrecsize
889 rbcrevidx = rev * _rbcrecsize
887
890
888 # avoid negative index, changelog.read(nullrev) is fast without cache
891 # avoid negative index, changelog.read(nullrev) is fast without cache
889 if rev == nullrev:
892 if rev == nullrev:
890 return changelog.branchinfo(rev)
893 return changelog.branchinfo(rev)
891
894
892 # if requested rev isn't allocated, grow and cache the rev info
895 # if requested rev isn't allocated, grow and cache the rev info
893 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
896 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
894 return self._branchinfo(rev)
897 return self._branchinfo(rev)
895
898
896 # fast path: extract data from cache, use it if node is matching
899 # fast path: extract data from cache, use it if node is matching
897 reponode = changelog.node(rev)[:_rbcnodelen]
900 reponode = changelog.node(rev)[:_rbcnodelen]
898 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
901 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
899 close = bool(branchidx & _rbccloseflag)
902 close = bool(branchidx & _rbccloseflag)
900 if close:
903 if close:
901 branchidx &= _rbcbranchidxmask
904 branchidx &= _rbcbranchidxmask
902 if cachenode == b'\0\0\0\0':
905 if cachenode == b'\0\0\0\0':
903 pass
906 pass
904 elif cachenode == reponode:
907 elif cachenode == reponode:
905 try:
908 try:
906 return self._names[branchidx], close
909 return self._names[branchidx], close
907 except IndexError:
910 except IndexError:
908 # recover from invalid reference to unknown branch
911 # recover from invalid reference to unknown branch
909 self._repo.ui.debug(
912 self._repo.ui.debug(
910 b"referenced branch names not found"
913 b"referenced branch names not found"
911 b" - rebuilding revision branch cache from scratch\n"
914 b" - rebuilding revision branch cache from scratch\n"
912 )
915 )
913 self._clear()
916 self._clear()
914 else:
917 else:
915 # rev/node map has changed, invalidate the cache from here up
918 # rev/node map has changed, invalidate the cache from here up
916 self._repo.ui.debug(
919 self._repo.ui.debug(
917 b"history modification detected - truncating "
920 b"history modification detected - truncating "
918 b"revision branch cache to revision %d\n" % rev
921 b"revision branch cache to revision %d\n" % rev
919 )
922 )
920 truncate = rbcrevidx + _rbcrecsize
923 truncate = rbcrevidx + _rbcrecsize
921 self._rbcrevs.truncate(truncate)
924 self._rbcrevs.truncate(truncate)
922 self._rbcrevslen = min(self._rbcrevslen, truncate)
925 self._rbcrevslen = min(self._rbcrevslen, truncate)
923
926
924 # fall back to slow path and make sure it will be written to disk
927 # fall back to slow path and make sure it will be written to disk
925 return self._branchinfo(rev)
928 return self._branchinfo(rev)
926
929
927 def _branchinfo(self, rev):
930 def _branchinfo(self, rev):
928 """Retrieve branch info from changelog and update _rbcrevs"""
931 """Retrieve branch info from changelog and update _rbcrevs"""
929 changelog = self._repo.changelog
932 changelog = self._repo.changelog
930 b, close = changelog.branchinfo(rev)
933 b, close = changelog.branchinfo(rev)
931 if b in self._namesreverse:
934 if b in self._namesreverse:
932 branchidx = self._namesreverse[b]
935 branchidx = self._namesreverse[b]
933 else:
936 else:
934 branchidx = len(self._names)
937 branchidx = len(self._names)
935 self._names.append(b)
938 self._names.append(b)
936 self._namesreverse[b] = branchidx
939 self._namesreverse[b] = branchidx
937 reponode = changelog.node(rev)
940 reponode = changelog.node(rev)
938 if close:
941 if close:
939 branchidx |= _rbccloseflag
942 branchidx |= _rbccloseflag
940 self._setcachedata(rev, reponode, branchidx)
943 self._setcachedata(rev, reponode, branchidx)
941 return b, close
944 return b, close
942
945
943 def setdata(self, rev, changelogrevision):
946 def setdata(self, rev, changelogrevision):
944 """add new data information to the cache"""
947 """add new data information to the cache"""
945 branch, close = changelogrevision.branchinfo
948 branch, close = changelogrevision.branchinfo
946
949
947 if branch in self._namesreverse:
950 if branch in self._namesreverse:
948 branchidx = self._namesreverse[branch]
951 branchidx = self._namesreverse[branch]
949 else:
952 else:
950 branchidx = len(self._names)
953 branchidx = len(self._names)
951 self._names.append(branch)
954 self._names.append(branch)
952 self._namesreverse[branch] = branchidx
955 self._namesreverse[branch] = branchidx
953 if close:
956 if close:
954 branchidx |= _rbccloseflag
957 branchidx |= _rbccloseflag
955 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
958 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
956 # If no cache data were readable (non exists, bad permission, etc)
959 # If no cache data were readable (non exists, bad permission, etc)
957 # the cache was bypassing itself by setting:
960 # the cache was bypassing itself by setting:
958 #
961 #
959 # self.branchinfo = self._branchinfo
962 # self.branchinfo = self._branchinfo
960 #
963 #
961 # Since we now have data in the cache, we need to drop this bypassing.
964 # Since we now have data in the cache, we need to drop this bypassing.
962 if 'branchinfo' in vars(self):
965 if 'branchinfo' in vars(self):
963 del self.branchinfo
966 del self.branchinfo
964
967
965 def _setcachedata(self, rev, node, branchidx):
968 def _setcachedata(self, rev, node, branchidx):
966 """Writes the node's branch data to the in-memory cache data."""
969 """Writes the node's branch data to the in-memory cache data."""
967 if rev == nullrev:
970 if rev == nullrev:
968 return
971 return
969 rbcrevidx = rev * _rbcrecsize
972 rbcrevidx = rev * _rbcrecsize
970 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
973 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
971 self._rbcrevslen = min(self._rbcrevslen, rev)
974 self._rbcrevslen = min(self._rbcrevslen, rev)
972
975
973 tr = self._repo.currenttransaction()
976 tr = self._repo.currenttransaction()
974 if tr:
977 if tr:
975 tr.addfinalize(b'write-revbranchcache', self.write)
978 tr.addfinalize(b'write-revbranchcache', self.write)
976
979
977 def write(self, tr=None):
980 def write(self, tr=None):
978 """Save branch cache if it is dirty."""
981 """Save branch cache if it is dirty."""
979 repo = self._repo
982 repo = self._repo
980 wlock = None
983 wlock = None
981 step = b''
984 step = b''
982 try:
985 try:
983 # write the new names
986 # write the new names
984 if self._rbcnamescount < len(self._names):
987 if self._rbcnamescount < len(self._names):
985 wlock = repo.wlock(wait=False)
988 wlock = repo.wlock(wait=False)
986 step = b' names'
989 step = b' names'
987 self._writenames(repo)
990 self._writenames(repo)
988
991
989 # write the new revs
992 # write the new revs
990 start = self._rbcrevslen * _rbcrecsize
993 start = self._rbcrevslen * _rbcrecsize
991 if start != len(self._rbcrevs):
994 if start != len(self._rbcrevs):
992 step = b''
995 step = b''
993 if wlock is None:
996 if wlock is None:
994 wlock = repo.wlock(wait=False)
997 wlock = repo.wlock(wait=False)
995 self._writerevs(repo, start)
998 self._writerevs(repo, start)
996
999
997 except (IOError, OSError, error.Abort, error.LockError) as inst:
1000 except (IOError, OSError, error.Abort, error.LockError) as inst:
998 repo.ui.debug(
1001 repo.ui.debug(
999 b"couldn't write revision branch cache%s: %s\n"
1002 b"couldn't write revision branch cache%s: %s\n"
1000 % (step, stringutil.forcebytestr(inst))
1003 % (step, stringutil.forcebytestr(inst))
1001 )
1004 )
1002 finally:
1005 finally:
1003 if wlock is not None:
1006 if wlock is not None:
1004 wlock.release()
1007 wlock.release()
1005
1008
1006 def _writenames(self, repo):
1009 def _writenames(self, repo):
1007 """write the new branch names to revbranchcache"""
1010 """write the new branch names to revbranchcache"""
1008 if self._rbcnamescount != 0:
1011 if self._rbcnamescount != 0:
1009 f = repo.cachevfs.open(_rbcnames, b'ab')
1012 f = repo.cachevfs.open(_rbcnames, b'ab')
1010 if f.tell() == self._rbcsnameslen:
1013 if f.tell() == self._rbcsnameslen:
1011 f.write(b'\0')
1014 f.write(b'\0')
1012 else:
1015 else:
1013 f.close()
1016 f.close()
1014 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1017 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1015 self._rbcnamescount = 0
1018 self._rbcnamescount = 0
1016 self._rbcrevslen = 0
1019 self._rbcrevslen = 0
1017 if self._rbcnamescount == 0:
1020 if self._rbcnamescount == 0:
1018 # before rewriting names, make sure references are removed
1021 # before rewriting names, make sure references are removed
1019 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1022 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1020 f = repo.cachevfs.open(_rbcnames, b'wb')
1023 f = repo.cachevfs.open(_rbcnames, b'wb')
1021 f.write(
1024 f.write(
1022 b'\0'.join(
1025 b'\0'.join(
1023 encoding.fromlocal(b)
1026 encoding.fromlocal(b)
1024 for b in self._names[self._rbcnamescount :]
1027 for b in self._names[self._rbcnamescount :]
1025 )
1028 )
1026 )
1029 )
1027 self._rbcsnameslen = f.tell()
1030 self._rbcsnameslen = f.tell()
1028 f.close()
1031 f.close()
1029 self._rbcnamescount = len(self._names)
1032 self._rbcnamescount = len(self._names)
1030
1033
1031 def _writerevs(self, repo, start):
1034 def _writerevs(self, repo, start):
1032 """write the new revs to revbranchcache"""
1035 """write the new revs to revbranchcache"""
1033 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1036 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1034 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1037 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1035 if f.tell() != start:
1038 if f.tell() != start:
1036 repo.ui.debug(
1039 repo.ui.debug(
1037 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1040 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1038 )
1041 )
1039 f.seek(start)
1042 f.seek(start)
1040 if f.tell() != start:
1043 if f.tell() != start:
1041 start = 0
1044 start = 0
1042 f.seek(start)
1045 f.seek(start)
1043 f.truncate()
1046 f.truncate()
1044 end = revs * _rbcrecsize
1047 end = revs * _rbcrecsize
1045 f.write(self._rbcrevs.slice(start, end))
1048 f.write(self._rbcrevs.slice(start, end))
1046 self._rbcrevslen = revs
1049 self._rbcrevslen = revs
@@ -1,23 +1,51 b''
1 # repoviewutil.py - constaints data relevant to repoview.py and other module
1 # repoviewutil.py - constaints data relevant to repoview.py and other module
2 #
2 #
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Logilab SA <contact@logilab.fr>
4 # Logilab SA <contact@logilab.fr>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from .. import error
9
10
10 ### Nearest subset relation
11 ### Nearest subset relation
11 # Nearest subset of filter X is a filter Y so that:
12 # Nearest subset of filter X is a filter Y so that:
12 # * Y is included in X,
13 # * Y is included in X,
13 # * X - Y is as small as possible.
14 # * X - Y is as small as possible.
14 # This create and ordering used for branchmap purpose.
15 # This create and ordering used for branchmap purpose.
15 # the ordering may be partial
16 # the ordering may be partial
16 subsettable = {
17 subsettable = {
17 None: b'visible',
18 None: b'visible',
18 b'visible-hidden': b'visible',
19 b'visible-hidden': b'visible',
19 b'visible': b'served',
20 b'visible': b'served',
20 b'served.hidden': b'served',
21 b'served.hidden': b'served',
21 b'served': b'immutable',
22 b'served': b'immutable',
22 b'immutable': b'base',
23 b'immutable': b'base',
23 }
24 }
25
26
27 def get_ordered_subset():
28 """return a list of subset name from dependencies to dependents"""
29 _unfinalized = set(subsettable.values())
30 ordered = []
31
32 # the subset table is expected to be small so we do the stupid N² version
33 # of the algorithm
34 while _unfinalized:
35 this_level = []
36 for candidate in _unfinalized:
37 dependency = subsettable.get(candidate)
38 if dependency not in _unfinalized:
39 this_level.append(candidate)
40
41 if not this_level:
42 msg = "cyclic dependencies in repoview subset %r"
43 msg %= subsettable
44 raise error.ProgrammingError(msg)
45
46 this_level.sort(key=lambda x: x if x is not None else '')
47
48 ordered.extend(this_level)
49 _unfinalized.difference_update(this_level)
50
51 return ordered
General Comments 0
You need to be logged in to leave comments. Login now