##// END OF EJS Templates
branchmap-v3: introduce a "stop_rev" argument to `headsrevs`...
marmoute -
r52870:42a116f1 default
parent child Browse files
Show More
@@ -1,1088 +1,1087
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 from .node import (
10 from .node import (
11 bin,
11 bin,
12 hex,
12 hex,
13 nullrev,
13 nullrev,
14 )
14 )
15
15
16 from typing import (
16 from typing import (
17 Any,
17 Any,
18 Callable,
18 Callable,
19 Dict,
19 Dict,
20 Iterable,
20 Iterable,
21 List,
21 List,
22 Optional,
22 Optional,
23 Set,
23 Set,
24 TYPE_CHECKING,
24 TYPE_CHECKING,
25 Tuple,
25 Tuple,
26 Union,
26 Union,
27 cast,
27 cast,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 encoding,
31 encoding,
32 error,
32 error,
33 obsolete,
33 obsolete,
34 scmutil,
34 scmutil,
35 util,
35 util,
36 )
36 )
37
37
38 from .utils import (
38 from .utils import (
39 repoviewutil,
39 repoviewutil,
40 stringutil,
40 stringutil,
41 )
41 )
42
42
43 if TYPE_CHECKING:
43 if TYPE_CHECKING:
44 from . import localrepo
44 from . import localrepo
45
45
46 assert [localrepo]
46 assert [localrepo]
47
47
48 subsettable = repoviewutil.subsettable
48 subsettable = repoviewutil.subsettable
49
49
50
50
51 class BranchMapCache:
51 class BranchMapCache:
52 """mapping of filtered views of repo with their branchcache"""
52 """mapping of filtered views of repo with their branchcache"""
53
53
54 def __init__(self):
54 def __init__(self):
55 self._per_filter = {}
55 self._per_filter = {}
56
56
57 def __getitem__(self, repo):
57 def __getitem__(self, repo):
58 self.updatecache(repo)
58 self.updatecache(repo)
59 bcache = self._per_filter[repo.filtername]
59 bcache = self._per_filter[repo.filtername]
60 bcache._ensure_populated(repo)
60 bcache._ensure_populated(repo)
61 assert bcache._filtername == repo.filtername, (
61 assert bcache._filtername == repo.filtername, (
62 bcache._filtername,
62 bcache._filtername,
63 repo.filtername,
63 repo.filtername,
64 )
64 )
65 return bcache
65 return bcache
66
66
67 def update_disk(self, repo, detect_pure_topo=False):
67 def update_disk(self, repo, detect_pure_topo=False):
68 """ensure and up-to-date cache is (or will be) written on disk
68 """ensure and up-to-date cache is (or will be) written on disk
69
69
70 The cache for this repository view is updated if needed and written on
70 The cache for this repository view is updated if needed and written on
71 disk.
71 disk.
72
72
73 If a transaction is in progress, the writing is schedule to transaction
73 If a transaction is in progress, the writing is schedule to transaction
74 close. See the `BranchMapCache.write_dirty` method.
74 close. See the `BranchMapCache.write_dirty` method.
75
75
76 This method exist independently of __getitem__ as it is sometime useful
76 This method exist independently of __getitem__ as it is sometime useful
77 to signal that we have no intend to use the data in memory yet.
77 to signal that we have no intend to use the data in memory yet.
78 """
78 """
79 self.updatecache(repo)
79 self.updatecache(repo)
80 bcache = self._per_filter[repo.filtername]
80 bcache = self._per_filter[repo.filtername]
81 assert bcache._filtername == repo.filtername, (
81 assert bcache._filtername == repo.filtername, (
82 bcache._filtername,
82 bcache._filtername,
83 repo.filtername,
83 repo.filtername,
84 )
84 )
85 if detect_pure_topo:
85 if detect_pure_topo:
86 bcache._detect_pure_topo(repo)
86 bcache._detect_pure_topo(repo)
87 tr = repo.currenttransaction()
87 tr = repo.currenttransaction()
88 if getattr(tr, 'finalized', True):
88 if getattr(tr, 'finalized', True):
89 bcache.sync_disk(repo)
89 bcache.sync_disk(repo)
90
90
91 def updatecache(self, repo):
91 def updatecache(self, repo):
92 """Update the cache for the given filtered view on a repository"""
92 """Update the cache for the given filtered view on a repository"""
93 # This can trigger updates for the caches for subsets of the filtered
93 # This can trigger updates for the caches for subsets of the filtered
94 # view, e.g. when there is no cache for this filtered view or the cache
94 # view, e.g. when there is no cache for this filtered view or the cache
95 # is stale.
95 # is stale.
96
96
97 cl = repo.changelog
97 cl = repo.changelog
98 filtername = repo.filtername
98 filtername = repo.filtername
99 bcache = self._per_filter.get(filtername)
99 bcache = self._per_filter.get(filtername)
100 if bcache is None or not bcache.validfor(repo):
100 if bcache is None or not bcache.validfor(repo):
101 # cache object missing or cache object stale? Read from disk
101 # cache object missing or cache object stale? Read from disk
102 bcache = branch_cache_from_file(repo)
102 bcache = branch_cache_from_file(repo)
103
103
104 revs = []
104 revs = []
105 if bcache is None:
105 if bcache is None:
106 # no (fresh) cache available anymore, perhaps we can re-use
106 # no (fresh) cache available anymore, perhaps we can re-use
107 # the cache for a subset, then extend that to add info on missing
107 # the cache for a subset, then extend that to add info on missing
108 # revisions.
108 # revisions.
109 subsetname = subsettable.get(filtername)
109 subsetname = subsettable.get(filtername)
110 if subsetname is not None:
110 if subsetname is not None:
111 subset = repo.filtered(subsetname)
111 subset = repo.filtered(subsetname)
112 self.updatecache(subset)
112 self.updatecache(subset)
113 bcache = self._per_filter[subset.filtername].inherit_for(repo)
113 bcache = self._per_filter[subset.filtername].inherit_for(repo)
114 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
114 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
115 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
115 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
116 else:
116 else:
117 # nothing to fall back on, start empty.
117 # nothing to fall back on, start empty.
118 bcache = new_branch_cache(repo)
118 bcache = new_branch_cache(repo)
119
119
120 revs.extend(cl.revs(start=bcache.tiprev + 1))
120 revs.extend(cl.revs(start=bcache.tiprev + 1))
121 if revs:
121 if revs:
122 bcache.update(repo, revs)
122 bcache.update(repo, revs)
123
123
124 assert bcache.validfor(repo), filtername
124 assert bcache.validfor(repo), filtername
125 self._per_filter[repo.filtername] = bcache
125 self._per_filter[repo.filtername] = bcache
126
126
127 def replace(self, repo, remotebranchmap):
127 def replace(self, repo, remotebranchmap):
128 """Replace the branchmap cache for a repo with a branch mapping.
128 """Replace the branchmap cache for a repo with a branch mapping.
129
129
130 This is likely only called during clone with a branch map from a
130 This is likely only called during clone with a branch map from a
131 remote.
131 remote.
132
132
133 """
133 """
134 cl = repo.changelog
134 cl = repo.changelog
135 clrev = cl.rev
135 clrev = cl.rev
136 clbranchinfo = cl.branchinfo
136 clbranchinfo = cl.branchinfo
137 rbheads = []
137 rbheads = []
138 closed = set()
138 closed = set()
139 for bheads in remotebranchmap.values():
139 for bheads in remotebranchmap.values():
140 rbheads += bheads
140 rbheads += bheads
141 for h in bheads:
141 for h in bheads:
142 r = clrev(h)
142 r = clrev(h)
143 b, c = clbranchinfo(r)
143 b, c = clbranchinfo(r)
144 if c:
144 if c:
145 closed.add(h)
145 closed.add(h)
146
146
147 if rbheads:
147 if rbheads:
148 rtiprev = max((int(clrev(node)) for node in rbheads))
148 rtiprev = max((int(clrev(node)) for node in rbheads))
149 cache = new_branch_cache(
149 cache = new_branch_cache(
150 repo,
150 repo,
151 remotebranchmap,
151 remotebranchmap,
152 repo[rtiprev].node(),
152 repo[rtiprev].node(),
153 rtiprev,
153 rtiprev,
154 closednodes=closed,
154 closednodes=closed,
155 )
155 )
156
156
157 # Try to stick it as low as possible
157 # Try to stick it as low as possible
158 # filter above served are unlikely to be fetch from a clone
158 # filter above served are unlikely to be fetch from a clone
159 for candidate in (b'base', b'immutable', b'served'):
159 for candidate in (b'base', b'immutable', b'served'):
160 rview = repo.filtered(candidate)
160 rview = repo.filtered(candidate)
161 if cache.validfor(rview):
161 if cache.validfor(rview):
162 cache._filtername = candidate
162 cache._filtername = candidate
163 self._per_filter[candidate] = cache
163 self._per_filter[candidate] = cache
164 cache._state = STATE_DIRTY
164 cache._state = STATE_DIRTY
165 cache.write(rview)
165 cache.write(rview)
166 return
166 return
167
167
168 def clear(self):
168 def clear(self):
169 self._per_filter.clear()
169 self._per_filter.clear()
170
170
171 def write_dirty(self, repo):
171 def write_dirty(self, repo):
172 unfi = repo.unfiltered()
172 unfi = repo.unfiltered()
173 for filtername in repoviewutil.get_ordered_subset():
173 for filtername in repoviewutil.get_ordered_subset():
174 cache = self._per_filter.get(filtername)
174 cache = self._per_filter.get(filtername)
175 if cache is None:
175 if cache is None:
176 continue
176 continue
177 if filtername is None:
177 if filtername is None:
178 repo = unfi
178 repo = unfi
179 else:
179 else:
180 repo = unfi.filtered(filtername)
180 repo = unfi.filtered(filtername)
181 cache.sync_disk(repo)
181 cache.sync_disk(repo)
182
182
183
183
184 def _unknownnode(node):
184 def _unknownnode(node):
185 """raises ValueError when branchcache found a node which does not exists"""
185 """raises ValueError when branchcache found a node which does not exists"""
186 raise ValueError('node %s does not exist' % node.hex())
186 raise ValueError('node %s does not exist' % node.hex())
187
187
188
188
189 def _branchcachedesc(repo):
189 def _branchcachedesc(repo):
190 if repo.filtername is not None:
190 if repo.filtername is not None:
191 return b'branch cache (%s)' % repo.filtername
191 return b'branch cache (%s)' % repo.filtername
192 else:
192 else:
193 return b'branch cache'
193 return b'branch cache'
194
194
195
195
196 class _BaseBranchCache:
196 class _BaseBranchCache:
197 """A dict like object that hold branches heads cache.
197 """A dict like object that hold branches heads cache.
198
198
199 This cache is used to avoid costly computations to determine all the
199 This cache is used to avoid costly computations to determine all the
200 branch heads of a repo.
200 branch heads of a repo.
201 """
201 """
202
202
203 def __init__(
203 def __init__(
204 self,
204 self,
205 repo: "localrepo.localrepository",
205 repo: "localrepo.localrepository",
206 entries: Union[
206 entries: Union[
207 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
207 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
208 ] = (),
208 ] = (),
209 closed_nodes: Optional[Set[bytes]] = None,
209 closed_nodes: Optional[Set[bytes]] = None,
210 ) -> None:
210 ) -> None:
211 """hasnode is a function which can be used to verify whether changelog
211 """hasnode is a function which can be used to verify whether changelog
212 has a given node or not. If it's not provided, we assume that every node
212 has a given node or not. If it's not provided, we assume that every node
213 we have exists in changelog"""
213 we have exists in changelog"""
214 # closednodes is a set of nodes that close their branch. If the branch
214 # closednodes is a set of nodes that close their branch. If the branch
215 # cache has been updated, it may contain nodes that are no longer
215 # cache has been updated, it may contain nodes that are no longer
216 # heads.
216 # heads.
217 if closed_nodes is None:
217 if closed_nodes is None:
218 closed_nodes = set()
218 closed_nodes = set()
219 self._closednodes = set(closed_nodes)
219 self._closednodes = set(closed_nodes)
220 self._entries = dict(entries)
220 self._entries = dict(entries)
221
221
222 def __iter__(self):
222 def __iter__(self):
223 return iter(self._entries)
223 return iter(self._entries)
224
224
225 def __setitem__(self, key, value):
225 def __setitem__(self, key, value):
226 self._entries[key] = value
226 self._entries[key] = value
227
227
228 def __getitem__(self, key):
228 def __getitem__(self, key):
229 return self._entries[key]
229 return self._entries[key]
230
230
231 def __contains__(self, key):
231 def __contains__(self, key):
232 return key in self._entries
232 return key in self._entries
233
233
234 def iteritems(self):
234 def iteritems(self):
235 return self._entries.items()
235 return self._entries.items()
236
236
237 items = iteritems
237 items = iteritems
238
238
239 def hasbranch(self, label):
239 def hasbranch(self, label):
240 """checks whether a branch of this name exists or not"""
240 """checks whether a branch of this name exists or not"""
241 return label in self._entries
241 return label in self._entries
242
242
243 def _branchtip(self, heads):
243 def _branchtip(self, heads):
244 """Return tuple with last open head in heads and false,
244 """Return tuple with last open head in heads and false,
245 otherwise return last closed head and true."""
245 otherwise return last closed head and true."""
246 tip = heads[-1]
246 tip = heads[-1]
247 closed = True
247 closed = True
248 for h in reversed(heads):
248 for h in reversed(heads):
249 if h not in self._closednodes:
249 if h not in self._closednodes:
250 tip = h
250 tip = h
251 closed = False
251 closed = False
252 break
252 break
253 return tip, closed
253 return tip, closed
254
254
255 def branchtip(self, branch):
255 def branchtip(self, branch):
256 """Return the tipmost open head on branch head, otherwise return the
256 """Return the tipmost open head on branch head, otherwise return the
257 tipmost closed head on branch.
257 tipmost closed head on branch.
258 Raise KeyError for unknown branch."""
258 Raise KeyError for unknown branch."""
259 return self._branchtip(self[branch])[0]
259 return self._branchtip(self[branch])[0]
260
260
261 def iteropen(self, nodes):
261 def iteropen(self, nodes):
262 return (n for n in nodes if n not in self._closednodes)
262 return (n for n in nodes if n not in self._closednodes)
263
263
264 def branchheads(self, branch, closed=False):
264 def branchheads(self, branch, closed=False):
265 heads = self._entries[branch]
265 heads = self._entries[branch]
266 if not closed:
266 if not closed:
267 heads = list(self.iteropen(heads))
267 heads = list(self.iteropen(heads))
268 return heads
268 return heads
269
269
270 def iterbranches(self):
270 def iterbranches(self):
271 for bn, heads in self.items():
271 for bn, heads in self.items():
272 yield (bn, heads) + self._branchtip(heads)
272 yield (bn, heads) + self._branchtip(heads)
273
273
274 def iterheads(self):
274 def iterheads(self):
275 """returns all the heads"""
275 """returns all the heads"""
276 return self._entries.values()
276 return self._entries.values()
277
277
278 def update(self, repo, revgen):
278 def update(self, repo, revgen):
279 """Given a branchhead cache, self, that may have extra nodes or be
279 """Given a branchhead cache, self, that may have extra nodes or be
280 missing heads, and a generator of nodes that are strictly a superset of
280 missing heads, and a generator of nodes that are strictly a superset of
281 heads missing, this function updates self to be correct.
281 heads missing, this function updates self to be correct.
282 """
282 """
283 starttime = util.timer()
283 starttime = util.timer()
284 cl = repo.changelog
284 cl = repo.changelog
285 # Faster than using ctx.obsolete()
285 # Faster than using ctx.obsolete()
286 obsrevs = obsolete.getrevs(repo, b'obsolete')
286 obsrevs = obsolete.getrevs(repo, b'obsolete')
287 # collect new branch entries
287 # collect new branch entries
288 newbranches = {}
288 newbranches = {}
289 new_closed = set()
289 new_closed = set()
290 obs_ignored = set()
290 obs_ignored = set()
291 getbranchinfo = repo.revbranchcache().branchinfo
291 getbranchinfo = repo.revbranchcache().branchinfo
292 max_rev = -1
292 max_rev = -1
293 for r in revgen:
293 for r in revgen:
294 max_rev = max(max_rev, r)
294 max_rev = max(max_rev, r)
295 if r in obsrevs:
295 if r in obsrevs:
296 # We ignore obsolete changesets as they shouldn't be
296 # We ignore obsolete changesets as they shouldn't be
297 # considered heads.
297 # considered heads.
298 obs_ignored.add(r)
298 obs_ignored.add(r)
299 continue
299 continue
300 branch, closesbranch = getbranchinfo(r)
300 branch, closesbranch = getbranchinfo(r)
301 newbranches.setdefault(branch, []).append(r)
301 newbranches.setdefault(branch, []).append(r)
302 if closesbranch:
302 if closesbranch:
303 new_closed.add(r)
303 new_closed.add(r)
304 if max_rev < 0:
304 if max_rev < 0:
305 msg = "running branchcache.update without revision to update"
305 msg = "running branchcache.update without revision to update"
306 raise error.ProgrammingError(msg)
306 raise error.ProgrammingError(msg)
307
307
308 self._process_new(
308 self._process_new(
309 repo,
309 repo,
310 newbranches,
310 newbranches,
311 new_closed,
311 new_closed,
312 obs_ignored,
312 obs_ignored,
313 max_rev,
313 max_rev,
314 )
314 )
315
315
316 self._closednodes.update(cl.node(rev) for rev in new_closed)
316 self._closednodes.update(cl.node(rev) for rev in new_closed)
317
317
318 duration = util.timer() - starttime
318 duration = util.timer() - starttime
319 repo.ui.log(
319 repo.ui.log(
320 b'branchcache',
320 b'branchcache',
321 b'updated %s in %.4f seconds\n',
321 b'updated %s in %.4f seconds\n',
322 _branchcachedesc(repo),
322 _branchcachedesc(repo),
323 duration,
323 duration,
324 )
324 )
325 return max_rev
325 return max_rev
326
326
327 def _process_new(
327 def _process_new(
328 self,
328 self,
329 repo,
329 repo,
330 newbranches,
330 newbranches,
331 new_closed,
331 new_closed,
332 obs_ignored,
332 obs_ignored,
333 max_rev,
333 max_rev,
334 ):
334 ):
335 """update the branchmap from a set of new information"""
335 """update the branchmap from a set of new information"""
336 # Delay fetching the topological heads until they are needed.
336 # Delay fetching the topological heads until they are needed.
337 # A repository without non-continous branches can skip this part.
337 # A repository without non-continous branches can skip this part.
338 topoheads = None
338 topoheads = None
339
339
340 cl = repo.changelog
340 cl = repo.changelog
341 getbranchinfo = repo.revbranchcache().branchinfo
341 getbranchinfo = repo.revbranchcache().branchinfo
342 # Faster than using ctx.obsolete()
342 # Faster than using ctx.obsolete()
343 obsrevs = obsolete.getrevs(repo, b'obsolete')
343 obsrevs = obsolete.getrevs(repo, b'obsolete')
344
344
345 # If a changeset is visible, its parents must be visible too, so
345 # If a changeset is visible, its parents must be visible too, so
346 # use the faster unfiltered parent accessor.
346 # use the faster unfiltered parent accessor.
347 parentrevs = cl._uncheckedparentrevs
347 parentrevs = cl._uncheckedparentrevs
348
348
349 for branch, newheadrevs in newbranches.items():
349 for branch, newheadrevs in newbranches.items():
350 # For every branch, compute the new branchheads.
350 # For every branch, compute the new branchheads.
351 # A branchhead is a revision such that no descendant is on
351 # A branchhead is a revision such that no descendant is on
352 # the same branch.
352 # the same branch.
353 #
353 #
354 # The branchheads are computed iteratively in revision order.
354 # The branchheads are computed iteratively in revision order.
355 # This ensures topological order, i.e. parents are processed
355 # This ensures topological order, i.e. parents are processed
356 # before their children. Ancestors are inclusive here, i.e.
356 # before their children. Ancestors are inclusive here, i.e.
357 # any revision is an ancestor of itself.
357 # any revision is an ancestor of itself.
358 #
358 #
359 # Core observations:
359 # Core observations:
360 # - The current revision is always a branchhead for the
360 # - The current revision is always a branchhead for the
361 # repository up to that point.
361 # repository up to that point.
362 # - It is the first revision of the branch if and only if
362 # - It is the first revision of the branch if and only if
363 # there was no branchhead before. In that case, it is the
363 # there was no branchhead before. In that case, it is the
364 # only branchhead as there are no possible ancestors on
364 # only branchhead as there are no possible ancestors on
365 # the same branch.
365 # the same branch.
366 # - If a parent is on the same branch, a branchhead can
366 # - If a parent is on the same branch, a branchhead can
367 # only be an ancestor of that parent, if it is parent
367 # only be an ancestor of that parent, if it is parent
368 # itself. Otherwise it would have been removed as ancestor
368 # itself. Otherwise it would have been removed as ancestor
369 # of that parent before.
369 # of that parent before.
370 # - Therefore, if all parents are on the same branch, they
370 # - Therefore, if all parents are on the same branch, they
371 # can just be removed from the branchhead set.
371 # can just be removed from the branchhead set.
372 # - If one parent is on the same branch and the other is not
372 # - If one parent is on the same branch and the other is not
373 # and there was exactly one branchhead known, the existing
373 # and there was exactly one branchhead known, the existing
374 # branchhead can only be an ancestor if it is the parent.
374 # branchhead can only be an ancestor if it is the parent.
375 # Otherwise it would have been removed as ancestor of
375 # Otherwise it would have been removed as ancestor of
376 # the parent before. The other parent therefore can't have
376 # the parent before. The other parent therefore can't have
377 # a branchhead as ancestor.
377 # a branchhead as ancestor.
378 # - In all other cases, the parents on different branches
378 # - In all other cases, the parents on different branches
379 # could have a branchhead as ancestor. Those parents are
379 # could have a branchhead as ancestor. Those parents are
380 # kept in the "uncertain" set. If all branchheads are also
380 # kept in the "uncertain" set. If all branchheads are also
381 # topological heads, they can't have descendants and further
381 # topological heads, they can't have descendants and further
382 # checks can be skipped. Otherwise, the ancestors of the
382 # checks can be skipped. Otherwise, the ancestors of the
383 # "uncertain" set are removed from branchheads.
383 # "uncertain" set are removed from branchheads.
384 # This computation is heavy and avoided if at all possible.
384 # This computation is heavy and avoided if at all possible.
385 bheads = self._entries.get(branch, [])
385 bheads = self._entries.get(branch, [])
386 bheadset = {cl.rev(node) for node in bheads}
386 bheadset = {cl.rev(node) for node in bheads}
387 uncertain = set()
387 uncertain = set()
388 for newrev in sorted(newheadrevs):
388 for newrev in sorted(newheadrevs):
389 if not bheadset:
389 if not bheadset:
390 bheadset.add(newrev)
390 bheadset.add(newrev)
391 continue
391 continue
392
392
393 parents = [p for p in parentrevs(newrev) if p != nullrev]
393 parents = [p for p in parentrevs(newrev) if p != nullrev]
394 samebranch = set()
394 samebranch = set()
395 otherbranch = set()
395 otherbranch = set()
396 obsparents = set()
396 obsparents = set()
397 for p in parents:
397 for p in parents:
398 if p in obsrevs:
398 if p in obsrevs:
399 # We ignored this obsolete changeset earlier, but now
399 # We ignored this obsolete changeset earlier, but now
400 # that it has non-ignored children, we need to make
400 # that it has non-ignored children, we need to make
401 # sure their ancestors are not considered heads. To
401 # sure their ancestors are not considered heads. To
402 # achieve that, we will simply treat this obsolete
402 # achieve that, we will simply treat this obsolete
403 # changeset as a parent from other branch.
403 # changeset as a parent from other branch.
404 obsparents.add(p)
404 obsparents.add(p)
405 elif p in bheadset or getbranchinfo(p)[0] == branch:
405 elif p in bheadset or getbranchinfo(p)[0] == branch:
406 samebranch.add(p)
406 samebranch.add(p)
407 else:
407 else:
408 otherbranch.add(p)
408 otherbranch.add(p)
409 if not (len(bheadset) == len(samebranch) == 1):
409 if not (len(bheadset) == len(samebranch) == 1):
410 uncertain.update(otherbranch)
410 uncertain.update(otherbranch)
411 uncertain.update(obsparents)
411 uncertain.update(obsparents)
412 bheadset.difference_update(samebranch)
412 bheadset.difference_update(samebranch)
413 bheadset.add(newrev)
413 bheadset.add(newrev)
414
414
415 if uncertain:
415 if uncertain:
416 if topoheads is None:
416 if topoheads is None:
417 topoheads = set(cl.headrevs())
417 topoheads = set(cl.headrevs())
418 if bheadset - topoheads:
418 if bheadset - topoheads:
419 floorrev = min(bheadset)
419 floorrev = min(bheadset)
420 if floorrev <= max(uncertain):
420 if floorrev <= max(uncertain):
421 ancestors = set(cl.ancestors(uncertain, floorrev))
421 ancestors = set(cl.ancestors(uncertain, floorrev))
422 bheadset -= ancestors
422 bheadset -= ancestors
423 if bheadset:
423 if bheadset:
424 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
424 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
425
425
426
426
427 STATE_CLEAN = 1
427 STATE_CLEAN = 1
428 STATE_INHERITED = 2
428 STATE_INHERITED = 2
429 STATE_DIRTY = 3
429 STATE_DIRTY = 3
430
430
431
431
432 class _LocalBranchCache(_BaseBranchCache):
432 class _LocalBranchCache(_BaseBranchCache):
433 """base class of branch-map info for a local repo or repoview"""
433 """base class of branch-map info for a local repo or repoview"""
434
434
435 _base_filename = None
435 _base_filename = None
436 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
436 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
437
437
438 def __init__(
438 def __init__(
439 self,
439 self,
440 repo: "localrepo.localrepository",
440 repo: "localrepo.localrepository",
441 entries: Union[
441 entries: Union[
442 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
442 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
443 ] = (),
443 ] = (),
444 tipnode: Optional[bytes] = None,
444 tipnode: Optional[bytes] = None,
445 tiprev: Optional[int] = nullrev,
445 tiprev: Optional[int] = nullrev,
446 key_hashes: Optional[Tuple[bytes]] = None,
446 key_hashes: Optional[Tuple[bytes]] = None,
447 closednodes: Optional[Set[bytes]] = None,
447 closednodes: Optional[Set[bytes]] = None,
448 hasnode: Optional[Callable[[bytes], bool]] = None,
448 hasnode: Optional[Callable[[bytes], bool]] = None,
449 verify_node: bool = False,
449 verify_node: bool = False,
450 inherited: bool = False,
450 inherited: bool = False,
451 ) -> None:
451 ) -> None:
452 """hasnode is a function which can be used to verify whether changelog
452 """hasnode is a function which can be used to verify whether changelog
453 has a given node or not. If it's not provided, we assume that every node
453 has a given node or not. If it's not provided, we assume that every node
454 we have exists in changelog"""
454 we have exists in changelog"""
455 self._filtername = repo.filtername
455 self._filtername = repo.filtername
456 if tipnode is None:
456 if tipnode is None:
457 self.tipnode = repo.nullid
457 self.tipnode = repo.nullid
458 else:
458 else:
459 self.tipnode = tipnode
459 self.tipnode = tipnode
460 self.tiprev = tiprev
460 self.tiprev = tiprev
461 if key_hashes is None:
461 if key_hashes is None:
462 self.key_hashes = self._default_key_hashes
462 self.key_hashes = self._default_key_hashes
463 else:
463 else:
464 self.key_hashes = key_hashes
464 self.key_hashes = key_hashes
465 self._state = STATE_CLEAN
465 self._state = STATE_CLEAN
466 if inherited:
466 if inherited:
467 self._state = STATE_INHERITED
467 self._state = STATE_INHERITED
468
468
469 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
469 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
470 # closednodes is a set of nodes that close their branch. If the branch
470 # closednodes is a set of nodes that close their branch. If the branch
471 # cache has been updated, it may contain nodes that are no longer
471 # cache has been updated, it may contain nodes that are no longer
472 # heads.
472 # heads.
473
473
474 # Do we need to verify branch at all ?
474 # Do we need to verify branch at all ?
475 self._verify_node = verify_node
475 self._verify_node = verify_node
476 # branches for which nodes are verified
476 # branches for which nodes are verified
477 self._verifiedbranches = set()
477 self._verifiedbranches = set()
478 self._hasnode = None
478 self._hasnode = None
479 if self._verify_node:
479 if self._verify_node:
480 self._hasnode = repo.changelog.hasnode
480 self._hasnode = repo.changelog.hasnode
481
481
482 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
482 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
483 raise NotImplementedError
483 raise NotImplementedError
484
484
485 def _ensure_populated(self, repo):
485 def _ensure_populated(self, repo):
486 """make sure any lazily loaded values are fully populated"""
486 """make sure any lazily loaded values are fully populated"""
487
487
488 def _detect_pure_topo(self, repo) -> None:
488 def _detect_pure_topo(self, repo) -> None:
489 pass
489 pass
490
490
491 def validfor(self, repo):
491 def validfor(self, repo):
492 """check that cache contents are valid for (a subset of) this repo
492 """check that cache contents are valid for (a subset of) this repo
493
493
494 - False when the order of changesets changed or if we detect a strip.
494 - False when the order of changesets changed or if we detect a strip.
495 - True when cache is up-to-date for the current repo or its subset."""
495 - True when cache is up-to-date for the current repo or its subset."""
496 try:
496 try:
497 node = repo.changelog.node(self.tiprev)
497 node = repo.changelog.node(self.tiprev)
498 except IndexError:
498 except IndexError:
499 # changesets were stripped and now we don't even have enough to
499 # changesets were stripped and now we don't even have enough to
500 # find tiprev
500 # find tiprev
501 return False
501 return False
502 if self.tipnode != node:
502 if self.tipnode != node:
503 # tiprev doesn't correspond to tipnode: repo was stripped, or this
503 # tiprev doesn't correspond to tipnode: repo was stripped, or this
504 # repo has a different order of changesets
504 # repo has a different order of changesets
505 return False
505 return False
506 repo_key_hashes = self._compute_key_hashes(repo)
506 repo_key_hashes = self._compute_key_hashes(repo)
507 # hashes don't match if this repo view has a different set of filtered
507 # hashes don't match if this repo view has a different set of filtered
508 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
508 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
509 # history was rewritten)
509 # history was rewritten)
510 return self.key_hashes == repo_key_hashes
510 return self.key_hashes == repo_key_hashes
511
511
512 @classmethod
512 @classmethod
513 def fromfile(cls, repo):
513 def fromfile(cls, repo):
514 f = None
514 f = None
515 try:
515 try:
516 f = repo.cachevfs(cls._filename(repo))
516 f = repo.cachevfs(cls._filename(repo))
517 lineiter = iter(f)
517 lineiter = iter(f)
518 init_kwargs = cls._load_header(repo, lineiter)
518 init_kwargs = cls._load_header(repo, lineiter)
519 bcache = cls(
519 bcache = cls(
520 repo,
520 repo,
521 verify_node=True,
521 verify_node=True,
522 **init_kwargs,
522 **init_kwargs,
523 )
523 )
524 if not bcache.validfor(repo):
524 if not bcache.validfor(repo):
525 # invalidate the cache
525 # invalidate the cache
526 raise ValueError('tip differs')
526 raise ValueError('tip differs')
527 bcache._load_heads(repo, lineiter)
527 bcache._load_heads(repo, lineiter)
528 except (IOError, OSError):
528 except (IOError, OSError):
529 return None
529 return None
530
530
531 except Exception as inst:
531 except Exception as inst:
532 if repo.ui.debugflag:
532 if repo.ui.debugflag:
533 msg = b'invalid %s: %s\n'
533 msg = b'invalid %s: %s\n'
534 msg %= (
534 msg %= (
535 _branchcachedesc(repo),
535 _branchcachedesc(repo),
536 stringutil.forcebytestr(inst),
536 stringutil.forcebytestr(inst),
537 )
537 )
538 repo.ui.debug(msg)
538 repo.ui.debug(msg)
539 bcache = None
539 bcache = None
540
540
541 finally:
541 finally:
542 if f:
542 if f:
543 f.close()
543 f.close()
544
544
545 return bcache
545 return bcache
546
546
547 @classmethod
547 @classmethod
548 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
548 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
549 raise NotImplementedError
549 raise NotImplementedError
550
550
551 def _load_heads(self, repo, lineiter):
551 def _load_heads(self, repo, lineiter):
552 """fully loads the branchcache by reading from the file using the line
552 """fully loads the branchcache by reading from the file using the line
553 iterator passed"""
553 iterator passed"""
554 for line in lineiter:
554 for line in lineiter:
555 line = line.rstrip(b'\n')
555 line = line.rstrip(b'\n')
556 if not line:
556 if not line:
557 continue
557 continue
558 node, state, label = line.split(b" ", 2)
558 node, state, label = line.split(b" ", 2)
559 if state not in b'oc':
559 if state not in b'oc':
560 raise ValueError('invalid branch state')
560 raise ValueError('invalid branch state')
561 label = encoding.tolocal(label.strip())
561 label = encoding.tolocal(label.strip())
562 node = bin(node)
562 node = bin(node)
563 self._entries.setdefault(label, []).append(node)
563 self._entries.setdefault(label, []).append(node)
564 if state == b'c':
564 if state == b'c':
565 self._closednodes.add(node)
565 self._closednodes.add(node)
566
566
567 @classmethod
567 @classmethod
568 def _filename(cls, repo):
568 def _filename(cls, repo):
569 """name of a branchcache file for a given repo or repoview"""
569 """name of a branchcache file for a given repo or repoview"""
570 filename = cls._base_filename
570 filename = cls._base_filename
571 assert filename is not None
571 assert filename is not None
572 if repo.filtername:
572 if repo.filtername:
573 filename = b'%s-%s' % (filename, repo.filtername)
573 filename = b'%s-%s' % (filename, repo.filtername)
574 return filename
574 return filename
575
575
576 def inherit_for(self, repo):
576 def inherit_for(self, repo):
577 """return a deep copy of the branchcache object"""
577 """return a deep copy of the branchcache object"""
578 assert repo.filtername != self._filtername
578 assert repo.filtername != self._filtername
579 other = type(self)(
579 other = type(self)(
580 repo=repo,
580 repo=repo,
581 # we always do a shally copy of self._entries, and the values is
581 # we always do a shally copy of self._entries, and the values is
582 # always replaced, so no need to deepcopy until the above remains
582 # always replaced, so no need to deepcopy until the above remains
583 # true.
583 # true.
584 entries=self._entries,
584 entries=self._entries,
585 tipnode=self.tipnode,
585 tipnode=self.tipnode,
586 tiprev=self.tiprev,
586 tiprev=self.tiprev,
587 key_hashes=self.key_hashes,
587 key_hashes=self.key_hashes,
588 closednodes=set(self._closednodes),
588 closednodes=set(self._closednodes),
589 verify_node=self._verify_node,
589 verify_node=self._verify_node,
590 inherited=True,
590 inherited=True,
591 )
591 )
592 # also copy information about the current verification state
592 # also copy information about the current verification state
593 other._verifiedbranches = set(self._verifiedbranches)
593 other._verifiedbranches = set(self._verifiedbranches)
594 return other
594 return other
595
595
596 def sync_disk(self, repo):
596 def sync_disk(self, repo):
597 """synchronise the on disk file with the cache state
597 """synchronise the on disk file with the cache state
598
598
599 If new value specific to this filter level need to be written, the file
599 If new value specific to this filter level need to be written, the file
600 will be updated, if the state of the branchcache is inherited from a
600 will be updated, if the state of the branchcache is inherited from a
601 subset, any stalled on disk file will be deleted.
601 subset, any stalled on disk file will be deleted.
602
602
603 That method does nothing if there is nothing to do.
603 That method does nothing if there is nothing to do.
604 """
604 """
605 if self._state == STATE_DIRTY:
605 if self._state == STATE_DIRTY:
606 self.write(repo)
606 self.write(repo)
607 elif self._state == STATE_INHERITED:
607 elif self._state == STATE_INHERITED:
608 filename = self._filename(repo)
608 filename = self._filename(repo)
609 repo.cachevfs.tryunlink(filename)
609 repo.cachevfs.tryunlink(filename)
610
610
611 def write(self, repo):
611 def write(self, repo):
612 assert self._filtername == repo.filtername, (
612 assert self._filtername == repo.filtername, (
613 self._filtername,
613 self._filtername,
614 repo.filtername,
614 repo.filtername,
615 )
615 )
616 assert self._state == STATE_DIRTY, self._state
616 assert self._state == STATE_DIRTY, self._state
617 # This method should not be called during an open transaction
617 # This method should not be called during an open transaction
618 tr = repo.currenttransaction()
618 tr = repo.currenttransaction()
619 if not getattr(tr, 'finalized', True):
619 if not getattr(tr, 'finalized', True):
620 msg = "writing branchcache in the middle of a transaction"
620 msg = "writing branchcache in the middle of a transaction"
621 raise error.ProgrammingError(msg)
621 raise error.ProgrammingError(msg)
622 try:
622 try:
623 filename = self._filename(repo)
623 filename = self._filename(repo)
624 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
624 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
625 self._write_header(f)
625 self._write_header(f)
626 nodecount = self._write_heads(repo, f)
626 nodecount = self._write_heads(repo, f)
627 repo.ui.log(
627 repo.ui.log(
628 b'branchcache',
628 b'branchcache',
629 b'wrote %s with %d labels and %d nodes\n',
629 b'wrote %s with %d labels and %d nodes\n',
630 _branchcachedesc(repo),
630 _branchcachedesc(repo),
631 len(self._entries),
631 len(self._entries),
632 nodecount,
632 nodecount,
633 )
633 )
634 self._state = STATE_CLEAN
634 self._state = STATE_CLEAN
635 except (IOError, OSError, error.Abort) as inst:
635 except (IOError, OSError, error.Abort) as inst:
636 # Abort may be raised by read only opener, so log and continue
636 # Abort may be raised by read only opener, so log and continue
637 repo.ui.debug(
637 repo.ui.debug(
638 b"couldn't write branch cache: %s\n"
638 b"couldn't write branch cache: %s\n"
639 % stringutil.forcebytestr(inst)
639 % stringutil.forcebytestr(inst)
640 )
640 )
641
641
642 def _write_header(self, fp) -> None:
642 def _write_header(self, fp) -> None:
643 raise NotImplementedError
643 raise NotImplementedError
644
644
645 def _write_heads(self, repo, fp) -> int:
645 def _write_heads(self, repo, fp) -> int:
646 """write list of heads to a file
646 """write list of heads to a file
647
647
648 Return the number of heads written."""
648 Return the number of heads written."""
649 nodecount = 0
649 nodecount = 0
650 for label, nodes in sorted(self._entries.items()):
650 for label, nodes in sorted(self._entries.items()):
651 label = encoding.fromlocal(label)
651 label = encoding.fromlocal(label)
652 for node in nodes:
652 for node in nodes:
653 nodecount += 1
653 nodecount += 1
654 if node in self._closednodes:
654 if node in self._closednodes:
655 state = b'c'
655 state = b'c'
656 else:
656 else:
657 state = b'o'
657 state = b'o'
658 fp.write(b"%s %s %s\n" % (hex(node), state, label))
658 fp.write(b"%s %s %s\n" % (hex(node), state, label))
659 return nodecount
659 return nodecount
660
660
661 def _verifybranch(self, branch):
661 def _verifybranch(self, branch):
662 """verify head nodes for the given branch."""
662 """verify head nodes for the given branch."""
663 if not self._verify_node:
663 if not self._verify_node:
664 return
664 return
665 if branch not in self._entries or branch in self._verifiedbranches:
665 if branch not in self._entries or branch in self._verifiedbranches:
666 return
666 return
667 assert self._hasnode is not None
667 assert self._hasnode is not None
668 for n in self._entries[branch]:
668 for n in self._entries[branch]:
669 if not self._hasnode(n):
669 if not self._hasnode(n):
670 _unknownnode(n)
670 _unknownnode(n)
671
671
672 self._verifiedbranches.add(branch)
672 self._verifiedbranches.add(branch)
673
673
674 def _verifyall(self):
674 def _verifyall(self):
675 """verifies nodes of all the branches"""
675 """verifies nodes of all the branches"""
676 for b in self._entries.keys():
676 for b in self._entries.keys():
677 if b not in self._verifiedbranches:
677 if b not in self._verifiedbranches:
678 self._verifybranch(b)
678 self._verifybranch(b)
679
679
680 def __getitem__(self, key):
680 def __getitem__(self, key):
681 self._verifybranch(key)
681 self._verifybranch(key)
682 return super().__getitem__(key)
682 return super().__getitem__(key)
683
683
684 def __contains__(self, key):
684 def __contains__(self, key):
685 self._verifybranch(key)
685 self._verifybranch(key)
686 return super().__contains__(key)
686 return super().__contains__(key)
687
687
688 def iteritems(self):
688 def iteritems(self):
689 self._verifyall()
689 self._verifyall()
690 return super().iteritems()
690 return super().iteritems()
691
691
692 items = iteritems
692 items = iteritems
693
693
694 def iterheads(self):
694 def iterheads(self):
695 """returns all the heads"""
695 """returns all the heads"""
696 self._verifyall()
696 self._verifyall()
697 return super().iterheads()
697 return super().iterheads()
698
698
699 def hasbranch(self, label):
699 def hasbranch(self, label):
700 """checks whether a branch of this name exists or not"""
700 """checks whether a branch of this name exists or not"""
701 self._verifybranch(label)
701 self._verifybranch(label)
702 return super().hasbranch(label)
702 return super().hasbranch(label)
703
703
704 def branchheads(self, branch, closed=False):
704 def branchheads(self, branch, closed=False):
705 self._verifybranch(branch)
705 self._verifybranch(branch)
706 return super().branchheads(branch, closed=closed)
706 return super().branchheads(branch, closed=closed)
707
707
708 def update(self, repo, revgen):
708 def update(self, repo, revgen):
709 assert self._filtername == repo.filtername, (
709 assert self._filtername == repo.filtername, (
710 self._filtername,
710 self._filtername,
711 repo.filtername,
711 repo.filtername,
712 )
712 )
713 cl = repo.changelog
713 cl = repo.changelog
714 max_rev = super().update(repo, revgen)
714 max_rev = super().update(repo, revgen)
715 # new tip revision which we found after iterating items from new
715 # new tip revision which we found after iterating items from new
716 # branches
716 # branches
717 if max_rev is not None and max_rev > self.tiprev:
717 if max_rev is not None and max_rev > self.tiprev:
718 self.tiprev = max_rev
718 self.tiprev = max_rev
719 self.tipnode = cl.node(max_rev)
719 self.tipnode = cl.node(max_rev)
720 else:
720 else:
721 # We should not be here is if this is false
721 # We should not be here is if this is false
722 assert cl.node(self.tiprev) == self.tipnode
722 assert cl.node(self.tiprev) == self.tipnode
723
723
724 if not self.validfor(repo):
724 if not self.validfor(repo):
725 # the tiprev and tipnode should be aligned, so if the current repo
725 # the tiprev and tipnode should be aligned, so if the current repo
726 # is not seens as valid this is because old cache key is now
726 # is not seens as valid this is because old cache key is now
727 # invalid for the repo.
727 # invalid for the repo.
728 #
728 #
729 # However. we've just updated the cache and we assume it's valid,
729 # However. we've just updated the cache and we assume it's valid,
730 # so let's make the cache key valid as well by recomputing it from
730 # so let's make the cache key valid as well by recomputing it from
731 # the cached data
731 # the cached data
732 self.key_hashes = self._compute_key_hashes(repo)
732 self.key_hashes = self._compute_key_hashes(repo)
733 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
733 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
734 repo,
734 repo,
735 self.tiprev,
735 self.tiprev,
736 )
736 )
737
737
738 self._state = STATE_DIRTY
738 self._state = STATE_DIRTY
739 tr = repo.currenttransaction()
739 tr = repo.currenttransaction()
740 if getattr(tr, 'finalized', True):
740 if getattr(tr, 'finalized', True):
741 # Avoid premature writing.
741 # Avoid premature writing.
742 #
742 #
743 # (The cache warming setup by localrepo will update the file later.)
743 # (The cache warming setup by localrepo will update the file later.)
744 self.write(repo)
744 self.write(repo)
745
745
746
746
747 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
747 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
748 """Build a branch cache from on-disk data if possible
748 """Build a branch cache from on-disk data if possible
749
749
750 Return a branch cache of the right format depending of the repository.
750 Return a branch cache of the right format depending of the repository.
751 """
751 """
752 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
752 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
753 return BranchCacheV3.fromfile(repo)
753 return BranchCacheV3.fromfile(repo)
754 else:
754 else:
755 return BranchCacheV2.fromfile(repo)
755 return BranchCacheV2.fromfile(repo)
756
756
757
757
758 def new_branch_cache(repo, *args, **kwargs):
758 def new_branch_cache(repo, *args, **kwargs):
759 """Build a new branch cache from argument
759 """Build a new branch cache from argument
760
760
761 Return a branch cache of the right format depending of the repository.
761 Return a branch cache of the right format depending of the repository.
762 """
762 """
763 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
763 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
764 return BranchCacheV3(repo, *args, **kwargs)
764 return BranchCacheV3(repo, *args, **kwargs)
765 else:
765 else:
766 return BranchCacheV2(repo, *args, **kwargs)
766 return BranchCacheV2(repo, *args, **kwargs)
767
767
768
768
769 class BranchCacheV2(_LocalBranchCache):
769 class BranchCacheV2(_LocalBranchCache):
770 """a branch cache using version 2 of the format on disk
770 """a branch cache using version 2 of the format on disk
771
771
772 The cache is serialized on disk in the following format:
772 The cache is serialized on disk in the following format:
773
773
774 <tip hex node> <tip rev number> [optional filtered repo hex hash]
774 <tip hex node> <tip rev number> [optional filtered repo hex hash]
775 <branch head hex node> <open/closed state> <branch name>
775 <branch head hex node> <open/closed state> <branch name>
776 <branch head hex node> <open/closed state> <branch name>
776 <branch head hex node> <open/closed state> <branch name>
777 ...
777 ...
778
778
779 The first line is used to check if the cache is still valid. If the
779 The first line is used to check if the cache is still valid. If the
780 branch cache is for a filtered repo view, an optional third hash is
780 branch cache is for a filtered repo view, an optional third hash is
781 included that hashes the hashes of all filtered and obsolete revisions.
781 included that hashes the hashes of all filtered and obsolete revisions.
782
782
783 The open/closed state is represented by a single letter 'o' or 'c'.
783 The open/closed state is represented by a single letter 'o' or 'c'.
784 This field can be used to avoid changelog reads when determining if a
784 This field can be used to avoid changelog reads when determining if a
785 branch head closes a branch or not.
785 branch head closes a branch or not.
786 """
786 """
787
787
788 _base_filename = b"branch2"
788 _base_filename = b"branch2"
789
789
790 @classmethod
790 @classmethod
791 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
791 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
792 """parse the head of a branchmap file
792 """parse the head of a branchmap file
793
793
794 return parameters to pass to a newly created class instance.
794 return parameters to pass to a newly created class instance.
795 """
795 """
796 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
796 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
797 last, lrev = cachekey[:2]
797 last, lrev = cachekey[:2]
798 last, lrev = bin(last), int(lrev)
798 last, lrev = bin(last), int(lrev)
799 filteredhash = ()
799 filteredhash = ()
800 if len(cachekey) > 2:
800 if len(cachekey) > 2:
801 filteredhash = (bin(cachekey[2]),)
801 filteredhash = (bin(cachekey[2]),)
802 return {
802 return {
803 "tipnode": last,
803 "tipnode": last,
804 "tiprev": lrev,
804 "tiprev": lrev,
805 "key_hashes": filteredhash,
805 "key_hashes": filteredhash,
806 }
806 }
807
807
808 def _write_header(self, fp) -> None:
808 def _write_header(self, fp) -> None:
809 """write the branch cache header to a file"""
809 """write the branch cache header to a file"""
810 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
810 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
811 if self.key_hashes:
811 if self.key_hashes:
812 cachekey.append(hex(self.key_hashes[0]))
812 cachekey.append(hex(self.key_hashes[0]))
813 fp.write(b" ".join(cachekey) + b'\n')
813 fp.write(b" ".join(cachekey) + b'\n')
814
814
815 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
815 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
816 """return the cache key hashes that match this repoview state"""
816 """return the cache key hashes that match this repoview state"""
817 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
817 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
818 repo,
818 repo,
819 self.tiprev,
819 self.tiprev,
820 needobsolete=True,
820 needobsolete=True,
821 )
821 )
822 keys: Tuple[bytes] = cast(Tuple[bytes], ())
822 keys: Tuple[bytes] = cast(Tuple[bytes], ())
823 if filtered_hash is not None:
823 if filtered_hash is not None:
824 keys: Tuple[bytes] = (filtered_hash,)
824 keys: Tuple[bytes] = (filtered_hash,)
825 return keys
825 return keys
826
826
827
827
828 class BranchCacheV3(_LocalBranchCache):
828 class BranchCacheV3(_LocalBranchCache):
829 """a branch cache using version 3 of the format on disk
829 """a branch cache using version 3 of the format on disk
830
830
831 This version is still EXPERIMENTAL and the format is subject to changes.
831 This version is still EXPERIMENTAL and the format is subject to changes.
832
832
833 The cache is serialized on disk in the following format:
833 The cache is serialized on disk in the following format:
834
834
835 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
835 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
836 <branch head hex node> <open/closed state> <branch name>
836 <branch head hex node> <open/closed state> <branch name>
837 <branch head hex node> <open/closed state> <branch name>
837 <branch head hex node> <open/closed state> <branch name>
838 ...
838 ...
839
839
840 The first line is used to check if the cache is still valid. It is a series
840 The first line is used to check if the cache is still valid. It is a series
841 of key value pair. The following key are recognized:
841 of key value pair. The following key are recognized:
842
842
843 - tip-rev: the rev-num of the tip-most revision seen by this cache
843 - tip-rev: the rev-num of the tip-most revision seen by this cache
844 - tip-node: the node-id of the tip-most revision sen by this cache
844 - tip-node: the node-id of the tip-most revision sen by this cache
845 - filtered-hash: the hash of all filtered revisions (before tip-rev)
845 - filtered-hash: the hash of all filtered revisions (before tip-rev)
846 ignored by this cache.
846 ignored by this cache.
847 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
847 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
848 tip-rev) ignored by this cache.
848 tip-rev) ignored by this cache.
849
849
850 The tip-rev is used to know how far behind the value in the file are
850 The tip-rev is used to know how far behind the value in the file are
851 compared to the current repository state.
851 compared to the current repository state.
852
852
853 The tip-node, filtered-hash and obsolete-hash are used to detect if this
853 The tip-node, filtered-hash and obsolete-hash are used to detect if this
854 cache can be used for this repository state at all.
854 cache can be used for this repository state at all.
855
855
856 The open/closed state is represented by a single letter 'o' or 'c'.
856 The open/closed state is represented by a single letter 'o' or 'c'.
857 This field can be used to avoid changelog reads when determining if a
857 This field can be used to avoid changelog reads when determining if a
858 branch head closes a branch or not.
858 branch head closes a branch or not.
859
859
860 Topological heads are not included in the listing and should be dispatched
860 Topological heads are not included in the listing and should be dispatched
861 on the right branch at read time. Obsolete topological heads should be
861 on the right branch at read time. Obsolete topological heads should be
862 ignored.
862 ignored.
863 """
863 """
864
864
865 _base_filename = b"branch3-exp"
865 _base_filename = b"branch3-exp"
866 _default_key_hashes = (None, None)
866 _default_key_hashes = (None, None)
867
867
868 def __init__(self, *args, pure_topo_branch=None, **kwargs):
868 def __init__(self, *args, pure_topo_branch=None, **kwargs):
869 super().__init__(*args, **kwargs)
869 super().__init__(*args, **kwargs)
870 self._pure_topo_branch = pure_topo_branch
870 self._pure_topo_branch = pure_topo_branch
871 self._needs_populate = self._pure_topo_branch is not None
871 self._needs_populate = self._pure_topo_branch is not None
872
872
873 def inherit_for(self, repo):
873 def inherit_for(self, repo):
874 new = super().inherit_for(repo)
874 new = super().inherit_for(repo)
875 new._pure_topo_branch = self._pure_topo_branch
875 new._pure_topo_branch = self._pure_topo_branch
876 new._needs_populate = self._needs_populate
876 new._needs_populate = self._needs_populate
877 return new
877 return new
878
878
879 def _get_topo_heads(self, repo):
879 def _get_topo_heads(self, repo):
880 """returns the topological head of a repoview content up to self.tiprev"""
880 """returns the topological head of a repoview content up to self.tiprev"""
881 cl = repo.changelog
881 cl = repo.changelog
882 if self.tiprev == nullrev:
882 if self.tiprev == nullrev:
883 return []
883 return []
884 elif self.tiprev == cl.tiprev():
884 elif self.tiprev == cl.tiprev():
885 return cl.headrevs()
885 return cl.headrevs()
886 else:
886 else:
887 # XXX passing tiprev as ceiling of cl.headrevs could be faster
887 heads = cl.headrevs(stop_rev=self.tiprev + 1)
888 heads = cl.headrevs(cl.revs(stop=self.tiprev))
889 return heads
888 return heads
890
889
891 def _write_header(self, fp) -> None:
890 def _write_header(self, fp) -> None:
892 cache_keys = {
891 cache_keys = {
893 b"tip-node": hex(self.tipnode),
892 b"tip-node": hex(self.tipnode),
894 b"tip-rev": b'%d' % self.tiprev,
893 b"tip-rev": b'%d' % self.tiprev,
895 }
894 }
896 if self.key_hashes:
895 if self.key_hashes:
897 if self.key_hashes[0] is not None:
896 if self.key_hashes[0] is not None:
898 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
897 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
899 if self.key_hashes[1] is not None:
898 if self.key_hashes[1] is not None:
900 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
899 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
901 if self._pure_topo_branch is not None:
900 if self._pure_topo_branch is not None:
902 cache_keys[b"topo-mode"] = b"pure"
901 cache_keys[b"topo-mode"] = b"pure"
903 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
902 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
904 fp.write(b" ".join(pieces) + b'\n')
903 fp.write(b" ".join(pieces) + b'\n')
905 if self._pure_topo_branch is not None:
904 if self._pure_topo_branch is not None:
906 label = encoding.fromlocal(self._pure_topo_branch)
905 label = encoding.fromlocal(self._pure_topo_branch)
907 fp.write(label + b'\n')
906 fp.write(label + b'\n')
908
907
909 def _write_heads(self, repo, fp) -> int:
908 def _write_heads(self, repo, fp) -> int:
910 """write list of heads to a file
909 """write list of heads to a file
911
910
912 Return the number of heads written."""
911 Return the number of heads written."""
913 to_node = repo.changelog.node
912 to_node = repo.changelog.node
914 nodecount = 0
913 nodecount = 0
915 topo_heads = None
914 topo_heads = None
916 if self._pure_topo_branch is None:
915 if self._pure_topo_branch is None:
917 # we match using node because it is faster to built the set of node
916 # we match using node because it is faster to built the set of node
918 # than to resolve node β†’ rev later.
917 # than to resolve node β†’ rev later.
919 topo_heads = set(to_node(r) for r in self._get_topo_heads(repo))
918 topo_heads = set(to_node(r) for r in self._get_topo_heads(repo))
920 for label, nodes in sorted(self._entries.items()):
919 for label, nodes in sorted(self._entries.items()):
921 if label == self._pure_topo_branch:
920 if label == self._pure_topo_branch:
922 # not need to write anything the header took care of that
921 # not need to write anything the header took care of that
923 continue
922 continue
924 label = encoding.fromlocal(label)
923 label = encoding.fromlocal(label)
925 for node in nodes:
924 for node in nodes:
926 if topo_heads is not None:
925 if topo_heads is not None:
927 if node in topo_heads:
926 if node in topo_heads:
928 continue
927 continue
929 if node in self._closednodes:
928 if node in self._closednodes:
930 state = b'c'
929 state = b'c'
931 else:
930 else:
932 state = b'o'
931 state = b'o'
933 nodecount += 1
932 nodecount += 1
934 fp.write(b"%s %s %s\n" % (hex(node), state, label))
933 fp.write(b"%s %s %s\n" % (hex(node), state, label))
935 return nodecount
934 return nodecount
936
935
937 @classmethod
936 @classmethod
938 def _load_header(cls, repo, lineiter):
937 def _load_header(cls, repo, lineiter):
939 header_line = next(lineiter)
938 header_line = next(lineiter)
940 pieces = header_line.rstrip(b'\n').split(b" ")
939 pieces = header_line.rstrip(b'\n').split(b" ")
941 for p in pieces:
940 for p in pieces:
942 if b'=' not in p:
941 if b'=' not in p:
943 msg = b"invalid header_line: %r" % header_line
942 msg = b"invalid header_line: %r" % header_line
944 raise ValueError(msg)
943 raise ValueError(msg)
945 cache_keys = dict(p.split(b'=', 1) for p in pieces)
944 cache_keys = dict(p.split(b'=', 1) for p in pieces)
946
945
947 args = {}
946 args = {}
948 filtered_hash = None
947 filtered_hash = None
949 obsolete_hash = None
948 obsolete_hash = None
950 has_pure_topo_heads = False
949 has_pure_topo_heads = False
951 for k, v in cache_keys.items():
950 for k, v in cache_keys.items():
952 if k == b"tip-rev":
951 if k == b"tip-rev":
953 args["tiprev"] = int(v)
952 args["tiprev"] = int(v)
954 elif k == b"tip-node":
953 elif k == b"tip-node":
955 args["tipnode"] = bin(v)
954 args["tipnode"] = bin(v)
956 elif k == b"filtered-hash":
955 elif k == b"filtered-hash":
957 filtered_hash = bin(v)
956 filtered_hash = bin(v)
958 elif k == b"obsolete-hash":
957 elif k == b"obsolete-hash":
959 obsolete_hash = bin(v)
958 obsolete_hash = bin(v)
960 elif k == b"topo-mode":
959 elif k == b"topo-mode":
961 if v == b"pure":
960 if v == b"pure":
962 has_pure_topo_heads = True
961 has_pure_topo_heads = True
963 else:
962 else:
964 msg = b"unknown topo-mode: %r" % v
963 msg = b"unknown topo-mode: %r" % v
965 raise ValueError(msg)
964 raise ValueError(msg)
966 else:
965 else:
967 msg = b"unknown cache key: %r" % k
966 msg = b"unknown cache key: %r" % k
968 raise ValueError(msg)
967 raise ValueError(msg)
969 args["key_hashes"] = (filtered_hash, obsolete_hash)
968 args["key_hashes"] = (filtered_hash, obsolete_hash)
970 if has_pure_topo_heads:
969 if has_pure_topo_heads:
971 pure_line = next(lineiter).rstrip(b'\n')
970 pure_line = next(lineiter).rstrip(b'\n')
972 args["pure_topo_branch"] = encoding.tolocal(pure_line)
971 args["pure_topo_branch"] = encoding.tolocal(pure_line)
973 return args
972 return args
974
973
975 def _load_heads(self, repo, lineiter):
974 def _load_heads(self, repo, lineiter):
976 """fully loads the branchcache by reading from the file using the line
975 """fully loads the branchcache by reading from the file using the line
977 iterator passed"""
976 iterator passed"""
978 super()._load_heads(repo, lineiter)
977 super()._load_heads(repo, lineiter)
979 if self._pure_topo_branch is not None:
978 if self._pure_topo_branch is not None:
980 # no need to read the repository heads, we know their value already.
979 # no need to read the repository heads, we know their value already.
981 return
980 return
982 cl = repo.changelog
981 cl = repo.changelog
983 getbranchinfo = repo.revbranchcache().branchinfo
982 getbranchinfo = repo.revbranchcache().branchinfo
984 obsrevs = obsolete.getrevs(repo, b'obsolete')
983 obsrevs = obsolete.getrevs(repo, b'obsolete')
985 to_node = cl.node
984 to_node = cl.node
986 touched_branch = set()
985 touched_branch = set()
987 for head in self._get_topo_heads(repo):
986 for head in self._get_topo_heads(repo):
988 if head in obsrevs:
987 if head in obsrevs:
989 continue
988 continue
990 node = to_node(head)
989 node = to_node(head)
991 branch, closed = getbranchinfo(head)
990 branch, closed = getbranchinfo(head)
992 self._entries.setdefault(branch, []).append(node)
991 self._entries.setdefault(branch, []).append(node)
993 if closed:
992 if closed:
994 self._closednodes.add(node)
993 self._closednodes.add(node)
995 touched_branch.add(branch)
994 touched_branch.add(branch)
996 to_rev = cl.index.rev
995 to_rev = cl.index.rev
997 for branch in touched_branch:
996 for branch in touched_branch:
998 self._entries[branch].sort(key=to_rev)
997 self._entries[branch].sort(key=to_rev)
999
998
1000 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
999 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
1001 """return the cache key hashes that match this repoview state"""
1000 """return the cache key hashes that match this repoview state"""
1002 return scmutil.filtered_and_obsolete_hash(
1001 return scmutil.filtered_and_obsolete_hash(
1003 repo,
1002 repo,
1004 self.tiprev,
1003 self.tiprev,
1005 )
1004 )
1006
1005
1007 def _process_new(
1006 def _process_new(
1008 self,
1007 self,
1009 repo,
1008 repo,
1010 newbranches,
1009 newbranches,
1011 new_closed,
1010 new_closed,
1012 obs_ignored,
1011 obs_ignored,
1013 max_rev,
1012 max_rev,
1014 ) -> None:
1013 ) -> None:
1015 if (
1014 if (
1016 # note: the check about `obs_ignored` is too strict as the
1015 # note: the check about `obs_ignored` is too strict as the
1017 # obsolete revision could be non-topological, but lets keep
1016 # obsolete revision could be non-topological, but lets keep
1018 # things simple for now
1017 # things simple for now
1019 #
1018 #
1020 # The same apply to `new_closed` if the closed changeset are
1019 # The same apply to `new_closed` if the closed changeset are
1021 # not a head, we don't care that it is closed, but lets keep
1020 # not a head, we don't care that it is closed, but lets keep
1022 # things simple here too.
1021 # things simple here too.
1023 not (obs_ignored or new_closed)
1022 not (obs_ignored or new_closed)
1024 and (
1023 and (
1025 not newbranches
1024 not newbranches
1026 or (
1025 or (
1027 len(newbranches) == 1
1026 len(newbranches) == 1
1028 and (
1027 and (
1029 self.tiprev == nullrev
1028 self.tiprev == nullrev
1030 or self._pure_topo_branch in newbranches
1029 or self._pure_topo_branch in newbranches
1031 )
1030 )
1032 )
1031 )
1033 )
1032 )
1034 ):
1033 ):
1035 if newbranches:
1034 if newbranches:
1036 assert len(newbranches) == 1
1035 assert len(newbranches) == 1
1037 self._pure_topo_branch = list(newbranches.keys())[0]
1036 self._pure_topo_branch = list(newbranches.keys())[0]
1038 self._needs_populate = True
1037 self._needs_populate = True
1039 self._entries.pop(self._pure_topo_branch, None)
1038 self._entries.pop(self._pure_topo_branch, None)
1040 return
1039 return
1041
1040
1042 self._ensure_populated(repo)
1041 self._ensure_populated(repo)
1043 self._pure_topo_branch = None
1042 self._pure_topo_branch = None
1044 super()._process_new(
1043 super()._process_new(
1045 repo,
1044 repo,
1046 newbranches,
1045 newbranches,
1047 new_closed,
1046 new_closed,
1048 obs_ignored,
1047 obs_ignored,
1049 max_rev,
1048 max_rev,
1050 )
1049 )
1051
1050
1052 def _ensure_populated(self, repo):
1051 def _ensure_populated(self, repo):
1053 """make sure any lazily loaded values are fully populated"""
1052 """make sure any lazily loaded values are fully populated"""
1054 if self._needs_populate:
1053 if self._needs_populate:
1055 assert self._pure_topo_branch is not None
1054 assert self._pure_topo_branch is not None
1056 cl = repo.changelog
1055 cl = repo.changelog
1057 to_node = cl.node
1056 to_node = cl.node
1058 topo_heads = self._get_topo_heads(repo)
1057 topo_heads = self._get_topo_heads(repo)
1059 heads = [to_node(r) for r in topo_heads]
1058 heads = [to_node(r) for r in topo_heads]
1060 self._entries[self._pure_topo_branch] = heads
1059 self._entries[self._pure_topo_branch] = heads
1061 self._needs_populate = False
1060 self._needs_populate = False
1062
1061
1063 def _detect_pure_topo(self, repo) -> None:
1062 def _detect_pure_topo(self, repo) -> None:
1064 if self._pure_topo_branch is not None:
1063 if self._pure_topo_branch is not None:
1065 # we are pure topological already
1064 # we are pure topological already
1066 return
1065 return
1067 to_node = repo.changelog.node
1066 to_node = repo.changelog.node
1068 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1067 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1069 if any(n in self._closednodes for n in topo_heads):
1068 if any(n in self._closednodes for n in topo_heads):
1070 return
1069 return
1071 for branch, heads in self._entries.items():
1070 for branch, heads in self._entries.items():
1072 if heads == topo_heads:
1071 if heads == topo_heads:
1073 self._pure_topo_branch = branch
1072 self._pure_topo_branch = branch
1074 break
1073 break
1075
1074
1076
1075
1077 class remotebranchcache(_BaseBranchCache):
1076 class remotebranchcache(_BaseBranchCache):
1078 """Branchmap info for a remote connection, should not write locally"""
1077 """Branchmap info for a remote connection, should not write locally"""
1079
1078
1080 def __init__(
1079 def __init__(
1081 self,
1080 self,
1082 repo: "localrepo.localrepository",
1081 repo: "localrepo.localrepository",
1083 entries: Union[
1082 entries: Union[
1084 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1083 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1085 ] = (),
1084 ] = (),
1086 closednodes: Optional[Set[bytes]] = None,
1085 closednodes: Optional[Set[bytes]] = None,
1087 ) -> None:
1086 ) -> None:
1088 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
1087 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
@@ -1,485 +1,490
1 # repoview.py - Filtered view of a localrepo object
1 # repoview.py - Filtered view of a localrepo object
2 #
2 #
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Logilab SA <contact@logilab.fr>
4 # Logilab SA <contact@logilab.fr>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import annotations
9 from __future__ import annotations
10
10
11 import copy
11 import copy
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 hex,
16 hex,
17 nullrev,
17 nullrev,
18 )
18 )
19 from . import (
19 from . import (
20 error,
20 error,
21 obsolete,
21 obsolete,
22 phases,
22 phases,
23 pycompat,
23 pycompat,
24 tags as tagsmod,
24 tags as tagsmod,
25 util,
25 util,
26 )
26 )
27 from .utils import repoviewutil
27 from .utils import repoviewutil
28
28
29
29
30 def hideablerevs(repo):
30 def hideablerevs(repo):
31 """Revision candidates to be hidden
31 """Revision candidates to be hidden
32
32
33 This is a standalone function to allow extensions to wrap it.
33 This is a standalone function to allow extensions to wrap it.
34
34
35 Because we use the set of immutable changesets as a fallback subset in
35 Because we use the set of immutable changesets as a fallback subset in
36 branchmap (see mercurial.utils.repoviewutils.subsettable), you cannot set
36 branchmap (see mercurial.utils.repoviewutils.subsettable), you cannot set
37 "public" changesets as "hideable". Doing so would break multiple code
37 "public" changesets as "hideable". Doing so would break multiple code
38 assertions and lead to crashes."""
38 assertions and lead to crashes."""
39 obsoletes = obsolete.getrevs(repo, b'obsolete')
39 obsoletes = obsolete.getrevs(repo, b'obsolete')
40 internals = repo._phasecache.getrevset(repo, phases.localhiddenphases)
40 internals = repo._phasecache.getrevset(repo, phases.localhiddenphases)
41 internals = frozenset(internals)
41 internals = frozenset(internals)
42 return obsoletes | internals
42 return obsoletes | internals
43
43
44
44
45 def pinnedrevs(repo):
45 def pinnedrevs(repo):
46 """revisions blocking hidden changesets from being filtered"""
46 """revisions blocking hidden changesets from being filtered"""
47
47
48 cl = repo.changelog
48 cl = repo.changelog
49 pinned = set()
49 pinned = set()
50 pinned.update([par.rev() for par in repo[None].parents()])
50 pinned.update([par.rev() for par in repo[None].parents()])
51 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
51 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
52
52
53 tags = {}
53 tags = {}
54 tagsmod.readlocaltags(repo.ui, repo, tags, {})
54 tagsmod.readlocaltags(repo.ui, repo, tags, {})
55 if tags:
55 if tags:
56 rev = cl.index.get_rev
56 rev = cl.index.get_rev
57 pinned.update(rev(t[0]) for t in tags.values())
57 pinned.update(rev(t[0]) for t in tags.values())
58 pinned.discard(None)
58 pinned.discard(None)
59
59
60 # Avoid cycle: mercurial.filemerge -> mercurial.templater ->
60 # Avoid cycle: mercurial.filemerge -> mercurial.templater ->
61 # mercurial.templatefuncs -> mercurial.revset -> mercurial.repoview ->
61 # mercurial.templatefuncs -> mercurial.revset -> mercurial.repoview ->
62 # mercurial.mergestate -> mercurial.filemerge
62 # mercurial.mergestate -> mercurial.filemerge
63 from . import mergestate
63 from . import mergestate
64
64
65 ms = mergestate.mergestate.read(repo)
65 ms = mergestate.mergestate.read(repo)
66 if ms.active() and ms.unresolvedcount():
66 if ms.active() and ms.unresolvedcount():
67 for node in (ms.local, ms.other):
67 for node in (ms.local, ms.other):
68 rev = cl.index.get_rev(node)
68 rev = cl.index.get_rev(node)
69 if rev is not None:
69 if rev is not None:
70 pinned.add(rev)
70 pinned.add(rev)
71
71
72 return pinned
72 return pinned
73
73
74
74
75 def _revealancestors(pfunc, hidden, revs):
75 def _revealancestors(pfunc, hidden, revs):
76 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
76 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
77 from 'hidden'
77 from 'hidden'
78
78
79 - pfunc(r): a funtion returning parent of 'r',
79 - pfunc(r): a funtion returning parent of 'r',
80 - hidden: the (preliminary) hidden revisions, to be updated
80 - hidden: the (preliminary) hidden revisions, to be updated
81 - revs: iterable of revnum,
81 - revs: iterable of revnum,
82
82
83 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
83 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
84 *not* revealed)
84 *not* revealed)
85 """
85 """
86 stack = list(revs)
86 stack = list(revs)
87 while stack:
87 while stack:
88 for p in pfunc(stack.pop()):
88 for p in pfunc(stack.pop()):
89 if p != nullrev and p in hidden:
89 if p != nullrev and p in hidden:
90 hidden.remove(p)
90 hidden.remove(p)
91 stack.append(p)
91 stack.append(p)
92
92
93
93
94 def computehidden(repo, visibilityexceptions=None):
94 def computehidden(repo, visibilityexceptions=None):
95 """compute the set of hidden revision to filter
95 """compute the set of hidden revision to filter
96
96
97 During most operation hidden should be filtered."""
97 During most operation hidden should be filtered."""
98 assert not repo.changelog.filteredrevs
98 assert not repo.changelog.filteredrevs
99
99
100 hidden = hideablerevs(repo)
100 hidden = hideablerevs(repo)
101 if hidden:
101 if hidden:
102 hidden = set(hidden - pinnedrevs(repo))
102 hidden = set(hidden - pinnedrevs(repo))
103 if visibilityexceptions:
103 if visibilityexceptions:
104 hidden -= visibilityexceptions
104 hidden -= visibilityexceptions
105 pfunc = repo.changelog.parentrevs
105 pfunc = repo.changelog.parentrevs
106 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
106 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
107
107
108 visible = mutable - hidden
108 visible = mutable - hidden
109 _revealancestors(pfunc, hidden, visible)
109 _revealancestors(pfunc, hidden, visible)
110 return frozenset(hidden)
110 return frozenset(hidden)
111
111
112
112
113 def computesecret(repo, visibilityexceptions=None):
113 def computesecret(repo, visibilityexceptions=None):
114 """compute the set of revision that can never be exposed through hgweb
114 """compute the set of revision that can never be exposed through hgweb
115
115
116 Changeset in the secret phase (or above) should stay unaccessible."""
116 Changeset in the secret phase (or above) should stay unaccessible."""
117 assert not repo.changelog.filteredrevs
117 assert not repo.changelog.filteredrevs
118 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
118 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
119 return frozenset(secrets)
119 return frozenset(secrets)
120
120
121
121
122 def computeunserved(repo, visibilityexceptions=None):
122 def computeunserved(repo, visibilityexceptions=None):
123 """compute the set of revision that should be filtered when used a server
123 """compute the set of revision that should be filtered when used a server
124
124
125 Secret and hidden changeset should not pretend to be here."""
125 Secret and hidden changeset should not pretend to be here."""
126 assert not repo.changelog.filteredrevs
126 assert not repo.changelog.filteredrevs
127 # fast path in simple case to avoid impact of non optimised code
127 # fast path in simple case to avoid impact of non optimised code
128 hiddens = filterrevs(repo, b'visible')
128 hiddens = filterrevs(repo, b'visible')
129 secrets = filterrevs(repo, b'served.hidden')
129 secrets = filterrevs(repo, b'served.hidden')
130 if secrets:
130 if secrets:
131 return frozenset(hiddens | secrets)
131 return frozenset(hiddens | secrets)
132 else:
132 else:
133 return hiddens
133 return hiddens
134
134
135
135
136 def computemutable(repo, visibilityexceptions=None):
136 def computemutable(repo, visibilityexceptions=None):
137 assert not repo.changelog.filteredrevs
137 assert not repo.changelog.filteredrevs
138 # fast check to avoid revset call on huge repo
138 # fast check to avoid revset call on huge repo
139 if repo._phasecache.hasnonpublicphases(repo):
139 if repo._phasecache.hasnonpublicphases(repo):
140 return frozenset(repo._phasecache.getrevset(repo, phases.mutablephases))
140 return frozenset(repo._phasecache.getrevset(repo, phases.mutablephases))
141 return frozenset()
141 return frozenset()
142
142
143
143
144 def computeimpactable(repo, visibilityexceptions=None):
144 def computeimpactable(repo, visibilityexceptions=None):
145 """Everything impactable by mutable revision
145 """Everything impactable by mutable revision
146
146
147 The immutable filter still have some chance to get invalidated. This will
147 The immutable filter still have some chance to get invalidated. This will
148 happen when:
148 happen when:
149
149
150 - you garbage collect hidden changeset,
150 - you garbage collect hidden changeset,
151 - public phase is moved backward,
151 - public phase is moved backward,
152 - something is changed in the filtering (this could be fixed)
152 - something is changed in the filtering (this could be fixed)
153
153
154 This filter out any mutable changeset and any public changeset that may be
154 This filter out any mutable changeset and any public changeset that may be
155 impacted by something happening to a mutable revision.
155 impacted by something happening to a mutable revision.
156
156
157 This is achieved by filtered everything with a revision number equal or
157 This is achieved by filtered everything with a revision number equal or
158 higher than the first mutable changeset is filtered."""
158 higher than the first mutable changeset is filtered."""
159 assert not repo.changelog.filteredrevs
159 assert not repo.changelog.filteredrevs
160 cl = repo.changelog
160 cl = repo.changelog
161 firstmutable = len(cl)
161 firstmutable = len(cl)
162 roots = repo._phasecache.nonpublicphaseroots(repo)
162 roots = repo._phasecache.nonpublicphaseroots(repo)
163 if roots:
163 if roots:
164 firstmutable = min(firstmutable, min(roots))
164 firstmutable = min(firstmutable, min(roots))
165 # protect from nullrev root
165 # protect from nullrev root
166 firstmutable = max(0, firstmutable)
166 firstmutable = max(0, firstmutable)
167 return frozenset(range(firstmutable, len(cl)))
167 return frozenset(range(firstmutable, len(cl)))
168
168
169
169
170 # function to compute filtered set
170 # function to compute filtered set
171 #
171 #
172 # When adding a new filter you MUST update the table at:
172 # When adding a new filter you MUST update the table at:
173 # mercurial.utils.repoviewutil.subsettable
173 # mercurial.utils.repoviewutil.subsettable
174 # Otherwise your filter will have to recompute all its branches cache
174 # Otherwise your filter will have to recompute all its branches cache
175 # from scratch (very slow).
175 # from scratch (very slow).
176 filtertable = {
176 filtertable = {
177 b'visible': computehidden,
177 b'visible': computehidden,
178 b'visible-hidden': computehidden,
178 b'visible-hidden': computehidden,
179 b'served.hidden': computesecret,
179 b'served.hidden': computesecret,
180 b'served': computeunserved,
180 b'served': computeunserved,
181 b'immutable': computemutable,
181 b'immutable': computemutable,
182 b'base': computeimpactable,
182 b'base': computeimpactable,
183 }
183 }
184
184
185 # set of filter level that will include the working copy parent no matter what.
185 # set of filter level that will include the working copy parent no matter what.
186 filter_has_wc = {b'visible', b'visible-hidden'}
186 filter_has_wc = {b'visible', b'visible-hidden'}
187
187
188 _basefiltername = list(filtertable)
188 _basefiltername = list(filtertable)
189
189
190
190
191 def extrafilter(ui):
191 def extrafilter(ui):
192 """initialize extra filter and return its id
192 """initialize extra filter and return its id
193
193
194 If extra filtering is configured, we make sure the associated filtered view
194 If extra filtering is configured, we make sure the associated filtered view
195 are declared and return the associated id.
195 are declared and return the associated id.
196 """
196 """
197 frevs = ui.config(b'experimental', b'extra-filter-revs')
197 frevs = ui.config(b'experimental', b'extra-filter-revs')
198 if frevs is None:
198 if frevs is None:
199 return None
199 return None
200
200
201 fid = pycompat.sysbytes(util.DIGESTS[b'sha1'](frevs).hexdigest())[:12]
201 fid = pycompat.sysbytes(util.DIGESTS[b'sha1'](frevs).hexdigest())[:12]
202
202
203 combine = lambda fname: fname + b'%' + fid
203 combine = lambda fname: fname + b'%' + fid
204
204
205 subsettable = repoviewutil.subsettable
205 subsettable = repoviewutil.subsettable
206
206
207 if combine(b'base') not in filtertable:
207 if combine(b'base') not in filtertable:
208 for base_name in _basefiltername:
208 for base_name in _basefiltername:
209
209
210 def extrafilteredrevs(repo, *args, name=base_name, **kwargs):
210 def extrafilteredrevs(repo, *args, name=base_name, **kwargs):
211 baserevs = filtertable[name](repo, *args, **kwargs)
211 baserevs = filtertable[name](repo, *args, **kwargs)
212 extrarevs = frozenset(repo.revs(frevs))
212 extrarevs = frozenset(repo.revs(frevs))
213 return baserevs | extrarevs
213 return baserevs | extrarevs
214
214
215 filtertable[combine(base_name)] = extrafilteredrevs
215 filtertable[combine(base_name)] = extrafilteredrevs
216 if base_name in subsettable:
216 if base_name in subsettable:
217 subsettable[combine(base_name)] = combine(
217 subsettable[combine(base_name)] = combine(
218 subsettable[base_name]
218 subsettable[base_name]
219 )
219 )
220 return fid
220 return fid
221
221
222
222
223 def filterrevs(repo, filtername, visibilityexceptions=None):
223 def filterrevs(repo, filtername, visibilityexceptions=None):
224 """returns set of filtered revision for this filter name
224 """returns set of filtered revision for this filter name
225
225
226 visibilityexceptions is a set of revs which must are exceptions for
226 visibilityexceptions is a set of revs which must are exceptions for
227 hidden-state and must be visible. They are dynamic and hence we should not
227 hidden-state and must be visible. They are dynamic and hence we should not
228 cache it's result"""
228 cache it's result"""
229 if filtername not in repo.filteredrevcache:
229 if filtername not in repo.filteredrevcache:
230 if repo.ui.configbool(b'devel', b'debug.repo-filters'):
230 if repo.ui.configbool(b'devel', b'debug.repo-filters'):
231 msg = b'computing revision filter for "%s"'
231 msg = b'computing revision filter for "%s"'
232 msg %= filtername
232 msg %= filtername
233 if repo.ui.tracebackflag and repo.ui.debugflag:
233 if repo.ui.tracebackflag and repo.ui.debugflag:
234 # XXX use ui.write_err
234 # XXX use ui.write_err
235 util.debugstacktrace(
235 util.debugstacktrace(
236 msg,
236 msg,
237 f=repo.ui._fout,
237 f=repo.ui._fout,
238 otherf=repo.ui._ferr,
238 otherf=repo.ui._ferr,
239 prefix=b'debug.filters: ',
239 prefix=b'debug.filters: ',
240 )
240 )
241 else:
241 else:
242 repo.ui.debug(b'debug.filters: %s\n' % msg)
242 repo.ui.debug(b'debug.filters: %s\n' % msg)
243 func = filtertable[filtername]
243 func = filtertable[filtername]
244 if visibilityexceptions:
244 if visibilityexceptions:
245 return func(repo.unfiltered, visibilityexceptions)
245 return func(repo.unfiltered, visibilityexceptions)
246 repo.filteredrevcache[filtername] = func(repo.unfiltered())
246 repo.filteredrevcache[filtername] = func(repo.unfiltered())
247 return repo.filteredrevcache[filtername]
247 return repo.filteredrevcache[filtername]
248
248
249
249
250 def wrapchangelog(unfichangelog, filteredrevs):
250 def wrapchangelog(unfichangelog, filteredrevs):
251 cl = copy.copy(unfichangelog)
251 cl = copy.copy(unfichangelog)
252 cl.filteredrevs = filteredrevs
252 cl.filteredrevs = filteredrevs
253
253
254 class filteredchangelog(filteredchangelogmixin, cl.__class__):
254 class filteredchangelog(filteredchangelogmixin, cl.__class__):
255 pass
255 pass
256
256
257 cl.__class__ = filteredchangelog
257 cl.__class__ = filteredchangelog
258
258
259 return cl
259 return cl
260
260
261
261
262 class filteredchangelogmixin:
262 class filteredchangelogmixin:
263 def tiprev(self):
263 def tiprev(self):
264 """filtered version of revlog.tiprev"""
264 """filtered version of revlog.tiprev"""
265 for i in range(len(self) - 1, -2, -1):
265 for i in range(len(self) - 1, -2, -1):
266 if i not in self.filteredrevs:
266 if i not in self.filteredrevs:
267 return i
267 return i
268
268
269 def __contains__(self, rev):
269 def __contains__(self, rev):
270 """filtered version of revlog.__contains__"""
270 """filtered version of revlog.__contains__"""
271 return 0 <= rev < len(self) and rev not in self.filteredrevs
271 return 0 <= rev < len(self) and rev not in self.filteredrevs
272
272
273 def __iter__(self):
273 def __iter__(self):
274 """filtered version of revlog.__iter__"""
274 """filtered version of revlog.__iter__"""
275
275
276 def filterediter():
276 def filterediter():
277 for i in range(len(self)):
277 for i in range(len(self)):
278 if i not in self.filteredrevs:
278 if i not in self.filteredrevs:
279 yield i
279 yield i
280
280
281 return filterediter()
281 return filterediter()
282
282
283 def revs(self, start=0, stop=None):
283 def revs(self, start=0, stop=None):
284 """filtered version of revlog.revs"""
284 """filtered version of revlog.revs"""
285 for i in super(filteredchangelogmixin, self).revs(start, stop):
285 for i in super(filteredchangelogmixin, self).revs(start, stop):
286 if i not in self.filteredrevs:
286 if i not in self.filteredrevs:
287 yield i
287 yield i
288
288
289 def _checknofilteredinrevs(self, revs):
289 def _checknofilteredinrevs(self, revs):
290 """raise the appropriate error if 'revs' contains a filtered revision
290 """raise the appropriate error if 'revs' contains a filtered revision
291
291
292 This returns a version of 'revs' to be used thereafter by the caller.
292 This returns a version of 'revs' to be used thereafter by the caller.
293 In particular, if revs is an iterator, it is converted into a set.
293 In particular, if revs is an iterator, it is converted into a set.
294 """
294 """
295 if hasattr(revs, '__next__'):
295 if hasattr(revs, '__next__'):
296 # Note that inspect.isgenerator() is not true for iterators,
296 # Note that inspect.isgenerator() is not true for iterators,
297 revs = set(revs)
297 revs = set(revs)
298
298
299 filteredrevs = self.filteredrevs
299 filteredrevs = self.filteredrevs
300 if hasattr(revs, 'first'): # smartset
300 if hasattr(revs, 'first'): # smartset
301 offenders = revs & filteredrevs
301 offenders = revs & filteredrevs
302 else:
302 else:
303 offenders = filteredrevs.intersection(revs)
303 offenders = filteredrevs.intersection(revs)
304
304
305 for rev in offenders:
305 for rev in offenders:
306 raise error.FilteredIndexError(rev)
306 raise error.FilteredIndexError(rev)
307 return revs
307 return revs
308
308
309 def _head_node_ids(self):
309 def _head_node_ids(self):
310 # no Rust fast path implemented yet, so just loop in Python
310 # no Rust fast path implemented yet, so just loop in Python
311 return [self.node(r) for r in self.headrevs()]
311 return [self.node(r) for r in self.headrevs()]
312
312
313 def headrevs(self, revs=None):
313 def headrevs(self, revs=None, stop_rev=None):
314 if revs is None:
314 if revs is None:
315 return self.index.headrevs(self.filteredrevs)
315 filtered = self.filteredrevs
316 if stop_rev is not None and stop_rev < len(self.index):
317 filtered = set(self.filteredrevs)
318 filtered.update(range(stop_rev, len(self.index)))
319 return self.index.headrevs(filtered)
320 assert stop_rev is None
316
321
317 revs = self._checknofilteredinrevs(revs)
322 revs = self._checknofilteredinrevs(revs)
318 return super(filteredchangelogmixin, self).headrevs(revs)
323 return super(filteredchangelogmixin, self).headrevs(revs)
319
324
320 def strip(self, *args, **kwargs):
325 def strip(self, *args, **kwargs):
321 # XXX make something better than assert
326 # XXX make something better than assert
322 # We can't expect proper strip behavior if we are filtered.
327 # We can't expect proper strip behavior if we are filtered.
323 assert not self.filteredrevs
328 assert not self.filteredrevs
324 super(filteredchangelogmixin, self).strip(*args, **kwargs)
329 super(filteredchangelogmixin, self).strip(*args, **kwargs)
325
330
326 def rev(self, node):
331 def rev(self, node):
327 """filtered version of revlog.rev"""
332 """filtered version of revlog.rev"""
328 r = super(filteredchangelogmixin, self).rev(node)
333 r = super(filteredchangelogmixin, self).rev(node)
329 if r in self.filteredrevs:
334 if r in self.filteredrevs:
330 raise error.FilteredLookupError(
335 raise error.FilteredLookupError(
331 hex(node), self.display_id, _(b'filtered node')
336 hex(node), self.display_id, _(b'filtered node')
332 )
337 )
333 return r
338 return r
334
339
335 def node(self, rev):
340 def node(self, rev):
336 """filtered version of revlog.node"""
341 """filtered version of revlog.node"""
337 if rev in self.filteredrevs:
342 if rev in self.filteredrevs:
338 raise error.FilteredIndexError(rev)
343 raise error.FilteredIndexError(rev)
339 return super(filteredchangelogmixin, self).node(rev)
344 return super(filteredchangelogmixin, self).node(rev)
340
345
341 def linkrev(self, rev):
346 def linkrev(self, rev):
342 """filtered version of revlog.linkrev"""
347 """filtered version of revlog.linkrev"""
343 if rev in self.filteredrevs:
348 if rev in self.filteredrevs:
344 raise error.FilteredIndexError(rev)
349 raise error.FilteredIndexError(rev)
345 return super(filteredchangelogmixin, self).linkrev(rev)
350 return super(filteredchangelogmixin, self).linkrev(rev)
346
351
347 def parentrevs(self, rev):
352 def parentrevs(self, rev):
348 """filtered version of revlog.parentrevs"""
353 """filtered version of revlog.parentrevs"""
349 if rev in self.filteredrevs:
354 if rev in self.filteredrevs:
350 raise error.FilteredIndexError(rev)
355 raise error.FilteredIndexError(rev)
351 return super(filteredchangelogmixin, self).parentrevs(rev)
356 return super(filteredchangelogmixin, self).parentrevs(rev)
352
357
353 def flags(self, rev):
358 def flags(self, rev):
354 """filtered version of revlog.flags"""
359 """filtered version of revlog.flags"""
355 if rev in self.filteredrevs:
360 if rev in self.filteredrevs:
356 raise error.FilteredIndexError(rev)
361 raise error.FilteredIndexError(rev)
357 return super(filteredchangelogmixin, self).flags(rev)
362 return super(filteredchangelogmixin, self).flags(rev)
358
363
359
364
360 class repoview:
365 class repoview:
361 """Provide a read/write view of a repo through a filtered changelog
366 """Provide a read/write view of a repo through a filtered changelog
362
367
363 This object is used to access a filtered version of a repository without
368 This object is used to access a filtered version of a repository without
364 altering the original repository object itself. We can not alter the
369 altering the original repository object itself. We can not alter the
365 original object for two main reasons:
370 original object for two main reasons:
366 - It prevents the use of a repo with multiple filters at the same time. In
371 - It prevents the use of a repo with multiple filters at the same time. In
367 particular when multiple threads are involved.
372 particular when multiple threads are involved.
368 - It makes scope of the filtering harder to control.
373 - It makes scope of the filtering harder to control.
369
374
370 This object behaves very closely to the original repository. All attribute
375 This object behaves very closely to the original repository. All attribute
371 operations are done on the original repository:
376 operations are done on the original repository:
372 - An access to `repoview.someattr` actually returns `repo.someattr`,
377 - An access to `repoview.someattr` actually returns `repo.someattr`,
373 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
378 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
374 - A deletion of `repoview.someattr` actually drops `someattr`
379 - A deletion of `repoview.someattr` actually drops `someattr`
375 from `repo.__dict__`.
380 from `repo.__dict__`.
376
381
377 The only exception is the `changelog` property. It is overridden to return
382 The only exception is the `changelog` property. It is overridden to return
378 a (surface) copy of `repo.changelog` with some revisions filtered. The
383 a (surface) copy of `repo.changelog` with some revisions filtered. The
379 `filtername` attribute of the view control the revisions that need to be
384 `filtername` attribute of the view control the revisions that need to be
380 filtered. (the fact the changelog is copied is an implementation detail).
385 filtered. (the fact the changelog is copied is an implementation detail).
381
386
382 Unlike attributes, this object intercepts all method calls. This means that
387 Unlike attributes, this object intercepts all method calls. This means that
383 all methods are run on the `repoview` object with the filtered `changelog`
388 all methods are run on the `repoview` object with the filtered `changelog`
384 property. For this purpose the simple `repoview` class must be mixed with
389 property. For this purpose the simple `repoview` class must be mixed with
385 the actual class of the repository. This ensures that the resulting
390 the actual class of the repository. This ensures that the resulting
386 `repoview` object have the very same methods than the repo object. This
391 `repoview` object have the very same methods than the repo object. This
387 leads to the property below.
392 leads to the property below.
388
393
389 repoview.method() --> repo.__class__.method(repoview)
394 repoview.method() --> repo.__class__.method(repoview)
390
395
391 The inheritance has to be done dynamically because `repo` can be of any
396 The inheritance has to be done dynamically because `repo` can be of any
392 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
397 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
393 """
398 """
394
399
395 def __init__(self, repo, filtername, visibilityexceptions=None):
400 def __init__(self, repo, filtername, visibilityexceptions=None):
396 if filtername is None:
401 if filtername is None:
397 msg = "repoview should have a non-None filtername"
402 msg = "repoview should have a non-None filtername"
398 raise error.ProgrammingError(msg)
403 raise error.ProgrammingError(msg)
399 object.__setattr__(self, '_unfilteredrepo', repo)
404 object.__setattr__(self, '_unfilteredrepo', repo)
400 object.__setattr__(self, 'filtername', filtername)
405 object.__setattr__(self, 'filtername', filtername)
401 object.__setattr__(self, '_clcachekey', None)
406 object.__setattr__(self, '_clcachekey', None)
402 object.__setattr__(self, '_clcache', None)
407 object.__setattr__(self, '_clcache', None)
403 # revs which are exceptions and must not be hidden
408 # revs which are exceptions and must not be hidden
404 object.__setattr__(self, '_visibilityexceptions', visibilityexceptions)
409 object.__setattr__(self, '_visibilityexceptions', visibilityexceptions)
405
410
406 # not a propertycache on purpose we shall implement a proper cache later
411 # not a propertycache on purpose we shall implement a proper cache later
407 @property
412 @property
408 def changelog(self):
413 def changelog(self):
409 """return a filtered version of the changeset
414 """return a filtered version of the changeset
410
415
411 this changelog must not be used for writing"""
416 this changelog must not be used for writing"""
412 # some cache may be implemented later
417 # some cache may be implemented later
413 unfi = self._unfilteredrepo
418 unfi = self._unfilteredrepo
414 unfichangelog = unfi.changelog
419 unfichangelog = unfi.changelog
415 # bypass call to changelog.method
420 # bypass call to changelog.method
416 unfiindex = unfichangelog.index
421 unfiindex = unfichangelog.index
417 unfilen = len(unfiindex)
422 unfilen = len(unfiindex)
418 unfinode = unfiindex[unfilen - 1][7]
423 unfinode = unfiindex[unfilen - 1][7]
419 with util.timedcm('repo filter for %s', self.filtername):
424 with util.timedcm('repo filter for %s', self.filtername):
420 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
425 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
421 cl = self._clcache
426 cl = self._clcache
422 newkey = (unfilen, unfinode, hash(revs), unfichangelog.is_delaying)
427 newkey = (unfilen, unfinode, hash(revs), unfichangelog.is_delaying)
423 # if cl.index is not unfiindex, unfi.changelog would be
428 # if cl.index is not unfiindex, unfi.changelog would be
424 # recreated, and our clcache refers to garbage object
429 # recreated, and our clcache refers to garbage object
425 if cl is not None and (
430 if cl is not None and (
426 cl.index is not unfiindex or newkey != self._clcachekey
431 cl.index is not unfiindex or newkey != self._clcachekey
427 ):
432 ):
428 cl = None
433 cl = None
429 # could have been made None by the previous if
434 # could have been made None by the previous if
430 if cl is None:
435 if cl is None:
431 # Only filter if there's something to filter
436 # Only filter if there's something to filter
432 cl = wrapchangelog(unfichangelog, revs) if revs else unfichangelog
437 cl = wrapchangelog(unfichangelog, revs) if revs else unfichangelog
433 object.__setattr__(self, '_clcache', cl)
438 object.__setattr__(self, '_clcache', cl)
434 object.__setattr__(self, '_clcachekey', newkey)
439 object.__setattr__(self, '_clcachekey', newkey)
435 return cl
440 return cl
436
441
437 def unfiltered(self):
442 def unfiltered(self):
438 """Return an unfiltered version of a repo"""
443 """Return an unfiltered version of a repo"""
439 return self._unfilteredrepo
444 return self._unfilteredrepo
440
445
441 def filtered(self, name, visibilityexceptions=None):
446 def filtered(self, name, visibilityexceptions=None):
442 """Return a filtered version of a repository"""
447 """Return a filtered version of a repository"""
443 if name == self.filtername and not visibilityexceptions:
448 if name == self.filtername and not visibilityexceptions:
444 return self
449 return self
445 return self.unfiltered().filtered(name, visibilityexceptions)
450 return self.unfiltered().filtered(name, visibilityexceptions)
446
451
447 def __repr__(self):
452 def __repr__(self):
448 return '<%s:%s %r>' % (
453 return '<%s:%s %r>' % (
449 self.__class__.__name__,
454 self.__class__.__name__,
450 pycompat.sysstr(self.filtername),
455 pycompat.sysstr(self.filtername),
451 self.unfiltered(),
456 self.unfiltered(),
452 )
457 )
453
458
454 # everything access are forwarded to the proxied repo
459 # everything access are forwarded to the proxied repo
455 def __getattr__(self, attr):
460 def __getattr__(self, attr):
456 return getattr(self._unfilteredrepo, attr)
461 return getattr(self._unfilteredrepo, attr)
457
462
458 def __setattr__(self, attr, value):
463 def __setattr__(self, attr, value):
459 return setattr(self._unfilteredrepo, attr, value)
464 return setattr(self._unfilteredrepo, attr, value)
460
465
461 def __delattr__(self, attr):
466 def __delattr__(self, attr):
462 return delattr(self._unfilteredrepo, attr)
467 return delattr(self._unfilteredrepo, attr)
463
468
464
469
465 # Dynamically created classes introduce memory cycles via __mro__. See
470 # Dynamically created classes introduce memory cycles via __mro__. See
466 # https://bugs.python.org/issue17950.
471 # https://bugs.python.org/issue17950.
467 # This need of the garbage collector can turn into memory leak in
472 # This need of the garbage collector can turn into memory leak in
468 # Python <3.4, which is the first version released with PEP 442.
473 # Python <3.4, which is the first version released with PEP 442.
469 _filteredrepotypes = weakref.WeakKeyDictionary()
474 _filteredrepotypes = weakref.WeakKeyDictionary()
470
475
471
476
472 def newtype(base):
477 def newtype(base):
473 """Create a new type with the repoview mixin and the given base class"""
478 """Create a new type with the repoview mixin and the given base class"""
474 ref = _filteredrepotypes.get(base)
479 ref = _filteredrepotypes.get(base)
475 if ref is not None:
480 if ref is not None:
476 cls = ref()
481 cls = ref()
477 if cls is not None:
482 if cls is not None:
478 return cls
483 return cls
479
484
480 class filteredrepo(repoview, base):
485 class filteredrepo(repoview, base):
481 pass
486 pass
482
487
483 _filteredrepotypes[base] = weakref.ref(filteredrepo)
488 _filteredrepotypes[base] = weakref.ref(filteredrepo)
484 # do not reread from weakref to be 100% sure not to return None
489 # do not reread from weakref to be 100% sure not to return None
485 return filteredrepo
490 return filteredrepo
@@ -1,4112 +1,4118
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import annotations
15 from __future__ import annotations
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import functools
20 import functools
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import typing
24 import typing
25 import weakref
25 import weakref
26 import zlib
26 import zlib
27
27
28 from typing import (
28 from typing import (
29 Iterable,
29 Iterable,
30 Iterator,
30 Iterator,
31 Optional,
31 Optional,
32 Tuple,
32 Tuple,
33 )
33 )
34
34
35 # import stuff from node for others to import from revlog
35 # import stuff from node for others to import from revlog
36 from .node import (
36 from .node import (
37 bin,
37 bin,
38 hex,
38 hex,
39 nullrev,
39 nullrev,
40 sha1nodeconstants,
40 sha1nodeconstants,
41 short,
41 short,
42 wdirrev,
42 wdirrev,
43 )
43 )
44 from .i18n import _
44 from .i18n import _
45 from .revlogutils.constants import (
45 from .revlogutils.constants import (
46 ALL_KINDS,
46 ALL_KINDS,
47 CHANGELOGV2,
47 CHANGELOGV2,
48 COMP_MODE_DEFAULT,
48 COMP_MODE_DEFAULT,
49 COMP_MODE_INLINE,
49 COMP_MODE_INLINE,
50 COMP_MODE_PLAIN,
50 COMP_MODE_PLAIN,
51 DELTA_BASE_REUSE_NO,
51 DELTA_BASE_REUSE_NO,
52 DELTA_BASE_REUSE_TRY,
52 DELTA_BASE_REUSE_TRY,
53 ENTRY_RANK,
53 ENTRY_RANK,
54 FEATURES_BY_VERSION,
54 FEATURES_BY_VERSION,
55 FLAG_GENERALDELTA,
55 FLAG_GENERALDELTA,
56 FLAG_INLINE_DATA,
56 FLAG_INLINE_DATA,
57 INDEX_HEADER,
57 INDEX_HEADER,
58 KIND_CHANGELOG,
58 KIND_CHANGELOG,
59 KIND_FILELOG,
59 KIND_FILELOG,
60 RANK_UNKNOWN,
60 RANK_UNKNOWN,
61 REVLOGV0,
61 REVLOGV0,
62 REVLOGV1,
62 REVLOGV1,
63 REVLOGV1_FLAGS,
63 REVLOGV1_FLAGS,
64 REVLOGV2,
64 REVLOGV2,
65 REVLOGV2_FLAGS,
65 REVLOGV2_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
67 REVLOG_DEFAULT_FORMAT,
67 REVLOG_DEFAULT_FORMAT,
68 REVLOG_DEFAULT_VERSION,
68 REVLOG_DEFAULT_VERSION,
69 SUPPORTED_FLAGS,
69 SUPPORTED_FLAGS,
70 )
70 )
71 from .revlogutils.flagutil import (
71 from .revlogutils.flagutil import (
72 REVIDX_DEFAULT_FLAGS,
72 REVIDX_DEFAULT_FLAGS,
73 REVIDX_ELLIPSIS,
73 REVIDX_ELLIPSIS,
74 REVIDX_EXTSTORED,
74 REVIDX_EXTSTORED,
75 REVIDX_FLAGS_ORDER,
75 REVIDX_FLAGS_ORDER,
76 REVIDX_HASCOPIESINFO,
76 REVIDX_HASCOPIESINFO,
77 REVIDX_ISCENSORED,
77 REVIDX_ISCENSORED,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
79 )
79 )
80 from .thirdparty import attr
80 from .thirdparty import attr
81
81
82 # Force pytype to use the non-vendored package
82 # Force pytype to use the non-vendored package
83 if typing.TYPE_CHECKING:
83 if typing.TYPE_CHECKING:
84 # noinspection PyPackageRequirements
84 # noinspection PyPackageRequirements
85 import attr
85 import attr
86
86
87 from . import (
87 from . import (
88 ancestor,
88 ancestor,
89 dagop,
89 dagop,
90 error,
90 error,
91 mdiff,
91 mdiff,
92 policy,
92 policy,
93 pycompat,
93 pycompat,
94 revlogutils,
94 revlogutils,
95 templatefilters,
95 templatefilters,
96 util,
96 util,
97 vfs as vfsmod,
97 vfs as vfsmod,
98 )
98 )
99 from .interfaces import (
99 from .interfaces import (
100 repository,
100 repository,
101 util as interfaceutil,
101 util as interfaceutil,
102 )
102 )
103 from .revlogutils import (
103 from .revlogutils import (
104 deltas as deltautil,
104 deltas as deltautil,
105 docket as docketutil,
105 docket as docketutil,
106 flagutil,
106 flagutil,
107 nodemap as nodemaputil,
107 nodemap as nodemaputil,
108 randomaccessfile,
108 randomaccessfile,
109 revlogv0,
109 revlogv0,
110 rewrite,
110 rewrite,
111 sidedata as sidedatautil,
111 sidedata as sidedatautil,
112 )
112 )
113 from .utils import (
113 from .utils import (
114 storageutil,
114 storageutil,
115 stringutil,
115 stringutil,
116 )
116 )
117
117
118 # blanked usage of all the name to prevent pyflakes constraints
118 # blanked usage of all the name to prevent pyflakes constraints
119 # We need these name available in the module for extensions.
119 # We need these name available in the module for extensions.
120
120
121 REVLOGV0
121 REVLOGV0
122 REVLOGV1
122 REVLOGV1
123 REVLOGV2
123 REVLOGV2
124 CHANGELOGV2
124 CHANGELOGV2
125 FLAG_INLINE_DATA
125 FLAG_INLINE_DATA
126 FLAG_GENERALDELTA
126 FLAG_GENERALDELTA
127 REVLOG_DEFAULT_FLAGS
127 REVLOG_DEFAULT_FLAGS
128 REVLOG_DEFAULT_FORMAT
128 REVLOG_DEFAULT_FORMAT
129 REVLOG_DEFAULT_VERSION
129 REVLOG_DEFAULT_VERSION
130 REVLOGV1_FLAGS
130 REVLOGV1_FLAGS
131 REVLOGV2_FLAGS
131 REVLOGV2_FLAGS
132 REVIDX_ISCENSORED
132 REVIDX_ISCENSORED
133 REVIDX_ELLIPSIS
133 REVIDX_ELLIPSIS
134 REVIDX_HASCOPIESINFO
134 REVIDX_HASCOPIESINFO
135 REVIDX_EXTSTORED
135 REVIDX_EXTSTORED
136 REVIDX_DEFAULT_FLAGS
136 REVIDX_DEFAULT_FLAGS
137 REVIDX_FLAGS_ORDER
137 REVIDX_FLAGS_ORDER
138 REVIDX_RAWTEXT_CHANGING_FLAGS
138 REVIDX_RAWTEXT_CHANGING_FLAGS
139
139
140 parsers = policy.importmod('parsers')
140 parsers = policy.importmod('parsers')
141 rustancestor = policy.importrust('ancestor')
141 rustancestor = policy.importrust('ancestor')
142 rustdagop = policy.importrust('dagop')
142 rustdagop = policy.importrust('dagop')
143 rustrevlog = policy.importrust('revlog')
143 rustrevlog = policy.importrust('revlog')
144
144
145 # Aliased for performance.
145 # Aliased for performance.
146 _zlibdecompress = zlib.decompress
146 _zlibdecompress = zlib.decompress
147
147
148 # max size of inline data embedded into a revlog
148 # max size of inline data embedded into a revlog
149 _maxinline = 131072
149 _maxinline = 131072
150
150
151
151
152 # Flag processors for REVIDX_ELLIPSIS.
152 # Flag processors for REVIDX_ELLIPSIS.
153 def ellipsisreadprocessor(rl, text):
153 def ellipsisreadprocessor(rl, text):
154 return text, False
154 return text, False
155
155
156
156
157 def ellipsiswriteprocessor(rl, text):
157 def ellipsiswriteprocessor(rl, text):
158 return text, False
158 return text, False
159
159
160
160
161 def ellipsisrawprocessor(rl, text):
161 def ellipsisrawprocessor(rl, text):
162 return False
162 return False
163
163
164
164
165 ellipsisprocessor = (
165 ellipsisprocessor = (
166 ellipsisreadprocessor,
166 ellipsisreadprocessor,
167 ellipsiswriteprocessor,
167 ellipsiswriteprocessor,
168 ellipsisrawprocessor,
168 ellipsisrawprocessor,
169 )
169 )
170
170
171
171
172 def _verify_revision(rl, skipflags, state, node):
172 def _verify_revision(rl, skipflags, state, node):
173 """Verify the integrity of the given revlog ``node`` while providing a hook
173 """Verify the integrity of the given revlog ``node`` while providing a hook
174 point for extensions to influence the operation."""
174 point for extensions to influence the operation."""
175 if skipflags:
175 if skipflags:
176 state[b'skipread'].add(node)
176 state[b'skipread'].add(node)
177 else:
177 else:
178 # Side-effect: read content and verify hash.
178 # Side-effect: read content and verify hash.
179 rl.revision(node)
179 rl.revision(node)
180
180
181
181
182 # True if a fast implementation for persistent-nodemap is available
182 # True if a fast implementation for persistent-nodemap is available
183 #
183 #
184 # We also consider we have a "fast" implementation in "pure" python because
184 # We also consider we have a "fast" implementation in "pure" python because
185 # people using pure don't really have performance consideration (and a
185 # people using pure don't really have performance consideration (and a
186 # wheelbarrow of other slowness source)
186 # wheelbarrow of other slowness source)
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
188 parsers, 'BaseIndexObject'
188 parsers, 'BaseIndexObject'
189 )
189 )
190
190
191
191
192 @attr.s(slots=True)
192 @attr.s(slots=True)
193 class RevLogRevisionDelta:
193 class RevLogRevisionDelta:
194 node = attr.ib()
194 node = attr.ib()
195 p1node = attr.ib()
195 p1node = attr.ib()
196 p2node = attr.ib()
196 p2node = attr.ib()
197 basenode = attr.ib()
197 basenode = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199 baserevisionsize = attr.ib()
199 baserevisionsize = attr.ib()
200 revision = attr.ib()
200 revision = attr.ib()
201 delta = attr.ib()
201 delta = attr.ib()
202 sidedata = attr.ib()
202 sidedata = attr.ib()
203 protocol_flags = attr.ib()
203 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
204 linknode = attr.ib(default=None)
205
205
206
206
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
208 RevLogRevisionDelta
208 RevLogRevisionDelta
209 )
209 )
210
210
211 if typing.TYPE_CHECKING:
211 if typing.TYPE_CHECKING:
212 revlogrevisiondelta = RevLogRevisionDelta
212 revlogrevisiondelta = RevLogRevisionDelta
213
213
214
214
215 @attr.s(frozen=True)
215 @attr.s(frozen=True)
216 class RevLogProblem:
216 class RevLogProblem:
217 warning = attr.ib(default=None, type=Optional[bytes])
217 warning = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
220
220
221
221
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
223 RevLogProblem
223 RevLogProblem
224 )
224 )
225
225
226 if typing.TYPE_CHECKING:
226 if typing.TYPE_CHECKING:
227 revlogproblem = RevLogProblem
227 revlogproblem = RevLogProblem
228
228
229
229
230 def parse_index_v1(data, inline):
230 def parse_index_v1(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 index, cache = parsers.parse_index2(data, inline)
232 index, cache = parsers.parse_index2(data, inline)
233 return index, cache
233 return index, cache
234
234
235
235
236 def parse_index_v2(data, inline):
236 def parse_index_v2(data, inline):
237 # call the C implementation to parse the index data
237 # call the C implementation to parse the index data
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
239 return index, cache
239 return index, cache
240
240
241
241
242 def parse_index_cl_v2(data, inline):
242 def parse_index_cl_v2(data, inline):
243 # call the C implementation to parse the index data
243 # call the C implementation to parse the index data
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
245 return index, cache
245 return index, cache
246
246
247
247
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
249
249
250 def parse_index_v1_nodemap(data, inline):
250 def parse_index_v1_nodemap(data, inline):
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
252 return index, cache
252 return index, cache
253
253
254 else:
254 else:
255 parse_index_v1_nodemap = None
255 parse_index_v1_nodemap = None
256
256
257
257
258 def parse_index_v1_rust(data, inline, default_header):
258 def parse_index_v1_rust(data, inline, default_header):
259 cache = (0, data) if inline else None
259 cache = (0, data) if inline else None
260 return rustrevlog.Index(data, default_header), cache
260 return rustrevlog.Index(data, default_header), cache
261
261
262
262
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
264 # signed integer)
264 # signed integer)
265 _maxentrysize = 0x7FFFFFFF
265 _maxentrysize = 0x7FFFFFFF
266
266
267 FILE_TOO_SHORT_MSG = _(
267 FILE_TOO_SHORT_MSG = _(
268 b'cannot read from revlog %s;'
268 b'cannot read from revlog %s;'
269 b' expected %d bytes from offset %d, data size is %d'
269 b' expected %d bytes from offset %d, data size is %d'
270 )
270 )
271
271
272 hexdigits = b'0123456789abcdefABCDEF'
272 hexdigits = b'0123456789abcdefABCDEF'
273
273
274
274
275 class _Config:
275 class _Config:
276 def copy(self):
276 def copy(self):
277 return self.__class__(**self.__dict__)
277 return self.__class__(**self.__dict__)
278
278
279
279
280 @attr.s()
280 @attr.s()
281 class FeatureConfig(_Config):
281 class FeatureConfig(_Config):
282 """Hold configuration values about the available revlog features"""
282 """Hold configuration values about the available revlog features"""
283
283
284 # the default compression engine
284 # the default compression engine
285 compression_engine = attr.ib(default=b'zlib')
285 compression_engine = attr.ib(default=b'zlib')
286 # compression engines options
286 # compression engines options
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
288
288
289 # can we use censor on this revlog
289 # can we use censor on this revlog
290 censorable = attr.ib(default=False)
290 censorable = attr.ib(default=False)
291 # does this revlog use the "side data" feature
291 # does this revlog use the "side data" feature
292 has_side_data = attr.ib(default=False)
292 has_side_data = attr.ib(default=False)
293 # might remove rank configuration once the computation has no impact
293 # might remove rank configuration once the computation has no impact
294 compute_rank = attr.ib(default=False)
294 compute_rank = attr.ib(default=False)
295 # parent order is supposed to be semantically irrelevant, so we
295 # parent order is supposed to be semantically irrelevant, so we
296 # normally resort parents to ensure that the first parent is non-null,
296 # normally resort parents to ensure that the first parent is non-null,
297 # if there is a non-null parent at all.
297 # if there is a non-null parent at all.
298 # filelog abuses the parent order as flag to mark some instances of
298 # filelog abuses the parent order as flag to mark some instances of
299 # meta-encoded files, so allow it to disable this behavior.
299 # meta-encoded files, so allow it to disable this behavior.
300 canonical_parent_order = attr.ib(default=False)
300 canonical_parent_order = attr.ib(default=False)
301 # can ellipsis commit be used
301 # can ellipsis commit be used
302 enable_ellipsis = attr.ib(default=False)
302 enable_ellipsis = attr.ib(default=False)
303
303
304 def copy(self):
304 def copy(self):
305 new = super().copy()
305 new = super().copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
307 return new
307 return new
308
308
309
309
310 @attr.s()
310 @attr.s()
311 class DataConfig(_Config):
311 class DataConfig(_Config):
312 """Hold configuration value about how the revlog data are read"""
312 """Hold configuration value about how the revlog data are read"""
313
313
314 # should we try to open the "pending" version of the revlog
314 # should we try to open the "pending" version of the revlog
315 try_pending = attr.ib(default=False)
315 try_pending = attr.ib(default=False)
316 # should we try to open the "splitted" version of the revlog
316 # should we try to open the "splitted" version of the revlog
317 try_split = attr.ib(default=False)
317 try_split = attr.ib(default=False)
318 # When True, indexfile should be opened with checkambig=True at writing,
318 # When True, indexfile should be opened with checkambig=True at writing,
319 # to avoid file stat ambiguity.
319 # to avoid file stat ambiguity.
320 check_ambig = attr.ib(default=False)
320 check_ambig = attr.ib(default=False)
321
321
322 # If true, use mmap instead of reading to deal with large index
322 # If true, use mmap instead of reading to deal with large index
323 mmap_large_index = attr.ib(default=False)
323 mmap_large_index = attr.ib(default=False)
324 # how much data is large
324 # how much data is large
325 mmap_index_threshold = attr.ib(default=None)
325 mmap_index_threshold = attr.ib(default=None)
326 # How much data to read and cache into the raw revlog data cache.
326 # How much data to read and cache into the raw revlog data cache.
327 chunk_cache_size = attr.ib(default=65536)
327 chunk_cache_size = attr.ib(default=65536)
328
328
329 # The size of the uncompressed cache compared to the largest revision seen.
329 # The size of the uncompressed cache compared to the largest revision seen.
330 uncompressed_cache_factor = attr.ib(default=None)
330 uncompressed_cache_factor = attr.ib(default=None)
331
331
332 # The number of chunk cached
332 # The number of chunk cached
333 uncompressed_cache_count = attr.ib(default=None)
333 uncompressed_cache_count = attr.ib(default=None)
334
334
335 # Allow sparse reading of the revlog data
335 # Allow sparse reading of the revlog data
336 with_sparse_read = attr.ib(default=False)
336 with_sparse_read = attr.ib(default=False)
337 # minimal density of a sparse read chunk
337 # minimal density of a sparse read chunk
338 sr_density_threshold = attr.ib(default=0.50)
338 sr_density_threshold = attr.ib(default=0.50)
339 # minimal size of data we skip when performing sparse read
339 # minimal size of data we skip when performing sparse read
340 sr_min_gap_size = attr.ib(default=262144)
340 sr_min_gap_size = attr.ib(default=262144)
341
341
342 # are delta encoded against arbitrary bases.
342 # are delta encoded against arbitrary bases.
343 generaldelta = attr.ib(default=False)
343 generaldelta = attr.ib(default=False)
344
344
345
345
346 @attr.s()
346 @attr.s()
347 class DeltaConfig(_Config):
347 class DeltaConfig(_Config):
348 """Hold configuration value about how new delta are computed
348 """Hold configuration value about how new delta are computed
349
349
350 Some attributes are duplicated from DataConfig to help havign each object
350 Some attributes are duplicated from DataConfig to help havign each object
351 self contained.
351 self contained.
352 """
352 """
353
353
354 # can delta be encoded against arbitrary bases.
354 # can delta be encoded against arbitrary bases.
355 general_delta = attr.ib(default=False)
355 general_delta = attr.ib(default=False)
356 # Allow sparse writing of the revlog data
356 # Allow sparse writing of the revlog data
357 sparse_revlog = attr.ib(default=False)
357 sparse_revlog = attr.ib(default=False)
358 # maximum length of a delta chain
358 # maximum length of a delta chain
359 max_chain_len = attr.ib(default=None)
359 max_chain_len = attr.ib(default=None)
360 # Maximum distance between delta chain base start and end
360 # Maximum distance between delta chain base start and end
361 max_deltachain_span = attr.ib(default=-1)
361 max_deltachain_span = attr.ib(default=-1)
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
363 # compression for the data content.
363 # compression for the data content.
364 upper_bound_comp = attr.ib(default=None)
364 upper_bound_comp = attr.ib(default=None)
365 # Should we try a delta against both parent
365 # Should we try a delta against both parent
366 delta_both_parents = attr.ib(default=True)
366 delta_both_parents = attr.ib(default=True)
367 # Test delta base candidate group by chunk of this maximal size.
367 # Test delta base candidate group by chunk of this maximal size.
368 candidate_group_chunk_size = attr.ib(default=0)
368 candidate_group_chunk_size = attr.ib(default=0)
369 # Should we display debug information about delta computation
369 # Should we display debug information about delta computation
370 debug_delta = attr.ib(default=False)
370 debug_delta = attr.ib(default=False)
371 # trust incoming delta by default
371 # trust incoming delta by default
372 lazy_delta = attr.ib(default=True)
372 lazy_delta = attr.ib(default=True)
373 # trust the base of incoming delta by default
373 # trust the base of incoming delta by default
374 lazy_delta_base = attr.ib(default=False)
374 lazy_delta_base = attr.ib(default=False)
375
375
376
376
377 class _InnerRevlog:
377 class _InnerRevlog:
378 """An inner layer of the revlog object
378 """An inner layer of the revlog object
379
379
380 That layer exist to be able to delegate some operation to Rust, its
380 That layer exist to be able to delegate some operation to Rust, its
381 boundaries are arbitrary and based on what we can delegate to Rust.
381 boundaries are arbitrary and based on what we can delegate to Rust.
382 """
382 """
383
383
384 opener: vfsmod.vfs
384 opener: vfsmod.vfs
385
385
386 def __init__(
386 def __init__(
387 self,
387 self,
388 opener: vfsmod.vfs,
388 opener: vfsmod.vfs,
389 index,
389 index,
390 index_file,
390 index_file,
391 data_file,
391 data_file,
392 sidedata_file,
392 sidedata_file,
393 inline,
393 inline,
394 data_config,
394 data_config,
395 delta_config,
395 delta_config,
396 feature_config,
396 feature_config,
397 chunk_cache,
397 chunk_cache,
398 default_compression_header,
398 default_compression_header,
399 ):
399 ):
400 self.opener = opener
400 self.opener = opener
401 self.index = index
401 self.index = index
402
402
403 self.index_file = index_file
403 self.index_file = index_file
404 self.data_file = data_file
404 self.data_file = data_file
405 self.sidedata_file = sidedata_file
405 self.sidedata_file = sidedata_file
406 self.inline = inline
406 self.inline = inline
407 self.data_config = data_config
407 self.data_config = data_config
408 self.delta_config = delta_config
408 self.delta_config = delta_config
409 self.feature_config = feature_config
409 self.feature_config = feature_config
410
410
411 # used during diverted write.
411 # used during diverted write.
412 self._orig_index_file = None
412 self._orig_index_file = None
413
413
414 self._default_compression_header = default_compression_header
414 self._default_compression_header = default_compression_header
415
415
416 # index
416 # index
417
417
418 # 3-tuple of file handles being used for active writing.
418 # 3-tuple of file handles being used for active writing.
419 self._writinghandles = None
419 self._writinghandles = None
420
420
421 self._segmentfile = randomaccessfile.randomaccessfile(
421 self._segmentfile = randomaccessfile.randomaccessfile(
422 self.opener,
422 self.opener,
423 (self.index_file if self.inline else self.data_file),
423 (self.index_file if self.inline else self.data_file),
424 self.data_config.chunk_cache_size,
424 self.data_config.chunk_cache_size,
425 chunk_cache,
425 chunk_cache,
426 )
426 )
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
428 self.opener,
428 self.opener,
429 self.sidedata_file,
429 self.sidedata_file,
430 self.data_config.chunk_cache_size,
430 self.data_config.chunk_cache_size,
431 )
431 )
432
432
433 # revlog header -> revlog compressor
433 # revlog header -> revlog compressor
434 self._decompressors = {}
434 self._decompressors = {}
435 # 3-tuple of (node, rev, text) for a raw revision.
435 # 3-tuple of (node, rev, text) for a raw revision.
436 self._revisioncache = None
436 self._revisioncache = None
437
437
438 # cache some uncompressed chunks
438 # cache some uncompressed chunks
439 # rev β†’ uncompressed_chunk
439 # rev β†’ uncompressed_chunk
440 #
440 #
441 # the max cost is dynamically updated to be proportionnal to the
441 # the max cost is dynamically updated to be proportionnal to the
442 # size of revision we actually encounter.
442 # size of revision we actually encounter.
443 self._uncompressed_chunk_cache = None
443 self._uncompressed_chunk_cache = None
444 if self.data_config.uncompressed_cache_factor is not None:
444 if self.data_config.uncompressed_cache_factor is not None:
445 self._uncompressed_chunk_cache = util.lrucachedict(
445 self._uncompressed_chunk_cache = util.lrucachedict(
446 self.data_config.uncompressed_cache_count,
446 self.data_config.uncompressed_cache_count,
447 maxcost=65536, # some arbitrary initial value
447 maxcost=65536, # some arbitrary initial value
448 )
448 )
449
449
450 self._delay_buffer = None
450 self._delay_buffer = None
451
451
452 def __len__(self):
452 def __len__(self):
453 return len(self.index)
453 return len(self.index)
454
454
455 def clear_cache(self):
455 def clear_cache(self):
456 assert not self.is_delaying
456 assert not self.is_delaying
457 self._revisioncache = None
457 self._revisioncache = None
458 if self._uncompressed_chunk_cache is not None:
458 if self._uncompressed_chunk_cache is not None:
459 self._uncompressed_chunk_cache.clear()
459 self._uncompressed_chunk_cache.clear()
460 self._segmentfile.clear_cache()
460 self._segmentfile.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
462
462
463 @property
463 @property
464 def canonical_index_file(self):
464 def canonical_index_file(self):
465 if self._orig_index_file is not None:
465 if self._orig_index_file is not None:
466 return self._orig_index_file
466 return self._orig_index_file
467 return self.index_file
467 return self.index_file
468
468
469 @property
469 @property
470 def is_delaying(self):
470 def is_delaying(self):
471 """is the revlog is currently delaying the visibility of written data?
471 """is the revlog is currently delaying the visibility of written data?
472
472
473 The delaying mechanism can be either in-memory or written on disk in a
473 The delaying mechanism can be either in-memory or written on disk in a
474 side-file."""
474 side-file."""
475 return (self._delay_buffer is not None) or (
475 return (self._delay_buffer is not None) or (
476 self._orig_index_file is not None
476 self._orig_index_file is not None
477 )
477 )
478
478
479 # Derived from index values.
479 # Derived from index values.
480
480
481 def start(self, rev):
481 def start(self, rev):
482 """the offset of the data chunk for this revision"""
482 """the offset of the data chunk for this revision"""
483 return int(self.index[rev][0] >> 16)
483 return int(self.index[rev][0] >> 16)
484
484
485 def length(self, rev):
485 def length(self, rev):
486 """the length of the data chunk for this revision"""
486 """the length of the data chunk for this revision"""
487 return self.index[rev][1]
487 return self.index[rev][1]
488
488
489 def end(self, rev):
489 def end(self, rev):
490 """the end of the data chunk for this revision"""
490 """the end of the data chunk for this revision"""
491 return self.start(rev) + self.length(rev)
491 return self.start(rev) + self.length(rev)
492
492
493 def deltaparent(self, rev):
493 def deltaparent(self, rev):
494 """return deltaparent of the given revision"""
494 """return deltaparent of the given revision"""
495 base = self.index[rev][3]
495 base = self.index[rev][3]
496 if base == rev:
496 if base == rev:
497 return nullrev
497 return nullrev
498 elif self.delta_config.general_delta:
498 elif self.delta_config.general_delta:
499 return base
499 return base
500 else:
500 else:
501 return rev - 1
501 return rev - 1
502
502
503 def issnapshot(self, rev):
503 def issnapshot(self, rev):
504 """tells whether rev is a snapshot"""
504 """tells whether rev is a snapshot"""
505 if not self.delta_config.sparse_revlog:
505 if not self.delta_config.sparse_revlog:
506 return self.deltaparent(rev) == nullrev
506 return self.deltaparent(rev) == nullrev
507 elif hasattr(self.index, 'issnapshot'):
507 elif hasattr(self.index, 'issnapshot'):
508 # directly assign the method to cache the testing and access
508 # directly assign the method to cache the testing and access
509 self.issnapshot = self.index.issnapshot
509 self.issnapshot = self.index.issnapshot
510 return self.issnapshot(rev)
510 return self.issnapshot(rev)
511 if rev == nullrev:
511 if rev == nullrev:
512 return True
512 return True
513 entry = self.index[rev]
513 entry = self.index[rev]
514 base = entry[3]
514 base = entry[3]
515 if base == rev:
515 if base == rev:
516 return True
516 return True
517 if base == nullrev:
517 if base == nullrev:
518 return True
518 return True
519 p1 = entry[5]
519 p1 = entry[5]
520 while self.length(p1) == 0:
520 while self.length(p1) == 0:
521 b = self.deltaparent(p1)
521 b = self.deltaparent(p1)
522 if b == p1:
522 if b == p1:
523 break
523 break
524 p1 = b
524 p1 = b
525 p2 = entry[6]
525 p2 = entry[6]
526 while self.length(p2) == 0:
526 while self.length(p2) == 0:
527 b = self.deltaparent(p2)
527 b = self.deltaparent(p2)
528 if b == p2:
528 if b == p2:
529 break
529 break
530 p2 = b
530 p2 = b
531 if base == p1 or base == p2:
531 if base == p1 or base == p2:
532 return False
532 return False
533 return self.issnapshot(base)
533 return self.issnapshot(base)
534
534
535 def _deltachain(self, rev, stoprev=None):
535 def _deltachain(self, rev, stoprev=None):
536 """Obtain the delta chain for a revision.
536 """Obtain the delta chain for a revision.
537
537
538 ``stoprev`` specifies a revision to stop at. If not specified, we
538 ``stoprev`` specifies a revision to stop at. If not specified, we
539 stop at the base of the chain.
539 stop at the base of the chain.
540
540
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
542 revs in ascending order and ``stopped`` is a bool indicating whether
542 revs in ascending order and ``stopped`` is a bool indicating whether
543 ``stoprev`` was hit.
543 ``stoprev`` was hit.
544 """
544 """
545 generaldelta = self.delta_config.general_delta
545 generaldelta = self.delta_config.general_delta
546 # Try C implementation.
546 # Try C implementation.
547 try:
547 try:
548 return self.index.deltachain(rev, stoprev, generaldelta)
548 return self.index.deltachain(rev, stoprev, generaldelta)
549 except AttributeError:
549 except AttributeError:
550 pass
550 pass
551
551
552 chain = []
552 chain = []
553
553
554 # Alias to prevent attribute lookup in tight loop.
554 # Alias to prevent attribute lookup in tight loop.
555 index = self.index
555 index = self.index
556
556
557 iterrev = rev
557 iterrev = rev
558 e = index[iterrev]
558 e = index[iterrev]
559 while iterrev != e[3] and iterrev != stoprev:
559 while iterrev != e[3] and iterrev != stoprev:
560 chain.append(iterrev)
560 chain.append(iterrev)
561 if generaldelta:
561 if generaldelta:
562 iterrev = e[3]
562 iterrev = e[3]
563 else:
563 else:
564 iterrev -= 1
564 iterrev -= 1
565 e = index[iterrev]
565 e = index[iterrev]
566
566
567 if iterrev == stoprev:
567 if iterrev == stoprev:
568 stopped = True
568 stopped = True
569 else:
569 else:
570 chain.append(iterrev)
570 chain.append(iterrev)
571 stopped = False
571 stopped = False
572
572
573 chain.reverse()
573 chain.reverse()
574 return chain, stopped
574 return chain, stopped
575
575
576 @util.propertycache
576 @util.propertycache
577 def _compressor(self):
577 def _compressor(self):
578 engine = util.compengines[self.feature_config.compression_engine]
578 engine = util.compengines[self.feature_config.compression_engine]
579 return engine.revlogcompressor(
579 return engine.revlogcompressor(
580 self.feature_config.compression_engine_options
580 self.feature_config.compression_engine_options
581 )
581 )
582
582
583 @util.propertycache
583 @util.propertycache
584 def _decompressor(self):
584 def _decompressor(self):
585 """the default decompressor"""
585 """the default decompressor"""
586 if self._default_compression_header is None:
586 if self._default_compression_header is None:
587 return None
587 return None
588 t = self._default_compression_header
588 t = self._default_compression_header
589 c = self._get_decompressor(t)
589 c = self._get_decompressor(t)
590 return c.decompress
590 return c.decompress
591
591
592 def _get_decompressor(self, t: bytes):
592 def _get_decompressor(self, t: bytes):
593 try:
593 try:
594 compressor = self._decompressors[t]
594 compressor = self._decompressors[t]
595 except KeyError:
595 except KeyError:
596 try:
596 try:
597 engine = util.compengines.forrevlogheader(t)
597 engine = util.compengines.forrevlogheader(t)
598 compressor = engine.revlogcompressor(
598 compressor = engine.revlogcompressor(
599 self.feature_config.compression_engine_options
599 self.feature_config.compression_engine_options
600 )
600 )
601 self._decompressors[t] = compressor
601 self._decompressors[t] = compressor
602 except KeyError:
602 except KeyError:
603 raise error.RevlogError(
603 raise error.RevlogError(
604 _(b'unknown compression type %s') % binascii.hexlify(t)
604 _(b'unknown compression type %s') % binascii.hexlify(t)
605 )
605 )
606 return compressor
606 return compressor
607
607
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
609 """Generate a possibly-compressed representation of data."""
609 """Generate a possibly-compressed representation of data."""
610 if not data:
610 if not data:
611 return b'', data
611 return b'', data
612
612
613 compressed = self._compressor.compress(data)
613 compressed = self._compressor.compress(data)
614
614
615 if compressed:
615 if compressed:
616 # The revlog compressor added the header in the returned data.
616 # The revlog compressor added the header in the returned data.
617 return b'', compressed
617 return b'', compressed
618
618
619 if data[0:1] == b'\0':
619 if data[0:1] == b'\0':
620 return b'', data
620 return b'', data
621 return b'u', data
621 return b'u', data
622
622
623 def decompress(self, data: bytes):
623 def decompress(self, data: bytes):
624 """Decompress a revlog chunk.
624 """Decompress a revlog chunk.
625
625
626 The chunk is expected to begin with a header identifying the
626 The chunk is expected to begin with a header identifying the
627 format type so it can be routed to an appropriate decompressor.
627 format type so it can be routed to an appropriate decompressor.
628 """
628 """
629 if not data:
629 if not data:
630 return data
630 return data
631
631
632 # Revlogs are read much more frequently than they are written and many
632 # Revlogs are read much more frequently than they are written and many
633 # chunks only take microseconds to decompress, so performance is
633 # chunks only take microseconds to decompress, so performance is
634 # important here.
634 # important here.
635 #
635 #
636 # We can make a few assumptions about revlogs:
636 # We can make a few assumptions about revlogs:
637 #
637 #
638 # 1) the majority of chunks will be compressed (as opposed to inline
638 # 1) the majority of chunks will be compressed (as opposed to inline
639 # raw data).
639 # raw data).
640 # 2) decompressing *any* data will likely by at least 10x slower than
640 # 2) decompressing *any* data will likely by at least 10x slower than
641 # returning raw inline data.
641 # returning raw inline data.
642 # 3) we want to prioritize common and officially supported compression
642 # 3) we want to prioritize common and officially supported compression
643 # engines
643 # engines
644 #
644 #
645 # It follows that we want to optimize for "decompress compressed data
645 # It follows that we want to optimize for "decompress compressed data
646 # when encoded with common and officially supported compression engines"
646 # when encoded with common and officially supported compression engines"
647 # case over "raw data" and "data encoded by less common or non-official
647 # case over "raw data" and "data encoded by less common or non-official
648 # compression engines." That is why we have the inline lookup first
648 # compression engines." That is why we have the inline lookup first
649 # followed by the compengines lookup.
649 # followed by the compengines lookup.
650 #
650 #
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
652 # compressed chunks. And this matters for changelog and manifest reads.
652 # compressed chunks. And this matters for changelog and manifest reads.
653 t = data[0:1]
653 t = data[0:1]
654
654
655 if t == b'x':
655 if t == b'x':
656 try:
656 try:
657 return _zlibdecompress(data)
657 return _zlibdecompress(data)
658 except zlib.error as e:
658 except zlib.error as e:
659 raise error.RevlogError(
659 raise error.RevlogError(
660 _(b'revlog decompress error: %s')
660 _(b'revlog decompress error: %s')
661 % stringutil.forcebytestr(e)
661 % stringutil.forcebytestr(e)
662 )
662 )
663 # '\0' is more common than 'u' so it goes first.
663 # '\0' is more common than 'u' so it goes first.
664 elif t == b'\0':
664 elif t == b'\0':
665 return data
665 return data
666 elif t == b'u':
666 elif t == b'u':
667 return util.buffer(data, 1)
667 return util.buffer(data, 1)
668
668
669 compressor = self._get_decompressor(t)
669 compressor = self._get_decompressor(t)
670
670
671 return compressor.decompress(data)
671 return compressor.decompress(data)
672
672
673 @contextlib.contextmanager
673 @contextlib.contextmanager
674 def reading(self):
674 def reading(self):
675 """Context manager that keeps data and sidedata files open for reading"""
675 """Context manager that keeps data and sidedata files open for reading"""
676 if len(self.index) == 0:
676 if len(self.index) == 0:
677 yield # nothing to be read
677 yield # nothing to be read
678 elif self._delay_buffer is not None and self.inline:
678 elif self._delay_buffer is not None and self.inline:
679 msg = "revlog with delayed write should not be inline"
679 msg = "revlog with delayed write should not be inline"
680 raise error.ProgrammingError(msg)
680 raise error.ProgrammingError(msg)
681 else:
681 else:
682 with self._segmentfile.reading():
682 with self._segmentfile.reading():
683 with self._segmentfile_sidedata.reading():
683 with self._segmentfile_sidedata.reading():
684 yield
684 yield
685
685
686 @property
686 @property
687 def is_writing(self):
687 def is_writing(self):
688 """True is a writing context is open"""
688 """True is a writing context is open"""
689 return self._writinghandles is not None
689 return self._writinghandles is not None
690
690
691 @property
691 @property
692 def is_open(self):
692 def is_open(self):
693 """True if any file handle is being held
693 """True if any file handle is being held
694
694
695 Used for assert and debug in the python code"""
695 Used for assert and debug in the python code"""
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
697
697
698 @contextlib.contextmanager
698 @contextlib.contextmanager
699 def writing(self, transaction, data_end=None, sidedata_end=None):
699 def writing(self, transaction, data_end=None, sidedata_end=None):
700 """Open the revlog files for writing
700 """Open the revlog files for writing
701
701
702 Add content to a revlog should be done within such context.
702 Add content to a revlog should be done within such context.
703 """
703 """
704 if self.is_writing:
704 if self.is_writing:
705 yield
705 yield
706 else:
706 else:
707 ifh = dfh = sdfh = None
707 ifh = dfh = sdfh = None
708 try:
708 try:
709 r = len(self.index)
709 r = len(self.index)
710 # opening the data file.
710 # opening the data file.
711 dsize = 0
711 dsize = 0
712 if r:
712 if r:
713 dsize = self.end(r - 1)
713 dsize = self.end(r - 1)
714 dfh = None
714 dfh = None
715 if not self.inline:
715 if not self.inline:
716 try:
716 try:
717 dfh = self.opener(self.data_file, mode=b"r+")
717 dfh = self.opener(self.data_file, mode=b"r+")
718 if data_end is None:
718 if data_end is None:
719 dfh.seek(0, os.SEEK_END)
719 dfh.seek(0, os.SEEK_END)
720 else:
720 else:
721 dfh.seek(data_end, os.SEEK_SET)
721 dfh.seek(data_end, os.SEEK_SET)
722 except FileNotFoundError:
722 except FileNotFoundError:
723 dfh = self.opener(self.data_file, mode=b"w+")
723 dfh = self.opener(self.data_file, mode=b"w+")
724 transaction.add(self.data_file, dsize)
724 transaction.add(self.data_file, dsize)
725 if self.sidedata_file is not None:
725 if self.sidedata_file is not None:
726 assert sidedata_end is not None
726 assert sidedata_end is not None
727 # revlog-v2 does not inline, help Pytype
727 # revlog-v2 does not inline, help Pytype
728 assert dfh is not None
728 assert dfh is not None
729 try:
729 try:
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
731 dfh.seek(sidedata_end, os.SEEK_SET)
731 dfh.seek(sidedata_end, os.SEEK_SET)
732 except FileNotFoundError:
732 except FileNotFoundError:
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
734 transaction.add(self.sidedata_file, sidedata_end)
734 transaction.add(self.sidedata_file, sidedata_end)
735
735
736 # opening the index file.
736 # opening the index file.
737 isize = r * self.index.entry_size
737 isize = r * self.index.entry_size
738 ifh = self.__index_write_fp()
738 ifh = self.__index_write_fp()
739 if self.inline:
739 if self.inline:
740 transaction.add(self.index_file, dsize + isize)
740 transaction.add(self.index_file, dsize + isize)
741 else:
741 else:
742 transaction.add(self.index_file, isize)
742 transaction.add(self.index_file, isize)
743 # exposing all file handle for writing.
743 # exposing all file handle for writing.
744 self._writinghandles = (ifh, dfh, sdfh)
744 self._writinghandles = (ifh, dfh, sdfh)
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
746 self._segmentfile_sidedata.writing_handle = sdfh
746 self._segmentfile_sidedata.writing_handle = sdfh
747 yield
747 yield
748 finally:
748 finally:
749 self._writinghandles = None
749 self._writinghandles = None
750 self._segmentfile.writing_handle = None
750 self._segmentfile.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
752 if dfh is not None:
752 if dfh is not None:
753 dfh.close()
753 dfh.close()
754 if sdfh is not None:
754 if sdfh is not None:
755 sdfh.close()
755 sdfh.close()
756 # closing the index file last to avoid exposing referent to
756 # closing the index file last to avoid exposing referent to
757 # potential unflushed data content.
757 # potential unflushed data content.
758 if ifh is not None:
758 if ifh is not None:
759 ifh.close()
759 ifh.close()
760
760
761 def __index_write_fp(self, index_end=None):
761 def __index_write_fp(self, index_end=None):
762 """internal method to open the index file for writing
762 """internal method to open the index file for writing
763
763
764 You should not use this directly and use `_writing` instead
764 You should not use this directly and use `_writing` instead
765 """
765 """
766 try:
766 try:
767 if self._delay_buffer is None:
767 if self._delay_buffer is None:
768 f = self.opener(
768 f = self.opener(
769 self.index_file,
769 self.index_file,
770 mode=b"r+",
770 mode=b"r+",
771 checkambig=self.data_config.check_ambig,
771 checkambig=self.data_config.check_ambig,
772 )
772 )
773 else:
773 else:
774 # check_ambig affect we way we open file for writing, however
774 # check_ambig affect we way we open file for writing, however
775 # here, we do not actually open a file for writting as write
775 # here, we do not actually open a file for writting as write
776 # will appened to a delay_buffer. So check_ambig is not
776 # will appened to a delay_buffer. So check_ambig is not
777 # meaningful and unneeded here.
777 # meaningful and unneeded here.
778 f = randomaccessfile.appender(
778 f = randomaccessfile.appender(
779 self.opener, self.index_file, b"r+", self._delay_buffer
779 self.opener, self.index_file, b"r+", self._delay_buffer
780 )
780 )
781 if index_end is None:
781 if index_end is None:
782 f.seek(0, os.SEEK_END)
782 f.seek(0, os.SEEK_END)
783 else:
783 else:
784 f.seek(index_end, os.SEEK_SET)
784 f.seek(index_end, os.SEEK_SET)
785 return f
785 return f
786 except FileNotFoundError:
786 except FileNotFoundError:
787 if self._delay_buffer is None:
787 if self._delay_buffer is None:
788 return self.opener(
788 return self.opener(
789 self.index_file,
789 self.index_file,
790 mode=b"w+",
790 mode=b"w+",
791 checkambig=self.data_config.check_ambig,
791 checkambig=self.data_config.check_ambig,
792 )
792 )
793 else:
793 else:
794 return randomaccessfile.appender(
794 return randomaccessfile.appender(
795 self.opener, self.index_file, b"w+", self._delay_buffer
795 self.opener, self.index_file, b"w+", self._delay_buffer
796 )
796 )
797
797
798 def __index_new_fp(self):
798 def __index_new_fp(self):
799 """internal method to create a new index file for writing
799 """internal method to create a new index file for writing
800
800
801 You should not use this unless you are upgrading from inline revlog
801 You should not use this unless you are upgrading from inline revlog
802 """
802 """
803 return self.opener(
803 return self.opener(
804 self.index_file,
804 self.index_file,
805 mode=b"w",
805 mode=b"w",
806 checkambig=self.data_config.check_ambig,
806 checkambig=self.data_config.check_ambig,
807 )
807 )
808
808
809 def split_inline(self, tr, header, new_index_file_path=None):
809 def split_inline(self, tr, header, new_index_file_path=None):
810 """split the data of an inline revlog into an index and a data file"""
810 """split the data of an inline revlog into an index and a data file"""
811 assert self._delay_buffer is None
811 assert self._delay_buffer is None
812 existing_handles = False
812 existing_handles = False
813 if self._writinghandles is not None:
813 if self._writinghandles is not None:
814 existing_handles = True
814 existing_handles = True
815 fp = self._writinghandles[0]
815 fp = self._writinghandles[0]
816 fp.flush()
816 fp.flush()
817 fp.close()
817 fp.close()
818 # We can't use the cached file handle after close(). So prevent
818 # We can't use the cached file handle after close(). So prevent
819 # its usage.
819 # its usage.
820 self._writinghandles = None
820 self._writinghandles = None
821 self._segmentfile.writing_handle = None
821 self._segmentfile.writing_handle = None
822 # No need to deal with sidedata writing handle as it is only
822 # No need to deal with sidedata writing handle as it is only
823 # relevant with revlog-v2 which is never inline, not reaching
823 # relevant with revlog-v2 which is never inline, not reaching
824 # this code
824 # this code
825
825
826 new_dfh = self.opener(self.data_file, mode=b"w+")
826 new_dfh = self.opener(self.data_file, mode=b"w+")
827 new_dfh.truncate(0) # drop any potentially existing data
827 new_dfh.truncate(0) # drop any potentially existing data
828 try:
828 try:
829 with self.reading():
829 with self.reading():
830 for r in range(len(self.index)):
830 for r in range(len(self.index)):
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
832 new_dfh.flush()
832 new_dfh.flush()
833
833
834 if new_index_file_path is not None:
834 if new_index_file_path is not None:
835 self.index_file = new_index_file_path
835 self.index_file = new_index_file_path
836 with self.__index_new_fp() as fp:
836 with self.__index_new_fp() as fp:
837 self.inline = False
837 self.inline = False
838 for i in range(len(self.index)):
838 for i in range(len(self.index)):
839 e = self.index.entry_binary(i)
839 e = self.index.entry_binary(i)
840 if i == 0:
840 if i == 0:
841 packed_header = self.index.pack_header(header)
841 packed_header = self.index.pack_header(header)
842 e = packed_header + e
842 e = packed_header + e
843 fp.write(e)
843 fp.write(e)
844
844
845 # If we don't use side-write, the temp file replace the real
845 # If we don't use side-write, the temp file replace the real
846 # index when we exit the context manager
846 # index when we exit the context manager
847
847
848 self._segmentfile = randomaccessfile.randomaccessfile(
848 self._segmentfile = randomaccessfile.randomaccessfile(
849 self.opener,
849 self.opener,
850 self.data_file,
850 self.data_file,
851 self.data_config.chunk_cache_size,
851 self.data_config.chunk_cache_size,
852 )
852 )
853
853
854 if existing_handles:
854 if existing_handles:
855 # switched from inline to conventional reopen the index
855 # switched from inline to conventional reopen the index
856 ifh = self.__index_write_fp()
856 ifh = self.__index_write_fp()
857 self._writinghandles = (ifh, new_dfh, None)
857 self._writinghandles = (ifh, new_dfh, None)
858 self._segmentfile.writing_handle = new_dfh
858 self._segmentfile.writing_handle = new_dfh
859 new_dfh = None
859 new_dfh = None
860 # No need to deal with sidedata writing handle as it is only
860 # No need to deal with sidedata writing handle as it is only
861 # relevant with revlog-v2 which is never inline, not reaching
861 # relevant with revlog-v2 which is never inline, not reaching
862 # this code
862 # this code
863 finally:
863 finally:
864 if new_dfh is not None:
864 if new_dfh is not None:
865 new_dfh.close()
865 new_dfh.close()
866 return self.index_file
866 return self.index_file
867
867
868 def get_segment_for_revs(self, startrev, endrev):
868 def get_segment_for_revs(self, startrev, endrev):
869 """Obtain a segment of raw data corresponding to a range of revisions.
869 """Obtain a segment of raw data corresponding to a range of revisions.
870
870
871 Accepts the start and end revisions and an optional already-open
871 Accepts the start and end revisions and an optional already-open
872 file handle to be used for reading. If the file handle is read, its
872 file handle to be used for reading. If the file handle is read, its
873 seek position will not be preserved.
873 seek position will not be preserved.
874
874
875 Requests for data may be satisfied by a cache.
875 Requests for data may be satisfied by a cache.
876
876
877 Returns a 2-tuple of (offset, data) for the requested range of
877 Returns a 2-tuple of (offset, data) for the requested range of
878 revisions. Offset is the integer offset from the beginning of the
878 revisions. Offset is the integer offset from the beginning of the
879 revlog and data is a str or buffer of the raw byte data.
879 revlog and data is a str or buffer of the raw byte data.
880
880
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
882 to determine where each revision's data begins and ends.
882 to determine where each revision's data begins and ends.
883
883
884 API: we should consider making this a private part of the InnerRevlog
884 API: we should consider making this a private part of the InnerRevlog
885 at some point.
885 at some point.
886 """
886 """
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
888 # (functions are expensive).
888 # (functions are expensive).
889 index = self.index
889 index = self.index
890 istart = index[startrev]
890 istart = index[startrev]
891 start = int(istart[0] >> 16)
891 start = int(istart[0] >> 16)
892 if startrev == endrev:
892 if startrev == endrev:
893 end = start + istart[1]
893 end = start + istart[1]
894 else:
894 else:
895 iend = index[endrev]
895 iend = index[endrev]
896 end = int(iend[0] >> 16) + iend[1]
896 end = int(iend[0] >> 16) + iend[1]
897
897
898 if self.inline:
898 if self.inline:
899 start += (startrev + 1) * self.index.entry_size
899 start += (startrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
901 length = end - start
901 length = end - start
902
902
903 return start, self._segmentfile.read_chunk(start, length)
903 return start, self._segmentfile.read_chunk(start, length)
904
904
905 def _chunk(self, rev):
905 def _chunk(self, rev):
906 """Obtain a single decompressed chunk for a revision.
906 """Obtain a single decompressed chunk for a revision.
907
907
908 Accepts an integer revision and an optional already-open file handle
908 Accepts an integer revision and an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will not
909 to be used for reading. If used, the seek position of the file will not
910 be preserved.
910 be preserved.
911
911
912 Returns a str holding uncompressed data for the requested revision.
912 Returns a str holding uncompressed data for the requested revision.
913 """
913 """
914 if self._uncompressed_chunk_cache is not None:
914 if self._uncompressed_chunk_cache is not None:
915 uncomp = self._uncompressed_chunk_cache.get(rev)
915 uncomp = self._uncompressed_chunk_cache.get(rev)
916 if uncomp is not None:
916 if uncomp is not None:
917 return uncomp
917 return uncomp
918
918
919 compression_mode = self.index[rev][10]
919 compression_mode = self.index[rev][10]
920 data = self.get_segment_for_revs(rev, rev)[1]
920 data = self.get_segment_for_revs(rev, rev)[1]
921 if compression_mode == COMP_MODE_PLAIN:
921 if compression_mode == COMP_MODE_PLAIN:
922 uncomp = data
922 uncomp = data
923 elif compression_mode == COMP_MODE_DEFAULT:
923 elif compression_mode == COMP_MODE_DEFAULT:
924 uncomp = self._decompressor(data)
924 uncomp = self._decompressor(data)
925 elif compression_mode == COMP_MODE_INLINE:
925 elif compression_mode == COMP_MODE_INLINE:
926 uncomp = self.decompress(data)
926 uncomp = self.decompress(data)
927 else:
927 else:
928 msg = b'unknown compression mode %d'
928 msg = b'unknown compression mode %d'
929 msg %= compression_mode
929 msg %= compression_mode
930 raise error.RevlogError(msg)
930 raise error.RevlogError(msg)
931 if self._uncompressed_chunk_cache is not None:
931 if self._uncompressed_chunk_cache is not None:
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
933 return uncomp
933 return uncomp
934
934
935 def _chunks(self, revs, targetsize=None):
935 def _chunks(self, revs, targetsize=None):
936 """Obtain decompressed chunks for the specified revisions.
936 """Obtain decompressed chunks for the specified revisions.
937
937
938 Accepts an iterable of numeric revisions that are assumed to be in
938 Accepts an iterable of numeric revisions that are assumed to be in
939 ascending order.
939 ascending order.
940
940
941 This function is similar to calling ``self._chunk()`` multiple times,
941 This function is similar to calling ``self._chunk()`` multiple times,
942 but is faster.
942 but is faster.
943
943
944 Returns a list with decompressed data for each requested revision.
944 Returns a list with decompressed data for each requested revision.
945 """
945 """
946 if not revs:
946 if not revs:
947 return []
947 return []
948 start = self.start
948 start = self.start
949 length = self.length
949 length = self.length
950 inline = self.inline
950 inline = self.inline
951 iosize = self.index.entry_size
951 iosize = self.index.entry_size
952 buffer = util.buffer
952 buffer = util.buffer
953
953
954 fetched_revs = []
954 fetched_revs = []
955 fadd = fetched_revs.append
955 fadd = fetched_revs.append
956
956
957 chunks = []
957 chunks = []
958 ladd = chunks.append
958 ladd = chunks.append
959
959
960 if self._uncompressed_chunk_cache is None:
960 if self._uncompressed_chunk_cache is None:
961 fetched_revs = revs
961 fetched_revs = revs
962 else:
962 else:
963 for rev in revs:
963 for rev in revs:
964 cached_value = self._uncompressed_chunk_cache.get(rev)
964 cached_value = self._uncompressed_chunk_cache.get(rev)
965 if cached_value is None:
965 if cached_value is None:
966 fadd(rev)
966 fadd(rev)
967 else:
967 else:
968 ladd((rev, cached_value))
968 ladd((rev, cached_value))
969
969
970 if not fetched_revs:
970 if not fetched_revs:
971 slicedchunks = ()
971 slicedchunks = ()
972 elif not self.data_config.with_sparse_read:
972 elif not self.data_config.with_sparse_read:
973 slicedchunks = (fetched_revs,)
973 slicedchunks = (fetched_revs,)
974 else:
974 else:
975 slicedchunks = deltautil.slicechunk(
975 slicedchunks = deltautil.slicechunk(
976 self,
976 self,
977 fetched_revs,
977 fetched_revs,
978 targetsize=targetsize,
978 targetsize=targetsize,
979 )
979 )
980
980
981 for revschunk in slicedchunks:
981 for revschunk in slicedchunks:
982 firstrev = revschunk[0]
982 firstrev = revschunk[0]
983 # Skip trailing revisions with empty diff
983 # Skip trailing revisions with empty diff
984 for lastrev in revschunk[::-1]:
984 for lastrev in revschunk[::-1]:
985 if length(lastrev) != 0:
985 if length(lastrev) != 0:
986 break
986 break
987
987
988 try:
988 try:
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
990 except OverflowError:
990 except OverflowError:
991 # issue4215 - we can't cache a run of chunks greater than
991 # issue4215 - we can't cache a run of chunks greater than
992 # 2G on Windows
992 # 2G on Windows
993 for rev in revschunk:
993 for rev in revschunk:
994 ladd((rev, self._chunk(rev)))
994 ladd((rev, self._chunk(rev)))
995
995
996 decomp = self.decompress
996 decomp = self.decompress
997 # self._decompressor might be None, but will not be used in that case
997 # self._decompressor might be None, but will not be used in that case
998 def_decomp = self._decompressor
998 def_decomp = self._decompressor
999 for rev in revschunk:
999 for rev in revschunk:
1000 chunkstart = start(rev)
1000 chunkstart = start(rev)
1001 if inline:
1001 if inline:
1002 chunkstart += (rev + 1) * iosize
1002 chunkstart += (rev + 1) * iosize
1003 chunklength = length(rev)
1003 chunklength = length(rev)
1004 comp_mode = self.index[rev][10]
1004 comp_mode = self.index[rev][10]
1005 c = buffer(data, chunkstart - offset, chunklength)
1005 c = buffer(data, chunkstart - offset, chunklength)
1006 if comp_mode == COMP_MODE_PLAIN:
1006 if comp_mode == COMP_MODE_PLAIN:
1007 c = c
1007 c = c
1008 elif comp_mode == COMP_MODE_INLINE:
1008 elif comp_mode == COMP_MODE_INLINE:
1009 c = decomp(c)
1009 c = decomp(c)
1010 elif comp_mode == COMP_MODE_DEFAULT:
1010 elif comp_mode == COMP_MODE_DEFAULT:
1011 c = def_decomp(c)
1011 c = def_decomp(c)
1012 else:
1012 else:
1013 msg = b'unknown compression mode %d'
1013 msg = b'unknown compression mode %d'
1014 msg %= comp_mode
1014 msg %= comp_mode
1015 raise error.RevlogError(msg)
1015 raise error.RevlogError(msg)
1016 ladd((rev, c))
1016 ladd((rev, c))
1017 if self._uncompressed_chunk_cache is not None:
1017 if self._uncompressed_chunk_cache is not None:
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1019
1019
1020 chunks.sort()
1020 chunks.sort()
1021 return [x[1] for x in chunks]
1021 return [x[1] for x in chunks]
1022
1022
1023 def raw_text(self, node, rev) -> bytes:
1023 def raw_text(self, node, rev) -> bytes:
1024 """return the possibly unvalidated rawtext for a revision
1024 """return the possibly unvalidated rawtext for a revision
1025
1025
1026 returns rawtext
1026 returns rawtext
1027 """
1027 """
1028
1028
1029 # revision in the cache (could be useful to apply delta)
1029 # revision in the cache (could be useful to apply delta)
1030 cachedrev = None
1030 cachedrev = None
1031 # An intermediate text to apply deltas to
1031 # An intermediate text to apply deltas to
1032 basetext = None
1032 basetext = None
1033
1033
1034 # Check if we have the entry in cache
1034 # Check if we have the entry in cache
1035 # The cache entry looks like (node, rev, rawtext)
1035 # The cache entry looks like (node, rev, rawtext)
1036 if self._revisioncache:
1036 if self._revisioncache:
1037 cachedrev = self._revisioncache[1]
1037 cachedrev = self._revisioncache[1]
1038
1038
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1040 if stopped:
1040 if stopped:
1041 basetext = self._revisioncache[2]
1041 basetext = self._revisioncache[2]
1042
1042
1043 # drop cache to save memory, the caller is expected to
1043 # drop cache to save memory, the caller is expected to
1044 # update self._inner._revisioncache after validating the text
1044 # update self._inner._revisioncache after validating the text
1045 self._revisioncache = None
1045 self._revisioncache = None
1046
1046
1047 targetsize = None
1047 targetsize = None
1048 rawsize = self.index[rev][2]
1048 rawsize = self.index[rev][2]
1049 if 0 <= rawsize:
1049 if 0 <= rawsize:
1050 targetsize = 4 * rawsize
1050 targetsize = 4 * rawsize
1051
1051
1052 if self._uncompressed_chunk_cache is not None:
1052 if self._uncompressed_chunk_cache is not None:
1053 # dynamically update the uncompressed_chunk_cache size to the
1053 # dynamically update the uncompressed_chunk_cache size to the
1054 # largest revision we saw in this revlog.
1054 # largest revision we saw in this revlog.
1055 factor = self.data_config.uncompressed_cache_factor
1055 factor = self.data_config.uncompressed_cache_factor
1056 candidate_size = rawsize * factor
1056 candidate_size = rawsize * factor
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1059
1059
1060 bins = self._chunks(chain, targetsize=targetsize)
1060 bins = self._chunks(chain, targetsize=targetsize)
1061 if basetext is None:
1061 if basetext is None:
1062 basetext = bytes(bins[0])
1062 basetext = bytes(bins[0])
1063 bins = bins[1:]
1063 bins = bins[1:]
1064
1064
1065 rawtext = mdiff.patches(basetext, bins)
1065 rawtext = mdiff.patches(basetext, bins)
1066 del basetext # let us have a chance to free memory early
1066 del basetext # let us have a chance to free memory early
1067 return rawtext
1067 return rawtext
1068
1068
1069 def sidedata(self, rev, sidedata_end):
1069 def sidedata(self, rev, sidedata_end):
1070 """Return the sidedata for a given revision number."""
1070 """Return the sidedata for a given revision number."""
1071 index_entry = self.index[rev]
1071 index_entry = self.index[rev]
1072 sidedata_offset = index_entry[8]
1072 sidedata_offset = index_entry[8]
1073 sidedata_size = index_entry[9]
1073 sidedata_size = index_entry[9]
1074
1074
1075 if self.inline:
1075 if self.inline:
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1077 if sidedata_size == 0:
1077 if sidedata_size == 0:
1078 return {}
1078 return {}
1079
1079
1080 if sidedata_end < sidedata_offset + sidedata_size:
1080 if sidedata_end < sidedata_offset + sidedata_size:
1081 filename = self.sidedata_file
1081 filename = self.sidedata_file
1082 end = sidedata_end
1082 end = sidedata_end
1083 offset = sidedata_offset
1083 offset = sidedata_offset
1084 length = sidedata_size
1084 length = sidedata_size
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1086 raise error.RevlogError(m)
1086 raise error.RevlogError(m)
1087
1087
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1089 sidedata_offset, sidedata_size
1089 sidedata_offset, sidedata_size
1090 )
1090 )
1091
1091
1092 comp = self.index[rev][11]
1092 comp = self.index[rev][11]
1093 if comp == COMP_MODE_PLAIN:
1093 if comp == COMP_MODE_PLAIN:
1094 segment = comp_segment
1094 segment = comp_segment
1095 elif comp == COMP_MODE_DEFAULT:
1095 elif comp == COMP_MODE_DEFAULT:
1096 segment = self._decompressor(comp_segment)
1096 segment = self._decompressor(comp_segment)
1097 elif comp == COMP_MODE_INLINE:
1097 elif comp == COMP_MODE_INLINE:
1098 segment = self.decompress(comp_segment)
1098 segment = self.decompress(comp_segment)
1099 else:
1099 else:
1100 msg = b'unknown compression mode %d'
1100 msg = b'unknown compression mode %d'
1101 msg %= comp
1101 msg %= comp
1102 raise error.RevlogError(msg)
1102 raise error.RevlogError(msg)
1103
1103
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1105 return sidedata
1105 return sidedata
1106
1106
1107 def write_entry(
1107 def write_entry(
1108 self,
1108 self,
1109 transaction,
1109 transaction,
1110 entry,
1110 entry,
1111 data,
1111 data,
1112 link,
1112 link,
1113 offset,
1113 offset,
1114 sidedata,
1114 sidedata,
1115 sidedata_offset,
1115 sidedata_offset,
1116 index_end,
1116 index_end,
1117 data_end,
1117 data_end,
1118 sidedata_end,
1118 sidedata_end,
1119 ):
1119 ):
1120 # Files opened in a+ mode have inconsistent behavior on various
1120 # Files opened in a+ mode have inconsistent behavior on various
1121 # platforms. Windows requires that a file positioning call be made
1121 # platforms. Windows requires that a file positioning call be made
1122 # when the file handle transitions between reads and writes. See
1122 # when the file handle transitions between reads and writes. See
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1124 # platforms, Python or the platform itself can be buggy. Some versions
1124 # platforms, Python or the platform itself can be buggy. Some versions
1125 # of Solaris have been observed to not append at the end of the file
1125 # of Solaris have been observed to not append at the end of the file
1126 # if the file was seeked to before the end. See issue4943 for more.
1126 # if the file was seeked to before the end. See issue4943 for more.
1127 #
1127 #
1128 # We work around this issue by inserting a seek() before writing.
1128 # We work around this issue by inserting a seek() before writing.
1129 # Note: This is likely not necessary on Python 3. However, because
1129 # Note: This is likely not necessary on Python 3. However, because
1130 # the file handle is reused for reads and may be seeked there, we need
1130 # the file handle is reused for reads and may be seeked there, we need
1131 # to be careful before changing this.
1131 # to be careful before changing this.
1132 if self._writinghandles is None:
1132 if self._writinghandles is None:
1133 msg = b'adding revision outside `revlog._writing` context'
1133 msg = b'adding revision outside `revlog._writing` context'
1134 raise error.ProgrammingError(msg)
1134 raise error.ProgrammingError(msg)
1135 ifh, dfh, sdfh = self._writinghandles
1135 ifh, dfh, sdfh = self._writinghandles
1136 if index_end is None:
1136 if index_end is None:
1137 ifh.seek(0, os.SEEK_END)
1137 ifh.seek(0, os.SEEK_END)
1138 else:
1138 else:
1139 ifh.seek(index_end, os.SEEK_SET)
1139 ifh.seek(index_end, os.SEEK_SET)
1140 if dfh:
1140 if dfh:
1141 if data_end is None:
1141 if data_end is None:
1142 dfh.seek(0, os.SEEK_END)
1142 dfh.seek(0, os.SEEK_END)
1143 else:
1143 else:
1144 dfh.seek(data_end, os.SEEK_SET)
1144 dfh.seek(data_end, os.SEEK_SET)
1145 if sdfh:
1145 if sdfh:
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1147
1147
1148 curr = len(self.index) - 1
1148 curr = len(self.index) - 1
1149 if not self.inline:
1149 if not self.inline:
1150 transaction.add(self.data_file, offset)
1150 transaction.add(self.data_file, offset)
1151 if self.sidedata_file:
1151 if self.sidedata_file:
1152 transaction.add(self.sidedata_file, sidedata_offset)
1152 transaction.add(self.sidedata_file, sidedata_offset)
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1154 if data[0]:
1154 if data[0]:
1155 dfh.write(data[0])
1155 dfh.write(data[0])
1156 dfh.write(data[1])
1156 dfh.write(data[1])
1157 if sidedata:
1157 if sidedata:
1158 sdfh.write(sidedata)
1158 sdfh.write(sidedata)
1159 if self._delay_buffer is None:
1159 if self._delay_buffer is None:
1160 ifh.write(entry)
1160 ifh.write(entry)
1161 else:
1161 else:
1162 self._delay_buffer.append(entry)
1162 self._delay_buffer.append(entry)
1163 elif self._delay_buffer is not None:
1163 elif self._delay_buffer is not None:
1164 msg = b'invalid delayed write on inline revlog'
1164 msg = b'invalid delayed write on inline revlog'
1165 raise error.ProgrammingError(msg)
1165 raise error.ProgrammingError(msg)
1166 else:
1166 else:
1167 offset += curr * self.index.entry_size
1167 offset += curr * self.index.entry_size
1168 transaction.add(self.canonical_index_file, offset)
1168 transaction.add(self.canonical_index_file, offset)
1169 assert not sidedata
1169 assert not sidedata
1170 ifh.write(entry)
1170 ifh.write(entry)
1171 ifh.write(data[0])
1171 ifh.write(data[0])
1172 ifh.write(data[1])
1172 ifh.write(data[1])
1173 return (
1173 return (
1174 ifh.tell(),
1174 ifh.tell(),
1175 dfh.tell() if dfh else None,
1175 dfh.tell() if dfh else None,
1176 sdfh.tell() if sdfh else None,
1176 sdfh.tell() if sdfh else None,
1177 )
1177 )
1178
1178
1179 def _divert_index(self):
1179 def _divert_index(self):
1180 index_file = self.index_file
1180 index_file = self.index_file
1181 # when we encounter a legacy inline-changelog, split it. However it is
1181 # when we encounter a legacy inline-changelog, split it. However it is
1182 # important to use the expected filename for pending content
1182 # important to use the expected filename for pending content
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1184 # pending transaction.
1184 # pending transaction.
1185 if index_file.endswith(b'.s'):
1185 if index_file.endswith(b'.s'):
1186 index_file = self.index_file[:-2]
1186 index_file = self.index_file[:-2]
1187 return index_file + b'.a'
1187 return index_file + b'.a'
1188
1188
1189 def delay(self):
1189 def delay(self):
1190 assert not self.is_open
1190 assert not self.is_open
1191 if self.inline:
1191 if self.inline:
1192 msg = "revlog with delayed write should not be inline"
1192 msg = "revlog with delayed write should not be inline"
1193 raise error.ProgrammingError(msg)
1193 raise error.ProgrammingError(msg)
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1195 # delay or divert already in place
1195 # delay or divert already in place
1196 return None
1196 return None
1197 elif len(self.index) == 0:
1197 elif len(self.index) == 0:
1198 self._orig_index_file = self.index_file
1198 self._orig_index_file = self.index_file
1199 self.index_file = self._divert_index()
1199 self.index_file = self._divert_index()
1200 assert self._orig_index_file is not None
1200 assert self._orig_index_file is not None
1201 assert self.index_file is not None
1201 assert self.index_file is not None
1202 if self.opener.exists(self.index_file):
1202 if self.opener.exists(self.index_file):
1203 self.opener.unlink(self.index_file)
1203 self.opener.unlink(self.index_file)
1204 return self.index_file
1204 return self.index_file
1205 else:
1205 else:
1206 self._delay_buffer = []
1206 self._delay_buffer = []
1207 return None
1207 return None
1208
1208
1209 def write_pending(self):
1209 def write_pending(self):
1210 assert not self.is_open
1210 assert not self.is_open
1211 if self.inline:
1211 if self.inline:
1212 msg = "revlog with delayed write should not be inline"
1212 msg = "revlog with delayed write should not be inline"
1213 raise error.ProgrammingError(msg)
1213 raise error.ProgrammingError(msg)
1214 if self._orig_index_file is not None:
1214 if self._orig_index_file is not None:
1215 return None, True
1215 return None, True
1216 any_pending = False
1216 any_pending = False
1217 pending_index_file = self._divert_index()
1217 pending_index_file = self._divert_index()
1218 if self.opener.exists(pending_index_file):
1218 if self.opener.exists(pending_index_file):
1219 self.opener.unlink(pending_index_file)
1219 self.opener.unlink(pending_index_file)
1220 util.copyfile(
1220 util.copyfile(
1221 self.opener.join(self.index_file),
1221 self.opener.join(self.index_file),
1222 self.opener.join(pending_index_file),
1222 self.opener.join(pending_index_file),
1223 )
1223 )
1224 if self._delay_buffer:
1224 if self._delay_buffer:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1226 ifh.seek(0, os.SEEK_END)
1226 ifh.seek(0, os.SEEK_END)
1227 ifh.write(b"".join(self._delay_buffer))
1227 ifh.write(b"".join(self._delay_buffer))
1228 any_pending = True
1228 any_pending = True
1229 self._delay_buffer = None
1229 self._delay_buffer = None
1230 self._orig_index_file = self.index_file
1230 self._orig_index_file = self.index_file
1231 self.index_file = pending_index_file
1231 self.index_file = pending_index_file
1232 return self.index_file, any_pending
1232 return self.index_file, any_pending
1233
1233
1234 def finalize_pending(self):
1234 def finalize_pending(self):
1235 assert not self.is_open
1235 assert not self.is_open
1236 if self.inline:
1236 if self.inline:
1237 msg = "revlog with delayed write should not be inline"
1237 msg = "revlog with delayed write should not be inline"
1238 raise error.ProgrammingError(msg)
1238 raise error.ProgrammingError(msg)
1239
1239
1240 delay = self._delay_buffer is not None
1240 delay = self._delay_buffer is not None
1241 divert = self._orig_index_file is not None
1241 divert = self._orig_index_file is not None
1242
1242
1243 if delay and divert:
1243 if delay and divert:
1244 assert False, "unreachable"
1244 assert False, "unreachable"
1245 elif delay:
1245 elif delay:
1246 if self._delay_buffer:
1246 if self._delay_buffer:
1247 with self.opener(self.index_file, b'r+') as ifh:
1247 with self.opener(self.index_file, b'r+') as ifh:
1248 ifh.seek(0, os.SEEK_END)
1248 ifh.seek(0, os.SEEK_END)
1249 ifh.write(b"".join(self._delay_buffer))
1249 ifh.write(b"".join(self._delay_buffer))
1250 self._delay_buffer = None
1250 self._delay_buffer = None
1251 elif divert:
1251 elif divert:
1252 if self.opener.exists(self.index_file):
1252 if self.opener.exists(self.index_file):
1253 self.opener.rename(
1253 self.opener.rename(
1254 self.index_file,
1254 self.index_file,
1255 self._orig_index_file,
1255 self._orig_index_file,
1256 checkambig=True,
1256 checkambig=True,
1257 )
1257 )
1258 self.index_file = self._orig_index_file
1258 self.index_file = self._orig_index_file
1259 self._orig_index_file = None
1259 self._orig_index_file = None
1260 else:
1260 else:
1261 msg = b"not delay or divert found on this revlog"
1261 msg = b"not delay or divert found on this revlog"
1262 raise error.ProgrammingError(msg)
1262 raise error.ProgrammingError(msg)
1263 return self.canonical_index_file
1263 return self.canonical_index_file
1264
1264
1265
1265
1266 class revlog:
1266 class revlog:
1267 """
1267 """
1268 the underlying revision storage object
1268 the underlying revision storage object
1269
1269
1270 A revlog consists of two parts, an index and the revision data.
1270 A revlog consists of two parts, an index and the revision data.
1271
1271
1272 The index is a file with a fixed record size containing
1272 The index is a file with a fixed record size containing
1273 information on each revision, including its nodeid (hash), the
1273 information on each revision, including its nodeid (hash), the
1274 nodeids of its parents, the position and offset of its data within
1274 nodeids of its parents, the position and offset of its data within
1275 the data file, and the revision it's based on. Finally, each entry
1275 the data file, and the revision it's based on. Finally, each entry
1276 contains a linkrev entry that can serve as a pointer to external
1276 contains a linkrev entry that can serve as a pointer to external
1277 data.
1277 data.
1278
1278
1279 The revision data itself is a linear collection of data chunks.
1279 The revision data itself is a linear collection of data chunks.
1280 Each chunk represents a revision and is usually represented as a
1280 Each chunk represents a revision and is usually represented as a
1281 delta against the previous chunk. To bound lookup time, runs of
1281 delta against the previous chunk. To bound lookup time, runs of
1282 deltas are limited to about 2 times the length of the original
1282 deltas are limited to about 2 times the length of the original
1283 version data. This makes retrieval of a version proportional to
1283 version data. This makes retrieval of a version proportional to
1284 its size, or O(1) relative to the number of revisions.
1284 its size, or O(1) relative to the number of revisions.
1285
1285
1286 Both pieces of the revlog are written to in an append-only
1286 Both pieces of the revlog are written to in an append-only
1287 fashion, which means we never need to rewrite a file to insert or
1287 fashion, which means we never need to rewrite a file to insert or
1288 remove data, and can use some simple techniques to avoid the need
1288 remove data, and can use some simple techniques to avoid the need
1289 for locking while reading.
1289 for locking while reading.
1290
1290
1291 If checkambig, indexfile is opened with checkambig=True at
1291 If checkambig, indexfile is opened with checkambig=True at
1292 writing, to avoid file stat ambiguity.
1292 writing, to avoid file stat ambiguity.
1293
1293
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1295 index will be mmapped rather than read if it is larger than the
1295 index will be mmapped rather than read if it is larger than the
1296 configured threshold.
1296 configured threshold.
1297
1297
1298 If censorable is True, the revlog can have censored revisions.
1298 If censorable is True, the revlog can have censored revisions.
1299
1299
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1301 compression for the data content.
1301 compression for the data content.
1302
1302
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1304 file handle, a filename, and an expected position. It should check whether
1304 file handle, a filename, and an expected position. It should check whether
1305 the current position in the file handle is valid, and log/warn/fail (by
1305 the current position in the file handle is valid, and log/warn/fail (by
1306 raising).
1306 raising).
1307
1307
1308 See mercurial/revlogutils/contants.py for details about the content of an
1308 See mercurial/revlogutils/contants.py for details about the content of an
1309 index entry.
1309 index entry.
1310 """
1310 """
1311
1311
1312 _flagserrorclass = error.RevlogError
1312 _flagserrorclass = error.RevlogError
1313 _inner: "_InnerRevlog"
1313 _inner: "_InnerRevlog"
1314
1314
1315 opener: vfsmod.vfs
1315 opener: vfsmod.vfs
1316
1316
1317 @staticmethod
1317 @staticmethod
1318 def is_inline_index(header_bytes):
1318 def is_inline_index(header_bytes):
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1320 if len(header_bytes) == 0:
1320 if len(header_bytes) == 0:
1321 return True
1321 return True
1322
1322
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1324
1324
1325 _format_flags = header & ~0xFFFF
1325 _format_flags = header & ~0xFFFF
1326 _format_version = header & 0xFFFF
1326 _format_version = header & 0xFFFF
1327
1327
1328 features = FEATURES_BY_VERSION[_format_version]
1328 features = FEATURES_BY_VERSION[_format_version]
1329 return features[b'inline'](_format_flags)
1329 return features[b'inline'](_format_flags)
1330
1330
1331 _docket_file: Optional[bytes]
1331 _docket_file: Optional[bytes]
1332
1332
1333 def __init__(
1333 def __init__(
1334 self,
1334 self,
1335 opener: vfsmod.vfs,
1335 opener: vfsmod.vfs,
1336 target,
1336 target,
1337 radix,
1337 radix,
1338 postfix=None, # only exist for `tmpcensored` now
1338 postfix=None, # only exist for `tmpcensored` now
1339 checkambig=False,
1339 checkambig=False,
1340 mmaplargeindex=False,
1340 mmaplargeindex=False,
1341 censorable=False,
1341 censorable=False,
1342 upperboundcomp=None,
1342 upperboundcomp=None,
1343 persistentnodemap=False,
1343 persistentnodemap=False,
1344 concurrencychecker=None,
1344 concurrencychecker=None,
1345 trypending=False,
1345 trypending=False,
1346 try_split=False,
1346 try_split=False,
1347 canonical_parent_order=True,
1347 canonical_parent_order=True,
1348 data_config=None,
1348 data_config=None,
1349 delta_config=None,
1349 delta_config=None,
1350 feature_config=None,
1350 feature_config=None,
1351 may_inline=True, # may inline new revlog
1351 may_inline=True, # may inline new revlog
1352 ):
1352 ):
1353 """
1353 """
1354 create a revlog object
1354 create a revlog object
1355
1355
1356 opener is a function that abstracts the file opening operation
1356 opener is a function that abstracts the file opening operation
1357 and can be used to implement COW semantics or the like.
1357 and can be used to implement COW semantics or the like.
1358
1358
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1360 this revlog. It help the rest of the code to understand what the revlog
1360 this revlog. It help the rest of the code to understand what the revlog
1361 is about without having to resort to heuristic and index filename
1361 is about without having to resort to heuristic and index filename
1362 analysis. Note: that this must be reliably be set by normal code, but
1362 analysis. Note: that this must be reliably be set by normal code, but
1363 that test, debug, or performance measurement code might not set this to
1363 that test, debug, or performance measurement code might not set this to
1364 accurate value.
1364 accurate value.
1365 """
1365 """
1366
1366
1367 self.radix = radix
1367 self.radix = radix
1368
1368
1369 self._docket_file = None
1369 self._docket_file = None
1370 self._indexfile = None
1370 self._indexfile = None
1371 self._datafile = None
1371 self._datafile = None
1372 self._sidedatafile = None
1372 self._sidedatafile = None
1373 self._nodemap_file = None
1373 self._nodemap_file = None
1374 self.postfix = postfix
1374 self.postfix = postfix
1375 self._trypending = trypending
1375 self._trypending = trypending
1376 self._try_split = try_split
1376 self._try_split = try_split
1377 self._may_inline = may_inline
1377 self._may_inline = may_inline
1378 self.opener = opener
1378 self.opener = opener
1379 if persistentnodemap:
1379 if persistentnodemap:
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1381
1381
1382 assert target[0] in ALL_KINDS
1382 assert target[0] in ALL_KINDS
1383 assert len(target) == 2
1383 assert len(target) == 2
1384 self.target = target
1384 self.target = target
1385 if feature_config is not None:
1385 if feature_config is not None:
1386 self.feature_config = feature_config.copy()
1386 self.feature_config = feature_config.copy()
1387 elif b'feature-config' in self.opener.options:
1387 elif b'feature-config' in self.opener.options:
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1389 else:
1389 else:
1390 self.feature_config = FeatureConfig()
1390 self.feature_config = FeatureConfig()
1391 self.feature_config.censorable = censorable
1391 self.feature_config.censorable = censorable
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1393 if data_config is not None:
1393 if data_config is not None:
1394 self.data_config = data_config.copy()
1394 self.data_config = data_config.copy()
1395 elif b'data-config' in self.opener.options:
1395 elif b'data-config' in self.opener.options:
1396 self.data_config = self.opener.options[b'data-config'].copy()
1396 self.data_config = self.opener.options[b'data-config'].copy()
1397 else:
1397 else:
1398 self.data_config = DataConfig()
1398 self.data_config = DataConfig()
1399 self.data_config.check_ambig = checkambig
1399 self.data_config.check_ambig = checkambig
1400 self.data_config.mmap_large_index = mmaplargeindex
1400 self.data_config.mmap_large_index = mmaplargeindex
1401 if delta_config is not None:
1401 if delta_config is not None:
1402 self.delta_config = delta_config.copy()
1402 self.delta_config = delta_config.copy()
1403 elif b'delta-config' in self.opener.options:
1403 elif b'delta-config' in self.opener.options:
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1405 else:
1405 else:
1406 self.delta_config = DeltaConfig()
1406 self.delta_config = DeltaConfig()
1407 self.delta_config.upper_bound_comp = upperboundcomp
1407 self.delta_config.upper_bound_comp = upperboundcomp
1408
1408
1409 # Maps rev to chain base rev.
1409 # Maps rev to chain base rev.
1410 self._chainbasecache = util.lrucachedict(100)
1410 self._chainbasecache = util.lrucachedict(100)
1411
1411
1412 self.index = None
1412 self.index = None
1413 self._docket = None
1413 self._docket = None
1414 self._nodemap_docket = None
1414 self._nodemap_docket = None
1415 # Mapping of partial identifiers to full nodes.
1415 # Mapping of partial identifiers to full nodes.
1416 self._pcache = {}
1416 self._pcache = {}
1417
1417
1418 # other optionnals features
1418 # other optionnals features
1419
1419
1420 # Make copy of flag processors so each revlog instance can support
1420 # Make copy of flag processors so each revlog instance can support
1421 # custom flags.
1421 # custom flags.
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1423 # prevent nesting of addgroup
1423 # prevent nesting of addgroup
1424 self._adding_group = None
1424 self._adding_group = None
1425
1425
1426 chunk_cache = self._loadindex()
1426 chunk_cache = self._loadindex()
1427 self._load_inner(chunk_cache)
1427 self._load_inner(chunk_cache)
1428 self._concurrencychecker = concurrencychecker
1428 self._concurrencychecker = concurrencychecker
1429
1429
1430 def _init_opts(self):
1430 def _init_opts(self):
1431 """process options (from above/config) to setup associated default revlog mode
1431 """process options (from above/config) to setup associated default revlog mode
1432
1432
1433 These values might be affected when actually reading on disk information.
1433 These values might be affected when actually reading on disk information.
1434
1434
1435 The relevant values are returned for use in _loadindex().
1435 The relevant values are returned for use in _loadindex().
1436
1436
1437 * newversionflags:
1437 * newversionflags:
1438 version header to use if we need to create a new revlog
1438 version header to use if we need to create a new revlog
1439
1439
1440 * mmapindexthreshold:
1440 * mmapindexthreshold:
1441 minimal index size for start to use mmap
1441 minimal index size for start to use mmap
1442
1442
1443 * force_nodemap:
1443 * force_nodemap:
1444 force the usage of a "development" version of the nodemap code
1444 force the usage of a "development" version of the nodemap code
1445 """
1445 """
1446 opts = self.opener.options
1446 opts = self.opener.options
1447
1447
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1449 new_header = CHANGELOGV2
1449 new_header = CHANGELOGV2
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1451 self.feature_config.compute_rank = compute_rank
1451 self.feature_config.compute_rank = compute_rank
1452 elif b'revlogv2' in opts:
1452 elif b'revlogv2' in opts:
1453 new_header = REVLOGV2
1453 new_header = REVLOGV2
1454 elif b'revlogv1' in opts:
1454 elif b'revlogv1' in opts:
1455 new_header = REVLOGV1
1455 new_header = REVLOGV1
1456 if self._may_inline:
1456 if self._may_inline:
1457 new_header |= FLAG_INLINE_DATA
1457 new_header |= FLAG_INLINE_DATA
1458 if b'generaldelta' in opts:
1458 if b'generaldelta' in opts:
1459 new_header |= FLAG_GENERALDELTA
1459 new_header |= FLAG_GENERALDELTA
1460 elif b'revlogv0' in self.opener.options:
1460 elif b'revlogv0' in self.opener.options:
1461 new_header = REVLOGV0
1461 new_header = REVLOGV0
1462 else:
1462 else:
1463 new_header = REVLOG_DEFAULT_VERSION
1463 new_header = REVLOG_DEFAULT_VERSION
1464
1464
1465 mmapindexthreshold = None
1465 mmapindexthreshold = None
1466 if self.data_config.mmap_large_index:
1466 if self.data_config.mmap_large_index:
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1468 if self.feature_config.enable_ellipsis:
1468 if self.feature_config.enable_ellipsis:
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1470
1470
1471 # revlog v0 doesn't have flag processors
1471 # revlog v0 doesn't have flag processors
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1474
1474
1475 chunk_cache_size = self.data_config.chunk_cache_size
1475 chunk_cache_size = self.data_config.chunk_cache_size
1476 if chunk_cache_size <= 0:
1476 if chunk_cache_size <= 0:
1477 raise error.RevlogError(
1477 raise error.RevlogError(
1478 _(b'revlog chunk cache size %r is not greater than 0')
1478 _(b'revlog chunk cache size %r is not greater than 0')
1479 % chunk_cache_size
1479 % chunk_cache_size
1480 )
1480 )
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1482 raise error.RevlogError(
1482 raise error.RevlogError(
1483 _(b'revlog chunk cache size %r is not a power of 2')
1483 _(b'revlog chunk cache size %r is not a power of 2')
1484 % chunk_cache_size
1484 % chunk_cache_size
1485 )
1485 )
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1487 return new_header, mmapindexthreshold, force_nodemap
1487 return new_header, mmapindexthreshold, force_nodemap
1488
1488
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1490 """return a file content with or without mmap
1490 """return a file content with or without mmap
1491
1491
1492 If the file is missing return the empty string"""
1492 If the file is missing return the empty string"""
1493 try:
1493 try:
1494 with self.opener(filepath) as fp:
1494 with self.opener(filepath) as fp:
1495 if mmap_threshold is not None:
1495 if mmap_threshold is not None:
1496 file_size = self.opener.fstat(fp).st_size
1496 file_size = self.opener.fstat(fp).st_size
1497 if (
1497 if (
1498 file_size >= mmap_threshold
1498 file_size >= mmap_threshold
1499 and self.opener.is_mmap_safe(filepath)
1499 and self.opener.is_mmap_safe(filepath)
1500 ):
1500 ):
1501 if size is not None:
1501 if size is not None:
1502 # avoid potentiel mmap crash
1502 # avoid potentiel mmap crash
1503 size = min(file_size, size)
1503 size = min(file_size, size)
1504 # TODO: should .close() to release resources without
1504 # TODO: should .close() to release resources without
1505 # relying on Python GC
1505 # relying on Python GC
1506 if size is None:
1506 if size is None:
1507 return util.buffer(util.mmapread(fp))
1507 return util.buffer(util.mmapread(fp))
1508 else:
1508 else:
1509 return util.buffer(util.mmapread(fp, size))
1509 return util.buffer(util.mmapread(fp, size))
1510 if size is None:
1510 if size is None:
1511 return fp.read()
1511 return fp.read()
1512 else:
1512 else:
1513 return fp.read(size)
1513 return fp.read(size)
1514 except FileNotFoundError:
1514 except FileNotFoundError:
1515 return b''
1515 return b''
1516
1516
1517 def get_streams(self, max_linkrev, force_inline=False):
1517 def get_streams(self, max_linkrev, force_inline=False):
1518 """return a list of streams that represent this revlog
1518 """return a list of streams that represent this revlog
1519
1519
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1521
1521
1522 This streams data for all revisions that refer to a changelog revision up
1522 This streams data for all revisions that refer to a changelog revision up
1523 to `max_linkrev`.
1523 to `max_linkrev`.
1524
1524
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1526
1526
1527 It returns is a list of three-tuple:
1527 It returns is a list of three-tuple:
1528
1528
1529 [
1529 [
1530 (filename, bytes_stream, stream_size),
1530 (filename, bytes_stream, stream_size),
1531 …
1531 …
1532 ]
1532 ]
1533 """
1533 """
1534 n = len(self)
1534 n = len(self)
1535 index = self.index
1535 index = self.index
1536 while n > 0:
1536 while n > 0:
1537 linkrev = index[n - 1][4]
1537 linkrev = index[n - 1][4]
1538 if linkrev < max_linkrev:
1538 if linkrev < max_linkrev:
1539 break
1539 break
1540 # note: this loop will rarely go through multiple iterations, since
1540 # note: this loop will rarely go through multiple iterations, since
1541 # it only traverses commits created during the current streaming
1541 # it only traverses commits created during the current streaming
1542 # pull operation.
1542 # pull operation.
1543 #
1543 #
1544 # If this become a problem, using a binary search should cap the
1544 # If this become a problem, using a binary search should cap the
1545 # runtime of this.
1545 # runtime of this.
1546 n = n - 1
1546 n = n - 1
1547 if n == 0:
1547 if n == 0:
1548 # no data to send
1548 # no data to send
1549 return []
1549 return []
1550 index_size = n * index.entry_size
1550 index_size = n * index.entry_size
1551 data_size = self.end(n - 1)
1551 data_size = self.end(n - 1)
1552
1552
1553 # XXX we might have been split (or stripped) since the object
1553 # XXX we might have been split (or stripped) since the object
1554 # initialization, We need to close this race too, but having a way to
1554 # initialization, We need to close this race too, but having a way to
1555 # pre-open the file we feed to the revlog and never closing them before
1555 # pre-open the file we feed to the revlog and never closing them before
1556 # we are done streaming.
1556 # we are done streaming.
1557
1557
1558 if self._inline:
1558 if self._inline:
1559
1559
1560 def get_stream():
1560 def get_stream():
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1562 yield None
1562 yield None
1563 size = index_size + data_size
1563 size = index_size + data_size
1564 if size <= 65536:
1564 if size <= 65536:
1565 yield fp.read(size)
1565 yield fp.read(size)
1566 else:
1566 else:
1567 yield from util.filechunkiter(fp, limit=size)
1567 yield from util.filechunkiter(fp, limit=size)
1568
1568
1569 inline_stream = get_stream()
1569 inline_stream = get_stream()
1570 next(inline_stream)
1570 next(inline_stream)
1571 return [
1571 return [
1572 (self._indexfile, inline_stream, index_size + data_size),
1572 (self._indexfile, inline_stream, index_size + data_size),
1573 ]
1573 ]
1574 elif force_inline:
1574 elif force_inline:
1575
1575
1576 def get_stream():
1576 def get_stream():
1577 with self.reading():
1577 with self.reading():
1578 yield None
1578 yield None
1579
1579
1580 for rev in range(n):
1580 for rev in range(n):
1581 idx = self.index.entry_binary(rev)
1581 idx = self.index.entry_binary(rev)
1582 if rev == 0 and self._docket is None:
1582 if rev == 0 and self._docket is None:
1583 # re-inject the inline flag
1583 # re-inject the inline flag
1584 header = self._format_flags
1584 header = self._format_flags
1585 header |= self._format_version
1585 header |= self._format_version
1586 header |= FLAG_INLINE_DATA
1586 header |= FLAG_INLINE_DATA
1587 header = self.index.pack_header(header)
1587 header = self.index.pack_header(header)
1588 idx = header + idx
1588 idx = header + idx
1589 yield idx
1589 yield idx
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1591
1591
1592 inline_stream = get_stream()
1592 inline_stream = get_stream()
1593 next(inline_stream)
1593 next(inline_stream)
1594 return [
1594 return [
1595 (self._indexfile, inline_stream, index_size + data_size),
1595 (self._indexfile, inline_stream, index_size + data_size),
1596 ]
1596 ]
1597 else:
1597 else:
1598
1598
1599 def get_index_stream():
1599 def get_index_stream():
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1601 yield None
1601 yield None
1602 if index_size <= 65536:
1602 if index_size <= 65536:
1603 yield fp.read(index_size)
1603 yield fp.read(index_size)
1604 else:
1604 else:
1605 yield from util.filechunkiter(fp, limit=index_size)
1605 yield from util.filechunkiter(fp, limit=index_size)
1606
1606
1607 def get_data_stream():
1607 def get_data_stream():
1608 with self._datafp() as fp:
1608 with self._datafp() as fp:
1609 yield None
1609 yield None
1610 if data_size <= 65536:
1610 if data_size <= 65536:
1611 yield fp.read(data_size)
1611 yield fp.read(data_size)
1612 else:
1612 else:
1613 yield from util.filechunkiter(fp, limit=data_size)
1613 yield from util.filechunkiter(fp, limit=data_size)
1614
1614
1615 index_stream = get_index_stream()
1615 index_stream = get_index_stream()
1616 next(index_stream)
1616 next(index_stream)
1617 data_stream = get_data_stream()
1617 data_stream = get_data_stream()
1618 next(data_stream)
1618 next(data_stream)
1619 return [
1619 return [
1620 (self._datafile, data_stream, data_size),
1620 (self._datafile, data_stream, data_size),
1621 (self._indexfile, index_stream, index_size),
1621 (self._indexfile, index_stream, index_size),
1622 ]
1622 ]
1623
1623
1624 def _loadindex(self, docket=None):
1624 def _loadindex(self, docket=None):
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1626
1626
1627 if self.postfix is not None:
1627 if self.postfix is not None:
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1630 entry_point = b'%s.i.a' % self.radix
1630 entry_point = b'%s.i.a' % self.radix
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1632 entry_point = self._split_index_file
1632 entry_point = self._split_index_file
1633 else:
1633 else:
1634 entry_point = b'%s.i' % self.radix
1634 entry_point = b'%s.i' % self.radix
1635
1635
1636 if docket is not None:
1636 if docket is not None:
1637 self._docket = docket
1637 self._docket = docket
1638 self._docket_file = entry_point
1638 self._docket_file = entry_point
1639 else:
1639 else:
1640 self._initempty = True
1640 self._initempty = True
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1642 if len(entry_data) > 0:
1642 if len(entry_data) > 0:
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1644 self._initempty = False
1644 self._initempty = False
1645 else:
1645 else:
1646 header = new_header
1646 header = new_header
1647
1647
1648 self._format_flags = header & ~0xFFFF
1648 self._format_flags = header & ~0xFFFF
1649 self._format_version = header & 0xFFFF
1649 self._format_version = header & 0xFFFF
1650
1650
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1652 if supported_flags is None:
1652 if supported_flags is None:
1653 msg = _(b'unknown version (%d) in revlog %s')
1653 msg = _(b'unknown version (%d) in revlog %s')
1654 msg %= (self._format_version, self.display_id)
1654 msg %= (self._format_version, self.display_id)
1655 raise error.RevlogError(msg)
1655 raise error.RevlogError(msg)
1656 elif self._format_flags & ~supported_flags:
1656 elif self._format_flags & ~supported_flags:
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1658 display_flag = self._format_flags >> 16
1658 display_flag = self._format_flags >> 16
1659 msg %= (display_flag, self._format_version, self.display_id)
1659 msg %= (display_flag, self._format_version, self.display_id)
1660 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1661
1661
1662 features = FEATURES_BY_VERSION[self._format_version]
1662 features = FEATURES_BY_VERSION[self._format_version]
1663 self._inline = features[b'inline'](self._format_flags)
1663 self._inline = features[b'inline'](self._format_flags)
1664 self.delta_config.general_delta = features[b'generaldelta'](
1664 self.delta_config.general_delta = features[b'generaldelta'](
1665 self._format_flags
1665 self._format_flags
1666 )
1666 )
1667 self.feature_config.has_side_data = features[b'sidedata']
1667 self.feature_config.has_side_data = features[b'sidedata']
1668
1668
1669 if not features[b'docket']:
1669 if not features[b'docket']:
1670 self._indexfile = entry_point
1670 self._indexfile = entry_point
1671 index_data = entry_data
1671 index_data = entry_data
1672 else:
1672 else:
1673 self._docket_file = entry_point
1673 self._docket_file = entry_point
1674 if self._initempty:
1674 if self._initempty:
1675 self._docket = docketutil.default_docket(self, header)
1675 self._docket = docketutil.default_docket(self, header)
1676 else:
1676 else:
1677 self._docket = docketutil.parse_docket(
1677 self._docket = docketutil.parse_docket(
1678 self, entry_data, use_pending=self._trypending
1678 self, entry_data, use_pending=self._trypending
1679 )
1679 )
1680
1680
1681 if self._docket is not None:
1681 if self._docket is not None:
1682 self._indexfile = self._docket.index_filepath()
1682 self._indexfile = self._docket.index_filepath()
1683 index_data = b''
1683 index_data = b''
1684 index_size = self._docket.index_end
1684 index_size = self._docket.index_end
1685 if index_size > 0:
1685 if index_size > 0:
1686 index_data = self._get_data(
1686 index_data = self._get_data(
1687 self._indexfile, mmapindexthreshold, size=index_size
1687 self._indexfile, mmapindexthreshold, size=index_size
1688 )
1688 )
1689 if len(index_data) < index_size:
1689 if len(index_data) < index_size:
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1691 msg %= (self.display_id, len(index_data), index_size)
1691 msg %= (self.display_id, len(index_data), index_size)
1692 raise error.RevlogError(msg)
1692 raise error.RevlogError(msg)
1693
1693
1694 self._inline = False
1694 self._inline = False
1695 # generaldelta implied by version 2 revlogs.
1695 # generaldelta implied by version 2 revlogs.
1696 self.delta_config.general_delta = True
1696 self.delta_config.general_delta = True
1697 # the logic for persistent nodemap will be dealt with within the
1697 # the logic for persistent nodemap will be dealt with within the
1698 # main docket, so disable it for now.
1698 # main docket, so disable it for now.
1699 self._nodemap_file = None
1699 self._nodemap_file = None
1700
1700
1701 if self._docket is not None:
1701 if self._docket is not None:
1702 self._datafile = self._docket.data_filepath()
1702 self._datafile = self._docket.data_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1704 elif self.postfix is None:
1704 elif self.postfix is None:
1705 self._datafile = b'%s.d' % self.radix
1705 self._datafile = b'%s.d' % self.radix
1706 else:
1706 else:
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1708
1708
1709 self.nodeconstants = sha1nodeconstants
1709 self.nodeconstants = sha1nodeconstants
1710 self.nullid = self.nodeconstants.nullid
1710 self.nullid = self.nodeconstants.nullid
1711
1711
1712 # sparse-revlog can't be on without general-delta (issue6056)
1712 # sparse-revlog can't be on without general-delta (issue6056)
1713 if not self.delta_config.general_delta:
1713 if not self.delta_config.general_delta:
1714 self.delta_config.sparse_revlog = False
1714 self.delta_config.sparse_revlog = False
1715
1715
1716 self._storedeltachains = True
1716 self._storedeltachains = True
1717
1717
1718 devel_nodemap = (
1718 devel_nodemap = (
1719 self._nodemap_file
1719 self._nodemap_file
1720 and force_nodemap
1720 and force_nodemap
1721 and parse_index_v1_nodemap is not None
1721 and parse_index_v1_nodemap is not None
1722 )
1722 )
1723
1723
1724 use_rust_index = False
1724 use_rust_index = False
1725 if rustrevlog is not None and self._nodemap_file is not None:
1725 if rustrevlog is not None and self._nodemap_file is not None:
1726 # we would like to use the rust_index in all case, especially
1726 # we would like to use the rust_index in all case, especially
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1728 # since the 6.7 cycle.
1728 # since the 6.7 cycle.
1729 #
1729 #
1730 # However, the performance impact of inconditionnaly building the
1730 # However, the performance impact of inconditionnaly building the
1731 # nodemap is currently a problem for non-persistent nodemap
1731 # nodemap is currently a problem for non-persistent nodemap
1732 # repository.
1732 # repository.
1733 use_rust_index = True
1733 use_rust_index = True
1734
1734
1735 self._parse_index = parse_index_v1
1735 self._parse_index = parse_index_v1
1736 if self._format_version == REVLOGV0:
1736 if self._format_version == REVLOGV0:
1737 self._parse_index = revlogv0.parse_index_v0
1737 self._parse_index = revlogv0.parse_index_v0
1738 elif self._format_version == REVLOGV2:
1738 elif self._format_version == REVLOGV2:
1739 self._parse_index = parse_index_v2
1739 self._parse_index = parse_index_v2
1740 elif self._format_version == CHANGELOGV2:
1740 elif self._format_version == CHANGELOGV2:
1741 self._parse_index = parse_index_cl_v2
1741 self._parse_index = parse_index_cl_v2
1742 elif devel_nodemap:
1742 elif devel_nodemap:
1743 self._parse_index = parse_index_v1_nodemap
1743 self._parse_index = parse_index_v1_nodemap
1744 elif use_rust_index:
1744 elif use_rust_index:
1745 self._parse_index = functools.partial(
1745 self._parse_index = functools.partial(
1746 parse_index_v1_rust, default_header=new_header
1746 parse_index_v1_rust, default_header=new_header
1747 )
1747 )
1748 try:
1748 try:
1749 d = self._parse_index(index_data, self._inline)
1749 d = self._parse_index(index_data, self._inline)
1750 index, chunkcache = d
1750 index, chunkcache = d
1751 use_nodemap = (
1751 use_nodemap = (
1752 not self._inline
1752 not self._inline
1753 and self._nodemap_file is not None
1753 and self._nodemap_file is not None
1754 and hasattr(index, 'update_nodemap_data')
1754 and hasattr(index, 'update_nodemap_data')
1755 )
1755 )
1756 if use_nodemap:
1756 if use_nodemap:
1757 nodemap_data = nodemaputil.persisted_data(self)
1757 nodemap_data = nodemaputil.persisted_data(self)
1758 if nodemap_data is not None:
1758 if nodemap_data is not None:
1759 docket = nodemap_data[0]
1759 docket = nodemap_data[0]
1760 if (
1760 if (
1761 len(d[0]) > docket.tip_rev
1761 len(d[0]) > docket.tip_rev
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1763 ):
1763 ):
1764 # no changelog tampering
1764 # no changelog tampering
1765 self._nodemap_docket = docket
1765 self._nodemap_docket = docket
1766 index.update_nodemap_data(*nodemap_data)
1766 index.update_nodemap_data(*nodemap_data)
1767 except (ValueError, IndexError):
1767 except (ValueError, IndexError):
1768 raise error.RevlogError(
1768 raise error.RevlogError(
1769 _(b"index %s is corrupted") % self.display_id
1769 _(b"index %s is corrupted") % self.display_id
1770 )
1770 )
1771 self.index = index
1771 self.index = index
1772 # revnum -> (chain-length, sum-delta-length)
1772 # revnum -> (chain-length, sum-delta-length)
1773 self._chaininfocache = util.lrucachedict(500)
1773 self._chaininfocache = util.lrucachedict(500)
1774
1774
1775 return chunkcache
1775 return chunkcache
1776
1776
1777 def _load_inner(self, chunk_cache):
1777 def _load_inner(self, chunk_cache):
1778 if self._docket is None:
1778 if self._docket is None:
1779 default_compression_header = None
1779 default_compression_header = None
1780 else:
1780 else:
1781 default_compression_header = self._docket.default_compression_header
1781 default_compression_header = self._docket.default_compression_header
1782
1782
1783 self._inner = _InnerRevlog(
1783 self._inner = _InnerRevlog(
1784 opener=self.opener,
1784 opener=self.opener,
1785 index=self.index,
1785 index=self.index,
1786 index_file=self._indexfile,
1786 index_file=self._indexfile,
1787 data_file=self._datafile,
1787 data_file=self._datafile,
1788 sidedata_file=self._sidedatafile,
1788 sidedata_file=self._sidedatafile,
1789 inline=self._inline,
1789 inline=self._inline,
1790 data_config=self.data_config,
1790 data_config=self.data_config,
1791 delta_config=self.delta_config,
1791 delta_config=self.delta_config,
1792 feature_config=self.feature_config,
1792 feature_config=self.feature_config,
1793 chunk_cache=chunk_cache,
1793 chunk_cache=chunk_cache,
1794 default_compression_header=default_compression_header,
1794 default_compression_header=default_compression_header,
1795 )
1795 )
1796
1796
1797 def get_revlog(self):
1797 def get_revlog(self):
1798 """simple function to mirror API of other not-really-revlog API"""
1798 """simple function to mirror API of other not-really-revlog API"""
1799 return self
1799 return self
1800
1800
1801 @util.propertycache
1801 @util.propertycache
1802 def revlog_kind(self):
1802 def revlog_kind(self):
1803 return self.target[0]
1803 return self.target[0]
1804
1804
1805 @util.propertycache
1805 @util.propertycache
1806 def display_id(self):
1806 def display_id(self):
1807 """The public facing "ID" of the revlog that we use in message"""
1807 """The public facing "ID" of the revlog that we use in message"""
1808 if self.revlog_kind == KIND_FILELOG:
1808 if self.revlog_kind == KIND_FILELOG:
1809 # Reference the file without the "data/" prefix, so it is familiar
1809 # Reference the file without the "data/" prefix, so it is familiar
1810 # to the user.
1810 # to the user.
1811 return self.target[1]
1811 return self.target[1]
1812 else:
1812 else:
1813 return self.radix
1813 return self.radix
1814
1814
1815 def _datafp(self, mode=b'r'):
1815 def _datafp(self, mode=b'r'):
1816 """file object for the revlog's data file"""
1816 """file object for the revlog's data file"""
1817 return self.opener(self._datafile, mode=mode)
1817 return self.opener(self._datafile, mode=mode)
1818
1818
1819 def tiprev(self):
1819 def tiprev(self):
1820 return len(self.index) - 1
1820 return len(self.index) - 1
1821
1821
1822 def tip(self):
1822 def tip(self):
1823 return self.node(self.tiprev())
1823 return self.node(self.tiprev())
1824
1824
1825 def __contains__(self, rev):
1825 def __contains__(self, rev):
1826 return 0 <= rev < len(self)
1826 return 0 <= rev < len(self)
1827
1827
1828 def __len__(self):
1828 def __len__(self):
1829 return len(self.index)
1829 return len(self.index)
1830
1830
1831 def __iter__(self) -> Iterator[int]:
1831 def __iter__(self) -> Iterator[int]:
1832 return iter(range(len(self)))
1832 return iter(range(len(self)))
1833
1833
1834 def revs(self, start=0, stop=None):
1834 def revs(self, start=0, stop=None):
1835 """iterate over all rev in this revlog (from start to stop)"""
1835 """iterate over all rev in this revlog (from start to stop)"""
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1837
1837
1838 def hasnode(self, node):
1838 def hasnode(self, node):
1839 try:
1839 try:
1840 self.rev(node)
1840 self.rev(node)
1841 return True
1841 return True
1842 except KeyError:
1842 except KeyError:
1843 return False
1843 return False
1844
1844
1845 def _candelta(self, baserev, rev):
1845 def _candelta(self, baserev, rev):
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1847 # Disable delta if either rev requires a content-changing flag
1847 # Disable delta if either rev requires a content-changing flag
1848 # processor (ex. LFS). This is because such flag processor can alter
1848 # processor (ex. LFS). This is because such flag processor can alter
1849 # the rawtext content that the delta will be based on, and two clients
1849 # the rawtext content that the delta will be based on, and two clients
1850 # could have a same revlog node with different flags (i.e. different
1850 # could have a same revlog node with different flags (i.e. different
1851 # rawtext contents) and the delta could be incompatible.
1851 # rawtext contents) and the delta could be incompatible.
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1854 ):
1854 ):
1855 return False
1855 return False
1856 return True
1856 return True
1857
1857
1858 def update_caches(self, transaction):
1858 def update_caches(self, transaction):
1859 """update on disk cache
1859 """update on disk cache
1860
1860
1861 If a transaction is passed, the update may be delayed to transaction
1861 If a transaction is passed, the update may be delayed to transaction
1862 commit."""
1862 commit."""
1863 if self._nodemap_file is not None:
1863 if self._nodemap_file is not None:
1864 if transaction is None:
1864 if transaction is None:
1865 nodemaputil.update_persistent_nodemap(self)
1865 nodemaputil.update_persistent_nodemap(self)
1866 else:
1866 else:
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1868
1868
1869 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1869 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1870 """Clear in-memory caches"""
1870 """Clear in-memory caches"""
1871 self._chainbasecache.clear()
1871 self._chainbasecache.clear()
1872 self._inner.clear_cache()
1872 self._inner.clear_cache()
1873 self._pcache = {}
1873 self._pcache = {}
1874 self._nodemap_docket = None
1874 self._nodemap_docket = None
1875 self.index.clearcaches()
1875 self.index.clearcaches()
1876 # The python code is the one responsible for validating the docket, we
1876 # The python code is the one responsible for validating the docket, we
1877 # end up having to refresh it here.
1877 # end up having to refresh it here.
1878 use_nodemap = (
1878 use_nodemap = (
1879 not self._inline
1879 not self._inline
1880 and self._nodemap_file is not None
1880 and self._nodemap_file is not None
1881 and hasattr(self.index, 'update_nodemap_data')
1881 and hasattr(self.index, 'update_nodemap_data')
1882 )
1882 )
1883 if use_nodemap:
1883 if use_nodemap:
1884 nodemap_data = nodemaputil.persisted_data(self)
1884 nodemap_data = nodemaputil.persisted_data(self)
1885 if nodemap_data is not None:
1885 if nodemap_data is not None:
1886 self._nodemap_docket = nodemap_data[0]
1886 self._nodemap_docket = nodemap_data[0]
1887 self.index.update_nodemap_data(*nodemap_data)
1887 self.index.update_nodemap_data(*nodemap_data)
1888
1888
1889 def rev(self, node):
1889 def rev(self, node):
1890 """return the revision number associated with a <nodeid>"""
1890 """return the revision number associated with a <nodeid>"""
1891 try:
1891 try:
1892 return self.index.rev(node)
1892 return self.index.rev(node)
1893 except TypeError:
1893 except TypeError:
1894 raise
1894 raise
1895 except error.RevlogError:
1895 except error.RevlogError:
1896 # parsers.c radix tree lookup failed
1896 # parsers.c radix tree lookup failed
1897 if (
1897 if (
1898 node == self.nodeconstants.wdirid
1898 node == self.nodeconstants.wdirid
1899 or node in self.nodeconstants.wdirfilenodeids
1899 or node in self.nodeconstants.wdirfilenodeids
1900 ):
1900 ):
1901 raise error.WdirUnsupported
1901 raise error.WdirUnsupported
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1903
1903
1904 # Accessors for index entries.
1904 # Accessors for index entries.
1905
1905
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1907 # are flags.
1907 # are flags.
1908 def start(self, rev):
1908 def start(self, rev):
1909 return int(self.index[rev][0] >> 16)
1909 return int(self.index[rev][0] >> 16)
1910
1910
1911 def sidedata_cut_off(self, rev):
1911 def sidedata_cut_off(self, rev):
1912 sd_cut_off = self.index[rev][8]
1912 sd_cut_off = self.index[rev][8]
1913 if sd_cut_off != 0:
1913 if sd_cut_off != 0:
1914 return sd_cut_off
1914 return sd_cut_off
1915 # This is some annoying dance, because entries without sidedata
1915 # This is some annoying dance, because entries without sidedata
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1917 # previous-size)
1917 # previous-size)
1918 #
1918 #
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1920 # In the meantime, we need this.
1920 # In the meantime, we need this.
1921 while 0 <= rev:
1921 while 0 <= rev:
1922 e = self.index[rev]
1922 e = self.index[rev]
1923 if e[9] != 0:
1923 if e[9] != 0:
1924 return e[8] + e[9]
1924 return e[8] + e[9]
1925 rev -= 1
1925 rev -= 1
1926 return 0
1926 return 0
1927
1927
1928 def flags(self, rev):
1928 def flags(self, rev):
1929 return self.index[rev][0] & 0xFFFF
1929 return self.index[rev][0] & 0xFFFF
1930
1930
1931 def length(self, rev):
1931 def length(self, rev):
1932 return self.index[rev][1]
1932 return self.index[rev][1]
1933
1933
1934 def sidedata_length(self, rev):
1934 def sidedata_length(self, rev):
1935 if not self.feature_config.has_side_data:
1935 if not self.feature_config.has_side_data:
1936 return 0
1936 return 0
1937 return self.index[rev][9]
1937 return self.index[rev][9]
1938
1938
1939 def rawsize(self, rev):
1939 def rawsize(self, rev):
1940 """return the length of the uncompressed text for a given revision"""
1940 """return the length of the uncompressed text for a given revision"""
1941 l = self.index[rev][2]
1941 l = self.index[rev][2]
1942 if l >= 0:
1942 if l >= 0:
1943 return l
1943 return l
1944
1944
1945 t = self.rawdata(rev)
1945 t = self.rawdata(rev)
1946 return len(t)
1946 return len(t)
1947
1947
1948 def size(self, rev):
1948 def size(self, rev):
1949 """length of non-raw text (processed by a "read" flag processor)"""
1949 """length of non-raw text (processed by a "read" flag processor)"""
1950 # fast path: if no "read" flag processor could change the content,
1950 # fast path: if no "read" flag processor could change the content,
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1952 flags = self.flags(rev)
1952 flags = self.flags(rev)
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1954 return self.rawsize(rev)
1954 return self.rawsize(rev)
1955
1955
1956 return len(self.revision(rev))
1956 return len(self.revision(rev))
1957
1957
1958 def fast_rank(self, rev):
1958 def fast_rank(self, rev):
1959 """Return the rank of a revision if already known, or None otherwise.
1959 """Return the rank of a revision if already known, or None otherwise.
1960
1960
1961 The rank of a revision is the size of the sub-graph it defines as a
1961 The rank of a revision is the size of the sub-graph it defines as a
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1963 `ancestors(r)`, `r` included.
1963 `ancestors(r)`, `r` included.
1964
1964
1965 This method returns the rank retrieved from the revlog in constant
1965 This method returns the rank retrieved from the revlog in constant
1966 time. It makes no attempt at computing unknown values for versions of
1966 time. It makes no attempt at computing unknown values for versions of
1967 the revlog which do not persist the rank.
1967 the revlog which do not persist the rank.
1968 """
1968 """
1969 rank = self.index[rev][ENTRY_RANK]
1969 rank = self.index[rev][ENTRY_RANK]
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1971 return None
1971 return None
1972 if rev == nullrev:
1972 if rev == nullrev:
1973 return 0 # convention
1973 return 0 # convention
1974 return rank
1974 return rank
1975
1975
1976 def chainbase(self, rev):
1976 def chainbase(self, rev):
1977 base = self._chainbasecache.get(rev)
1977 base = self._chainbasecache.get(rev)
1978 if base is not None:
1978 if base is not None:
1979 return base
1979 return base
1980
1980
1981 index = self.index
1981 index = self.index
1982 iterrev = rev
1982 iterrev = rev
1983 base = index[iterrev][3]
1983 base = index[iterrev][3]
1984 while base != iterrev:
1984 while base != iterrev:
1985 iterrev = base
1985 iterrev = base
1986 base = index[iterrev][3]
1986 base = index[iterrev][3]
1987
1987
1988 self._chainbasecache[rev] = base
1988 self._chainbasecache[rev] = base
1989 return base
1989 return base
1990
1990
1991 def linkrev(self, rev):
1991 def linkrev(self, rev):
1992 return self.index[rev][4]
1992 return self.index[rev][4]
1993
1993
1994 def parentrevs(self, rev):
1994 def parentrevs(self, rev):
1995 try:
1995 try:
1996 entry = self.index[rev]
1996 entry = self.index[rev]
1997 except IndexError:
1997 except IndexError:
1998 if rev == wdirrev:
1998 if rev == wdirrev:
1999 raise error.WdirUnsupported
1999 raise error.WdirUnsupported
2000 raise
2000 raise
2001
2001
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2003 return entry[6], entry[5]
2003 return entry[6], entry[5]
2004 else:
2004 else:
2005 return entry[5], entry[6]
2005 return entry[5], entry[6]
2006
2006
2007 # fast parentrevs(rev) where rev isn't filtered
2007 # fast parentrevs(rev) where rev isn't filtered
2008 _uncheckedparentrevs = parentrevs
2008 _uncheckedparentrevs = parentrevs
2009
2009
2010 def node(self, rev):
2010 def node(self, rev):
2011 try:
2011 try:
2012 return self.index[rev][7]
2012 return self.index[rev][7]
2013 except IndexError:
2013 except IndexError:
2014 if rev == wdirrev:
2014 if rev == wdirrev:
2015 raise error.WdirUnsupported
2015 raise error.WdirUnsupported
2016 raise
2016 raise
2017
2017
2018 # Derived from index values.
2018 # Derived from index values.
2019
2019
2020 def end(self, rev):
2020 def end(self, rev):
2021 return self.start(rev) + self.length(rev)
2021 return self.start(rev) + self.length(rev)
2022
2022
2023 def parents(self, node):
2023 def parents(self, node):
2024 i = self.index
2024 i = self.index
2025 d = i[self.rev(node)]
2025 d = i[self.rev(node)]
2026 # inline node() to avoid function call overhead
2026 # inline node() to avoid function call overhead
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2028 return i[d[6]][7], i[d[5]][7]
2028 return i[d[6]][7], i[d[5]][7]
2029 else:
2029 else:
2030 return i[d[5]][7], i[d[6]][7]
2030 return i[d[5]][7], i[d[6]][7]
2031
2031
2032 def chainlen(self, rev):
2032 def chainlen(self, rev):
2033 return self._chaininfo(rev)[0]
2033 return self._chaininfo(rev)[0]
2034
2034
2035 def _chaininfo(self, rev):
2035 def _chaininfo(self, rev):
2036 chaininfocache = self._chaininfocache
2036 chaininfocache = self._chaininfocache
2037 if rev in chaininfocache:
2037 if rev in chaininfocache:
2038 return chaininfocache[rev]
2038 return chaininfocache[rev]
2039 index = self.index
2039 index = self.index
2040 generaldelta = self.delta_config.general_delta
2040 generaldelta = self.delta_config.general_delta
2041 iterrev = rev
2041 iterrev = rev
2042 e = index[iterrev]
2042 e = index[iterrev]
2043 clen = 0
2043 clen = 0
2044 compresseddeltalen = 0
2044 compresseddeltalen = 0
2045 while iterrev != e[3]:
2045 while iterrev != e[3]:
2046 clen += 1
2046 clen += 1
2047 compresseddeltalen += e[1]
2047 compresseddeltalen += e[1]
2048 if generaldelta:
2048 if generaldelta:
2049 iterrev = e[3]
2049 iterrev = e[3]
2050 else:
2050 else:
2051 iterrev -= 1
2051 iterrev -= 1
2052 if iterrev in chaininfocache:
2052 if iterrev in chaininfocache:
2053 t = chaininfocache[iterrev]
2053 t = chaininfocache[iterrev]
2054 clen += t[0]
2054 clen += t[0]
2055 compresseddeltalen += t[1]
2055 compresseddeltalen += t[1]
2056 break
2056 break
2057 e = index[iterrev]
2057 e = index[iterrev]
2058 else:
2058 else:
2059 # Add text length of base since decompressing that also takes
2059 # Add text length of base since decompressing that also takes
2060 # work. For cache hits the length is already included.
2060 # work. For cache hits the length is already included.
2061 compresseddeltalen += e[1]
2061 compresseddeltalen += e[1]
2062 r = (clen, compresseddeltalen)
2062 r = (clen, compresseddeltalen)
2063 chaininfocache[rev] = r
2063 chaininfocache[rev] = r
2064 return r
2064 return r
2065
2065
2066 def _deltachain(self, rev, stoprev=None):
2066 def _deltachain(self, rev, stoprev=None):
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2068
2068
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2070 """Generate the ancestors of 'revs' in reverse revision order.
2070 """Generate the ancestors of 'revs' in reverse revision order.
2071 Does not generate revs lower than stoprev.
2071 Does not generate revs lower than stoprev.
2072
2072
2073 See the documentation for ancestor.lazyancestors for more details."""
2073 See the documentation for ancestor.lazyancestors for more details."""
2074
2074
2075 # first, make sure start revisions aren't filtered
2075 # first, make sure start revisions aren't filtered
2076 revs = list(revs)
2076 revs = list(revs)
2077 checkrev = self.node
2077 checkrev = self.node
2078 for r in revs:
2078 for r in revs:
2079 checkrev(r)
2079 checkrev(r)
2080 # and we're sure ancestors aren't filtered as well
2080 # and we're sure ancestors aren't filtered as well
2081
2081
2082 if rustancestor is not None and self.index.rust_ext_compat:
2082 if rustancestor is not None and self.index.rust_ext_compat:
2083 lazyancestors = rustancestor.LazyAncestors
2083 lazyancestors = rustancestor.LazyAncestors
2084 arg = self.index
2084 arg = self.index
2085 else:
2085 else:
2086 lazyancestors = ancestor.lazyancestors
2086 lazyancestors = ancestor.lazyancestors
2087 arg = self._uncheckedparentrevs
2087 arg = self._uncheckedparentrevs
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2089
2089
2090 def descendants(self, revs):
2090 def descendants(self, revs):
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2092
2092
2093 def findcommonmissing(self, common=None, heads=None):
2093 def findcommonmissing(self, common=None, heads=None):
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2095 that are not ancestors of common. In revset terminology, we return the
2095 that are not ancestors of common. In revset terminology, we return the
2096 tuple:
2096 tuple:
2097
2097
2098 ::common, (::heads) - (::common)
2098 ::common, (::heads) - (::common)
2099
2099
2100 The list is sorted by revision number, meaning it is
2100 The list is sorted by revision number, meaning it is
2101 topologically sorted.
2101 topologically sorted.
2102
2102
2103 'heads' and 'common' are both lists of node IDs. If heads is
2103 'heads' and 'common' are both lists of node IDs. If heads is
2104 not supplied, uses all of the revlog's heads. If common is not
2104 not supplied, uses all of the revlog's heads. If common is not
2105 supplied, uses nullid."""
2105 supplied, uses nullid."""
2106 if common is None:
2106 if common is None:
2107 common = [self.nullid]
2107 common = [self.nullid]
2108 if heads is None:
2108 if heads is None:
2109 heads = self.heads()
2109 heads = self.heads()
2110
2110
2111 common = [self.rev(n) for n in common]
2111 common = [self.rev(n) for n in common]
2112 heads = [self.rev(n) for n in heads]
2112 heads = [self.rev(n) for n in heads]
2113
2113
2114 # we want the ancestors, but inclusive
2114 # we want the ancestors, but inclusive
2115 class lazyset:
2115 class lazyset:
2116 def __init__(self, lazyvalues):
2116 def __init__(self, lazyvalues):
2117 self.addedvalues = set()
2117 self.addedvalues = set()
2118 self.lazyvalues = lazyvalues
2118 self.lazyvalues = lazyvalues
2119
2119
2120 def __contains__(self, value):
2120 def __contains__(self, value):
2121 return value in self.addedvalues or value in self.lazyvalues
2121 return value in self.addedvalues or value in self.lazyvalues
2122
2122
2123 def __iter__(self):
2123 def __iter__(self):
2124 added = self.addedvalues
2124 added = self.addedvalues
2125 for r in added:
2125 for r in added:
2126 yield r
2126 yield r
2127 for r in self.lazyvalues:
2127 for r in self.lazyvalues:
2128 if not r in added:
2128 if not r in added:
2129 yield r
2129 yield r
2130
2130
2131 def add(self, value):
2131 def add(self, value):
2132 self.addedvalues.add(value)
2132 self.addedvalues.add(value)
2133
2133
2134 def update(self, values):
2134 def update(self, values):
2135 self.addedvalues.update(values)
2135 self.addedvalues.update(values)
2136
2136
2137 has = lazyset(self.ancestors(common))
2137 has = lazyset(self.ancestors(common))
2138 has.add(nullrev)
2138 has.add(nullrev)
2139 has.update(common)
2139 has.update(common)
2140
2140
2141 # take all ancestors from heads that aren't in has
2141 # take all ancestors from heads that aren't in has
2142 missing = set()
2142 missing = set()
2143 visit = collections.deque(r for r in heads if r not in has)
2143 visit = collections.deque(r for r in heads if r not in has)
2144 while visit:
2144 while visit:
2145 r = visit.popleft()
2145 r = visit.popleft()
2146 if r in missing:
2146 if r in missing:
2147 continue
2147 continue
2148 else:
2148 else:
2149 missing.add(r)
2149 missing.add(r)
2150 for p in self.parentrevs(r):
2150 for p in self.parentrevs(r):
2151 if p not in has:
2151 if p not in has:
2152 visit.append(p)
2152 visit.append(p)
2153 missing = list(missing)
2153 missing = list(missing)
2154 missing.sort()
2154 missing.sort()
2155 return has, [self.node(miss) for miss in missing]
2155 return has, [self.node(miss) for miss in missing]
2156
2156
2157 def incrementalmissingrevs(self, common=None):
2157 def incrementalmissingrevs(self, common=None):
2158 """Return an object that can be used to incrementally compute the
2158 """Return an object that can be used to incrementally compute the
2159 revision numbers of the ancestors of arbitrary sets that are not
2159 revision numbers of the ancestors of arbitrary sets that are not
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2161 object.
2161 object.
2162
2162
2163 'common' is a list of revision numbers. If common is not supplied, uses
2163 'common' is a list of revision numbers. If common is not supplied, uses
2164 nullrev.
2164 nullrev.
2165 """
2165 """
2166 if common is None:
2166 if common is None:
2167 common = [nullrev]
2167 common = [nullrev]
2168
2168
2169 if rustancestor is not None and self.index.rust_ext_compat:
2169 if rustancestor is not None and self.index.rust_ext_compat:
2170 return rustancestor.MissingAncestors(self.index, common)
2170 return rustancestor.MissingAncestors(self.index, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2172
2172
2173 def findmissingrevs(self, common=None, heads=None):
2173 def findmissingrevs(self, common=None, heads=None):
2174 """Return the revision numbers of the ancestors of heads that
2174 """Return the revision numbers of the ancestors of heads that
2175 are not ancestors of common.
2175 are not ancestors of common.
2176
2176
2177 More specifically, return a list of revision numbers corresponding to
2177 More specifically, return a list of revision numbers corresponding to
2178 nodes N such that every N satisfies the following constraints:
2178 nodes N such that every N satisfies the following constraints:
2179
2179
2180 1. N is an ancestor of some node in 'heads'
2180 1. N is an ancestor of some node in 'heads'
2181 2. N is not an ancestor of any node in 'common'
2181 2. N is not an ancestor of any node in 'common'
2182
2182
2183 The list is sorted by revision number, meaning it is
2183 The list is sorted by revision number, meaning it is
2184 topologically sorted.
2184 topologically sorted.
2185
2185
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2187 not supplied, uses all of the revlog's heads. If common is not
2187 not supplied, uses all of the revlog's heads. If common is not
2188 supplied, uses nullid."""
2188 supplied, uses nullid."""
2189 if common is None:
2189 if common is None:
2190 common = [nullrev]
2190 common = [nullrev]
2191 if heads is None:
2191 if heads is None:
2192 heads = self.headrevs()
2192 heads = self.headrevs()
2193
2193
2194 inc = self.incrementalmissingrevs(common=common)
2194 inc = self.incrementalmissingrevs(common=common)
2195 return inc.missingancestors(heads)
2195 return inc.missingancestors(heads)
2196
2196
2197 def findmissing(self, common=None, heads=None):
2197 def findmissing(self, common=None, heads=None):
2198 """Return the ancestors of heads that are not ancestors of common.
2198 """Return the ancestors of heads that are not ancestors of common.
2199
2199
2200 More specifically, return a list of nodes N such that every N
2200 More specifically, return a list of nodes N such that every N
2201 satisfies the following constraints:
2201 satisfies the following constraints:
2202
2202
2203 1. N is an ancestor of some node in 'heads'
2203 1. N is an ancestor of some node in 'heads'
2204 2. N is not an ancestor of any node in 'common'
2204 2. N is not an ancestor of any node in 'common'
2205
2205
2206 The list is sorted by revision number, meaning it is
2206 The list is sorted by revision number, meaning it is
2207 topologically sorted.
2207 topologically sorted.
2208
2208
2209 'heads' and 'common' are both lists of node IDs. If heads is
2209 'heads' and 'common' are both lists of node IDs. If heads is
2210 not supplied, uses all of the revlog's heads. If common is not
2210 not supplied, uses all of the revlog's heads. If common is not
2211 supplied, uses nullid."""
2211 supplied, uses nullid."""
2212 if common is None:
2212 if common is None:
2213 common = [self.nullid]
2213 common = [self.nullid]
2214 if heads is None:
2214 if heads is None:
2215 heads = self.heads()
2215 heads = self.heads()
2216
2216
2217 common = [self.rev(n) for n in common]
2217 common = [self.rev(n) for n in common]
2218 heads = [self.rev(n) for n in heads]
2218 heads = [self.rev(n) for n in heads]
2219
2219
2220 inc = self.incrementalmissingrevs(common=common)
2220 inc = self.incrementalmissingrevs(common=common)
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2222
2222
2223 def nodesbetween(self, roots=None, heads=None):
2223 def nodesbetween(self, roots=None, heads=None):
2224 """Return a topological path from 'roots' to 'heads'.
2224 """Return a topological path from 'roots' to 'heads'.
2225
2225
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2227 topologically sorted list of all nodes N that satisfy both of
2227 topologically sorted list of all nodes N that satisfy both of
2228 these constraints:
2228 these constraints:
2229
2229
2230 1. N is a descendant of some node in 'roots'
2230 1. N is a descendant of some node in 'roots'
2231 2. N is an ancestor of some node in 'heads'
2231 2. N is an ancestor of some node in 'heads'
2232
2232
2233 Every node is considered to be both a descendant and an ancestor
2233 Every node is considered to be both a descendant and an ancestor
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2235 included in 'nodes'.
2235 included in 'nodes'.
2236
2236
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2240
2240
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2243 unspecified, uses list of all of the revlog's heads."""
2243 unspecified, uses list of all of the revlog's heads."""
2244 nonodes = ([], [], [])
2244 nonodes = ([], [], [])
2245 if roots is not None:
2245 if roots is not None:
2246 roots = list(roots)
2246 roots = list(roots)
2247 if not roots:
2247 if not roots:
2248 return nonodes
2248 return nonodes
2249 lowestrev = min([self.rev(n) for n in roots])
2249 lowestrev = min([self.rev(n) for n in roots])
2250 else:
2250 else:
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2252 lowestrev = nullrev
2252 lowestrev = nullrev
2253 if (lowestrev == nullrev) and (heads is None):
2253 if (lowestrev == nullrev) and (heads is None):
2254 # We want _all_ the nodes!
2254 # We want _all_ the nodes!
2255 return (
2255 return (
2256 [self.node(r) for r in self],
2256 [self.node(r) for r in self],
2257 [self.nullid],
2257 [self.nullid],
2258 list(self.heads()),
2258 list(self.heads()),
2259 )
2259 )
2260 if heads is None:
2260 if heads is None:
2261 # All nodes are ancestors, so the latest ancestor is the last
2261 # All nodes are ancestors, so the latest ancestor is the last
2262 # node.
2262 # node.
2263 highestrev = len(self) - 1
2263 highestrev = len(self) - 1
2264 # Set ancestors to None to signal that every node is an ancestor.
2264 # Set ancestors to None to signal that every node is an ancestor.
2265 ancestors = None
2265 ancestors = None
2266 # Set heads to an empty dictionary for later discovery of heads
2266 # Set heads to an empty dictionary for later discovery of heads
2267 heads = {}
2267 heads = {}
2268 else:
2268 else:
2269 heads = list(heads)
2269 heads = list(heads)
2270 if not heads:
2270 if not heads:
2271 return nonodes
2271 return nonodes
2272 ancestors = set()
2272 ancestors = set()
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2274 # Also, later we will be using it to filter out the heads we can't
2274 # Also, later we will be using it to filter out the heads we can't
2275 # find from roots.
2275 # find from roots.
2276 heads = dict.fromkeys(heads, False)
2276 heads = dict.fromkeys(heads, False)
2277 # Start at the top and keep marking parents until we're done.
2277 # Start at the top and keep marking parents until we're done.
2278 nodestotag = set(heads)
2278 nodestotag = set(heads)
2279 # Remember where the top was so we can use it as a limit later.
2279 # Remember where the top was so we can use it as a limit later.
2280 highestrev = max([self.rev(n) for n in nodestotag])
2280 highestrev = max([self.rev(n) for n in nodestotag])
2281 while nodestotag:
2281 while nodestotag:
2282 # grab a node to tag
2282 # grab a node to tag
2283 n = nodestotag.pop()
2283 n = nodestotag.pop()
2284 # Never tag nullid
2284 # Never tag nullid
2285 if n == self.nullid:
2285 if n == self.nullid:
2286 continue
2286 continue
2287 # A node's revision number represents its place in a
2287 # A node's revision number represents its place in a
2288 # topologically sorted list of nodes.
2288 # topologically sorted list of nodes.
2289 r = self.rev(n)
2289 r = self.rev(n)
2290 if r >= lowestrev:
2290 if r >= lowestrev:
2291 if n not in ancestors:
2291 if n not in ancestors:
2292 # If we are possibly a descendant of one of the roots
2292 # If we are possibly a descendant of one of the roots
2293 # and we haven't already been marked as an ancestor
2293 # and we haven't already been marked as an ancestor
2294 ancestors.add(n) # Mark as ancestor
2294 ancestors.add(n) # Mark as ancestor
2295 # Add non-nullid parents to list of nodes to tag.
2295 # Add non-nullid parents to list of nodes to tag.
2296 nodestotag.update(
2296 nodestotag.update(
2297 [p for p in self.parents(n) if p != self.nullid]
2297 [p for p in self.parents(n) if p != self.nullid]
2298 )
2298 )
2299 elif n in heads: # We've seen it before, is it a fake head?
2299 elif n in heads: # We've seen it before, is it a fake head?
2300 # So it is, real heads should not be the ancestors of
2300 # So it is, real heads should not be the ancestors of
2301 # any other heads.
2301 # any other heads.
2302 heads.pop(n)
2302 heads.pop(n)
2303 if not ancestors:
2303 if not ancestors:
2304 return nonodes
2304 return nonodes
2305 # Now that we have our set of ancestors, we want to remove any
2305 # Now that we have our set of ancestors, we want to remove any
2306 # roots that are not ancestors.
2306 # roots that are not ancestors.
2307
2307
2308 # If one of the roots was nullid, everything is included anyway.
2308 # If one of the roots was nullid, everything is included anyway.
2309 if lowestrev > nullrev:
2309 if lowestrev > nullrev:
2310 # But, since we weren't, let's recompute the lowest rev to not
2310 # But, since we weren't, let's recompute the lowest rev to not
2311 # include roots that aren't ancestors.
2311 # include roots that aren't ancestors.
2312
2312
2313 # Filter out roots that aren't ancestors of heads
2313 # Filter out roots that aren't ancestors of heads
2314 roots = [root for root in roots if root in ancestors]
2314 roots = [root for root in roots if root in ancestors]
2315 # Recompute the lowest revision
2315 # Recompute the lowest revision
2316 if roots:
2316 if roots:
2317 lowestrev = min([self.rev(root) for root in roots])
2317 lowestrev = min([self.rev(root) for root in roots])
2318 else:
2318 else:
2319 # No more roots? Return empty list
2319 # No more roots? Return empty list
2320 return nonodes
2320 return nonodes
2321 else:
2321 else:
2322 # We are descending from nullid, and don't need to care about
2322 # We are descending from nullid, and don't need to care about
2323 # any other roots.
2323 # any other roots.
2324 lowestrev = nullrev
2324 lowestrev = nullrev
2325 roots = [self.nullid]
2325 roots = [self.nullid]
2326 # Transform our roots list into a set.
2326 # Transform our roots list into a set.
2327 descendants = set(roots)
2327 descendants = set(roots)
2328 # Also, keep the original roots so we can filter out roots that aren't
2328 # Also, keep the original roots so we can filter out roots that aren't
2329 # 'real' roots (i.e. are descended from other roots).
2329 # 'real' roots (i.e. are descended from other roots).
2330 roots = descendants.copy()
2330 roots = descendants.copy()
2331 # Our topologically sorted list of output nodes.
2331 # Our topologically sorted list of output nodes.
2332 orderedout = []
2332 orderedout = []
2333 # Don't start at nullid since we don't want nullid in our output list,
2333 # Don't start at nullid since we don't want nullid in our output list,
2334 # and if nullid shows up in descendants, empty parents will look like
2334 # and if nullid shows up in descendants, empty parents will look like
2335 # they're descendants.
2335 # they're descendants.
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2337 n = self.node(r)
2337 n = self.node(r)
2338 isdescendant = False
2338 isdescendant = False
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2340 isdescendant = True
2340 isdescendant = True
2341 elif n in descendants:
2341 elif n in descendants:
2342 # n is already a descendant
2342 # n is already a descendant
2343 isdescendant = True
2343 isdescendant = True
2344 # This check only needs to be done here because all the roots
2344 # This check only needs to be done here because all the roots
2345 # will start being marked is descendants before the loop.
2345 # will start being marked is descendants before the loop.
2346 if n in roots:
2346 if n in roots:
2347 # If n was a root, check if it's a 'real' root.
2347 # If n was a root, check if it's a 'real' root.
2348 p = tuple(self.parents(n))
2348 p = tuple(self.parents(n))
2349 # If any of its parents are descendants, it's not a root.
2349 # If any of its parents are descendants, it's not a root.
2350 if (p[0] in descendants) or (p[1] in descendants):
2350 if (p[0] in descendants) or (p[1] in descendants):
2351 roots.remove(n)
2351 roots.remove(n)
2352 else:
2352 else:
2353 p = tuple(self.parents(n))
2353 p = tuple(self.parents(n))
2354 # A node is a descendant if either of its parents are
2354 # A node is a descendant if either of its parents are
2355 # descendants. (We seeded the dependents list with the roots
2355 # descendants. (We seeded the dependents list with the roots
2356 # up there, remember?)
2356 # up there, remember?)
2357 if (p[0] in descendants) or (p[1] in descendants):
2357 if (p[0] in descendants) or (p[1] in descendants):
2358 descendants.add(n)
2358 descendants.add(n)
2359 isdescendant = True
2359 isdescendant = True
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2361 # Only include nodes that are both descendants and ancestors.
2361 # Only include nodes that are both descendants and ancestors.
2362 orderedout.append(n)
2362 orderedout.append(n)
2363 if (ancestors is not None) and (n in heads):
2363 if (ancestors is not None) and (n in heads):
2364 # We're trying to figure out which heads are reachable
2364 # We're trying to figure out which heads are reachable
2365 # from roots.
2365 # from roots.
2366 # Mark this head as having been reached
2366 # Mark this head as having been reached
2367 heads[n] = True
2367 heads[n] = True
2368 elif ancestors is None:
2368 elif ancestors is None:
2369 # Otherwise, we're trying to discover the heads.
2369 # Otherwise, we're trying to discover the heads.
2370 # Assume this is a head because if it isn't, the next step
2370 # Assume this is a head because if it isn't, the next step
2371 # will eventually remove it.
2371 # will eventually remove it.
2372 heads[n] = True
2372 heads[n] = True
2373 # But, obviously its parents aren't.
2373 # But, obviously its parents aren't.
2374 for p in self.parents(n):
2374 for p in self.parents(n):
2375 heads.pop(p, None)
2375 heads.pop(p, None)
2376 heads = [head for head, flag in heads.items() if flag]
2376 heads = [head for head, flag in heads.items() if flag]
2377 roots = list(roots)
2377 roots = list(roots)
2378 assert orderedout
2378 assert orderedout
2379 assert roots
2379 assert roots
2380 assert heads
2380 assert heads
2381 return (orderedout, roots, heads)
2381 return (orderedout, roots, heads)
2382
2382
2383 def headrevs(self, revs=None):
2383 def headrevs(self, revs=None, stop_rev=None):
2384 if revs is None:
2384 if revs is None:
2385 return self.index.headrevs()
2385 excluded = None
2386 if stop_rev is not None and stop_rev < len(self.index):
2387 # We should let the native code handle it, but that a
2388 # simple enough first step.
2389 excluded = range(stop_rev, len(self.index))
2390 return self.index.headrevs(excluded)
2391 assert stop_rev is None
2386 if rustdagop is not None and self.index.rust_ext_compat:
2392 if rustdagop is not None and self.index.rust_ext_compat:
2387 return rustdagop.headrevs(self.index, revs)
2393 return rustdagop.headrevs(self.index, revs)
2388 return dagop.headrevs(revs, self._uncheckedparentrevs)
2394 return dagop.headrevs(revs, self._uncheckedparentrevs)
2389
2395
2390 def headrevsdiff(self, start, stop):
2396 def headrevsdiff(self, start, stop):
2391 try:
2397 try:
2392 return self.index.headrevsdiff(start, stop)
2398 return self.index.headrevsdiff(start, stop)
2393 except AttributeError:
2399 except AttributeError:
2394 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2400 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2395
2401
2396 def computephases(self, roots):
2402 def computephases(self, roots):
2397 return self.index.computephasesmapsets(roots)
2403 return self.index.computephasesmapsets(roots)
2398
2404
2399 def _head_node_ids(self):
2405 def _head_node_ids(self):
2400 try:
2406 try:
2401 return self.index.head_node_ids()
2407 return self.index.head_node_ids()
2402 except AttributeError:
2408 except AttributeError:
2403 return [self.node(r) for r in self.headrevs()]
2409 return [self.node(r) for r in self.headrevs()]
2404
2410
2405 def heads(self, start=None, stop=None):
2411 def heads(self, start=None, stop=None):
2406 """return the list of all nodes that have no children
2412 """return the list of all nodes that have no children
2407
2413
2408 if start is specified, only heads that are descendants of
2414 if start is specified, only heads that are descendants of
2409 start will be returned
2415 start will be returned
2410 if stop is specified, it will consider all the revs from stop
2416 if stop is specified, it will consider all the revs from stop
2411 as if they had no children
2417 as if they had no children
2412 """
2418 """
2413 if start is None and stop is None:
2419 if start is None and stop is None:
2414 if not len(self):
2420 if not len(self):
2415 return [self.nullid]
2421 return [self.nullid]
2416 return self._head_node_ids()
2422 return self._head_node_ids()
2417 if start is None:
2423 if start is None:
2418 start = nullrev
2424 start = nullrev
2419 else:
2425 else:
2420 start = self.rev(start)
2426 start = self.rev(start)
2421
2427
2422 stoprevs = {self.rev(n) for n in stop or []}
2428 stoprevs = {self.rev(n) for n in stop or []}
2423
2429
2424 revs = dagop.headrevssubset(
2430 revs = dagop.headrevssubset(
2425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2431 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2426 )
2432 )
2427
2433
2428 return [self.node(rev) for rev in revs]
2434 return [self.node(rev) for rev in revs]
2429
2435
2430 def diffheads(self, start, stop):
2436 def diffheads(self, start, stop):
2431 """return the nodes that make up the difference between
2437 """return the nodes that make up the difference between
2432 heads of revs before `start` and heads of revs before `stop`"""
2438 heads of revs before `start` and heads of revs before `stop`"""
2433 removed, added = self.headrevsdiff(start, stop)
2439 removed, added = self.headrevsdiff(start, stop)
2434 return [self.node(r) for r in removed], [self.node(r) for r in added]
2440 return [self.node(r) for r in removed], [self.node(r) for r in added]
2435
2441
2436 def children(self, node):
2442 def children(self, node):
2437 """find the children of a given node"""
2443 """find the children of a given node"""
2438 c = []
2444 c = []
2439 p = self.rev(node)
2445 p = self.rev(node)
2440 for r in self.revs(start=p + 1):
2446 for r in self.revs(start=p + 1):
2441 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2447 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2442 if prevs:
2448 if prevs:
2443 for pr in prevs:
2449 for pr in prevs:
2444 if pr == p:
2450 if pr == p:
2445 c.append(self.node(r))
2451 c.append(self.node(r))
2446 elif p == nullrev:
2452 elif p == nullrev:
2447 c.append(self.node(r))
2453 c.append(self.node(r))
2448 return c
2454 return c
2449
2455
2450 def commonancestorsheads(self, a, b):
2456 def commonancestorsheads(self, a, b):
2451 """calculate all the heads of the common ancestors of nodes a and b"""
2457 """calculate all the heads of the common ancestors of nodes a and b"""
2452 a, b = self.rev(a), self.rev(b)
2458 a, b = self.rev(a), self.rev(b)
2453 ancs = self._commonancestorsheads(a, b)
2459 ancs = self._commonancestorsheads(a, b)
2454 return pycompat.maplist(self.node, ancs)
2460 return pycompat.maplist(self.node, ancs)
2455
2461
2456 def _commonancestorsheads(self, *revs):
2462 def _commonancestorsheads(self, *revs):
2457 """calculate all the heads of the common ancestors of revs"""
2463 """calculate all the heads of the common ancestors of revs"""
2458 try:
2464 try:
2459 ancs = self.index.commonancestorsheads(*revs)
2465 ancs = self.index.commonancestorsheads(*revs)
2460 except (AttributeError, OverflowError): # C implementation failed
2466 except (AttributeError, OverflowError): # C implementation failed
2461 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2467 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2462 return ancs
2468 return ancs
2463
2469
2464 def isancestor(self, a, b):
2470 def isancestor(self, a, b):
2465 """return True if node a is an ancestor of node b
2471 """return True if node a is an ancestor of node b
2466
2472
2467 A revision is considered an ancestor of itself."""
2473 A revision is considered an ancestor of itself."""
2468 a, b = self.rev(a), self.rev(b)
2474 a, b = self.rev(a), self.rev(b)
2469 return self.isancestorrev(a, b)
2475 return self.isancestorrev(a, b)
2470
2476
2471 def isancestorrev(self, a, b):
2477 def isancestorrev(self, a, b):
2472 """return True if revision a is an ancestor of revision b
2478 """return True if revision a is an ancestor of revision b
2473
2479
2474 A revision is considered an ancestor of itself.
2480 A revision is considered an ancestor of itself.
2475
2481
2476 The implementation of this is trivial but the use of
2482 The implementation of this is trivial but the use of
2477 reachableroots is not."""
2483 reachableroots is not."""
2478 if a == nullrev:
2484 if a == nullrev:
2479 return True
2485 return True
2480 elif a == b:
2486 elif a == b:
2481 return True
2487 return True
2482 elif a > b:
2488 elif a > b:
2483 return False
2489 return False
2484 return bool(self.reachableroots(a, [b], [a], includepath=False))
2490 return bool(self.reachableroots(a, [b], [a], includepath=False))
2485
2491
2486 def reachableroots(self, minroot, heads, roots, includepath=False):
2492 def reachableroots(self, minroot, heads, roots, includepath=False):
2487 """return (heads(::(<roots> and <roots>::<heads>)))
2493 """return (heads(::(<roots> and <roots>::<heads>)))
2488
2494
2489 If includepath is True, return (<roots>::<heads>)."""
2495 If includepath is True, return (<roots>::<heads>)."""
2490 try:
2496 try:
2491 return self.index.reachableroots2(
2497 return self.index.reachableroots2(
2492 minroot, heads, roots, includepath
2498 minroot, heads, roots, includepath
2493 )
2499 )
2494 except AttributeError:
2500 except AttributeError:
2495 return dagop._reachablerootspure(
2501 return dagop._reachablerootspure(
2496 self.parentrevs, minroot, roots, heads, includepath
2502 self.parentrevs, minroot, roots, heads, includepath
2497 )
2503 )
2498
2504
2499 def ancestor(self, a, b):
2505 def ancestor(self, a, b):
2500 """calculate the "best" common ancestor of nodes a and b"""
2506 """calculate the "best" common ancestor of nodes a and b"""
2501
2507
2502 a, b = self.rev(a), self.rev(b)
2508 a, b = self.rev(a), self.rev(b)
2503 try:
2509 try:
2504 ancs = self.index.ancestors(a, b)
2510 ancs = self.index.ancestors(a, b)
2505 except (AttributeError, OverflowError):
2511 except (AttributeError, OverflowError):
2506 ancs = ancestor.ancestors(self.parentrevs, a, b)
2512 ancs = ancestor.ancestors(self.parentrevs, a, b)
2507 if ancs:
2513 if ancs:
2508 # choose a consistent winner when there's a tie
2514 # choose a consistent winner when there's a tie
2509 return min(map(self.node, ancs))
2515 return min(map(self.node, ancs))
2510 return self.nullid
2516 return self.nullid
2511
2517
2512 def _match(self, id):
2518 def _match(self, id):
2513 if isinstance(id, int):
2519 if isinstance(id, int):
2514 # rev
2520 # rev
2515 return self.node(id)
2521 return self.node(id)
2516 if len(id) == self.nodeconstants.nodelen:
2522 if len(id) == self.nodeconstants.nodelen:
2517 # possibly a binary node
2523 # possibly a binary node
2518 # odds of a binary node being all hex in ASCII are 1 in 10**25
2524 # odds of a binary node being all hex in ASCII are 1 in 10**25
2519 try:
2525 try:
2520 node = id
2526 node = id
2521 self.rev(node) # quick search the index
2527 self.rev(node) # quick search the index
2522 return node
2528 return node
2523 except error.LookupError:
2529 except error.LookupError:
2524 pass # may be partial hex id
2530 pass # may be partial hex id
2525 try:
2531 try:
2526 # str(rev)
2532 # str(rev)
2527 rev = int(id)
2533 rev = int(id)
2528 if b"%d" % rev != id:
2534 if b"%d" % rev != id:
2529 raise ValueError
2535 raise ValueError
2530 if rev < 0:
2536 if rev < 0:
2531 rev = len(self) + rev
2537 rev = len(self) + rev
2532 if rev < 0 or rev >= len(self):
2538 if rev < 0 or rev >= len(self):
2533 raise ValueError
2539 raise ValueError
2534 return self.node(rev)
2540 return self.node(rev)
2535 except (ValueError, OverflowError):
2541 except (ValueError, OverflowError):
2536 pass
2542 pass
2537 if len(id) == 2 * self.nodeconstants.nodelen:
2543 if len(id) == 2 * self.nodeconstants.nodelen:
2538 try:
2544 try:
2539 # a full hex nodeid?
2545 # a full hex nodeid?
2540 node = bin(id)
2546 node = bin(id)
2541 self.rev(node)
2547 self.rev(node)
2542 return node
2548 return node
2543 except (binascii.Error, error.LookupError):
2549 except (binascii.Error, error.LookupError):
2544 pass
2550 pass
2545
2551
2546 def _partialmatch(self, id):
2552 def _partialmatch(self, id):
2547 # we don't care wdirfilenodeids as they should be always full hash
2553 # we don't care wdirfilenodeids as they should be always full hash
2548 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2554 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2549 ambiguous = False
2555 ambiguous = False
2550 try:
2556 try:
2551 partial = self.index.partialmatch(id)
2557 partial = self.index.partialmatch(id)
2552 if partial and self.hasnode(partial):
2558 if partial and self.hasnode(partial):
2553 if maybewdir:
2559 if maybewdir:
2554 # single 'ff...' match in radix tree, ambiguous with wdir
2560 # single 'ff...' match in radix tree, ambiguous with wdir
2555 ambiguous = True
2561 ambiguous = True
2556 else:
2562 else:
2557 return partial
2563 return partial
2558 elif maybewdir:
2564 elif maybewdir:
2559 # no 'ff...' match in radix tree, wdir identified
2565 # no 'ff...' match in radix tree, wdir identified
2560 raise error.WdirUnsupported
2566 raise error.WdirUnsupported
2561 else:
2567 else:
2562 return None
2568 return None
2563 except error.RevlogError:
2569 except error.RevlogError:
2564 # parsers.c radix tree lookup gave multiple matches
2570 # parsers.c radix tree lookup gave multiple matches
2565 # fast path: for unfiltered changelog, radix tree is accurate
2571 # fast path: for unfiltered changelog, radix tree is accurate
2566 if not getattr(self, 'filteredrevs', None):
2572 if not getattr(self, 'filteredrevs', None):
2567 ambiguous = True
2573 ambiguous = True
2568 # fall through to slow path that filters hidden revisions
2574 # fall through to slow path that filters hidden revisions
2569 except (AttributeError, ValueError):
2575 except (AttributeError, ValueError):
2570 # we are pure python, or key is not hex
2576 # we are pure python, or key is not hex
2571 pass
2577 pass
2572 if ambiguous:
2578 if ambiguous:
2573 raise error.AmbiguousPrefixLookupError(
2579 raise error.AmbiguousPrefixLookupError(
2574 id, self.display_id, _(b'ambiguous identifier')
2580 id, self.display_id, _(b'ambiguous identifier')
2575 )
2581 )
2576
2582
2577 if id in self._pcache:
2583 if id in self._pcache:
2578 return self._pcache[id]
2584 return self._pcache[id]
2579
2585
2580 if len(id) <= 40:
2586 if len(id) <= 40:
2581 # hex(node)[:...]
2587 # hex(node)[:...]
2582 l = len(id) // 2 * 2 # grab an even number of digits
2588 l = len(id) // 2 * 2 # grab an even number of digits
2583 try:
2589 try:
2584 # we're dropping the last digit, so let's check that it's hex,
2590 # we're dropping the last digit, so let's check that it's hex,
2585 # to avoid the expensive computation below if it's not
2591 # to avoid the expensive computation below if it's not
2586 if len(id) % 2 > 0:
2592 if len(id) % 2 > 0:
2587 if not (id[-1] in hexdigits):
2593 if not (id[-1] in hexdigits):
2588 return None
2594 return None
2589 prefix = bin(id[:l])
2595 prefix = bin(id[:l])
2590 except binascii.Error:
2596 except binascii.Error:
2591 pass
2597 pass
2592 else:
2598 else:
2593 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2599 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2594 nl = [
2600 nl = [
2595 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2601 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2596 ]
2602 ]
2597 if self.nodeconstants.nullhex.startswith(id):
2603 if self.nodeconstants.nullhex.startswith(id):
2598 nl.append(self.nullid)
2604 nl.append(self.nullid)
2599 if len(nl) > 0:
2605 if len(nl) > 0:
2600 if len(nl) == 1 and not maybewdir:
2606 if len(nl) == 1 and not maybewdir:
2601 self._pcache[id] = nl[0]
2607 self._pcache[id] = nl[0]
2602 return nl[0]
2608 return nl[0]
2603 raise error.AmbiguousPrefixLookupError(
2609 raise error.AmbiguousPrefixLookupError(
2604 id, self.display_id, _(b'ambiguous identifier')
2610 id, self.display_id, _(b'ambiguous identifier')
2605 )
2611 )
2606 if maybewdir:
2612 if maybewdir:
2607 raise error.WdirUnsupported
2613 raise error.WdirUnsupported
2608 return None
2614 return None
2609
2615
2610 def lookup(self, id):
2616 def lookup(self, id):
2611 """locate a node based on:
2617 """locate a node based on:
2612 - revision number or str(revision number)
2618 - revision number or str(revision number)
2613 - nodeid or subset of hex nodeid
2619 - nodeid or subset of hex nodeid
2614 """
2620 """
2615 n = self._match(id)
2621 n = self._match(id)
2616 if n is not None:
2622 if n is not None:
2617 return n
2623 return n
2618 n = self._partialmatch(id)
2624 n = self._partialmatch(id)
2619 if n:
2625 if n:
2620 return n
2626 return n
2621
2627
2622 raise error.LookupError(id, self.display_id, _(b'no match found'))
2628 raise error.LookupError(id, self.display_id, _(b'no match found'))
2623
2629
2624 def shortest(self, node, minlength=1):
2630 def shortest(self, node, minlength=1):
2625 """Find the shortest unambiguous prefix that matches node."""
2631 """Find the shortest unambiguous prefix that matches node."""
2626
2632
2627 def isvalid(prefix):
2633 def isvalid(prefix):
2628 try:
2634 try:
2629 matchednode = self._partialmatch(prefix)
2635 matchednode = self._partialmatch(prefix)
2630 except error.AmbiguousPrefixLookupError:
2636 except error.AmbiguousPrefixLookupError:
2631 return False
2637 return False
2632 except error.WdirUnsupported:
2638 except error.WdirUnsupported:
2633 # single 'ff...' match
2639 # single 'ff...' match
2634 return True
2640 return True
2635 if matchednode is None:
2641 if matchednode is None:
2636 raise error.LookupError(node, self.display_id, _(b'no node'))
2642 raise error.LookupError(node, self.display_id, _(b'no node'))
2637 return True
2643 return True
2638
2644
2639 def maybewdir(prefix):
2645 def maybewdir(prefix):
2640 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2646 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2641
2647
2642 hexnode = hex(node)
2648 hexnode = hex(node)
2643
2649
2644 def disambiguate(hexnode, minlength):
2650 def disambiguate(hexnode, minlength):
2645 """Disambiguate against wdirid."""
2651 """Disambiguate against wdirid."""
2646 for length in range(minlength, len(hexnode) + 1):
2652 for length in range(minlength, len(hexnode) + 1):
2647 prefix = hexnode[:length]
2653 prefix = hexnode[:length]
2648 if not maybewdir(prefix):
2654 if not maybewdir(prefix):
2649 return prefix
2655 return prefix
2650
2656
2651 if not getattr(self, 'filteredrevs', None):
2657 if not getattr(self, 'filteredrevs', None):
2652 try:
2658 try:
2653 length = max(self.index.shortest(node), minlength)
2659 length = max(self.index.shortest(node), minlength)
2654 return disambiguate(hexnode, length)
2660 return disambiguate(hexnode, length)
2655 except error.RevlogError:
2661 except error.RevlogError:
2656 if node != self.nodeconstants.wdirid:
2662 if node != self.nodeconstants.wdirid:
2657 raise error.LookupError(
2663 raise error.LookupError(
2658 node, self.display_id, _(b'no node')
2664 node, self.display_id, _(b'no node')
2659 )
2665 )
2660 except AttributeError:
2666 except AttributeError:
2661 # Fall through to pure code
2667 # Fall through to pure code
2662 pass
2668 pass
2663
2669
2664 if node == self.nodeconstants.wdirid:
2670 if node == self.nodeconstants.wdirid:
2665 for length in range(minlength, len(hexnode) + 1):
2671 for length in range(minlength, len(hexnode) + 1):
2666 prefix = hexnode[:length]
2672 prefix = hexnode[:length]
2667 if isvalid(prefix):
2673 if isvalid(prefix):
2668 return prefix
2674 return prefix
2669
2675
2670 for length in range(minlength, len(hexnode) + 1):
2676 for length in range(minlength, len(hexnode) + 1):
2671 prefix = hexnode[:length]
2677 prefix = hexnode[:length]
2672 if isvalid(prefix):
2678 if isvalid(prefix):
2673 return disambiguate(hexnode, length)
2679 return disambiguate(hexnode, length)
2674
2680
2675 def cmp(self, node, text):
2681 def cmp(self, node, text):
2676 """compare text with a given file revision
2682 """compare text with a given file revision
2677
2683
2678 returns True if text is different than what is stored.
2684 returns True if text is different than what is stored.
2679 """
2685 """
2680 p1, p2 = self.parents(node)
2686 p1, p2 = self.parents(node)
2681 return storageutil.hashrevisionsha1(text, p1, p2) != node
2687 return storageutil.hashrevisionsha1(text, p1, p2) != node
2682
2688
2683 def deltaparent(self, rev):
2689 def deltaparent(self, rev):
2684 """return deltaparent of the given revision"""
2690 """return deltaparent of the given revision"""
2685 base = self.index[rev][3]
2691 base = self.index[rev][3]
2686 if base == rev:
2692 if base == rev:
2687 return nullrev
2693 return nullrev
2688 elif self.delta_config.general_delta:
2694 elif self.delta_config.general_delta:
2689 return base
2695 return base
2690 else:
2696 else:
2691 return rev - 1
2697 return rev - 1
2692
2698
2693 def issnapshot(self, rev):
2699 def issnapshot(self, rev):
2694 """tells whether rev is a snapshot"""
2700 """tells whether rev is a snapshot"""
2695 ret = self._inner.issnapshot(rev)
2701 ret = self._inner.issnapshot(rev)
2696 self.issnapshot = self._inner.issnapshot
2702 self.issnapshot = self._inner.issnapshot
2697 return ret
2703 return ret
2698
2704
2699 def snapshotdepth(self, rev):
2705 def snapshotdepth(self, rev):
2700 """number of snapshot in the chain before this one"""
2706 """number of snapshot in the chain before this one"""
2701 if not self.issnapshot(rev):
2707 if not self.issnapshot(rev):
2702 raise error.ProgrammingError(b'revision %d not a snapshot')
2708 raise error.ProgrammingError(b'revision %d not a snapshot')
2703 return len(self._inner._deltachain(rev)[0]) - 1
2709 return len(self._inner._deltachain(rev)[0]) - 1
2704
2710
2705 def revdiff(self, rev1, rev2):
2711 def revdiff(self, rev1, rev2):
2706 """return or calculate a delta between two revisions
2712 """return or calculate a delta between two revisions
2707
2713
2708 The delta calculated is in binary form and is intended to be written to
2714 The delta calculated is in binary form and is intended to be written to
2709 revlog data directly. So this function needs raw revision data.
2715 revlog data directly. So this function needs raw revision data.
2710 """
2716 """
2711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2717 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2712 return bytes(self._inner._chunk(rev2))
2718 return bytes(self._inner._chunk(rev2))
2713
2719
2714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2720 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2715
2721
2716 def revision(self, nodeorrev):
2722 def revision(self, nodeorrev):
2717 """return an uncompressed revision of a given node or revision
2723 """return an uncompressed revision of a given node or revision
2718 number.
2724 number.
2719 """
2725 """
2720 return self._revisiondata(nodeorrev)
2726 return self._revisiondata(nodeorrev)
2721
2727
2722 def sidedata(self, nodeorrev):
2728 def sidedata(self, nodeorrev):
2723 """a map of extra data related to the changeset but not part of the hash
2729 """a map of extra data related to the changeset but not part of the hash
2724
2730
2725 This function currently return a dictionary. However, more advanced
2731 This function currently return a dictionary. However, more advanced
2726 mapping object will likely be used in the future for a more
2732 mapping object will likely be used in the future for a more
2727 efficient/lazy code.
2733 efficient/lazy code.
2728 """
2734 """
2729 # deal with <nodeorrev> argument type
2735 # deal with <nodeorrev> argument type
2730 if isinstance(nodeorrev, int):
2736 if isinstance(nodeorrev, int):
2731 rev = nodeorrev
2737 rev = nodeorrev
2732 else:
2738 else:
2733 rev = self.rev(nodeorrev)
2739 rev = self.rev(nodeorrev)
2734 return self._sidedata(rev)
2740 return self._sidedata(rev)
2735
2741
2736 def _rawtext(self, node, rev):
2742 def _rawtext(self, node, rev):
2737 """return the possibly unvalidated rawtext for a revision
2743 """return the possibly unvalidated rawtext for a revision
2738
2744
2739 returns (rev, rawtext, validated)
2745 returns (rev, rawtext, validated)
2740 """
2746 """
2741 # Check if we have the entry in cache
2747 # Check if we have the entry in cache
2742 # The cache entry looks like (node, rev, rawtext)
2748 # The cache entry looks like (node, rev, rawtext)
2743 if self._inner._revisioncache:
2749 if self._inner._revisioncache:
2744 if self._inner._revisioncache[0] == node:
2750 if self._inner._revisioncache[0] == node:
2745 return (rev, self._inner._revisioncache[2], True)
2751 return (rev, self._inner._revisioncache[2], True)
2746
2752
2747 if rev is None:
2753 if rev is None:
2748 rev = self.rev(node)
2754 rev = self.rev(node)
2749
2755
2750 text = self._inner.raw_text(node, rev)
2756 text = self._inner.raw_text(node, rev)
2751 return (rev, text, False)
2757 return (rev, text, False)
2752
2758
2753 def _revisiondata(self, nodeorrev, raw=False):
2759 def _revisiondata(self, nodeorrev, raw=False):
2754 # deal with <nodeorrev> argument type
2760 # deal with <nodeorrev> argument type
2755 if isinstance(nodeorrev, int):
2761 if isinstance(nodeorrev, int):
2756 rev = nodeorrev
2762 rev = nodeorrev
2757 node = self.node(rev)
2763 node = self.node(rev)
2758 else:
2764 else:
2759 node = nodeorrev
2765 node = nodeorrev
2760 rev = None
2766 rev = None
2761
2767
2762 # fast path the special `nullid` rev
2768 # fast path the special `nullid` rev
2763 if node == self.nullid:
2769 if node == self.nullid:
2764 return b""
2770 return b""
2765
2771
2766 # ``rawtext`` is the text as stored inside the revlog. Might be the
2772 # ``rawtext`` is the text as stored inside the revlog. Might be the
2767 # revision or might need to be processed to retrieve the revision.
2773 # revision or might need to be processed to retrieve the revision.
2768 rev, rawtext, validated = self._rawtext(node, rev)
2774 rev, rawtext, validated = self._rawtext(node, rev)
2769
2775
2770 if raw and validated:
2776 if raw and validated:
2771 # if we don't want to process the raw text and that raw
2777 # if we don't want to process the raw text and that raw
2772 # text is cached, we can exit early.
2778 # text is cached, we can exit early.
2773 return rawtext
2779 return rawtext
2774 if rev is None:
2780 if rev is None:
2775 rev = self.rev(node)
2781 rev = self.rev(node)
2776 # the revlog's flag for this revision
2782 # the revlog's flag for this revision
2777 # (usually alter its state or content)
2783 # (usually alter its state or content)
2778 flags = self.flags(rev)
2784 flags = self.flags(rev)
2779
2785
2780 if validated and flags == REVIDX_DEFAULT_FLAGS:
2786 if validated and flags == REVIDX_DEFAULT_FLAGS:
2781 # no extra flags set, no flag processor runs, text = rawtext
2787 # no extra flags set, no flag processor runs, text = rawtext
2782 return rawtext
2788 return rawtext
2783
2789
2784 if raw:
2790 if raw:
2785 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2791 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2786 text = rawtext
2792 text = rawtext
2787 else:
2793 else:
2788 r = flagutil.processflagsread(self, rawtext, flags)
2794 r = flagutil.processflagsread(self, rawtext, flags)
2789 text, validatehash = r
2795 text, validatehash = r
2790 if validatehash:
2796 if validatehash:
2791 self.checkhash(text, node, rev=rev)
2797 self.checkhash(text, node, rev=rev)
2792 if not validated:
2798 if not validated:
2793 self._inner._revisioncache = (node, rev, rawtext)
2799 self._inner._revisioncache = (node, rev, rawtext)
2794
2800
2795 return text
2801 return text
2796
2802
2797 def _sidedata(self, rev):
2803 def _sidedata(self, rev):
2798 """Return the sidedata for a given revision number."""
2804 """Return the sidedata for a given revision number."""
2799 if self._sidedatafile is None:
2805 if self._sidedatafile is None:
2800 return {}
2806 return {}
2801 sidedata_end = None
2807 sidedata_end = None
2802 if self._docket is not None:
2808 if self._docket is not None:
2803 sidedata_end = self._docket.sidedata_end
2809 sidedata_end = self._docket.sidedata_end
2804 return self._inner.sidedata(rev, sidedata_end)
2810 return self._inner.sidedata(rev, sidedata_end)
2805
2811
2806 def rawdata(self, nodeorrev):
2812 def rawdata(self, nodeorrev):
2807 """return an uncompressed raw data of a given node or revision number."""
2813 """return an uncompressed raw data of a given node or revision number."""
2808 return self._revisiondata(nodeorrev, raw=True)
2814 return self._revisiondata(nodeorrev, raw=True)
2809
2815
2810 def hash(self, text, p1, p2):
2816 def hash(self, text, p1, p2):
2811 """Compute a node hash.
2817 """Compute a node hash.
2812
2818
2813 Available as a function so that subclasses can replace the hash
2819 Available as a function so that subclasses can replace the hash
2814 as needed.
2820 as needed.
2815 """
2821 """
2816 return storageutil.hashrevisionsha1(text, p1, p2)
2822 return storageutil.hashrevisionsha1(text, p1, p2)
2817
2823
2818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2824 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2819 """Check node hash integrity.
2825 """Check node hash integrity.
2820
2826
2821 Available as a function so that subclasses can extend hash mismatch
2827 Available as a function so that subclasses can extend hash mismatch
2822 behaviors as needed.
2828 behaviors as needed.
2823 """
2829 """
2824 try:
2830 try:
2825 if p1 is None and p2 is None:
2831 if p1 is None and p2 is None:
2826 p1, p2 = self.parents(node)
2832 p1, p2 = self.parents(node)
2827 if node != self.hash(text, p1, p2):
2833 if node != self.hash(text, p1, p2):
2828 # Clear the revision cache on hash failure. The revision cache
2834 # Clear the revision cache on hash failure. The revision cache
2829 # only stores the raw revision and clearing the cache does have
2835 # only stores the raw revision and clearing the cache does have
2830 # the side-effect that we won't have a cache hit when the raw
2836 # the side-effect that we won't have a cache hit when the raw
2831 # revision data is accessed. But this case should be rare and
2837 # revision data is accessed. But this case should be rare and
2832 # it is extra work to teach the cache about the hash
2838 # it is extra work to teach the cache about the hash
2833 # verification state.
2839 # verification state.
2834 if (
2840 if (
2835 self._inner._revisioncache
2841 self._inner._revisioncache
2836 and self._inner._revisioncache[0] == node
2842 and self._inner._revisioncache[0] == node
2837 ):
2843 ):
2838 self._inner._revisioncache = None
2844 self._inner._revisioncache = None
2839
2845
2840 revornode = rev
2846 revornode = rev
2841 if revornode is None:
2847 if revornode is None:
2842 revornode = templatefilters.short(hex(node))
2848 revornode = templatefilters.short(hex(node))
2843 raise error.RevlogError(
2849 raise error.RevlogError(
2844 _(b"integrity check failed on %s:%s")
2850 _(b"integrity check failed on %s:%s")
2845 % (self.display_id, pycompat.bytestr(revornode))
2851 % (self.display_id, pycompat.bytestr(revornode))
2846 )
2852 )
2847 except error.RevlogError:
2853 except error.RevlogError:
2848 if self.feature_config.censorable and storageutil.iscensoredtext(
2854 if self.feature_config.censorable and storageutil.iscensoredtext(
2849 text
2855 text
2850 ):
2856 ):
2851 raise error.CensoredNodeError(self.display_id, node, text)
2857 raise error.CensoredNodeError(self.display_id, node, text)
2852 raise
2858 raise
2853
2859
2854 @property
2860 @property
2855 def _split_index_file(self):
2861 def _split_index_file(self):
2856 """the path where to expect the index of an ongoing splitting operation
2862 """the path where to expect the index of an ongoing splitting operation
2857
2863
2858 The file will only exist if a splitting operation is in progress, but
2864 The file will only exist if a splitting operation is in progress, but
2859 it is always expected at the same location."""
2865 it is always expected at the same location."""
2860 parts = self.radix.split(b'/')
2866 parts = self.radix.split(b'/')
2861 if len(parts) > 1:
2867 if len(parts) > 1:
2862 # adds a '-s' prefix to the ``data/` or `meta/` base
2868 # adds a '-s' prefix to the ``data/` or `meta/` base
2863 head = parts[0] + b'-s'
2869 head = parts[0] + b'-s'
2864 mids = parts[1:-1]
2870 mids = parts[1:-1]
2865 tail = parts[-1] + b'.i'
2871 tail = parts[-1] + b'.i'
2866 pieces = [head] + mids + [tail]
2872 pieces = [head] + mids + [tail]
2867 return b'/'.join(pieces)
2873 return b'/'.join(pieces)
2868 else:
2874 else:
2869 # the revlog is stored at the root of the store (changelog or
2875 # the revlog is stored at the root of the store (changelog or
2870 # manifest), no risk of collision.
2876 # manifest), no risk of collision.
2871 return self.radix + b'.i.s'
2877 return self.radix + b'.i.s'
2872
2878
2873 def _enforceinlinesize(self, tr):
2879 def _enforceinlinesize(self, tr):
2874 """Check if the revlog is too big for inline and convert if so.
2880 """Check if the revlog is too big for inline and convert if so.
2875
2881
2876 This should be called after revisions are added to the revlog. If the
2882 This should be called after revisions are added to the revlog. If the
2877 revlog has grown too large to be an inline revlog, it will convert it
2883 revlog has grown too large to be an inline revlog, it will convert it
2878 to use multiple index and data files.
2884 to use multiple index and data files.
2879 """
2885 """
2880 tiprev = len(self) - 1
2886 tiprev = len(self) - 1
2881 total_size = self.start(tiprev) + self.length(tiprev)
2887 total_size = self.start(tiprev) + self.length(tiprev)
2882 if not self._inline or (self._may_inline and total_size < _maxinline):
2888 if not self._inline or (self._may_inline and total_size < _maxinline):
2883 return
2889 return
2884
2890
2885 if self._docket is not None:
2891 if self._docket is not None:
2886 msg = b"inline revlog should not have a docket"
2892 msg = b"inline revlog should not have a docket"
2887 raise error.ProgrammingError(msg)
2893 raise error.ProgrammingError(msg)
2888
2894
2889 # In the common case, we enforce inline size because the revlog has
2895 # In the common case, we enforce inline size because the revlog has
2890 # been appened too. And in such case, it must have an initial offset
2896 # been appened too. And in such case, it must have an initial offset
2891 # recorded in the transaction.
2897 # recorded in the transaction.
2892 troffset = tr.findoffset(self._inner.canonical_index_file)
2898 troffset = tr.findoffset(self._inner.canonical_index_file)
2893 pre_touched = troffset is not None
2899 pre_touched = troffset is not None
2894 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2900 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2895 raise error.RevlogError(
2901 raise error.RevlogError(
2896 _(b"%s not found in the transaction") % self._indexfile
2902 _(b"%s not found in the transaction") % self._indexfile
2897 )
2903 )
2898
2904
2899 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2905 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2900 tr.add(self._datafile, 0)
2906 tr.add(self._datafile, 0)
2901
2907
2902 new_index_file_path = None
2908 new_index_file_path = None
2903 old_index_file_path = self._indexfile
2909 old_index_file_path = self._indexfile
2904 new_index_file_path = self._split_index_file
2910 new_index_file_path = self._split_index_file
2905 opener = self.opener
2911 opener = self.opener
2906 weak_self = weakref.ref(self)
2912 weak_self = weakref.ref(self)
2907
2913
2908 # the "split" index replace the real index when the transaction is
2914 # the "split" index replace the real index when the transaction is
2909 # finalized
2915 # finalized
2910 def finalize_callback(tr):
2916 def finalize_callback(tr):
2911 opener.rename(
2917 opener.rename(
2912 new_index_file_path,
2918 new_index_file_path,
2913 old_index_file_path,
2919 old_index_file_path,
2914 checkambig=True,
2920 checkambig=True,
2915 )
2921 )
2916 maybe_self = weak_self()
2922 maybe_self = weak_self()
2917 if maybe_self is not None:
2923 if maybe_self is not None:
2918 maybe_self._indexfile = old_index_file_path
2924 maybe_self._indexfile = old_index_file_path
2919 maybe_self._inner.index_file = maybe_self._indexfile
2925 maybe_self._inner.index_file = maybe_self._indexfile
2920
2926
2921 def abort_callback(tr):
2927 def abort_callback(tr):
2922 maybe_self = weak_self()
2928 maybe_self = weak_self()
2923 if maybe_self is not None:
2929 if maybe_self is not None:
2924 maybe_self._indexfile = old_index_file_path
2930 maybe_self._indexfile = old_index_file_path
2925 maybe_self._inner.inline = True
2931 maybe_self._inner.inline = True
2926 maybe_self._inner.index_file = old_index_file_path
2932 maybe_self._inner.index_file = old_index_file_path
2927
2933
2928 tr.registertmp(new_index_file_path)
2934 tr.registertmp(new_index_file_path)
2929 # we use 001 here to make this this happens after the finalisation of
2935 # we use 001 here to make this this happens after the finalisation of
2930 # pending changelog write (using 000). Otherwise the two finalizer
2936 # pending changelog write (using 000). Otherwise the two finalizer
2931 # would step over each other and delete the changelog.i file.
2937 # would step over each other and delete the changelog.i file.
2932 if self.target[1] is not None:
2938 if self.target[1] is not None:
2933 callback_id = b'001-revlog-split-%d-%s' % self.target
2939 callback_id = b'001-revlog-split-%d-%s' % self.target
2934 else:
2940 else:
2935 callback_id = b'001-revlog-split-%d' % self.target[0]
2941 callback_id = b'001-revlog-split-%d' % self.target[0]
2936 tr.addfinalize(callback_id, finalize_callback)
2942 tr.addfinalize(callback_id, finalize_callback)
2937 tr.addabort(callback_id, abort_callback)
2943 tr.addabort(callback_id, abort_callback)
2938
2944
2939 self._format_flags &= ~FLAG_INLINE_DATA
2945 self._format_flags &= ~FLAG_INLINE_DATA
2940 self._inner.split_inline(
2946 self._inner.split_inline(
2941 tr,
2947 tr,
2942 self._format_flags | self._format_version,
2948 self._format_flags | self._format_version,
2943 new_index_file_path=new_index_file_path,
2949 new_index_file_path=new_index_file_path,
2944 )
2950 )
2945
2951
2946 self._inline = False
2952 self._inline = False
2947 if new_index_file_path is not None:
2953 if new_index_file_path is not None:
2948 self._indexfile = new_index_file_path
2954 self._indexfile = new_index_file_path
2949
2955
2950 nodemaputil.setup_persistent_nodemap(tr, self)
2956 nodemaputil.setup_persistent_nodemap(tr, self)
2951
2957
2952 def _nodeduplicatecallback(self, transaction, node):
2958 def _nodeduplicatecallback(self, transaction, node):
2953 """called when trying to add a node already stored."""
2959 """called when trying to add a node already stored."""
2954
2960
2955 @contextlib.contextmanager
2961 @contextlib.contextmanager
2956 def reading(self):
2962 def reading(self):
2957 with self._inner.reading():
2963 with self._inner.reading():
2958 yield
2964 yield
2959
2965
2960 @contextlib.contextmanager
2966 @contextlib.contextmanager
2961 def _writing(self, transaction):
2967 def _writing(self, transaction):
2962 if self._trypending:
2968 if self._trypending:
2963 msg = b'try to write in a `trypending` revlog: %s'
2969 msg = b'try to write in a `trypending` revlog: %s'
2964 msg %= self.display_id
2970 msg %= self.display_id
2965 raise error.ProgrammingError(msg)
2971 raise error.ProgrammingError(msg)
2966 if self._inner.is_writing:
2972 if self._inner.is_writing:
2967 yield
2973 yield
2968 else:
2974 else:
2969 data_end = None
2975 data_end = None
2970 sidedata_end = None
2976 sidedata_end = None
2971 if self._docket is not None:
2977 if self._docket is not None:
2972 data_end = self._docket.data_end
2978 data_end = self._docket.data_end
2973 sidedata_end = self._docket.sidedata_end
2979 sidedata_end = self._docket.sidedata_end
2974 with self._inner.writing(
2980 with self._inner.writing(
2975 transaction,
2981 transaction,
2976 data_end=data_end,
2982 data_end=data_end,
2977 sidedata_end=sidedata_end,
2983 sidedata_end=sidedata_end,
2978 ):
2984 ):
2979 yield
2985 yield
2980 if self._docket is not None:
2986 if self._docket is not None:
2981 self._write_docket(transaction)
2987 self._write_docket(transaction)
2982
2988
2983 @property
2989 @property
2984 def is_delaying(self):
2990 def is_delaying(self):
2985 return self._inner.is_delaying
2991 return self._inner.is_delaying
2986
2992
2987 def _write_docket(self, transaction):
2993 def _write_docket(self, transaction):
2988 """write the current docket on disk
2994 """write the current docket on disk
2989
2995
2990 Exist as a method to help changelog to implement transaction logic
2996 Exist as a method to help changelog to implement transaction logic
2991
2997
2992 We could also imagine using the same transaction logic for all revlog
2998 We could also imagine using the same transaction logic for all revlog
2993 since docket are cheap."""
2999 since docket are cheap."""
2994 self._docket.write(transaction)
3000 self._docket.write(transaction)
2995
3001
2996 def addrevision(
3002 def addrevision(
2997 self,
3003 self,
2998 text,
3004 text,
2999 transaction,
3005 transaction,
3000 link,
3006 link,
3001 p1,
3007 p1,
3002 p2,
3008 p2,
3003 cachedelta=None,
3009 cachedelta=None,
3004 node=None,
3010 node=None,
3005 flags=REVIDX_DEFAULT_FLAGS,
3011 flags=REVIDX_DEFAULT_FLAGS,
3006 deltacomputer=None,
3012 deltacomputer=None,
3007 sidedata=None,
3013 sidedata=None,
3008 ):
3014 ):
3009 """add a revision to the log
3015 """add a revision to the log
3010
3016
3011 text - the revision data to add
3017 text - the revision data to add
3012 transaction - the transaction object used for rollback
3018 transaction - the transaction object used for rollback
3013 link - the linkrev data to add
3019 link - the linkrev data to add
3014 p1, p2 - the parent nodeids of the revision
3020 p1, p2 - the parent nodeids of the revision
3015 cachedelta - an optional precomputed delta
3021 cachedelta - an optional precomputed delta
3016 node - nodeid of revision; typically node is not specified, and it is
3022 node - nodeid of revision; typically node is not specified, and it is
3017 computed by default as hash(text, p1, p2), however subclasses might
3023 computed by default as hash(text, p1, p2), however subclasses might
3018 use different hashing method (and override checkhash() in such case)
3024 use different hashing method (and override checkhash() in such case)
3019 flags - the known flags to set on the revision
3025 flags - the known flags to set on the revision
3020 deltacomputer - an optional deltacomputer instance shared between
3026 deltacomputer - an optional deltacomputer instance shared between
3021 multiple calls
3027 multiple calls
3022 """
3028 """
3023 if link == nullrev:
3029 if link == nullrev:
3024 raise error.RevlogError(
3030 raise error.RevlogError(
3025 _(b"attempted to add linkrev -1 to %s") % self.display_id
3031 _(b"attempted to add linkrev -1 to %s") % self.display_id
3026 )
3032 )
3027
3033
3028 if sidedata is None:
3034 if sidedata is None:
3029 sidedata = {}
3035 sidedata = {}
3030 elif sidedata and not self.feature_config.has_side_data:
3036 elif sidedata and not self.feature_config.has_side_data:
3031 raise error.ProgrammingError(
3037 raise error.ProgrammingError(
3032 _(b"trying to add sidedata to a revlog who don't support them")
3038 _(b"trying to add sidedata to a revlog who don't support them")
3033 )
3039 )
3034
3040
3035 if flags:
3041 if flags:
3036 node = node or self.hash(text, p1, p2)
3042 node = node or self.hash(text, p1, p2)
3037
3043
3038 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3044 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3039
3045
3040 # If the flag processor modifies the revision data, ignore any provided
3046 # If the flag processor modifies the revision data, ignore any provided
3041 # cachedelta.
3047 # cachedelta.
3042 if rawtext != text:
3048 if rawtext != text:
3043 cachedelta = None
3049 cachedelta = None
3044
3050
3045 if len(rawtext) > _maxentrysize:
3051 if len(rawtext) > _maxentrysize:
3046 raise error.RevlogError(
3052 raise error.RevlogError(
3047 _(
3053 _(
3048 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3054 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3049 )
3055 )
3050 % (self.display_id, len(rawtext))
3056 % (self.display_id, len(rawtext))
3051 )
3057 )
3052
3058
3053 node = node or self.hash(rawtext, p1, p2)
3059 node = node or self.hash(rawtext, p1, p2)
3054 rev = self.index.get_rev(node)
3060 rev = self.index.get_rev(node)
3055 if rev is not None:
3061 if rev is not None:
3056 return rev
3062 return rev
3057
3063
3058 if validatehash:
3064 if validatehash:
3059 self.checkhash(rawtext, node, p1=p1, p2=p2)
3065 self.checkhash(rawtext, node, p1=p1, p2=p2)
3060
3066
3061 return self.addrawrevision(
3067 return self.addrawrevision(
3062 rawtext,
3068 rawtext,
3063 transaction,
3069 transaction,
3064 link,
3070 link,
3065 p1,
3071 p1,
3066 p2,
3072 p2,
3067 node,
3073 node,
3068 flags,
3074 flags,
3069 cachedelta=cachedelta,
3075 cachedelta=cachedelta,
3070 deltacomputer=deltacomputer,
3076 deltacomputer=deltacomputer,
3071 sidedata=sidedata,
3077 sidedata=sidedata,
3072 )
3078 )
3073
3079
3074 def addrawrevision(
3080 def addrawrevision(
3075 self,
3081 self,
3076 rawtext,
3082 rawtext,
3077 transaction,
3083 transaction,
3078 link,
3084 link,
3079 p1,
3085 p1,
3080 p2,
3086 p2,
3081 node,
3087 node,
3082 flags,
3088 flags,
3083 cachedelta=None,
3089 cachedelta=None,
3084 deltacomputer=None,
3090 deltacomputer=None,
3085 sidedata=None,
3091 sidedata=None,
3086 ):
3092 ):
3087 """add a raw revision with known flags, node and parents
3093 """add a raw revision with known flags, node and parents
3088 useful when reusing a revision not stored in this revlog (ex: received
3094 useful when reusing a revision not stored in this revlog (ex: received
3089 over wire, or read from an external bundle).
3095 over wire, or read from an external bundle).
3090 """
3096 """
3091 with self._writing(transaction):
3097 with self._writing(transaction):
3092 return self._addrevision(
3098 return self._addrevision(
3093 node,
3099 node,
3094 rawtext,
3100 rawtext,
3095 transaction,
3101 transaction,
3096 link,
3102 link,
3097 p1,
3103 p1,
3098 p2,
3104 p2,
3099 flags,
3105 flags,
3100 cachedelta,
3106 cachedelta,
3101 deltacomputer=deltacomputer,
3107 deltacomputer=deltacomputer,
3102 sidedata=sidedata,
3108 sidedata=sidedata,
3103 )
3109 )
3104
3110
3105 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3111 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3106 return self._inner.compress(data)
3112 return self._inner.compress(data)
3107
3113
3108 def decompress(self, data):
3114 def decompress(self, data):
3109 return self._inner.decompress(data)
3115 return self._inner.decompress(data)
3110
3116
3111 def _addrevision(
3117 def _addrevision(
3112 self,
3118 self,
3113 node,
3119 node,
3114 rawtext,
3120 rawtext,
3115 transaction,
3121 transaction,
3116 link,
3122 link,
3117 p1,
3123 p1,
3118 p2,
3124 p2,
3119 flags,
3125 flags,
3120 cachedelta,
3126 cachedelta,
3121 alwayscache=False,
3127 alwayscache=False,
3122 deltacomputer=None,
3128 deltacomputer=None,
3123 sidedata=None,
3129 sidedata=None,
3124 ):
3130 ):
3125 """internal function to add revisions to the log
3131 """internal function to add revisions to the log
3126
3132
3127 see addrevision for argument descriptions.
3133 see addrevision for argument descriptions.
3128
3134
3129 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3135 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3130
3136
3131 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3137 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3132 be used.
3138 be used.
3133
3139
3134 invariants:
3140 invariants:
3135 - rawtext is optional (can be None); if not set, cachedelta must be set.
3141 - rawtext is optional (can be None); if not set, cachedelta must be set.
3136 if both are set, they must correspond to each other.
3142 if both are set, they must correspond to each other.
3137 """
3143 """
3138 if node == self.nullid:
3144 if node == self.nullid:
3139 raise error.RevlogError(
3145 raise error.RevlogError(
3140 _(b"%s: attempt to add null revision") % self.display_id
3146 _(b"%s: attempt to add null revision") % self.display_id
3141 )
3147 )
3142 if (
3148 if (
3143 node == self.nodeconstants.wdirid
3149 node == self.nodeconstants.wdirid
3144 or node in self.nodeconstants.wdirfilenodeids
3150 or node in self.nodeconstants.wdirfilenodeids
3145 ):
3151 ):
3146 raise error.RevlogError(
3152 raise error.RevlogError(
3147 _(b"%s: attempt to add wdir revision") % self.display_id
3153 _(b"%s: attempt to add wdir revision") % self.display_id
3148 )
3154 )
3149 if not self._inner.is_writing:
3155 if not self._inner.is_writing:
3150 msg = b'adding revision outside `revlog._writing` context'
3156 msg = b'adding revision outside `revlog._writing` context'
3151 raise error.ProgrammingError(msg)
3157 raise error.ProgrammingError(msg)
3152
3158
3153 btext = [rawtext]
3159 btext = [rawtext]
3154
3160
3155 curr = len(self)
3161 curr = len(self)
3156 prev = curr - 1
3162 prev = curr - 1
3157
3163
3158 offset = self._get_data_offset(prev)
3164 offset = self._get_data_offset(prev)
3159
3165
3160 if self._concurrencychecker:
3166 if self._concurrencychecker:
3161 ifh, dfh, sdfh = self._inner._writinghandles
3167 ifh, dfh, sdfh = self._inner._writinghandles
3162 # XXX no checking for the sidedata file
3168 # XXX no checking for the sidedata file
3163 if self._inline:
3169 if self._inline:
3164 # offset is "as if" it were in the .d file, so we need to add on
3170 # offset is "as if" it were in the .d file, so we need to add on
3165 # the size of the entry metadata.
3171 # the size of the entry metadata.
3166 self._concurrencychecker(
3172 self._concurrencychecker(
3167 ifh, self._indexfile, offset + curr * self.index.entry_size
3173 ifh, self._indexfile, offset + curr * self.index.entry_size
3168 )
3174 )
3169 else:
3175 else:
3170 # Entries in the .i are a consistent size.
3176 # Entries in the .i are a consistent size.
3171 self._concurrencychecker(
3177 self._concurrencychecker(
3172 ifh, self._indexfile, curr * self.index.entry_size
3178 ifh, self._indexfile, curr * self.index.entry_size
3173 )
3179 )
3174 self._concurrencychecker(dfh, self._datafile, offset)
3180 self._concurrencychecker(dfh, self._datafile, offset)
3175
3181
3176 p1r, p2r = self.rev(p1), self.rev(p2)
3182 p1r, p2r = self.rev(p1), self.rev(p2)
3177
3183
3178 # full versions are inserted when the needed deltas
3184 # full versions are inserted when the needed deltas
3179 # become comparable to the uncompressed text
3185 # become comparable to the uncompressed text
3180 if rawtext is None:
3186 if rawtext is None:
3181 # need rawtext size, before changed by flag processors, which is
3187 # need rawtext size, before changed by flag processors, which is
3182 # the non-raw size. use revlog explicitly to avoid filelog's extra
3188 # the non-raw size. use revlog explicitly to avoid filelog's extra
3183 # logic that might remove metadata size.
3189 # logic that might remove metadata size.
3184 textlen = mdiff.patchedsize(
3190 textlen = mdiff.patchedsize(
3185 revlog.size(self, cachedelta[0]), cachedelta[1]
3191 revlog.size(self, cachedelta[0]), cachedelta[1]
3186 )
3192 )
3187 else:
3193 else:
3188 textlen = len(rawtext)
3194 textlen = len(rawtext)
3189
3195
3190 if deltacomputer is None:
3196 if deltacomputer is None:
3191 write_debug = None
3197 write_debug = None
3192 if self.delta_config.debug_delta:
3198 if self.delta_config.debug_delta:
3193 write_debug = transaction._report
3199 write_debug = transaction._report
3194 deltacomputer = deltautil.deltacomputer(
3200 deltacomputer = deltautil.deltacomputer(
3195 self, write_debug=write_debug
3201 self, write_debug=write_debug
3196 )
3202 )
3197
3203
3198 if cachedelta is not None and len(cachedelta) == 2:
3204 if cachedelta is not None and len(cachedelta) == 2:
3199 # If the cached delta has no information about how it should be
3205 # If the cached delta has no information about how it should be
3200 # reused, add the default reuse instruction according to the
3206 # reused, add the default reuse instruction according to the
3201 # revlog's configuration.
3207 # revlog's configuration.
3202 if (
3208 if (
3203 self.delta_config.general_delta
3209 self.delta_config.general_delta
3204 and self.delta_config.lazy_delta_base
3210 and self.delta_config.lazy_delta_base
3205 ):
3211 ):
3206 delta_base_reuse = DELTA_BASE_REUSE_TRY
3212 delta_base_reuse = DELTA_BASE_REUSE_TRY
3207 else:
3213 else:
3208 delta_base_reuse = DELTA_BASE_REUSE_NO
3214 delta_base_reuse = DELTA_BASE_REUSE_NO
3209 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3215 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3210
3216
3211 revinfo = revlogutils.revisioninfo(
3217 revinfo = revlogutils.revisioninfo(
3212 node,
3218 node,
3213 p1,
3219 p1,
3214 p2,
3220 p2,
3215 btext,
3221 btext,
3216 textlen,
3222 textlen,
3217 cachedelta,
3223 cachedelta,
3218 flags,
3224 flags,
3219 )
3225 )
3220
3226
3221 deltainfo = deltacomputer.finddeltainfo(revinfo)
3227 deltainfo = deltacomputer.finddeltainfo(revinfo)
3222
3228
3223 compression_mode = COMP_MODE_INLINE
3229 compression_mode = COMP_MODE_INLINE
3224 if self._docket is not None:
3230 if self._docket is not None:
3225 default_comp = self._docket.default_compression_header
3231 default_comp = self._docket.default_compression_header
3226 r = deltautil.delta_compression(default_comp, deltainfo)
3232 r = deltautil.delta_compression(default_comp, deltainfo)
3227 compression_mode, deltainfo = r
3233 compression_mode, deltainfo = r
3228
3234
3229 sidedata_compression_mode = COMP_MODE_INLINE
3235 sidedata_compression_mode = COMP_MODE_INLINE
3230 if sidedata and self.feature_config.has_side_data:
3236 if sidedata and self.feature_config.has_side_data:
3231 sidedata_compression_mode = COMP_MODE_PLAIN
3237 sidedata_compression_mode = COMP_MODE_PLAIN
3232 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3238 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3233 sidedata_offset = self._docket.sidedata_end
3239 sidedata_offset = self._docket.sidedata_end
3234 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3240 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3235 if (
3241 if (
3236 h != b'u'
3242 h != b'u'
3237 and comp_sidedata[0:1] != b'\0'
3243 and comp_sidedata[0:1] != b'\0'
3238 and len(comp_sidedata) < len(serialized_sidedata)
3244 and len(comp_sidedata) < len(serialized_sidedata)
3239 ):
3245 ):
3240 assert not h
3246 assert not h
3241 if (
3247 if (
3242 comp_sidedata[0:1]
3248 comp_sidedata[0:1]
3243 == self._docket.default_compression_header
3249 == self._docket.default_compression_header
3244 ):
3250 ):
3245 sidedata_compression_mode = COMP_MODE_DEFAULT
3251 sidedata_compression_mode = COMP_MODE_DEFAULT
3246 serialized_sidedata = comp_sidedata
3252 serialized_sidedata = comp_sidedata
3247 else:
3253 else:
3248 sidedata_compression_mode = COMP_MODE_INLINE
3254 sidedata_compression_mode = COMP_MODE_INLINE
3249 serialized_sidedata = comp_sidedata
3255 serialized_sidedata = comp_sidedata
3250 else:
3256 else:
3251 serialized_sidedata = b""
3257 serialized_sidedata = b""
3252 # Don't store the offset if the sidedata is empty, that way
3258 # Don't store the offset if the sidedata is empty, that way
3253 # we can easily detect empty sidedata and they will be no different
3259 # we can easily detect empty sidedata and they will be no different
3254 # than ones we manually add.
3260 # than ones we manually add.
3255 sidedata_offset = 0
3261 sidedata_offset = 0
3256
3262
3257 rank = RANK_UNKNOWN
3263 rank = RANK_UNKNOWN
3258 if self.feature_config.compute_rank:
3264 if self.feature_config.compute_rank:
3259 if (p1r, p2r) == (nullrev, nullrev):
3265 if (p1r, p2r) == (nullrev, nullrev):
3260 rank = 1
3266 rank = 1
3261 elif p1r != nullrev and p2r == nullrev:
3267 elif p1r != nullrev and p2r == nullrev:
3262 rank = 1 + self.fast_rank(p1r)
3268 rank = 1 + self.fast_rank(p1r)
3263 elif p1r == nullrev and p2r != nullrev:
3269 elif p1r == nullrev and p2r != nullrev:
3264 rank = 1 + self.fast_rank(p2r)
3270 rank = 1 + self.fast_rank(p2r)
3265 else: # merge node
3271 else: # merge node
3266 if rustdagop is not None and self.index.rust_ext_compat:
3272 if rustdagop is not None and self.index.rust_ext_compat:
3267 rank = rustdagop.rank(self.index, p1r, p2r)
3273 rank = rustdagop.rank(self.index, p1r, p2r)
3268 else:
3274 else:
3269 pmin, pmax = sorted((p1r, p2r))
3275 pmin, pmax = sorted((p1r, p2r))
3270 rank = 1 + self.fast_rank(pmax)
3276 rank = 1 + self.fast_rank(pmax)
3271 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3277 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3272
3278
3273 e = revlogutils.entry(
3279 e = revlogutils.entry(
3274 flags=flags,
3280 flags=flags,
3275 data_offset=offset,
3281 data_offset=offset,
3276 data_compressed_length=deltainfo.deltalen,
3282 data_compressed_length=deltainfo.deltalen,
3277 data_uncompressed_length=textlen,
3283 data_uncompressed_length=textlen,
3278 data_compression_mode=compression_mode,
3284 data_compression_mode=compression_mode,
3279 data_delta_base=deltainfo.base,
3285 data_delta_base=deltainfo.base,
3280 link_rev=link,
3286 link_rev=link,
3281 parent_rev_1=p1r,
3287 parent_rev_1=p1r,
3282 parent_rev_2=p2r,
3288 parent_rev_2=p2r,
3283 node_id=node,
3289 node_id=node,
3284 sidedata_offset=sidedata_offset,
3290 sidedata_offset=sidedata_offset,
3285 sidedata_compressed_length=len(serialized_sidedata),
3291 sidedata_compressed_length=len(serialized_sidedata),
3286 sidedata_compression_mode=sidedata_compression_mode,
3292 sidedata_compression_mode=sidedata_compression_mode,
3287 rank=rank,
3293 rank=rank,
3288 )
3294 )
3289
3295
3290 self.index.append(e)
3296 self.index.append(e)
3291 entry = self.index.entry_binary(curr)
3297 entry = self.index.entry_binary(curr)
3292 if curr == 0 and self._docket is None:
3298 if curr == 0 and self._docket is None:
3293 header = self._format_flags | self._format_version
3299 header = self._format_flags | self._format_version
3294 header = self.index.pack_header(header)
3300 header = self.index.pack_header(header)
3295 entry = header + entry
3301 entry = header + entry
3296 self._writeentry(
3302 self._writeentry(
3297 transaction,
3303 transaction,
3298 entry,
3304 entry,
3299 deltainfo.data,
3305 deltainfo.data,
3300 link,
3306 link,
3301 offset,
3307 offset,
3302 serialized_sidedata,
3308 serialized_sidedata,
3303 sidedata_offset,
3309 sidedata_offset,
3304 )
3310 )
3305
3311
3306 rawtext = btext[0]
3312 rawtext = btext[0]
3307
3313
3308 if alwayscache and rawtext is None:
3314 if alwayscache and rawtext is None:
3309 rawtext = deltacomputer.buildtext(revinfo)
3315 rawtext = deltacomputer.buildtext(revinfo)
3310
3316
3311 if type(rawtext) == bytes: # only accept immutable objects
3317 if type(rawtext) == bytes: # only accept immutable objects
3312 self._inner._revisioncache = (node, curr, rawtext)
3318 self._inner._revisioncache = (node, curr, rawtext)
3313 self._chainbasecache[curr] = deltainfo.chainbase
3319 self._chainbasecache[curr] = deltainfo.chainbase
3314 return curr
3320 return curr
3315
3321
3316 def _get_data_offset(self, prev):
3322 def _get_data_offset(self, prev):
3317 """Returns the current offset in the (in-transaction) data file.
3323 """Returns the current offset in the (in-transaction) data file.
3318 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3324 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3319 file to store that information: since sidedata can be rewritten to the
3325 file to store that information: since sidedata can be rewritten to the
3320 end of the data file within a transaction, you can have cases where, for
3326 end of the data file within a transaction, you can have cases where, for
3321 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3327 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3322 to `n - 1`'s sidedata being written after `n`'s data.
3328 to `n - 1`'s sidedata being written after `n`'s data.
3323
3329
3324 TODO cache this in a docket file before getting out of experimental."""
3330 TODO cache this in a docket file before getting out of experimental."""
3325 if self._docket is None:
3331 if self._docket is None:
3326 return self.end(prev)
3332 return self.end(prev)
3327 else:
3333 else:
3328 return self._docket.data_end
3334 return self._docket.data_end
3329
3335
3330 def _writeentry(
3336 def _writeentry(
3331 self,
3337 self,
3332 transaction,
3338 transaction,
3333 entry,
3339 entry,
3334 data,
3340 data,
3335 link,
3341 link,
3336 offset,
3342 offset,
3337 sidedata,
3343 sidedata,
3338 sidedata_offset,
3344 sidedata_offset,
3339 ):
3345 ):
3340 # Files opened in a+ mode have inconsistent behavior on various
3346 # Files opened in a+ mode have inconsistent behavior on various
3341 # platforms. Windows requires that a file positioning call be made
3347 # platforms. Windows requires that a file positioning call be made
3342 # when the file handle transitions between reads and writes. See
3348 # when the file handle transitions between reads and writes. See
3343 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3349 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3344 # platforms, Python or the platform itself can be buggy. Some versions
3350 # platforms, Python or the platform itself can be buggy. Some versions
3345 # of Solaris have been observed to not append at the end of the file
3351 # of Solaris have been observed to not append at the end of the file
3346 # if the file was seeked to before the end. See issue4943 for more.
3352 # if the file was seeked to before the end. See issue4943 for more.
3347 #
3353 #
3348 # We work around this issue by inserting a seek() before writing.
3354 # We work around this issue by inserting a seek() before writing.
3349 # Note: This is likely not necessary on Python 3. However, because
3355 # Note: This is likely not necessary on Python 3. However, because
3350 # the file handle is reused for reads and may be seeked there, we need
3356 # the file handle is reused for reads and may be seeked there, we need
3351 # to be careful before changing this.
3357 # to be careful before changing this.
3352 index_end = data_end = sidedata_end = None
3358 index_end = data_end = sidedata_end = None
3353 if self._docket is not None:
3359 if self._docket is not None:
3354 index_end = self._docket.index_end
3360 index_end = self._docket.index_end
3355 data_end = self._docket.data_end
3361 data_end = self._docket.data_end
3356 sidedata_end = self._docket.sidedata_end
3362 sidedata_end = self._docket.sidedata_end
3357
3363
3358 files_end = self._inner.write_entry(
3364 files_end = self._inner.write_entry(
3359 transaction,
3365 transaction,
3360 entry,
3366 entry,
3361 data,
3367 data,
3362 link,
3368 link,
3363 offset,
3369 offset,
3364 sidedata,
3370 sidedata,
3365 sidedata_offset,
3371 sidedata_offset,
3366 index_end,
3372 index_end,
3367 data_end,
3373 data_end,
3368 sidedata_end,
3374 sidedata_end,
3369 )
3375 )
3370 self._enforceinlinesize(transaction)
3376 self._enforceinlinesize(transaction)
3371 if self._docket is not None:
3377 if self._docket is not None:
3372 self._docket.index_end = files_end[0]
3378 self._docket.index_end = files_end[0]
3373 self._docket.data_end = files_end[1]
3379 self._docket.data_end = files_end[1]
3374 self._docket.sidedata_end = files_end[2]
3380 self._docket.sidedata_end = files_end[2]
3375
3381
3376 nodemaputil.setup_persistent_nodemap(transaction, self)
3382 nodemaputil.setup_persistent_nodemap(transaction, self)
3377
3383
3378 def addgroup(
3384 def addgroup(
3379 self,
3385 self,
3380 deltas,
3386 deltas,
3381 linkmapper,
3387 linkmapper,
3382 transaction,
3388 transaction,
3383 alwayscache=False,
3389 alwayscache=False,
3384 addrevisioncb=None,
3390 addrevisioncb=None,
3385 duplicaterevisioncb=None,
3391 duplicaterevisioncb=None,
3386 debug_info=None,
3392 debug_info=None,
3387 delta_base_reuse_policy=None,
3393 delta_base_reuse_policy=None,
3388 ):
3394 ):
3389 """
3395 """
3390 add a delta group
3396 add a delta group
3391
3397
3392 given a set of deltas, add them to the revision log. the
3398 given a set of deltas, add them to the revision log. the
3393 first delta is against its parent, which should be in our
3399 first delta is against its parent, which should be in our
3394 log, the rest are against the previous delta.
3400 log, the rest are against the previous delta.
3395
3401
3396 If ``addrevisioncb`` is defined, it will be called with arguments of
3402 If ``addrevisioncb`` is defined, it will be called with arguments of
3397 this revlog and the node that was added.
3403 this revlog and the node that was added.
3398 """
3404 """
3399
3405
3400 if self._adding_group:
3406 if self._adding_group:
3401 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3407 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3402
3408
3403 # read the default delta-base reuse policy from revlog config if the
3409 # read the default delta-base reuse policy from revlog config if the
3404 # group did not specify one.
3410 # group did not specify one.
3405 if delta_base_reuse_policy is None:
3411 if delta_base_reuse_policy is None:
3406 if (
3412 if (
3407 self.delta_config.general_delta
3413 self.delta_config.general_delta
3408 and self.delta_config.lazy_delta_base
3414 and self.delta_config.lazy_delta_base
3409 ):
3415 ):
3410 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3416 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3411 else:
3417 else:
3412 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3418 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3413
3419
3414 self._adding_group = True
3420 self._adding_group = True
3415 empty = True
3421 empty = True
3416 try:
3422 try:
3417 with self._writing(transaction):
3423 with self._writing(transaction):
3418 write_debug = None
3424 write_debug = None
3419 if self.delta_config.debug_delta:
3425 if self.delta_config.debug_delta:
3420 write_debug = transaction._report
3426 write_debug = transaction._report
3421 deltacomputer = deltautil.deltacomputer(
3427 deltacomputer = deltautil.deltacomputer(
3422 self,
3428 self,
3423 write_debug=write_debug,
3429 write_debug=write_debug,
3424 debug_info=debug_info,
3430 debug_info=debug_info,
3425 )
3431 )
3426 # loop through our set of deltas
3432 # loop through our set of deltas
3427 for data in deltas:
3433 for data in deltas:
3428 (
3434 (
3429 node,
3435 node,
3430 p1,
3436 p1,
3431 p2,
3437 p2,
3432 linknode,
3438 linknode,
3433 deltabase,
3439 deltabase,
3434 delta,
3440 delta,
3435 flags,
3441 flags,
3436 sidedata,
3442 sidedata,
3437 ) = data
3443 ) = data
3438 link = linkmapper(linknode)
3444 link = linkmapper(linknode)
3439 flags = flags or REVIDX_DEFAULT_FLAGS
3445 flags = flags or REVIDX_DEFAULT_FLAGS
3440
3446
3441 rev = self.index.get_rev(node)
3447 rev = self.index.get_rev(node)
3442 if rev is not None:
3448 if rev is not None:
3443 # this can happen if two branches make the same change
3449 # this can happen if two branches make the same change
3444 self._nodeduplicatecallback(transaction, rev)
3450 self._nodeduplicatecallback(transaction, rev)
3445 if duplicaterevisioncb:
3451 if duplicaterevisioncb:
3446 duplicaterevisioncb(self, rev)
3452 duplicaterevisioncb(self, rev)
3447 empty = False
3453 empty = False
3448 continue
3454 continue
3449
3455
3450 for p in (p1, p2):
3456 for p in (p1, p2):
3451 if not self.index.has_node(p):
3457 if not self.index.has_node(p):
3452 raise error.LookupError(
3458 raise error.LookupError(
3453 p, self.radix, _(b'unknown parent')
3459 p, self.radix, _(b'unknown parent')
3454 )
3460 )
3455
3461
3456 if not self.index.has_node(deltabase):
3462 if not self.index.has_node(deltabase):
3457 raise error.LookupError(
3463 raise error.LookupError(
3458 deltabase, self.display_id, _(b'unknown delta base')
3464 deltabase, self.display_id, _(b'unknown delta base')
3459 )
3465 )
3460
3466
3461 baserev = self.rev(deltabase)
3467 baserev = self.rev(deltabase)
3462
3468
3463 if baserev != nullrev and self.iscensored(baserev):
3469 if baserev != nullrev and self.iscensored(baserev):
3464 # if base is censored, delta must be full replacement in a
3470 # if base is censored, delta must be full replacement in a
3465 # single patch operation
3471 # single patch operation
3466 hlen = struct.calcsize(b">lll")
3472 hlen = struct.calcsize(b">lll")
3467 oldlen = self.rawsize(baserev)
3473 oldlen = self.rawsize(baserev)
3468 newlen = len(delta) - hlen
3474 newlen = len(delta) - hlen
3469 if delta[:hlen] != mdiff.replacediffheader(
3475 if delta[:hlen] != mdiff.replacediffheader(
3470 oldlen, newlen
3476 oldlen, newlen
3471 ):
3477 ):
3472 raise error.CensoredBaseError(
3478 raise error.CensoredBaseError(
3473 self.display_id, self.node(baserev)
3479 self.display_id, self.node(baserev)
3474 )
3480 )
3475
3481
3476 if not flags and self._peek_iscensored(baserev, delta):
3482 if not flags and self._peek_iscensored(baserev, delta):
3477 flags |= REVIDX_ISCENSORED
3483 flags |= REVIDX_ISCENSORED
3478
3484
3479 # We assume consumers of addrevisioncb will want to retrieve
3485 # We assume consumers of addrevisioncb will want to retrieve
3480 # the added revision, which will require a call to
3486 # the added revision, which will require a call to
3481 # revision(). revision() will fast path if there is a cache
3487 # revision(). revision() will fast path if there is a cache
3482 # hit. So, we tell _addrevision() to always cache in this case.
3488 # hit. So, we tell _addrevision() to always cache in this case.
3483 # We're only using addgroup() in the context of changegroup
3489 # We're only using addgroup() in the context of changegroup
3484 # generation so the revision data can always be handled as raw
3490 # generation so the revision data can always be handled as raw
3485 # by the flagprocessor.
3491 # by the flagprocessor.
3486 rev = self._addrevision(
3492 rev = self._addrevision(
3487 node,
3493 node,
3488 None,
3494 None,
3489 transaction,
3495 transaction,
3490 link,
3496 link,
3491 p1,
3497 p1,
3492 p2,
3498 p2,
3493 flags,
3499 flags,
3494 (baserev, delta, delta_base_reuse_policy),
3500 (baserev, delta, delta_base_reuse_policy),
3495 alwayscache=alwayscache,
3501 alwayscache=alwayscache,
3496 deltacomputer=deltacomputer,
3502 deltacomputer=deltacomputer,
3497 sidedata=sidedata,
3503 sidedata=sidedata,
3498 )
3504 )
3499
3505
3500 if addrevisioncb:
3506 if addrevisioncb:
3501 addrevisioncb(self, rev)
3507 addrevisioncb(self, rev)
3502 empty = False
3508 empty = False
3503 finally:
3509 finally:
3504 self._adding_group = False
3510 self._adding_group = False
3505 return not empty
3511 return not empty
3506
3512
3507 def iscensored(self, rev):
3513 def iscensored(self, rev):
3508 """Check if a file revision is censored."""
3514 """Check if a file revision is censored."""
3509 if not self.feature_config.censorable:
3515 if not self.feature_config.censorable:
3510 return False
3516 return False
3511
3517
3512 return self.flags(rev) & REVIDX_ISCENSORED
3518 return self.flags(rev) & REVIDX_ISCENSORED
3513
3519
3514 def _peek_iscensored(self, baserev, delta):
3520 def _peek_iscensored(self, baserev, delta):
3515 """Quickly check if a delta produces a censored revision."""
3521 """Quickly check if a delta produces a censored revision."""
3516 if not self.feature_config.censorable:
3522 if not self.feature_config.censorable:
3517 return False
3523 return False
3518
3524
3519 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3525 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3520
3526
3521 def getstrippoint(self, minlink):
3527 def getstrippoint(self, minlink):
3522 """find the minimum rev that must be stripped to strip the linkrev
3528 """find the minimum rev that must be stripped to strip the linkrev
3523
3529
3524 Returns a tuple containing the minimum rev and a set of all revs that
3530 Returns a tuple containing the minimum rev and a set of all revs that
3525 have linkrevs that will be broken by this strip.
3531 have linkrevs that will be broken by this strip.
3526 """
3532 """
3527 return storageutil.resolvestripinfo(
3533 return storageutil.resolvestripinfo(
3528 minlink,
3534 minlink,
3529 len(self) - 1,
3535 len(self) - 1,
3530 self.headrevs(),
3536 self.headrevs(),
3531 self.linkrev,
3537 self.linkrev,
3532 self.parentrevs,
3538 self.parentrevs,
3533 )
3539 )
3534
3540
3535 def strip(self, minlink, transaction):
3541 def strip(self, minlink, transaction):
3536 """truncate the revlog on the first revision with a linkrev >= minlink
3542 """truncate the revlog on the first revision with a linkrev >= minlink
3537
3543
3538 This function is called when we're stripping revision minlink and
3544 This function is called when we're stripping revision minlink and
3539 its descendants from the repository.
3545 its descendants from the repository.
3540
3546
3541 We have to remove all revisions with linkrev >= minlink, because
3547 We have to remove all revisions with linkrev >= minlink, because
3542 the equivalent changelog revisions will be renumbered after the
3548 the equivalent changelog revisions will be renumbered after the
3543 strip.
3549 strip.
3544
3550
3545 So we truncate the revlog on the first of these revisions, and
3551 So we truncate the revlog on the first of these revisions, and
3546 trust that the caller has saved the revisions that shouldn't be
3552 trust that the caller has saved the revisions that shouldn't be
3547 removed and that it'll re-add them after this truncation.
3553 removed and that it'll re-add them after this truncation.
3548 """
3554 """
3549 if len(self) == 0:
3555 if len(self) == 0:
3550 return
3556 return
3551
3557
3552 rev, _ = self.getstrippoint(minlink)
3558 rev, _ = self.getstrippoint(minlink)
3553 if rev == len(self):
3559 if rev == len(self):
3554 return
3560 return
3555
3561
3556 # first truncate the files on disk
3562 # first truncate the files on disk
3557 data_end = self.start(rev)
3563 data_end = self.start(rev)
3558 if not self._inline:
3564 if not self._inline:
3559 transaction.add(self._datafile, data_end)
3565 transaction.add(self._datafile, data_end)
3560 end = rev * self.index.entry_size
3566 end = rev * self.index.entry_size
3561 else:
3567 else:
3562 end = data_end + (rev * self.index.entry_size)
3568 end = data_end + (rev * self.index.entry_size)
3563
3569
3564 if self._sidedatafile:
3570 if self._sidedatafile:
3565 sidedata_end = self.sidedata_cut_off(rev)
3571 sidedata_end = self.sidedata_cut_off(rev)
3566 transaction.add(self._sidedatafile, sidedata_end)
3572 transaction.add(self._sidedatafile, sidedata_end)
3567
3573
3568 transaction.add(self._indexfile, end)
3574 transaction.add(self._indexfile, end)
3569 if self._docket is not None:
3575 if self._docket is not None:
3570 # XXX we could, leverage the docket while stripping. However it is
3576 # XXX we could, leverage the docket while stripping. However it is
3571 # not powerfull enough at the time of this comment
3577 # not powerfull enough at the time of this comment
3572 self._docket.index_end = end
3578 self._docket.index_end = end
3573 self._docket.data_end = data_end
3579 self._docket.data_end = data_end
3574 self._docket.sidedata_end = sidedata_end
3580 self._docket.sidedata_end = sidedata_end
3575 self._docket.write(transaction, stripping=True)
3581 self._docket.write(transaction, stripping=True)
3576
3582
3577 # then reset internal state in memory to forget those revisions
3583 # then reset internal state in memory to forget those revisions
3578 self._chaininfocache = util.lrucachedict(500)
3584 self._chaininfocache = util.lrucachedict(500)
3579 self._inner.clear_cache()
3585 self._inner.clear_cache()
3580
3586
3581 del self.index[rev:-1]
3587 del self.index[rev:-1]
3582
3588
3583 def checksize(self):
3589 def checksize(self):
3584 """Check size of index and data files
3590 """Check size of index and data files
3585
3591
3586 return a (dd, di) tuple.
3592 return a (dd, di) tuple.
3587 - dd: extra bytes for the "data" file
3593 - dd: extra bytes for the "data" file
3588 - di: extra bytes for the "index" file
3594 - di: extra bytes for the "index" file
3589
3595
3590 A healthy revlog will return (0, 0).
3596 A healthy revlog will return (0, 0).
3591 """
3597 """
3592 expected = 0
3598 expected = 0
3593 if len(self):
3599 if len(self):
3594 expected = max(0, self.end(len(self) - 1))
3600 expected = max(0, self.end(len(self) - 1))
3595
3601
3596 try:
3602 try:
3597 with self._datafp() as f:
3603 with self._datafp() as f:
3598 f.seek(0, io.SEEK_END)
3604 f.seek(0, io.SEEK_END)
3599 actual = f.tell()
3605 actual = f.tell()
3600 dd = actual - expected
3606 dd = actual - expected
3601 except FileNotFoundError:
3607 except FileNotFoundError:
3602 dd = 0
3608 dd = 0
3603
3609
3604 try:
3610 try:
3605 f = self.opener(self._indexfile)
3611 f = self.opener(self._indexfile)
3606 f.seek(0, io.SEEK_END)
3612 f.seek(0, io.SEEK_END)
3607 actual = f.tell()
3613 actual = f.tell()
3608 f.close()
3614 f.close()
3609 s = self.index.entry_size
3615 s = self.index.entry_size
3610 i = max(0, actual // s)
3616 i = max(0, actual // s)
3611 di = actual - (i * s)
3617 di = actual - (i * s)
3612 if self._inline:
3618 if self._inline:
3613 databytes = 0
3619 databytes = 0
3614 for r in self:
3620 for r in self:
3615 databytes += max(0, self.length(r))
3621 databytes += max(0, self.length(r))
3616 dd = 0
3622 dd = 0
3617 di = actual - len(self) * s - databytes
3623 di = actual - len(self) * s - databytes
3618 except FileNotFoundError:
3624 except FileNotFoundError:
3619 di = 0
3625 di = 0
3620
3626
3621 return (dd, di)
3627 return (dd, di)
3622
3628
3623 def files(self):
3629 def files(self):
3624 """return list of files that compose this revlog"""
3630 """return list of files that compose this revlog"""
3625 res = [self._indexfile]
3631 res = [self._indexfile]
3626 if self._docket_file is None:
3632 if self._docket_file is None:
3627 if not self._inline:
3633 if not self._inline:
3628 res.append(self._datafile)
3634 res.append(self._datafile)
3629 else:
3635 else:
3630 res.append(self._docket_file)
3636 res.append(self._docket_file)
3631 res.extend(self._docket.old_index_filepaths(include_empty=False))
3637 res.extend(self._docket.old_index_filepaths(include_empty=False))
3632 if self._docket.data_end:
3638 if self._docket.data_end:
3633 res.append(self._datafile)
3639 res.append(self._datafile)
3634 res.extend(self._docket.old_data_filepaths(include_empty=False))
3640 res.extend(self._docket.old_data_filepaths(include_empty=False))
3635 if self._docket.sidedata_end:
3641 if self._docket.sidedata_end:
3636 res.append(self._sidedatafile)
3642 res.append(self._sidedatafile)
3637 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3643 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3638 return res
3644 return res
3639
3645
3640 def emitrevisions(
3646 def emitrevisions(
3641 self,
3647 self,
3642 nodes,
3648 nodes,
3643 nodesorder=None,
3649 nodesorder=None,
3644 revisiondata=False,
3650 revisiondata=False,
3645 assumehaveparentrevisions=False,
3651 assumehaveparentrevisions=False,
3646 deltamode=repository.CG_DELTAMODE_STD,
3652 deltamode=repository.CG_DELTAMODE_STD,
3647 sidedata_helpers=None,
3653 sidedata_helpers=None,
3648 debug_info=None,
3654 debug_info=None,
3649 ):
3655 ):
3650 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3656 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3651 raise error.ProgrammingError(
3657 raise error.ProgrammingError(
3652 b'unhandled value for nodesorder: %s' % nodesorder
3658 b'unhandled value for nodesorder: %s' % nodesorder
3653 )
3659 )
3654
3660
3655 if nodesorder is None and not self.delta_config.general_delta:
3661 if nodesorder is None and not self.delta_config.general_delta:
3656 nodesorder = b'storage'
3662 nodesorder = b'storage'
3657
3663
3658 if (
3664 if (
3659 not self._storedeltachains
3665 not self._storedeltachains
3660 and deltamode != repository.CG_DELTAMODE_PREV
3666 and deltamode != repository.CG_DELTAMODE_PREV
3661 ):
3667 ):
3662 deltamode = repository.CG_DELTAMODE_FULL
3668 deltamode = repository.CG_DELTAMODE_FULL
3663
3669
3664 return storageutil.emitrevisions(
3670 return storageutil.emitrevisions(
3665 self,
3671 self,
3666 nodes,
3672 nodes,
3667 nodesorder,
3673 nodesorder,
3668 revlogrevisiondelta,
3674 revlogrevisiondelta,
3669 deltaparentfn=self.deltaparent,
3675 deltaparentfn=self.deltaparent,
3670 candeltafn=self._candelta,
3676 candeltafn=self._candelta,
3671 rawsizefn=self.rawsize,
3677 rawsizefn=self.rawsize,
3672 revdifffn=self.revdiff,
3678 revdifffn=self.revdiff,
3673 flagsfn=self.flags,
3679 flagsfn=self.flags,
3674 deltamode=deltamode,
3680 deltamode=deltamode,
3675 revisiondata=revisiondata,
3681 revisiondata=revisiondata,
3676 assumehaveparentrevisions=assumehaveparentrevisions,
3682 assumehaveparentrevisions=assumehaveparentrevisions,
3677 sidedata_helpers=sidedata_helpers,
3683 sidedata_helpers=sidedata_helpers,
3678 debug_info=debug_info,
3684 debug_info=debug_info,
3679 )
3685 )
3680
3686
3681 DELTAREUSEALWAYS = b'always'
3687 DELTAREUSEALWAYS = b'always'
3682 DELTAREUSESAMEREVS = b'samerevs'
3688 DELTAREUSESAMEREVS = b'samerevs'
3683 DELTAREUSENEVER = b'never'
3689 DELTAREUSENEVER = b'never'
3684
3690
3685 DELTAREUSEFULLADD = b'fulladd'
3691 DELTAREUSEFULLADD = b'fulladd'
3686
3692
3687 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3693 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3688
3694
3689 def clone(
3695 def clone(
3690 self,
3696 self,
3691 tr,
3697 tr,
3692 destrevlog,
3698 destrevlog,
3693 addrevisioncb=None,
3699 addrevisioncb=None,
3694 deltareuse=DELTAREUSESAMEREVS,
3700 deltareuse=DELTAREUSESAMEREVS,
3695 forcedeltabothparents=None,
3701 forcedeltabothparents=None,
3696 sidedata_helpers=None,
3702 sidedata_helpers=None,
3697 ):
3703 ):
3698 """Copy this revlog to another, possibly with format changes.
3704 """Copy this revlog to another, possibly with format changes.
3699
3705
3700 The destination revlog will contain the same revisions and nodes.
3706 The destination revlog will contain the same revisions and nodes.
3701 However, it may not be bit-for-bit identical due to e.g. delta encoding
3707 However, it may not be bit-for-bit identical due to e.g. delta encoding
3702 differences.
3708 differences.
3703
3709
3704 The ``deltareuse`` argument control how deltas from the existing revlog
3710 The ``deltareuse`` argument control how deltas from the existing revlog
3705 are preserved in the destination revlog. The argument can have the
3711 are preserved in the destination revlog. The argument can have the
3706 following values:
3712 following values:
3707
3713
3708 DELTAREUSEALWAYS
3714 DELTAREUSEALWAYS
3709 Deltas will always be reused (if possible), even if the destination
3715 Deltas will always be reused (if possible), even if the destination
3710 revlog would not select the same revisions for the delta. This is the
3716 revlog would not select the same revisions for the delta. This is the
3711 fastest mode of operation.
3717 fastest mode of operation.
3712 DELTAREUSESAMEREVS
3718 DELTAREUSESAMEREVS
3713 Deltas will be reused if the destination revlog would pick the same
3719 Deltas will be reused if the destination revlog would pick the same
3714 revisions for the delta. This mode strikes a balance between speed
3720 revisions for the delta. This mode strikes a balance between speed
3715 and optimization.
3721 and optimization.
3716 DELTAREUSENEVER
3722 DELTAREUSENEVER
3717 Deltas will never be reused. This is the slowest mode of execution.
3723 Deltas will never be reused. This is the slowest mode of execution.
3718 This mode can be used to recompute deltas (e.g. if the diff/delta
3724 This mode can be used to recompute deltas (e.g. if the diff/delta
3719 algorithm changes).
3725 algorithm changes).
3720 DELTAREUSEFULLADD
3726 DELTAREUSEFULLADD
3721 Revision will be re-added as if their were new content. This is
3727 Revision will be re-added as if their were new content. This is
3722 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3728 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3723 eg: large file detection and handling.
3729 eg: large file detection and handling.
3724
3730
3725 Delta computation can be slow, so the choice of delta reuse policy can
3731 Delta computation can be slow, so the choice of delta reuse policy can
3726 significantly affect run time.
3732 significantly affect run time.
3727
3733
3728 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3734 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3729 two extremes. Deltas will be reused if they are appropriate. But if the
3735 two extremes. Deltas will be reused if they are appropriate. But if the
3730 delta could choose a better revision, it will do so. This means if you
3736 delta could choose a better revision, it will do so. This means if you
3731 are converting a non-generaldelta revlog to a generaldelta revlog,
3737 are converting a non-generaldelta revlog to a generaldelta revlog,
3732 deltas will be recomputed if the delta's parent isn't a parent of the
3738 deltas will be recomputed if the delta's parent isn't a parent of the
3733 revision.
3739 revision.
3734
3740
3735 In addition to the delta policy, the ``forcedeltabothparents``
3741 In addition to the delta policy, the ``forcedeltabothparents``
3736 argument controls whether to force compute deltas against both parents
3742 argument controls whether to force compute deltas against both parents
3737 for merges. By default, the current default is used.
3743 for merges. By default, the current default is used.
3738
3744
3739 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3745 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3740 `sidedata_helpers`.
3746 `sidedata_helpers`.
3741 """
3747 """
3742 if deltareuse not in self.DELTAREUSEALL:
3748 if deltareuse not in self.DELTAREUSEALL:
3743 raise ValueError(
3749 raise ValueError(
3744 _(b'value for deltareuse invalid: %s') % deltareuse
3750 _(b'value for deltareuse invalid: %s') % deltareuse
3745 )
3751 )
3746
3752
3747 if len(destrevlog):
3753 if len(destrevlog):
3748 raise ValueError(_(b'destination revlog is not empty'))
3754 raise ValueError(_(b'destination revlog is not empty'))
3749
3755
3750 if getattr(self, 'filteredrevs', None):
3756 if getattr(self, 'filteredrevs', None):
3751 raise ValueError(_(b'source revlog has filtered revisions'))
3757 raise ValueError(_(b'source revlog has filtered revisions'))
3752 if getattr(destrevlog, 'filteredrevs', None):
3758 if getattr(destrevlog, 'filteredrevs', None):
3753 raise ValueError(_(b'destination revlog has filtered revisions'))
3759 raise ValueError(_(b'destination revlog has filtered revisions'))
3754
3760
3755 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3761 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3756 # if possible.
3762 # if possible.
3757 old_delta_config = destrevlog.delta_config
3763 old_delta_config = destrevlog.delta_config
3758 destrevlog.delta_config = destrevlog.delta_config.copy()
3764 destrevlog.delta_config = destrevlog.delta_config.copy()
3759
3765
3760 try:
3766 try:
3761 if deltareuse == self.DELTAREUSEALWAYS:
3767 if deltareuse == self.DELTAREUSEALWAYS:
3762 destrevlog.delta_config.lazy_delta_base = True
3768 destrevlog.delta_config.lazy_delta_base = True
3763 destrevlog.delta_config.lazy_delta = True
3769 destrevlog.delta_config.lazy_delta = True
3764 elif deltareuse == self.DELTAREUSESAMEREVS:
3770 elif deltareuse == self.DELTAREUSESAMEREVS:
3765 destrevlog.delta_config.lazy_delta_base = False
3771 destrevlog.delta_config.lazy_delta_base = False
3766 destrevlog.delta_config.lazy_delta = True
3772 destrevlog.delta_config.lazy_delta = True
3767 elif deltareuse == self.DELTAREUSENEVER:
3773 elif deltareuse == self.DELTAREUSENEVER:
3768 destrevlog.delta_config.lazy_delta_base = False
3774 destrevlog.delta_config.lazy_delta_base = False
3769 destrevlog.delta_config.lazy_delta = False
3775 destrevlog.delta_config.lazy_delta = False
3770
3776
3771 delta_both_parents = (
3777 delta_both_parents = (
3772 forcedeltabothparents or old_delta_config.delta_both_parents
3778 forcedeltabothparents or old_delta_config.delta_both_parents
3773 )
3779 )
3774 destrevlog.delta_config.delta_both_parents = delta_both_parents
3780 destrevlog.delta_config.delta_both_parents = delta_both_parents
3775
3781
3776 with self.reading(), destrevlog._writing(tr):
3782 with self.reading(), destrevlog._writing(tr):
3777 self._clone(
3783 self._clone(
3778 tr,
3784 tr,
3779 destrevlog,
3785 destrevlog,
3780 addrevisioncb,
3786 addrevisioncb,
3781 deltareuse,
3787 deltareuse,
3782 forcedeltabothparents,
3788 forcedeltabothparents,
3783 sidedata_helpers,
3789 sidedata_helpers,
3784 )
3790 )
3785
3791
3786 finally:
3792 finally:
3787 destrevlog.delta_config = old_delta_config
3793 destrevlog.delta_config = old_delta_config
3788
3794
3789 def _clone(
3795 def _clone(
3790 self,
3796 self,
3791 tr,
3797 tr,
3792 destrevlog,
3798 destrevlog,
3793 addrevisioncb,
3799 addrevisioncb,
3794 deltareuse,
3800 deltareuse,
3795 forcedeltabothparents,
3801 forcedeltabothparents,
3796 sidedata_helpers,
3802 sidedata_helpers,
3797 ):
3803 ):
3798 """perform the core duty of `revlog.clone` after parameter processing"""
3804 """perform the core duty of `revlog.clone` after parameter processing"""
3799 write_debug = None
3805 write_debug = None
3800 if self.delta_config.debug_delta:
3806 if self.delta_config.debug_delta:
3801 write_debug = tr._report
3807 write_debug = tr._report
3802 deltacomputer = deltautil.deltacomputer(
3808 deltacomputer = deltautil.deltacomputer(
3803 destrevlog,
3809 destrevlog,
3804 write_debug=write_debug,
3810 write_debug=write_debug,
3805 )
3811 )
3806 index = self.index
3812 index = self.index
3807 for rev in self:
3813 for rev in self:
3808 entry = index[rev]
3814 entry = index[rev]
3809
3815
3810 # Some classes override linkrev to take filtered revs into
3816 # Some classes override linkrev to take filtered revs into
3811 # account. Use raw entry from index.
3817 # account. Use raw entry from index.
3812 flags = entry[0] & 0xFFFF
3818 flags = entry[0] & 0xFFFF
3813 linkrev = entry[4]
3819 linkrev = entry[4]
3814 p1 = index[entry[5]][7]
3820 p1 = index[entry[5]][7]
3815 p2 = index[entry[6]][7]
3821 p2 = index[entry[6]][7]
3816 node = entry[7]
3822 node = entry[7]
3817
3823
3818 # (Possibly) reuse the delta from the revlog if allowed and
3824 # (Possibly) reuse the delta from the revlog if allowed and
3819 # the revlog chunk is a delta.
3825 # the revlog chunk is a delta.
3820 cachedelta = None
3826 cachedelta = None
3821 rawtext = None
3827 rawtext = None
3822 if deltareuse == self.DELTAREUSEFULLADD:
3828 if deltareuse == self.DELTAREUSEFULLADD:
3823 text = self._revisiondata(rev)
3829 text = self._revisiondata(rev)
3824 sidedata = self.sidedata(rev)
3830 sidedata = self.sidedata(rev)
3825
3831
3826 if sidedata_helpers is not None:
3832 if sidedata_helpers is not None:
3827 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3833 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3828 self, sidedata_helpers, sidedata, rev
3834 self, sidedata_helpers, sidedata, rev
3829 )
3835 )
3830 flags = flags | new_flags[0] & ~new_flags[1]
3836 flags = flags | new_flags[0] & ~new_flags[1]
3831
3837
3832 destrevlog.addrevision(
3838 destrevlog.addrevision(
3833 text,
3839 text,
3834 tr,
3840 tr,
3835 linkrev,
3841 linkrev,
3836 p1,
3842 p1,
3837 p2,
3843 p2,
3838 cachedelta=cachedelta,
3844 cachedelta=cachedelta,
3839 node=node,
3845 node=node,
3840 flags=flags,
3846 flags=flags,
3841 deltacomputer=deltacomputer,
3847 deltacomputer=deltacomputer,
3842 sidedata=sidedata,
3848 sidedata=sidedata,
3843 )
3849 )
3844 else:
3850 else:
3845 if destrevlog.delta_config.lazy_delta:
3851 if destrevlog.delta_config.lazy_delta:
3846 dp = self.deltaparent(rev)
3852 dp = self.deltaparent(rev)
3847 if dp != nullrev:
3853 if dp != nullrev:
3848 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3854 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3849
3855
3850 sidedata = None
3856 sidedata = None
3851 if not cachedelta:
3857 if not cachedelta:
3852 try:
3858 try:
3853 rawtext = self._revisiondata(rev)
3859 rawtext = self._revisiondata(rev)
3854 except error.CensoredNodeError as censored:
3860 except error.CensoredNodeError as censored:
3855 assert flags & REVIDX_ISCENSORED
3861 assert flags & REVIDX_ISCENSORED
3856 rawtext = censored.tombstone
3862 rawtext = censored.tombstone
3857 sidedata = self.sidedata(rev)
3863 sidedata = self.sidedata(rev)
3858 if sidedata is None:
3864 if sidedata is None:
3859 sidedata = self.sidedata(rev)
3865 sidedata = self.sidedata(rev)
3860
3866
3861 if sidedata_helpers is not None:
3867 if sidedata_helpers is not None:
3862 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3868 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3863 self, sidedata_helpers, sidedata, rev
3869 self, sidedata_helpers, sidedata, rev
3864 )
3870 )
3865 flags = flags | new_flags[0] & ~new_flags[1]
3871 flags = flags | new_flags[0] & ~new_flags[1]
3866
3872
3867 destrevlog._addrevision(
3873 destrevlog._addrevision(
3868 node,
3874 node,
3869 rawtext,
3875 rawtext,
3870 tr,
3876 tr,
3871 linkrev,
3877 linkrev,
3872 p1,
3878 p1,
3873 p2,
3879 p2,
3874 flags,
3880 flags,
3875 cachedelta,
3881 cachedelta,
3876 deltacomputer=deltacomputer,
3882 deltacomputer=deltacomputer,
3877 sidedata=sidedata,
3883 sidedata=sidedata,
3878 )
3884 )
3879
3885
3880 if addrevisioncb:
3886 if addrevisioncb:
3881 addrevisioncb(self, rev, node)
3887 addrevisioncb(self, rev, node)
3882
3888
3883 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3889 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3884 if self._format_version == REVLOGV0:
3890 if self._format_version == REVLOGV0:
3885 raise error.RevlogError(
3891 raise error.RevlogError(
3886 _(b'cannot censor with version %d revlogs')
3892 _(b'cannot censor with version %d revlogs')
3887 % self._format_version
3893 % self._format_version
3888 )
3894 )
3889 elif self._format_version == REVLOGV1:
3895 elif self._format_version == REVLOGV1:
3890 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3896 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3891 else:
3897 else:
3892 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3898 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3893
3899
3894 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3900 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3895 """Verifies the integrity of the revlog.
3901 """Verifies the integrity of the revlog.
3896
3902
3897 Yields ``revlogproblem`` instances describing problems that are
3903 Yields ``revlogproblem`` instances describing problems that are
3898 found.
3904 found.
3899 """
3905 """
3900 dd, di = self.checksize()
3906 dd, di = self.checksize()
3901 if dd:
3907 if dd:
3902 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3908 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3903 if di:
3909 if di:
3904 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3910 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3905
3911
3906 version = self._format_version
3912 version = self._format_version
3907
3913
3908 # The verifier tells us what version revlog we should be.
3914 # The verifier tells us what version revlog we should be.
3909 if version != state[b'expectedversion']:
3915 if version != state[b'expectedversion']:
3910 yield revlogproblem(
3916 yield revlogproblem(
3911 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3917 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3912 % (self.display_id, version, state[b'expectedversion'])
3918 % (self.display_id, version, state[b'expectedversion'])
3913 )
3919 )
3914
3920
3915 state[b'skipread'] = set()
3921 state[b'skipread'] = set()
3916 state[b'safe_renamed'] = set()
3922 state[b'safe_renamed'] = set()
3917
3923
3918 for rev in self:
3924 for rev in self:
3919 node = self.node(rev)
3925 node = self.node(rev)
3920
3926
3921 # Verify contents. 4 cases to care about:
3927 # Verify contents. 4 cases to care about:
3922 #
3928 #
3923 # common: the most common case
3929 # common: the most common case
3924 # rename: with a rename
3930 # rename: with a rename
3925 # meta: file content starts with b'\1\n', the metadata
3931 # meta: file content starts with b'\1\n', the metadata
3926 # header defined in filelog.py, but without a rename
3932 # header defined in filelog.py, but without a rename
3927 # ext: content stored externally
3933 # ext: content stored externally
3928 #
3934 #
3929 # More formally, their differences are shown below:
3935 # More formally, their differences are shown below:
3930 #
3936 #
3931 # | common | rename | meta | ext
3937 # | common | rename | meta | ext
3932 # -------------------------------------------------------
3938 # -------------------------------------------------------
3933 # flags() | 0 | 0 | 0 | not 0
3939 # flags() | 0 | 0 | 0 | not 0
3934 # renamed() | False | True | False | ?
3940 # renamed() | False | True | False | ?
3935 # rawtext[0:2]=='\1\n'| False | True | True | ?
3941 # rawtext[0:2]=='\1\n'| False | True | True | ?
3936 #
3942 #
3937 # "rawtext" means the raw text stored in revlog data, which
3943 # "rawtext" means the raw text stored in revlog data, which
3938 # could be retrieved by "rawdata(rev)". "text"
3944 # could be retrieved by "rawdata(rev)". "text"
3939 # mentioned below is "revision(rev)".
3945 # mentioned below is "revision(rev)".
3940 #
3946 #
3941 # There are 3 different lengths stored physically:
3947 # There are 3 different lengths stored physically:
3942 # 1. L1: rawsize, stored in revlog index
3948 # 1. L1: rawsize, stored in revlog index
3943 # 2. L2: len(rawtext), stored in revlog data
3949 # 2. L2: len(rawtext), stored in revlog data
3944 # 3. L3: len(text), stored in revlog data if flags==0, or
3950 # 3. L3: len(text), stored in revlog data if flags==0, or
3945 # possibly somewhere else if flags!=0
3951 # possibly somewhere else if flags!=0
3946 #
3952 #
3947 # L1 should be equal to L2. L3 could be different from them.
3953 # L1 should be equal to L2. L3 could be different from them.
3948 # "text" may or may not affect commit hash depending on flag
3954 # "text" may or may not affect commit hash depending on flag
3949 # processors (see flagutil.addflagprocessor).
3955 # processors (see flagutil.addflagprocessor).
3950 #
3956 #
3951 # | common | rename | meta | ext
3957 # | common | rename | meta | ext
3952 # -------------------------------------------------
3958 # -------------------------------------------------
3953 # rawsize() | L1 | L1 | L1 | L1
3959 # rawsize() | L1 | L1 | L1 | L1
3954 # size() | L1 | L2-LM | L1(*) | L1 (?)
3960 # size() | L1 | L2-LM | L1(*) | L1 (?)
3955 # len(rawtext) | L2 | L2 | L2 | L2
3961 # len(rawtext) | L2 | L2 | L2 | L2
3956 # len(text) | L2 | L2 | L2 | L3
3962 # len(text) | L2 | L2 | L2 | L3
3957 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3963 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3958 #
3964 #
3959 # LM: length of metadata, depending on rawtext
3965 # LM: length of metadata, depending on rawtext
3960 # (*): not ideal, see comment in filelog.size
3966 # (*): not ideal, see comment in filelog.size
3961 # (?): could be "- len(meta)" if the resolved content has
3967 # (?): could be "- len(meta)" if the resolved content has
3962 # rename metadata
3968 # rename metadata
3963 #
3969 #
3964 # Checks needed to be done:
3970 # Checks needed to be done:
3965 # 1. length check: L1 == L2, in all cases.
3971 # 1. length check: L1 == L2, in all cases.
3966 # 2. hash check: depending on flag processor, we may need to
3972 # 2. hash check: depending on flag processor, we may need to
3967 # use either "text" (external), or "rawtext" (in revlog).
3973 # use either "text" (external), or "rawtext" (in revlog).
3968
3974
3969 try:
3975 try:
3970 skipflags = state.get(b'skipflags', 0)
3976 skipflags = state.get(b'skipflags', 0)
3971 if skipflags:
3977 if skipflags:
3972 skipflags &= self.flags(rev)
3978 skipflags &= self.flags(rev)
3973
3979
3974 _verify_revision(self, skipflags, state, node)
3980 _verify_revision(self, skipflags, state, node)
3975
3981
3976 l1 = self.rawsize(rev)
3982 l1 = self.rawsize(rev)
3977 l2 = len(self.rawdata(node))
3983 l2 = len(self.rawdata(node))
3978
3984
3979 if l1 != l2:
3985 if l1 != l2:
3980 yield revlogproblem(
3986 yield revlogproblem(
3981 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3987 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3982 node=node,
3988 node=node,
3983 )
3989 )
3984
3990
3985 except error.CensoredNodeError:
3991 except error.CensoredNodeError:
3986 if state[b'erroroncensored']:
3992 if state[b'erroroncensored']:
3987 yield revlogproblem(
3993 yield revlogproblem(
3988 error=_(b'censored file data'), node=node
3994 error=_(b'censored file data'), node=node
3989 )
3995 )
3990 state[b'skipread'].add(node)
3996 state[b'skipread'].add(node)
3991 except Exception as e:
3997 except Exception as e:
3992 yield revlogproblem(
3998 yield revlogproblem(
3993 error=_(b'unpacking %s: %s')
3999 error=_(b'unpacking %s: %s')
3994 % (short(node), stringutil.forcebytestr(e)),
4000 % (short(node), stringutil.forcebytestr(e)),
3995 node=node,
4001 node=node,
3996 )
4002 )
3997 state[b'skipread'].add(node)
4003 state[b'skipread'].add(node)
3998
4004
3999 def storageinfo(
4005 def storageinfo(
4000 self,
4006 self,
4001 exclusivefiles=False,
4007 exclusivefiles=False,
4002 sharedfiles=False,
4008 sharedfiles=False,
4003 revisionscount=False,
4009 revisionscount=False,
4004 trackedsize=False,
4010 trackedsize=False,
4005 storedsize=False,
4011 storedsize=False,
4006 ):
4012 ):
4007 d = {}
4013 d = {}
4008
4014
4009 if exclusivefiles:
4015 if exclusivefiles:
4010 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4016 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4011 if not self._inline:
4017 if not self._inline:
4012 d[b'exclusivefiles'].append((self.opener, self._datafile))
4018 d[b'exclusivefiles'].append((self.opener, self._datafile))
4013
4019
4014 if sharedfiles:
4020 if sharedfiles:
4015 d[b'sharedfiles'] = []
4021 d[b'sharedfiles'] = []
4016
4022
4017 if revisionscount:
4023 if revisionscount:
4018 d[b'revisionscount'] = len(self)
4024 d[b'revisionscount'] = len(self)
4019
4025
4020 if trackedsize:
4026 if trackedsize:
4021 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4027 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4022
4028
4023 if storedsize:
4029 if storedsize:
4024 d[b'storedsize'] = sum(
4030 d[b'storedsize'] = sum(
4025 self.opener.stat(path).st_size for path in self.files()
4031 self.opener.stat(path).st_size for path in self.files()
4026 )
4032 )
4027
4033
4028 return d
4034 return d
4029
4035
4030 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4036 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4031 if not self.feature_config.has_side_data:
4037 if not self.feature_config.has_side_data:
4032 return
4038 return
4033 # revlog formats with sidedata support does not support inline
4039 # revlog formats with sidedata support does not support inline
4034 assert not self._inline
4040 assert not self._inline
4035 if not helpers[1] and not helpers[2]:
4041 if not helpers[1] and not helpers[2]:
4036 # Nothing to generate or remove
4042 # Nothing to generate or remove
4037 return
4043 return
4038
4044
4039 new_entries = []
4045 new_entries = []
4040 # append the new sidedata
4046 # append the new sidedata
4041 with self._writing(transaction):
4047 with self._writing(transaction):
4042 ifh, dfh, sdfh = self._inner._writinghandles
4048 ifh, dfh, sdfh = self._inner._writinghandles
4043 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4049 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4044
4050
4045 current_offset = sdfh.tell()
4051 current_offset = sdfh.tell()
4046 for rev in range(startrev, endrev + 1):
4052 for rev in range(startrev, endrev + 1):
4047 entry = self.index[rev]
4053 entry = self.index[rev]
4048 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4054 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4049 store=self,
4055 store=self,
4050 sidedata_helpers=helpers,
4056 sidedata_helpers=helpers,
4051 sidedata={},
4057 sidedata={},
4052 rev=rev,
4058 rev=rev,
4053 )
4059 )
4054
4060
4055 serialized_sidedata = sidedatautil.serialize_sidedata(
4061 serialized_sidedata = sidedatautil.serialize_sidedata(
4056 new_sidedata
4062 new_sidedata
4057 )
4063 )
4058
4064
4059 sidedata_compression_mode = COMP_MODE_INLINE
4065 sidedata_compression_mode = COMP_MODE_INLINE
4060 if serialized_sidedata and self.feature_config.has_side_data:
4066 if serialized_sidedata and self.feature_config.has_side_data:
4061 sidedata_compression_mode = COMP_MODE_PLAIN
4067 sidedata_compression_mode = COMP_MODE_PLAIN
4062 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4068 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4063 if (
4069 if (
4064 h != b'u'
4070 h != b'u'
4065 and comp_sidedata[0] != b'\0'
4071 and comp_sidedata[0] != b'\0'
4066 and len(comp_sidedata) < len(serialized_sidedata)
4072 and len(comp_sidedata) < len(serialized_sidedata)
4067 ):
4073 ):
4068 assert not h
4074 assert not h
4069 if (
4075 if (
4070 comp_sidedata[0]
4076 comp_sidedata[0]
4071 == self._docket.default_compression_header
4077 == self._docket.default_compression_header
4072 ):
4078 ):
4073 sidedata_compression_mode = COMP_MODE_DEFAULT
4079 sidedata_compression_mode = COMP_MODE_DEFAULT
4074 serialized_sidedata = comp_sidedata
4080 serialized_sidedata = comp_sidedata
4075 else:
4081 else:
4076 sidedata_compression_mode = COMP_MODE_INLINE
4082 sidedata_compression_mode = COMP_MODE_INLINE
4077 serialized_sidedata = comp_sidedata
4083 serialized_sidedata = comp_sidedata
4078 if entry[8] != 0 or entry[9] != 0:
4084 if entry[8] != 0 or entry[9] != 0:
4079 # rewriting entries that already have sidedata is not
4085 # rewriting entries that already have sidedata is not
4080 # supported yet, because it introduces garbage data in the
4086 # supported yet, because it introduces garbage data in the
4081 # revlog.
4087 # revlog.
4082 msg = b"rewriting existing sidedata is not supported yet"
4088 msg = b"rewriting existing sidedata is not supported yet"
4083 raise error.Abort(msg)
4089 raise error.Abort(msg)
4084
4090
4085 # Apply (potential) flags to add and to remove after running
4091 # Apply (potential) flags to add and to remove after running
4086 # the sidedata helpers
4092 # the sidedata helpers
4087 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4093 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4088 entry_update = (
4094 entry_update = (
4089 current_offset,
4095 current_offset,
4090 len(serialized_sidedata),
4096 len(serialized_sidedata),
4091 new_offset_flags,
4097 new_offset_flags,
4092 sidedata_compression_mode,
4098 sidedata_compression_mode,
4093 )
4099 )
4094
4100
4095 # the sidedata computation might have move the file cursors around
4101 # the sidedata computation might have move the file cursors around
4096 sdfh.seek(current_offset, os.SEEK_SET)
4102 sdfh.seek(current_offset, os.SEEK_SET)
4097 sdfh.write(serialized_sidedata)
4103 sdfh.write(serialized_sidedata)
4098 new_entries.append(entry_update)
4104 new_entries.append(entry_update)
4099 current_offset += len(serialized_sidedata)
4105 current_offset += len(serialized_sidedata)
4100 self._docket.sidedata_end = sdfh.tell()
4106 self._docket.sidedata_end = sdfh.tell()
4101
4107
4102 # rewrite the new index entries
4108 # rewrite the new index entries
4103 ifh.seek(startrev * self.index.entry_size)
4109 ifh.seek(startrev * self.index.entry_size)
4104 for i, e in enumerate(new_entries):
4110 for i, e in enumerate(new_entries):
4105 rev = startrev + i
4111 rev = startrev + i
4106 self.index.replace_sidedata_info(rev, *e)
4112 self.index.replace_sidedata_info(rev, *e)
4107 packed = self.index.entry_binary(rev)
4113 packed = self.index.entry_binary(rev)
4108 if rev == 0 and self._docket is None:
4114 if rev == 0 and self._docket is None:
4109 header = self._format_flags | self._format_version
4115 header = self._format_flags | self._format_version
4110 header = self.index.pack_header(header)
4116 header = self.index.pack_header(header)
4111 packed = header + packed
4117 packed = header + packed
4112 ifh.write(packed)
4118 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now