##// END OF EJS Templates
branchcache: gather newly closed head in a dedicated set...
branchcache: gather newly closed head in a dedicated set This is part of a series to more clearly split the update in two step. This will allow us to introduce a fast path during update in a future changeset.

File last commit:

r52427:767b62cb default
r52427:767b62cb default
Show More
branchmap.py
1277 lines | 45.2 KiB | text/x-python | PythonLexer
Pierre-Yves David
branchmap: create a mercurial.branchmap module...
r18116 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
#
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
Pierre-Yves David
branchmap: create a mercurial.branchmap module...
r18116 #
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Pierre-Yves David
branchmap: extract write logic from localrepo
r18117
Gregory Szorc
branchmap: use absolute_import
r25918
import struct
from .node import (
bin,
hex,
nullrev,
)
pytype: import typing directly...
r52178
from typing import (
branchcache: move the header loading in a `_load_header` class method...
r52356 Any,
pytype: import typing directly...
r52178 Callable,
Dict,
Iterable,
List,
Optional,
Set,
TYPE_CHECKING,
Tuple,
Union,
branchcache: rework the `filteredhash` logic to be more generic...
r52420 cast,
pytype: import typing directly...
r52178 )
Gregory Szorc
branchmap: use absolute_import
r25918 from . import (
encoding,
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 error,
av6
branchmap: skip obsolete revisions while computing heads...
r49536 obsolete,
Gregory Szorc
branchmap: use absolute_import
r25918 scmutil,
Simon Farnsworth
mercurial: switch to util.timer for all interval timings...
r30975 util,
Gregory Szorc
branchmap: use absolute_import
r25918 )
pytype: import typing directly...
r52178
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 from .utils import (
repoview: move subsettable in a dedicated module...
r42314 repoviewutil,
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 stringutil,
)
Gregory Szorc
branchmap: use absolute_import
r25918
pytype: import typing directly...
r52178 if TYPE_CHECKING:
Matt Harbison
merge with stable
r47552 from . import localrepo
Augie Fackler
branchmap: annotate constructor type for branchcache...
r44035
pytype: import typing directly...
r52178 assert [localrepo]
Augie Fackler
branchmap: annotate constructor type for branchcache...
r44035
Augie Fackler
formatting: blacken the codebase...
r43346 subsettable = repoviewutil.subsettable
repoview: move subsettable in a dedicated module...
r42314
Gregory Szorc
branchmap: use absolute_import
r25918 calcsize = struct.calcsize
Mads Kiilerich
rbc: use struct unpack_from and pack_into instead of unpack and pack...
r31370 pack_into = struct.pack_into
unpack_from = struct.unpack_from
Pierre-Yves David
branchmap: extract write logic from localrepo
r18117
Pierre-Yves David
branchmap: extract read logic from repo
r18118
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class BranchMapCache:
Pulkit Goyal
branchmap: improve doc about BranchMapCache class...
r41867 """mapping of filtered views of repo with their branchcache"""
Augie Fackler
formatting: blacken the codebase...
r43346
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 def __init__(self):
self._per_filter = {}
Martijn Pieters
branchmap: add some clarifications and clean up flow...
r41708
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 def __getitem__(self, repo):
self.updatecache(repo)
branchcache: have an explicit method to update the on disk cache...
r52342 bcache = self._per_filter[repo.filtername]
branchcache: stop storing a repository instance on the cache altogether...
r52344 assert bcache._filtername == repo.filtername, (
bcache._filtername,
branchcache: pass the target repository when copying...
r52343 repo.filtername,
)
branchcache: have an explicit method to update the on disk cache...
r52342 return bcache
def update_disk(self, repo):
"""ensure and up-to-date cache is (or will be) written on disk
The cache for this repository view is updated if needed and written on
disk.
If a transaction is in progress, the writing is schedule to transaction
branchcache: change the _delayed flag to an explicit `_dirty` flag...
r52381 close. See the `BranchMapCache.write_dirty` method.
branchcache: have an explicit method to update the on disk cache...
r52342
This method exist independently of __getitem__ as it is sometime useful
to signal that we have no intend to use the data in memory yet.
"""
self.updatecache(repo)
bcache = self._per_filter[repo.filtername]
branchcache: stop storing a repository instance on the cache altogether...
r52344 assert bcache._filtername == repo.filtername, (
bcache._filtername,
branchcache: pass the target repository when copying...
r52343 repo.filtername,
)
branchcache: explictly update disk state only if no transaction exist...
r52388 tr = repo.currenttransaction()
if getattr(tr, 'finalized', True):
bcache.sync_disk(repo)
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764
def updatecache(self, repo):
"""Update the cache for the given filtered view on a repository"""
# This can trigger updates for the caches for subsets of the filtered
# view, e.g. when there is no cache for this filtered view or the cache
# is stale.
Pierre-Yves David
branchmap: extract updatebranchcache from repo
r18121
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 cl = repo.changelog
filtername = repo.filtername
bcache = self._per_filter.get(filtername)
if bcache is None or not bcache.validfor(repo):
# cache object missing or cache object stale? Read from disk
branchcache: use an explicit class for the v2 version...
r52412 bcache = branch_cache_from_file(repo)
Martijn Pieters
branchmap: add some clarifications and clean up flow...
r41708
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 revs = []
if bcache is None:
# no (fresh) cache available anymore, perhaps we can re-use
# the cache for a subset, then extend that to add info on missing
# revisions.
subsetname = subsettable.get(filtername)
if subsetname is not None:
subset = repo.filtered(subsetname)
branchcache: do not use `__getitem__` in updatecache...
r52387 self.updatecache(subset)
bcache = self._per_filter[subset.filtername].inherit_for(repo)
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
revs.extend(r for r in extrarevs if r <= bcache.tiprev)
else:
# nothing to fall back on, start empty.
branchcache: use an explicit class for the v2 version...
r52412 bcache = new_branch_cache(repo)
Durham Goode
revbranchcache: move out of branchmap onto localrepo...
r24373
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 revs.extend(cl.revs(start=bcache.tiprev + 1))
if revs:
bcache.update(repo, revs)
Pierre-Yves David
branchmap: store branchcache in a dedicated object...
r18124
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 assert bcache.validfor(repo), filtername
self._per_filter[repo.filtername] = bcache
def replace(self, repo, remotebranchmap):
"""Replace the branchmap cache for a repo with a branch mapping.
This is likely only called during clone with a branch map from a
remote.
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 """
cl = repo.changelog
clrev = cl.rev
clbranchinfo = cl.branchinfo
rbheads = []
Martin von Zweigbergk
branchmap: make "closed" a set from beginning instead of converting from list...
r44086 closed = set()
Gregory Szorc
py3: replace pycompat.itervalues(x) with x.values()...
r49790 for bheads in remotebranchmap.values():
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 rbheads += bheads
for h in bheads:
r = clrev(h)
b, c = clbranchinfo(r)
if c:
Martin von Zweigbergk
branchmap: make "closed" a set from beginning instead of converting from list...
r44086 closed.add(h)
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 if rbheads:
rtiprev = max((int(clrev(node)) for node in rbheads))
branchcache: use an explicit class for the v2 version...
r52412 cache = new_branch_cache(
Joerg Sonnenberger
node: introduce nodeconstants class...
r47538 repo,
Augie Fackler
formatting: blacken the codebase...
r43346 remotebranchmap,
repo[rtiprev].node(),
rtiprev,
Martin von Zweigbergk
branchmap: make "closed" a set from beginning instead of converting from list...
r44086 closednodes=closed,
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 # Try to stick it as low as possible
# filter above served are unlikely to be fetch from a clone
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 for candidate in (b'base', b'immutable', b'served'):
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 rview = repo.filtered(candidate)
if cache.validfor(rview):
branchcache: stop using `copy(…)` in `replace(…)`...
r52382 cache._filtername = candidate
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 self._per_filter[candidate] = cache
branchcache: explicitly track inheritence "state"...
r52386 cache._state = STATE_DIRTY
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 cache.write(rview)
return
def clear(self):
self._per_filter.clear()
branchcache: change the _delayed flag to an explicit `_dirty` flag...
r52381 def write_dirty(self, repo):
branchmap: stop writing cache for uncommitted data...
r49526 unfi = repo.unfiltered()
branchcache: write branchmap in subset inheritance order...
r52380 for filtername in repoviewutil.get_ordered_subset():
cache = self._per_filter.get(filtername)
if cache is None:
continue
branchcache: explicitly track inheritence "state"...
r52386 if filtername is None:
repo = unfi
else:
branchmap: stop writing cache for uncommitted data...
r49526 repo = unfi.filtered(filtername)
branchcache: explicitly track inheritence "state"...
r52386 cache.sync_disk(repo)
branchmap: stop writing cache for uncommitted data...
r49526
Augie Fackler
formatting: blacken the codebase...
r43346
Pulkit Goyal
branchcache: add functions to validate changelog nodes...
r42289 def _unknownnode(node):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """raises ValueError when branchcache found a node which does not exists"""
Manuel Jacob
py3: use `x.hex()` instead of `pycompat.sysstr(node.hex(x))`
r50195 raise ValueError('node %s does not exist' % node.hex())
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Augie Fackler
formatting: blacken the codebase...
r43346
Martin von Zweigbergk
py3: fix formatting of branchmap log messages with repo.filtername=None...
r42805 def _branchcachedesc(repo):
if repo.filtername is not None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'branch cache (%s)' % repo.filtername
Martin von Zweigbergk
py3: fix formatting of branchmap log messages with repo.filtername=None...
r42805 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'branch cache'
Martin von Zweigbergk
py3: fix formatting of branchmap log messages with repo.filtername=None...
r42805
Augie Fackler
formatting: blacken the codebase...
r43346
branchcache: introduce a base class for branchmap...
r52347 class _BaseBranchCache:
Brodie Rao
branchmap: add documentation on the branchcache on-disk format
r20181 """A dict like object that hold branches heads cache.
This cache is used to avoid costly computations to determine all the
branch heads of a repo.
"""
Pulkit Goyal
branchmap: move __init__ up in branchcache class...
r41826
Augie Fackler
formatting: blacken the codebase...
r43346 def __init__(
self,
pytype: move some type comment to proper annotation...
r52180 repo: "localrepo.localrepository",
entries: Union[
Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
] = (),
branchcache: dispatch the code into the dedicated subclass...
r52348 closed_nodes: Optional[Set[bytes]] = None,
pytype: move some type comment to proper annotation...
r52180 ) -> None:
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """hasnode is a function which can be used to verify whether changelog
Pulkit Goyal
branchcache: have a hasnode function to validate nodes...
r42174 has a given node or not. If it's not provided, we assume that every node
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 we have exists in changelog"""
Pulkit Goyal
branchmap: move __init__ up in branchcache class...
r41826 # closednodes is a set of nodes that close their branch. If the branch
# cache has been updated, it may contain nodes that are no longer
# heads.
branchcache: dispatch the code into the dedicated subclass...
r52348 if closed_nodes is None:
closed_nodes = set()
self._closednodes = set(closed_nodes)
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries = dict(entries)
Pulkit Goyal
branchcache: add functions to validate changelog nodes...
r42289
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168 def __iter__(self):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return iter(self._entries)
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def __setitem__(self, key, value):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries[key] = value
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def __getitem__(self, key):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return self._entries[key]
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
Pulkit Goyal
branchmap: implement __contains__()...
r42282 def __contains__(self, key):
return key in self._entries
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168 def iteritems(self):
branchcache: dispatch the code into the dedicated subclass...
r52348 return self._entries.items()
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
Martin von Zweigbergk
py3: source-transform only call-sites of iteritems(), not definitions...
r42809 items = iteritems
Pulkit Goyal
branchcache: introduce hasbranch()...
r42171 def hasbranch(self, label):
Kyle Lippincott
black: make codebase compatible with black v21.4b2 and v20.8b1...
r47856 """checks whether a branch of this name exists or not"""
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return label in self._entries
Pulkit Goyal
branchcache: introduce hasbranch()...
r42171
Brodie Rao
branchmap: introduce branchtip() method
r20186 def _branchtip(self, heads):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Return tuple with last open head in heads and false,
otherwise return last closed head and true."""
Brodie Rao
branchmap: introduce branchtip() method
r20186 tip = heads[-1]
closed = True
for h in reversed(heads):
if h not in self._closednodes:
tip = h
closed = False
break
return tip, closed
def branchtip(self, branch):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Return the tipmost open head on branch head, otherwise return the
Mads Kiilerich
help: branch names primarily denote the tipmost unclosed branch head...
r20245 tipmost closed head on branch.
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 Raise KeyError for unknown branch."""
Brodie Rao
branchmap: introduce branchtip() method
r20186 return self._branchtip(self[branch])[0]
the31k
branches: correctly show inactive multiheaded branches...
r34076 def iteropen(self, nodes):
return (n for n in nodes if n not in self._closednodes)
Brodie Rao
branchmap: introduce branchheads() method
r20188 def branchheads(self, branch, closed=False):
Pulkit Goyal
branchmap: prevent using __getitem__() in branchheads()...
r42281 heads = self._entries[branch]
Brodie Rao
branchmap: introduce branchheads() method
r20188 if not closed:
the31k
branches: correctly show inactive multiheaded branches...
r34076 heads = list(self.iteropen(heads))
Brodie Rao
branchmap: introduce branchheads() method
r20188 return heads
Brodie Rao
branchmap: introduce iterbranches() method
r20190 def iterbranches(self):
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for bn, heads in self.items():
Brodie Rao
branchmap: introduce iterbranches() method
r20190 yield (bn, heads) + self._branchtip(heads)
Pulkit Goyal
branchcache: rename itervalues() to iterheads()...
r42169 def iterheads(self):
Kyle Lippincott
black: make codebase compatible with black v21.4b2 and v20.8b1...
r47856 """returns all the heads"""
Gregory Szorc
py3: replace pycompat.itervalues(x) with x.values()...
r49790 return self._entries.values()
Pulkit Goyal
branchcache: rename itervalues() to iterheads()...
r42169
Pierre-Yves David
branchmap: pass revision insteads of changectx to the update function...
r18305 def update(self, repo, revgen):
Pierre-Yves David
branchmap: make update a method
r18131 """Given a branchhead cache, self, that may have extra nodes or be
Pierre-Yves David
branchmap: simplify update code...
r20263 missing heads, and a generator of nodes that are strictly a superset of
Pierre-Yves David
branchmap: make update a method
r18131 heads missing, this function updates self to be correct.
"""
Simon Farnsworth
mercurial: switch to util.timer for all interval timings...
r30975 starttime = util.timer()
Pierre-Yves David
branchmap: make update a method
r18131 cl = repo.changelog
branchcache: filter obsolete revisions sooner...
r52425 # Faster than using ctx.obsolete()
obsrevs = obsolete.getrevs(repo, b'obsolete')
Pierre-Yves David
branchmap: make update a method
r18131 # collect new branch entries
newbranches = {}
branchcache: gather newly closed head in a dedicated set...
r52427 new_closed = set()
branchcache: gather new obsolete revision in a set...
r52426 obs_ignored = set()
Durham Goode
revbranchcache: move out of branchmap onto localrepo...
r24373 getbranchinfo = repo.revbranchcache().branchinfo
branchcache: dispatch the code into the dedicated subclass...
r52348 max_rev = -1
Pierre-Yves David
branchmap: Save changectx creation during update...
r18307 for r in revgen:
branchcache: filter obsolete revisions sooner...
r52425 max_rev = max(max_rev, r)
if r in obsrevs:
# We ignore obsolete changesets as they shouldn't be
# considered heads.
branchcache: gather new obsolete revision in a set...
r52426 obs_ignored.add(r)
branchcache: filter obsolete revisions sooner...
r52425 continue
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 branch, closesbranch = getbranchinfo(r)
Pierre-Yves David
branchmap: stop useless rev -> node -> rev round trip...
r20262 newbranches.setdefault(branch, []).append(r)
Brodie Rao
branchmap: cache open/closed branch head information...
r20185 if closesbranch:
branchcache: gather newly closed head in a dedicated set...
r52427 new_closed.add(r)
branchcache: dispatch the code into the dedicated subclass...
r52348 if max_rev < 0:
branchcache: do not accept "empty update"...
r52379 msg = "running branchcache.update without revision to update"
raise error.ProgrammingError(msg)
Pulkit Goyal
branchcache: store the maximum tip in a variable inside for loop...
r42400
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 # Delay fetching the topological heads until they are needed.
# A repository without non-continous branches can skip this part.
topoheads = None
# If a changeset is visible, its parents must be visible too, so
# use the faster unfiltered parent accessor.
parentrevs = repo.unfiltered().changelog.parentrevs
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for branch, newheadrevs in newbranches.items():
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 # For every branch, compute the new branchheads.
# A branchhead is a revision such that no descendant is on
# the same branch.
#
# The branchheads are computed iteratively in revision order.
# This ensures topological order, i.e. parents are processed
# before their children. Ancestors are inclusive here, i.e.
# any revision is an ancestor of itself.
#
# Core observations:
# - The current revision is always a branchhead for the
# repository up to that point.
# - It is the first revision of the branch if and only if
# there was no branchhead before. In that case, it is the
# only branchhead as there are no possible ancestors on
# the same branch.
# - If a parent is on the same branch, a branchhead can
# only be an ancestor of that parent, if it is parent
# itself. Otherwise it would have been removed as ancestor
# of that parent before.
# - Therefore, if all parents are on the same branch, they
# can just be removed from the branchhead set.
# - If one parent is on the same branch and the other is not
# and there was exactly one branchhead known, the existing
# branchhead can only be an ancestor if it is the parent.
# Otherwise it would have been removed as ancestor of
# the parent before. The other parent therefore can't have
# a branchhead as ancestor.
# - In all other cases, the parents on different branches
# could have a branchhead as ancestor. Those parents are
# kept in the "uncertain" set. If all branchheads are also
# topological heads, they can't have descendants and further
# checks can be skipped. Otherwise, the ancestors of the
# "uncertain" set are removed from branchheads.
# This computation is heavy and avoided if at all possible.
av6
branchmap: don't add branch entries if there are no heads...
r49567 bheads = self._entries.get(branch, [])
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 bheadset = {cl.rev(node) for node in bheads}
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 uncertain = set()
for newrev in sorted(newheadrevs):
if not bheadset:
bheadset.add(newrev)
continue
Pierre-Yves David
branchmap: make update a method
r18131
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 parents = [p for p in parentrevs(newrev) if p != nullrev]
samebranch = set()
otherbranch = set()
av6
branchmap: skip obsolete revisions while computing heads...
r49536 obsparents = set()
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 for p in parents:
av6
branchmap: skip obsolete revisions while computing heads...
r49536 if p in obsrevs:
# We ignored this obsolete changeset earlier, but now
# that it has non-ignored children, we need to make
# sure their ancestors are not considered heads. To
# achieve that, we will simply treat this obsolete
# changeset as a parent from other branch.
obsparents.add(p)
elif p in bheadset or getbranchinfo(p)[0] == branch:
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 samebranch.add(p)
else:
otherbranch.add(p)
av6
branchmap: skip obsolete revisions while computing heads...
r49536 if not (len(bheadset) == len(samebranch) == 1):
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 uncertain.update(otherbranch)
av6
branchmap: skip obsolete revisions while computing heads...
r49536 uncertain.update(obsparents)
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 bheadset.difference_update(samebranch)
bheadset.add(newrev)
Pierre-Yves David
branchmap: pre-filter topological heads before ancestors based filtering...
r22357 if uncertain:
Joerg Sonnenberger
branchmap: avoid ancestor computations in absence of non-continous branches...
r46880 if topoheads is None:
topoheads = set(cl.headrevs())
if bheadset - topoheads:
floorrev = min(bheadset)
av6
branchmap: skip obsolete revisions while computing heads...
r49536 if floorrev <= max(uncertain):
ancestors = set(cl.ancestors(uncertain, floorrev))
bheadset -= ancestors
av6
branchmap: don't add branch entries if there are no heads...
r49567 if bheadset:
self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
branchcache: dispatch the code into the dedicated subclass...
r52348
branchcache: gather newly closed head in a dedicated set...
r52427 self._closednodes.update(cl.node(rev) for rev in new_closed)
branchcache: dispatch the code into the dedicated subclass...
r52348 duration = util.timer() - starttime
repo.ui.log(
b'branchcache',
b'updated %s in %.4f seconds\n',
_branchcachedesc(repo),
duration,
)
return max_rev
branchcache: explicitly track inheritence "state"...
r52386 STATE_CLEAN = 1
STATE_INHERITED = 2
STATE_DIRTY = 3
branchcache: use an explicit class for the v2 version...
r52412 class _LocalBranchCache(_BaseBranchCache):
"""base class of branch-map info for a local repo or repoview"""
branchcache: dispatch the code into the dedicated subclass...
r52348
branchcache: use an explicit class for the v2 version...
r52412 _base_filename = None
branchcache: rework the `filteredhash` logic to be more generic...
r52420 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
branchcache: move the filename to a class attribute...
r52353
branchcache: dispatch the code into the dedicated subclass...
r52348 def __init__(
self,
repo: "localrepo.localrepository",
entries: Union[
Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
] = (),
tipnode: Optional[bytes] = None,
tiprev: Optional[int] = nullrev,
branchcache: rework the `filteredhash` logic to be more generic...
r52420 key_hashes: Optional[Tuple[bytes]] = None,
branchcache: dispatch the code into the dedicated subclass...
r52348 closednodes: Optional[Set[bytes]] = None,
hasnode: Optional[Callable[[bytes], bool]] = None,
verify_node: bool = False,
branchcache: explicitly track inheritence "state"...
r52386 inherited: bool = False,
branchcache: dispatch the code into the dedicated subclass...
r52348 ) -> None:
"""hasnode is a function which can be used to verify whether changelog
has a given node or not. If it's not provided, we assume that every node
we have exists in changelog"""
self._filtername = repo.filtername
if tipnode is None:
self.tipnode = repo.nullid
else:
self.tipnode = tipnode
self.tiprev = tiprev
branchcache: rework the `filteredhash` logic to be more generic...
r52420 if key_hashes is None:
self.key_hashes = self._default_key_hashes
else:
self.key_hashes = key_hashes
branchcache: explicitly track inheritence "state"...
r52386 self._state = STATE_CLEAN
if inherited:
self._state = STATE_INHERITED
branchcache: dispatch the code into the dedicated subclass...
r52348
super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
# closednodes is a set of nodes that close their branch. If the branch
# cache has been updated, it may contain nodes that are no longer
# heads.
# Do we need to verify branch at all ?
self._verify_node = verify_node
# branches for which nodes are verified
self._verifiedbranches = set()
self._hasnode = None
if self._verify_node:
self._hasnode = repo.changelog.hasnode
branchcache: rework the `filteredhash` logic to be more generic...
r52420 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
raise NotImplementedError
branchcache: dispatch the code into the dedicated subclass...
r52348 def validfor(self, repo):
"""check that cache contents are valid for (a subset of) this repo
- False when the order of changesets changed or if we detect a strip.
- True when cache is up-to-date for the current repo or its subset."""
try:
node = repo.changelog.node(self.tiprev)
except IndexError:
# changesets were stripped and now we don't even have enough to
# find tiprev
return False
if self.tipnode != node:
# tiprev doesn't correspond to tipnode: repo was stripped, or this
# repo has a different order of changesets
return False
branchcache: rework the `filteredhash` logic to be more generic...
r52420 repo_key_hashes = self._compute_key_hashes(repo)
branchcache: dispatch the code into the dedicated subclass...
r52348 # hashes don't match if this repo view has a different set of filtered
# revisions (e.g. due to phase changes) or obsolete revisions (e.g.
# history was rewritten)
branchcache: rework the `filteredhash` logic to be more generic...
r52420 return self.key_hashes == repo_key_hashes
branchcache: dispatch the code into the dedicated subclass...
r52348
@classmethod
def fromfile(cls, repo):
f = None
try:
f = repo.cachevfs(cls._filename(repo))
lineiter = iter(f)
branchcache: move the header loading in a `_load_header` class method...
r52356 init_kwargs = cls._load_header(repo, lineiter)
branchcache: dispatch the code into the dedicated subclass...
r52348 bcache = cls(
repo,
verify_node=True,
branchcache: move the header loading in a `_load_header` class method...
r52356 **init_kwargs,
branchcache: dispatch the code into the dedicated subclass...
r52348 )
if not bcache.validfor(repo):
# invalidate the cache
raise ValueError('tip differs')
branchcache: rename `load` to `_load_heads`...
r52354 bcache._load_heads(repo, lineiter)
branchcache: dispatch the code into the dedicated subclass...
r52348 except (IOError, OSError):
return None
except Exception as inst:
if repo.ui.debugflag:
msg = b'invalid %s: %s\n'
branchcache: simplify a long line...
r52355 msg %= (
_branchcachedesc(repo),
stringutil.forcebytestr(inst),
branchcache: dispatch the code into the dedicated subclass...
r52348 )
branchcache: simplify a long line...
r52355 repo.ui.debug(msg)
branchcache: dispatch the code into the dedicated subclass...
r52348 bcache = None
finally:
if f:
f.close()
return bcache
branchcache: move the header loading in a `_load_header` class method...
r52356 @classmethod
def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
branchcache: rework the `filteredhash` logic to be more generic...
r52420 raise NotImplementedError
branchcache: move the header loading in a `_load_header` class method...
r52356
branchcache: rename `load` to `_load_heads`...
r52354 def _load_heads(self, repo, lineiter):
branchcache: dispatch the code into the dedicated subclass...
r52348 """fully loads the branchcache by reading from the file using the line
iterator passed"""
for line in lineiter:
line = line.rstrip(b'\n')
if not line:
continue
node, state, label = line.split(b" ", 2)
if state not in b'oc':
raise ValueError('invalid branch state')
label = encoding.tolocal(label.strip())
node = bin(node)
self._entries.setdefault(label, []).append(node)
if state == b'c':
self._closednodes.add(node)
Pulkit Goyal
branchcache: store the maximum tip in a variable inside for loop...
r42400
branchcache: move the filename to a class attribute...
r52353 @classmethod
def _filename(cls, repo):
branchcache: dispatch the code into the dedicated subclass...
r52348 """name of a branchcache file for a given repo or repoview"""
branchcache: move the filename to a class attribute...
r52353 filename = cls._base_filename
branchcache: use an explicit class for the v2 version...
r52412 assert filename is not None
branchcache: dispatch the code into the dedicated subclass...
r52348 if repo.filtername:
filename = b'%s-%s' % (filename, repo.filtername)
return filename
branchcache: explicitly track inheritence "state"...
r52386 def inherit_for(self, repo):
branchcache: dispatch the code into the dedicated subclass...
r52348 """return a deep copy of the branchcache object"""
branchcache: explicitly assert that copy is always about inheritance...
r52383 assert repo.filtername != self._filtername
branchcache: dispatch the code into the dedicated subclass...
r52348 other = type(self)(
repo=repo,
# we always do a shally copy of self._entries, and the values is
# always replaced, so no need to deepcopy until the above remains
# true.
entries=self._entries,
tipnode=self.tipnode,
tiprev=self.tiprev,
branchcache: rework the `filteredhash` logic to be more generic...
r52420 key_hashes=self.key_hashes,
branchcache: dispatch the code into the dedicated subclass...
r52348 closednodes=set(self._closednodes),
verify_node=self._verify_node,
branchcache: explicitly track inheritence "state"...
r52386 inherited=True,
branchcache: dispatch the code into the dedicated subclass...
r52348 )
# also copy information about the current verification state
other._verifiedbranches = set(self._verifiedbranches)
return other
branchcache: explicitly track inheritence "state"...
r52386 def sync_disk(self, repo):
"""synchronise the on disk file with the cache state
If new value specific to this filter level need to be written, the file
will be updated, if the state of the branchcache is inherited from a
subset, any stalled on disk file will be deleted.
That method does nothing if there is nothing to do.
"""
if self._state == STATE_DIRTY:
self.write(repo)
elif self._state == STATE_INHERITED:
filename = self._filename(repo)
repo.cachevfs.tryunlink(filename)
branchcache: dispatch the code into the dedicated subclass...
r52348 def write(self, repo):
assert self._filtername == repo.filtername, (
self._filtername,
repo.filtername,
)
branchcache: explicitly track inheritence "state"...
r52386 assert self._state == STATE_DIRTY, self._state
branchcache: explictly update disk state only if no transaction exist...
r52388 # This method should not be called during an open transaction
branchcache: dispatch the code into the dedicated subclass...
r52348 tr = repo.currenttransaction()
if not getattr(tr, 'finalized', True):
branchcache: explictly update disk state only if no transaction exist...
r52388 msg = "writing branchcache in the middle of a transaction"
raise error.ProgrammingError(msg)
branchcache: dispatch the code into the dedicated subclass...
r52348 try:
filename = self._filename(repo)
with repo.cachevfs(filename, b"w", atomictemp=True) as f:
branchcache: move head writing in a `_write_headers` method...
r52358 self._write_header(f)
branchcache: skip entries that are topological heads in the on disk file...
r52424 nodecount = self._write_heads(repo, f)
branchcache: dispatch the code into the dedicated subclass...
r52348 repo.ui.log(
b'branchcache',
b'wrote %s with %d labels and %d nodes\n',
_branchcachedesc(repo),
len(self._entries),
nodecount,
)
branchcache: explicitly track inheritence "state"...
r52386 self._state = STATE_CLEAN
branchcache: dispatch the code into the dedicated subclass...
r52348 except (IOError, OSError, error.Abort) as inst:
# Abort may be raised by read only opener, so log and continue
repo.ui.debug(
b"couldn't write branch cache: %s\n"
% stringutil.forcebytestr(inst)
)
branchcache: move head writing in a `_write_headers` method...
r52358 def _write_header(self, fp) -> None:
branchcache: rework the `filteredhash` logic to be more generic...
r52420 raise NotImplementedError
branchcache: move head writing in a `_write_headers` method...
r52358
branchcache: skip entries that are topological heads in the on disk file...
r52424 def _write_heads(self, repo, fp) -> int:
branchcache: move head writing in a `_write_heads` method...
r52357 """write list of heads to a file
Return the number of heads written."""
nodecount = 0
for label, nodes in sorted(self._entries.items()):
label = encoding.fromlocal(label)
for node in nodes:
nodecount += 1
if node in self._closednodes:
state = b'c'
else:
state = b'o'
fp.write(b"%s %s %s\n" % (hex(node), state, label))
return nodecount
branchcache: dispatch the code into the dedicated subclass...
r52348 def _verifybranch(self, branch):
"""verify head nodes for the given branch."""
if not self._verify_node:
return
if branch not in self._entries or branch in self._verifiedbranches:
return
assert self._hasnode is not None
for n in self._entries[branch]:
if not self._hasnode(n):
_unknownnode(n)
self._verifiedbranches.add(branch)
def _verifyall(self):
"""verifies nodes of all the branches"""
for b in self._entries.keys():
if b not in self._verifiedbranches:
self._verifybranch(b)
def __getitem__(self, key):
self._verifybranch(key)
return super().__getitem__(key)
def __contains__(self, key):
self._verifybranch(key)
return super().__contains__(key)
def iteritems(self):
self._verifyall()
return super().iteritems()
items = iteritems
def iterheads(self):
"""returns all the heads"""
self._verifyall()
return super().iterheads()
def hasbranch(self, label):
"""checks whether a branch of this name exists or not"""
self._verifybranch(label)
return super().hasbranch(label)
def branchheads(self, branch, closed=False):
self._verifybranch(branch)
return super().branchheads(branch, closed=closed)
def update(self, repo, revgen):
assert self._filtername == repo.filtername, (
self._filtername,
repo.filtername,
)
cl = repo.changelog
max_rev = super().update(repo, revgen)
# new tip revision which we found after iterating items from new
# branches
if max_rev is not None and max_rev > self.tiprev:
self.tiprev = max_rev
self.tipnode = cl.node(max_rev)
branchcache: cleanup the final key generation after update...
r52416 else:
# We should not be here is if this is false
assert cl.node(self.tiprev) == self.tipnode
Pierre-Yves David
branchmap: make update a method
r18131
Pierre-Yves David
branchmap: remove the droppednodes logic...
r19838 if not self.validfor(repo):
branchcache: cleanup the final key generation after update...
r52416 # the tiprev and tipnode should be aligned, so if the current repo
# is not seens as valid this is because old cache key is now
# invalid for the repo.
#
# However. we've just updated the cache and we assume it's valid,
# so let's make the cache key valid as well by recomputing it from
# the cached data
branchcache: rework the `filteredhash` logic to be more generic...
r52420 self.key_hashes = self._compute_key_hashes(repo)
filteredhash: rename the filteredhash function...
r52419 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
repo,
self.tiprev,
branchcache: cleanup the final key generation after update...
r52416 )
branchcache: explicitly track inheritence "state"...
r52386 self._state = STATE_DIRTY
branchcache: explictly update disk state only if no transaction exist...
r52388 tr = repo.currenttransaction()
if getattr(tr, 'finalized', True):
# Avoid premature writing.
#
# (The cache warming setup by localrepo will update the file later.)
self.write(repo)
Martijn Pieters
branchmap: updating triggers a write...
r41707
branchcache: use an explicit class for the v2 version...
r52412 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
branchcache-v3: introduce a v3 format...
r52413 """Build a branch cache from on-disk data if possible
Return a branch cache of the right format depending of the repository.
"""
if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
return BranchCacheV3.fromfile(repo)
else:
return BranchCacheV2.fromfile(repo)
branchcache: use an explicit class for the v2 version...
r52412
def new_branch_cache(repo, *args, **kwargs):
branchcache-v3: introduce a v3 format...
r52413 """Build a new branch cache from argument
Return a branch cache of the right format depending of the repository.
"""
if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
return BranchCacheV3(repo, *args, **kwargs)
else:
return BranchCacheV2(repo, *args, **kwargs)
branchcache: use an explicit class for the v2 version...
r52412
class BranchCacheV2(_LocalBranchCache):
"""a branch cache using version 2 of the format on disk
The cache is serialized on disk in the following format:
<tip hex node> <tip rev number> [optional filtered repo hex hash]
<branch head hex node> <open/closed state> <branch name>
<branch head hex node> <open/closed state> <branch name>
...
The first line is used to check if the cache is still valid. If the
branch cache is for a filtered repo view, an optional third hash is
included that hashes the hashes of all filtered and obsolete revisions.
The open/closed state is represented by a single letter 'o' or 'c'.
This field can be used to avoid changelog reads when determining if a
branch head closes a branch or not.
"""
_base_filename = b"branch2"
branchcache: rework the `filteredhash` logic to be more generic...
r52420 @classmethod
def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
"""parse the head of a branchmap file
return parameters to pass to a newly created class instance.
"""
cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
last, lrev = cachekey[:2]
last, lrev = bin(last), int(lrev)
filteredhash = ()
if len(cachekey) > 2:
filteredhash = (bin(cachekey[2]),)
return {
"tipnode": last,
"tiprev": lrev,
"key_hashes": filteredhash,
}
def _write_header(self, fp) -> None:
"""write the branch cache header to a file"""
cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
if self.key_hashes:
cachekey.append(hex(self.key_hashes[0]))
fp.write(b" ".join(cachekey) + b'\n')
def _compute_key_hashes(self, repo) -> Tuple[bytes]:
"""return the cache key hashes that match this repoview state"""
filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
repo,
self.tiprev,
needobsolete=True,
)
keys: Tuple[bytes] = cast(Tuple[bytes], ())
if filtered_hash is not None:
keys: Tuple[bytes] = (filtered_hash,)
return keys
branchcache: use an explicit class for the v2 version...
r52412
branchcache-v3: introduce a v3 format...
r52413 class BranchCacheV3(_LocalBranchCache):
"""a branch cache using version 3 of the format on disk
This version is still EXPERIMENTAL and the format is subject to changes.
The cache is serialized on disk in the following format:
branchcache-v3: use more explicit header line...
r52414 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
branchcache-v3: introduce a v3 format...
r52413 <branch head hex node> <open/closed state> <branch name>
<branch head hex node> <open/closed state> <branch name>
...
branchcache-v3: use more explicit header line...
r52414 The first line is used to check if the cache is still valid. It is a series
of key value pair. The following key are recognized:
- tip-rev: the rev-num of the tip-most revision seen by this cache
- tip-node: the node-id of the tip-most revision sen by this cache
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 - filtered-hash: the hash of all filtered revisions (before tip-rev)
ignored by this cache.
- obsolete-hash: the hash of all non-filtered obsolete revisions (before
branchcache-v3: use more explicit header line...
r52414 tip-rev) ignored by this cache.
The tip-rev is used to know how far behind the value in the file are
compared to the current repository state.
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 The tip-node, filtered-hash and obsolete-hash are used to detect if this
cache can be used for this repository state at all.
branchcache-v3: introduce a v3 format...
r52413
The open/closed state is represented by a single letter 'o' or 'c'.
This field can be used to avoid changelog reads when determining if a
branch head closes a branch or not.
branchcache: skip entries that are topological heads in the on disk file...
r52424
Topological heads are not included in the listing and should be dispatched
on the right branch at read time. Obsolete topological heads should be
ignored.
branchcache-v3: introduce a v3 format...
r52413 """
_base_filename = b"branch3"
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 _default_key_hashes = (None, None)
branchcache-v3: introduce a v3 format...
r52413
branchcache: skip entries that are topological heads in the on disk file...
r52424 def _get_topo_heads(self, repo) -> List[int]:
"""returns the topological head of a repoview content up to self.tiprev"""
cl = repo.changelog
if self.tiprev == nullrev:
return []
elif self.tiprev == cl.tiprev():
return cl.headrevs()
else:
# XXX passing tiprev as ceiling of cl.headrevs could be faster
heads = cl.headrevs(cl.revs(stop=self.tiprev))
return heads
branchcache-v3: use more explicit header line...
r52414 def _write_header(self, fp) -> None:
cache_keys = {
b"tip-node": hex(self.tipnode),
b"tip-rev": b'%d' % self.tiprev,
}
branchcache: rework the `filteredhash` logic to be more generic...
r52420 if self.key_hashes:
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 if self.key_hashes[0] is not None:
cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
if self.key_hashes[1] is not None:
cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
branchcache-v3: use more explicit header line...
r52414 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
fp.write(b" ".join(pieces) + b'\n')
branchcache: skip entries that are topological heads in the on disk file...
r52424 def _write_heads(self, repo, fp) -> int:
"""write list of heads to a file
Return the number of heads written."""
nodecount = 0
topo_heads = set(self._get_topo_heads(repo))
to_rev = repo.changelog.index.rev
for label, nodes in sorted(self._entries.items()):
label = encoding.fromlocal(label)
for node in nodes:
rev = to_rev(node)
if rev in topo_heads:
continue
if node in self._closednodes:
state = b'c'
else:
state = b'o'
nodecount += 1
fp.write(b"%s %s %s\n" % (hex(node), state, label))
return nodecount
branchcache-v3: use more explicit header line...
r52414 @classmethod
def _load_header(cls, repo, lineiter):
header_line = next(lineiter)
pieces = header_line.rstrip(b'\n').split(b" ")
cache_keys = dict(p.split(b'=', 1) for p in pieces)
args = {}
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 filtered_hash = None
obsolete_hash = None
branchcache-v3: use more explicit header line...
r52414 for k, v in cache_keys.items():
if k == b"tip-rev":
args["tiprev"] = int(v)
elif k == b"tip-node":
args["tipnode"] = bin(v)
elif k == b"filtered-hash":
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 filtered_hash = bin(v)
elif k == b"obsolete-hash":
obsolete_hash = bin(v)
branchcache-v3: use more explicit header line...
r52414 else:
msg = b"unknown cache key: %r" % k
raise ValueError(msg)
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 args["key_hashes"] = (filtered_hash, obsolete_hash)
branchcache-v3: use more explicit header line...
r52414 return args
branchcache: skip entries that are topological heads in the on disk file...
r52424 def _load_heads(self, repo, lineiter):
"""fully loads the branchcache by reading from the file using the line
iterator passed"""
super()._load_heads(repo, lineiter)
cl = repo.changelog
getbranchinfo = repo.revbranchcache().branchinfo
obsrevs = obsolete.getrevs(repo, b'obsolete')
to_node = cl.node
touched_branch = set()
for head in self._get_topo_heads(repo):
if head in obsrevs:
continue
node = to_node(head)
branch, closed = getbranchinfo(head)
self._entries.setdefault(branch, []).append(node)
if closed:
self._closednodes.add(node)
touched_branch.add(branch)
to_rev = cl.index.rev
for branch in touched_branch:
self._entries[branch].sort(key=to_rev)
branchcache: rework the `filteredhash` logic to be more generic...
r52420 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
"""return the cache key hashes that match this repoview state"""
branchcache: store filtered hash and obsolete hash independently for V3...
r52422 return scmutil.filtered_and_obsolete_hash(
branchcache: rework the `filteredhash` logic to be more generic...
r52420 repo,
self.tiprev,
)
branchcache-v3: introduce a v3 format...
r52413
branchcache: introduce a base class for branchmap...
r52347 class remotebranchcache(_BaseBranchCache):
Martijn Pieters
branchmap: updating triggers a write...
r41707 """Branchmap info for a remote connection, should not write locally"""
Augie Fackler
formatting: blacken the codebase...
r43346
branchcache: dispatch the code into the dedicated subclass...
r52348 def __init__(
self,
repo: "localrepo.localrepository",
entries: Union[
Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
] = (),
closednodes: Optional[Set[bytes]] = None,
) -> None:
super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
Martijn Pieters
branchmap: updating triggers a write...
r41707
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # Revision branch info cache
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _rbcversion = b'-v1'
_rbcnames = b'rbc-names' + _rbcversion
_rbcrevs = b'rbc-revs' + _rbcversion
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _rbcrecfmt = b'>4sI'
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 _rbcrecsize = calcsize(_rbcrecfmt)
Joerg Sonnenberger
reverse-branch-cache: switch to doubling allocating scheme...
r47069 _rbcmininc = 64 * _rbcrecsize
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 _rbcnodelen = 4
Augie Fackler
formatting: blacken the codebase...
r43346 _rbcbranchidxmask = 0x7FFFFFFF
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 _rbccloseflag = 0x80000000
Augie Fackler
formatting: blacken the codebase...
r43346
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 class rbcrevs:
"""a byte string consisting of an immutable prefix followed by a mutable suffix"""
def __init__(self, revs):
self._prefix = revs
self._rest = bytearray()
def __len__(self):
return len(self._prefix) + len(self._rest)
def unpack_record(self, rbcrevidx):
if rbcrevidx < len(self._prefix):
return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
else:
return unpack_from(
_rbcrecfmt,
util.buffer(self._rest),
rbcrevidx - len(self._prefix),
)
def make_mutable(self):
if len(self._prefix) > 0:
entirety = bytearray()
entirety[:] = self._prefix
entirety.extend(self._rest)
self._rest = entirety
self._prefix = bytearray()
def truncate(self, pos):
self.make_mutable()
del self._rest[pos:]
def pack_into(self, rbcrevidx, node, branchidx):
if rbcrevidx < len(self._prefix):
self.make_mutable()
buf = self._rest
start_offset = rbcrevidx - len(self._prefix)
end_offset = start_offset + _rbcrecsize
if len(self._rest) < end_offset:
# bytearray doesn't allocate extra space at least in Python 3.7.
# When multiple changesets are added in a row, precise resize would
# result in quadratic complexity. Overallocate to compensate by
# using the classic doubling technique for dynamic arrays instead.
# If there was a gap in the map before, less space will be reserved.
self._rest.extend(b'\0' * end_offset)
return pack_into(
_rbcrecfmt,
buf,
start_offset,
node,
branchidx,
)
def extend(self, extension):
return self._rest.extend(extension)
def slice(self, begin, end):
if begin < len(self._prefix):
acc = bytearray()
acc[:] = self._prefix[begin:end]
acc.extend(
self._rest[begin - len(self._prefix) : end - len(self._prefix)]
)
return acc
return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class revbranchcache:
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Persistent cache, mapping from revision number to branch name and close.
This is a low level cache, independent of filtering.
Branch names are stored in rbc-names in internal encoding separated by 0.
rbc-names is append-only, and each branch name is only stored once and will
thus have a unique index.
The branch info for each revision is stored in rbc-revs as constant size
records. The whole file is read into memory, but it is only 'parsed' on
demand. The file is usually append-only but will be truncated if repo
modification is detected.
The record for each revision contains the first 4 bytes of the
corresponding node hash, and the record is only used if it still matches.
Even a completely trashed rbc-revs fill thus still give the right result
while converging towards full recovery ... assuming no incorrectly matching
node hashes.
The record also contains 4 bytes where 31 bits contains the index of the
branch and the last bit indicate that it is a branch close commit.
The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
and will grow with it but be 1/8th of its size.
"""
Mads Kiilerich
revisionbranchcache: fall back to slow path if starting readonly (issue4531)...
r24159 def __init__(self, repo, readonly=True):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 assert repo.filtername is None
Durham Goode
revbranchcache: store repo on the object...
r24374 self._repo = repo
Augie Fackler
formatting: blacken the codebase...
r43346 self._names = [] # branch names in local encoding with static index
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 self._rbcrevs = rbcrevs(bytearray())
Augie Fackler
formatting: blacken the codebase...
r43346 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 try:
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 bndata = repo.cachevfs.read(_rbcnames)
Augie Fackler
formatting: blacken the codebase...
r43346 self._rbcsnameslen = len(bndata) # for verification before writing
Mads Kiilerich
rbc: empty (and invalid) rbc-names file should give an empty name list...
r31371 if bndata:
Augie Fackler
formatting: blacken the codebase...
r43346 self._names = [
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 encoding.tolocal(bn) for bn in bndata.split(b'\0')
Augie Fackler
formatting: blacken the codebase...
r43346 ]
Gregory Szorc
branchmap: remove unused exception variable
r29423 except (IOError, OSError):
Mads Kiilerich
revisionbranchcache: fall back to slow path if starting readonly (issue4531)...
r24159 if readonly:
# don't try to use cache - fall back to the slow path
self.branchinfo = self._branchinfo
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 if self._names:
try:
config: move the option to mmap rev branch cache in the storage section...
r52340 if repo.ui.configbool(b'storage', b'revbranchcache.mmap'):
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 with repo.cachevfs(_rbcrevs) as fp:
data = util.buffer(util.mmapread(fp))
else:
data = repo.cachevfs.read(_rbcrevs)
self._rbcrevs = rbcrevs(data)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except (IOError, OSError) as inst:
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"couldn't read revision branch cache: %s\n"
Augie Fackler
formatting: blacken the codebase...
r43346 % stringutil.forcebytestr(inst)
)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # remember number of good records on disk
Augie Fackler
formatting: blacken the codebase...
r43346 self._rbcrevslen = min(
len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 if self._rbcrevslen == 0:
self._names = []
Augie Fackler
formatting: blacken the codebase...
r43346 self._rbcnamescount = len(self._names) # number of names read at
# _rbcsnameslen
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558 def _clear(self):
self._rbcsnameslen = 0
del self._names[:]
self._rbcnamescount = 0
self._rbcrevslen = len(self._repo.changelog)
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 util.clearcachedproperty(self, b'_namesreverse')
Pulkit Goyal
branchmap: build the revbranchcache._namesreverse() only when required...
r40746
@util.propertycache
def _namesreverse(self):
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 return {b: r for r, b in enumerate(self._names)}
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def branchinfo(self, rev):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Return branch name and close flag for rev, using and updating
persistent cache."""
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 changelog = self._repo.changelog
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 rbcrevidx = rev * _rbcrecsize
Yuya Nishihara
revbranchcache: return uncached branchinfo for nullrev (issue4683)...
r25266 # avoid negative index, changelog.read(nullrev) is fast without cache
if rev == nullrev:
return changelog.branchinfo(rev)
Mads Kiilerich
rbc: fix invalid rbc-revs entries caused by missing cache growth...
r29604 # if requested rev isn't allocated, grow and cache the rev info
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 return self._branchinfo(rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
# fast path: extract data from cache, use it if node is matching
reponode = changelog.node(rev)[:_rbcnodelen]
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 close = bool(branchidx & _rbccloseflag)
if close:
branchidx &= _rbcbranchidxmask
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if cachenode == b'\0\0\0\0':
Durham Goode
revbranchcache: populate cache incrementally...
r24376 pass
elif cachenode == reponode:
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 try:
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558 return self._names[branchidx], close
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 except IndexError:
# recover from invalid reference to unknown branch
Augie Fackler
formatting: blacken the codebase...
r43346 self._repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"referenced branch names not found"
b" - rebuilding revision branch cache from scratch\n"
Augie Fackler
formatting: blacken the codebase...
r43346 )
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 self._clear()
Durham Goode
revbranchcache: populate cache incrementally...
r24376 else:
# rev/node map has changed, invalidate the cache from here up
Augie Fackler
formatting: blacken the codebase...
r43346 self._repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"history modification detected - truncating "
b"revision branch cache to revision %d\n" % rev
Augie Fackler
formatting: blacken the codebase...
r43346 )
Durham Goode
revbranchcache: populate cache incrementally...
r24376 truncate = rbcrevidx + _rbcrecsize
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 self._rbcrevs.truncate(truncate)
Durham Goode
revbranchcache: populate cache incrementally...
r24376 self._rbcrevslen = min(self._rbcrevslen, truncate)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # fall back to slow path and make sure it will be written to disk
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 return self._branchinfo(rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def _branchinfo(self, rev):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Retrieve branch info from changelog and update _rbcrevs"""
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 changelog = self._repo.changelog
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 b, close = changelog.branchinfo(rev)
if b in self._namesreverse:
branchidx = self._namesreverse[b]
else:
branchidx = len(self._names)
self._names.append(b)
self._namesreverse[b] = branchidx
reponode = changelog.node(rev)
if close:
branchidx |= _rbccloseflag
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 self._setcachedata(rev, reponode, branchidx)
Durham Goode
revbranchcache: move entry writing to a separate function...
r24375 return b, close
Joerg Sonnenberger
branchmap: update rev-branch-cache incrementally...
r47084 def setdata(self, rev, changelogrevision):
Boris Feld
revbranchcache: add a public function to update the data...
r36980 """add new data information to the cache"""
Joerg Sonnenberger
branchmap: update rev-branch-cache incrementally...
r47084 branch, close = changelogrevision.branchinfo
Boris Feld
revbranchcache: add a public function to update the data...
r36980 if branch in self._namesreverse:
branchidx = self._namesreverse[branch]
else:
branchidx = len(self._names)
self._names.append(branch)
self._namesreverse[branch] = branchidx
if close:
branchidx |= _rbccloseflag
Joerg Sonnenberger
branchmap: update rev-branch-cache incrementally...
r47084 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
Boris Feld
revbranchcache: add a public function to update the data...
r36980 # If no cache data were readable (non exists, bad permission, etc)
# the cache was bypassing itself by setting:
#
# self.branchinfo = self._branchinfo
#
# Since we now have data in the cache, we need to drop this bypassing.
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 if 'branchinfo' in vars(self):
Boris Feld
revbranchcache: add a public function to update the data...
r36980 del self.branchinfo
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def _setcachedata(self, rev, node, branchidx):
Durham Goode
revbranchcache: move entry writing to a separate function...
r24375 """Writes the node's branch data to the in-memory cache data."""
Durham Goode
branchmap: handle nullrev in setcachedata...
r31454 if rev == nullrev:
return
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 rbcrevidx = rev * _rbcrecsize
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
Durham Goode
revbranchcache: populate cache incrementally...
r24376 self._rbcrevslen = min(self._rbcrevslen, rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Durham Goode
revbranchcache: move cache writing to the transaction finalizer...
r24377 tr = self._repo.currenttransaction()
if tr:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 tr.addfinalize(b'write-revbranchcache', self.write)
Durham Goode
revbranchcache: move cache writing to the transaction finalizer...
r24377
def write(self, tr=None):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Save branch cache if it is dirty."""
Durham Goode
revbranchcache: store repo on the object...
r24374 repo = self._repo
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 wlock = None
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 step = b''
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 try:
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 # write the new names
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 if self._rbcnamescount < len(self._names):
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 wlock = repo.wlock(wait=False)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 step = b' names'
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 self._writenames(repo)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 # write the new revs
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 start = self._rbcrevslen * _rbcrecsize
if start != len(self._rbcrevs):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 step = b''
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 if wlock is None:
wlock = repo.wlock(wait=False)
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 self._writerevs(repo, start)
Pierre-Yves David
branchmap: simplify error handlind when writing rev branch cache...
r29745 except (IOError, OSError, error.Abort, error.LockError) as inst:
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"couldn't write revision branch cache%s: %s\n"
Augie Fackler
formatting: blacken the codebase...
r43346 % (step, stringutil.forcebytestr(inst))
)
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 finally:
if wlock is not None:
wlock.release()
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363
def _writenames(self, repo):
Kyle Lippincott
black: make codebase compatible with black v21.4b2 and v20.8b1...
r47856 """write the new branch names to revbranchcache"""
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 if self._rbcnamescount != 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = repo.cachevfs.open(_rbcnames, b'ab')
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 if f.tell() == self._rbcsnameslen:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f.write(b'\0')
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 else:
f.close()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 self._rbcnamescount = 0
self._rbcrevslen = 0
if self._rbcnamescount == 0:
# before rewriting names, make sure references are removed
repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = repo.cachevfs.open(_rbcnames, b'wb')
Augie Fackler
formatting: blacken the codebase...
r43346 f.write(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'\0'.join(
Augie Fackler
formatting: blacken the codebase...
r43346 encoding.fromlocal(b)
for b in self._names[self._rbcnamescount :]
)
)
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 self._rbcsnameslen = f.tell()
f.close()
self._rbcnamescount = len(self._names)
def _writerevs(self, repo, start):
Kyle Lippincott
black: make codebase compatible with black v21.4b2 and v20.8b1...
r47856 """write the new revs to revbranchcache"""
Pulkit Goyal
revbranchcache: use context manager in _writerevs() to write to file...
r42364 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 if f.tell() != start:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repo.ui.debug(
b"truncating cache/%s to %d\n" % (_rbcrevs, start)
)
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 f.seek(start)
Pulkit Goyal
revbranchcache: use context manager in _writerevs() to write to file...
r42364 if f.tell() != start:
start = 0
f.seek(start)
f.truncate()
end = revs * _rbcrecsize
Arseniy Alekseyev
branchmap: use mmap for faster revbranchcache loading...
r52268 f.write(self._rbcrevs.slice(start, end))
Pulkit Goyal
revbranchcache: factor logic to write names and revs in separate functions...
r42363 self._rbcrevslen = revs