##// END OF EJS Templates
compression: introduce an official `format.revlog-compression` option...
compression: introduce an official `format.revlog-compression` option This option supersedes the `experiment.format.compression` option. The value currently supported are zlib (default) and zstd (if Mercurial was compiled with zstd support). The option gained an explicit reference to `revlog` since this is the target usage here. Different storage methods might require different compression strategies. In our tests, using zstd give a significant CPU usage improvement (both compression and decompressing) while keeping similar repository size. Zstd as other interresting mode (dictionnary, pre-text, etc…) that are probably worth exploring. However, just plain switching from zlib to zstd provide a large benefit.

File last commit:

r42174:b5511845 default
r42213:4ee906aa default
Show More
branchmap.py
634 lines | 23.7 KiB | text/x-python | PythonLexer
Pierre-Yves David
branchmap: create a mercurial.branchmap module...
r18116 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Pierre-Yves David
branchmap: extract write logic from localrepo
r18117
Gregory Szorc
branchmap: use absolute_import
r25918 from __future__ import absolute_import
import struct
from .node import (
bin,
hex,
nullid,
nullrev,
)
from . import (
encoding,
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 error,
Augie Fackler
branchmap: make error messages consistent between Python 2 and 3...
r35849 pycompat,
Gregory Szorc
branchmap: use absolute_import
r25918 scmutil,
Simon Farnsworth
mercurial: switch to util.timer for all interval timings...
r30975 util,
Gregory Szorc
branchmap: use absolute_import
r25918 )
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 from .utils import (
stringutil,
)
Gregory Szorc
branchmap: use absolute_import
r25918
calcsize = struct.calcsize
Mads Kiilerich
rbc: use struct unpack_from and pack_into instead of unpack and pack...
r31370 pack_into = struct.pack_into
unpack_from = struct.unpack_from
Pierre-Yves David
branchmap: extract write logic from localrepo
r18117
Pierre-Yves David
branchmap: extract read logic from repo
r18118
Augie Fackler
subsettable: move from repoview to branchmap, the only place it's used...
r20032 ### Nearest subset relation
# Nearest subset of filter X is a filter Y so that:
# * Y is included in X,
# * X - Y is as small as possible.
# This create and ordering used for branchmap purpose.
# the ordering may be partial
subsettable = {None: 'visible',
Pulkit Goyal
repoview: add a new filtername for accessing hidden commits...
r35511 'visible-hidden': 'visible',
Augie Fackler
subsettable: move from repoview to branchmap, the only place it's used...
r20032 'visible': 'served',
'served': 'immutable',
'immutable': 'base'}
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764
class BranchMapCache(object):
Pulkit Goyal
branchmap: improve doc about BranchMapCache class...
r41867 """mapping of filtered views of repo with their branchcache"""
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 def __init__(self):
self._per_filter = {}
Martijn Pieters
branchmap: add some clarifications and clean up flow...
r41708
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 def __getitem__(self, repo):
self.updatecache(repo)
return self._per_filter[repo.filtername]
def updatecache(self, repo):
"""Update the cache for the given filtered view on a repository"""
# This can trigger updates for the caches for subsets of the filtered
# view, e.g. when there is no cache for this filtered view or the cache
# is stale.
Pierre-Yves David
branchmap: extract updatebranchcache from repo
r18121
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 cl = repo.changelog
filtername = repo.filtername
bcache = self._per_filter.get(filtername)
if bcache is None or not bcache.validfor(repo):
# cache object missing or cache object stale? Read from disk
bcache = branchcache.fromfile(repo)
Martijn Pieters
branchmap: add some clarifications and clean up flow...
r41708
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 revs = []
if bcache is None:
# no (fresh) cache available anymore, perhaps we can re-use
# the cache for a subset, then extend that to add info on missing
# revisions.
subsetname = subsettable.get(filtername)
if subsetname is not None:
subset = repo.filtered(subsetname)
bcache = self[subset].copy()
extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
revs.extend(r for r in extrarevs if r <= bcache.tiprev)
else:
# nothing to fall back on, start empty.
bcache = branchcache()
Durham Goode
revbranchcache: move out of branchmap onto localrepo...
r24373
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 revs.extend(cl.revs(start=bcache.tiprev + 1))
if revs:
bcache.update(repo, revs)
Pierre-Yves David
branchmap: store branchcache in a dedicated object...
r18124
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 assert bcache.validfor(repo), filtername
self._per_filter[repo.filtername] = bcache
def replace(self, repo, remotebranchmap):
"""Replace the branchmap cache for a repo with a branch mapping.
This is likely only called during clone with a branch map from a
remote.
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 """
cl = repo.changelog
clrev = cl.rev
clbranchinfo = cl.branchinfo
rbheads = []
closed = []
for bheads in remotebranchmap.itervalues():
rbheads += bheads
for h in bheads:
r = clrev(h)
b, c = clbranchinfo(r)
if c:
closed.append(h)
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 if rbheads:
rtiprev = max((int(clrev(node)) for node in rbheads))
cache = branchcache(
remotebranchmap, repo[rtiprev].node(), rtiprev,
closednodes=closed)
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Martijn Pieters
branchmap: encapsulate cache updating in the map itself...
r41764 # Try to stick it as low as possible
# filter above served are unlikely to be fetch from a clone
for candidate in ('base', 'immutable', 'served'):
rview = repo.filtered(candidate)
if cache.validfor(rview):
self._per_filter[candidate] = cache
cache.write(rview)
return
def clear(self):
self._per_filter.clear()
Gregory Szorc
branchmap: move branch cache code out of streamclone.py...
r26460
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168 class branchcache(object):
Brodie Rao
branchmap: add documentation on the branchcache on-disk format
r20181 """A dict like object that hold branches heads cache.
This cache is used to avoid costly computations to determine all the
branch heads of a repo.
The cache is serialized on disk in the following format:
<tip hex node> <tip rev number> [optional filtered repo hex hash]
Brodie Rao
branchmap: cache open/closed branch head information...
r20185 <branch head hex node> <open/closed state> <branch name>
<branch head hex node> <open/closed state> <branch name>
Brodie Rao
branchmap: add documentation on the branchcache on-disk format
r20181 ...
The first line is used to check if the cache is still valid. If the
branch cache is for a filtered repo view, an optional third hash is
included that hashes the hashes of all filtered revisions.
Brodie Rao
branchmap: cache open/closed branch head information...
r20185
The open/closed state is represented by a single letter 'o' or 'c'.
This field can be used to avoid changelog reads when determining if a
branch head closes a branch or not.
Brodie Rao
branchmap: add documentation on the branchcache on-disk format
r20181 """
Pulkit Goyal
branchmap: move __init__ up in branchcache class...
r41826
def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
Pulkit Goyal
branchcache: have a hasnode function to validate nodes...
r42174 filteredhash=None, closednodes=None, hasnode=None):
""" hasnode is a function which can be used to verify whether changelog
has a given node or not. If it's not provided, we assume that every node
we have exists in changelog """
Pulkit Goyal
branchmap: move __init__ up in branchcache class...
r41826 self.tipnode = tipnode
self.tiprev = tiprev
self.filteredhash = filteredhash
# closednodes is a set of nodes that close their branch. If the branch
# cache has been updated, it may contain nodes that are no longer
# heads.
if closednodes is None:
self._closednodes = set()
else:
self._closednodes = closednodes
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries = dict(entries)
Pulkit Goyal
branchcache: add attributes to track which nodes are verified...
r42173 # whether closed nodes are verified or not
self._closedverified = False
# branches for which nodes are verified
self._verifiedbranches = set()
Pulkit Goyal
branchcache: have a hasnode function to validate nodes...
r42174 self._hasnode = hasnode
if self._hasnode is None:
self._hasnode = lambda x: True
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def __iter__(self):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return iter(self._entries)
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def __setitem__(self, key, value):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries[key] = value
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def __getitem__(self, key):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return self._entries[key]
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
def iteritems(self):
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return self._entries.iteritems()
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168
Pulkit Goyal
branchcache: introduce hasbranch()...
r42171 def hasbranch(self, label):
""" checks whether a branch of this name exists or not """
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return label in self._entries
Pulkit Goyal
branchcache: introduce hasbranch()...
r42171
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 @classmethod
def fromfile(cls, repo):
f = None
try:
f = repo.cachevfs(cls._filename(repo))
lineiter = iter(f)
cachekey = next(lineiter).rstrip('\n').split(" ", 2)
last, lrev = cachekey[:2]
last, lrev = bin(last), int(lrev)
filteredhash = None
Pulkit Goyal
branchcache: have a hasnode function to validate nodes...
r42174 hasnode = repo.changelog.hasnode
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 if len(cachekey) > 2:
filteredhash = bin(cachekey[2])
Pulkit Goyal
branchcache: have a hasnode function to validate nodes...
r42174 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash,
hasnode=hasnode)
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 if not bcache.validfor(repo):
# invalidate the cache
raise ValueError(r'tip differs')
Pulkit Goyal
branchmap: prevent reading the file twice through different iterators...
r41974 bcache.load(repo, lineiter)
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 except (IOError, OSError):
return None
except Exception as inst:
if repo.ui.debugflag:
msg = 'invalid branchheads cache'
if repo.filtername is not None:
msg += ' (%s)' % repo.filtername
msg += ': %s\n'
repo.ui.debug(msg % pycompat.bytestr(inst))
bcache = None
finally:
if f:
f.close()
return bcache
Pulkit Goyal
branchmap: prevent reading the file twice through different iterators...
r41974 def load(self, repo, lineiter):
""" fully loads the branchcache by reading from the file using the line
iterator passed"""
Pulkit Goyal
branchcache: move loading of branch names and nodes into it's own function...
r41959 cl = repo.changelog
for line in lineiter:
line = line.rstrip('\n')
if not line:
continue
node, state, label = line.split(" ", 2)
if state not in 'oc':
raise ValueError(r'invalid branch state')
label = encoding.tolocal(label.strip())
node = bin(node)
if not cl.hasnode(node):
raise ValueError(
r'node %s does not exist' % pycompat.sysstr(hex(node)))
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries.setdefault(label, []).append(node)
Pulkit Goyal
branchcache: add attributes to track which nodes are verified...
r42173 self._verifiedbranches.add(label)
Pulkit Goyal
branchcache: move loading of branch names and nodes into it's own function...
r41959 if state == 'c':
self._closednodes.add(node)
Pulkit Goyal
branchcache: add attributes to track which nodes are verified...
r42173 self._closedverified = True
Pulkit Goyal
branchcache: move loading of branch names and nodes into it's own function...
r41959
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 @staticmethod
def _filename(repo):
"""name of a branchcache file for a given repo or repoview"""
filename = "branch2"
if repo.filtername:
filename = '%s-%s' % (filename, repo.filtername)
return filename
Pierre-Yves David
branchmap: store branchcache in a dedicated object...
r18124
Pierre-Yves David
branchmap: move validity logic in the object itself...
r18132 def validfor(self, repo):
Mads Kiilerich
spelling: fix some minor issues found by spell checker
r18644 """Is the cache content valid regarding a repo
Pierre-Yves David
branchmap: move validity logic in the object itself...
r18132
Mads Kiilerich
spelling: fix some minor issues found by spell checker
r18644 - False when cached tipnode is unknown or if we detect a strip.
Pierre-Yves David
branchmap: move validity logic in the object itself...
r18132 - True when cache is up to date or a subset of current repo."""
try:
Pierre-Yves David
branchmap: takes filtered revision in account for cache calculation...
r18168 return ((self.tipnode == repo.changelog.node(self.tiprev))
Augie Fackler
cleanup: use () to wrap long lines instead of \...
r41925 and (self.filteredhash ==
Gregory Szorc
repoview: move function for computing filtered hash...
r24723 scmutil.filteredhash(repo, self.tiprev)))
Pierre-Yves David
branchmap: move validity logic in the object itself...
r18132 except IndexError:
return False
Brodie Rao
branchmap: introduce branchtip() method
r20186 def _branchtip(self, heads):
Mads Kiilerich
help: branch names primarily denote the tipmost unclosed branch head...
r20245 '''Return tuple with last open head in heads and false,
otherwise return last closed head and true.'''
Brodie Rao
branchmap: introduce branchtip() method
r20186 tip = heads[-1]
closed = True
for h in reversed(heads):
if h not in self._closednodes:
tip = h
closed = False
break
return tip, closed
def branchtip(self, branch):
Mads Kiilerich
help: branch names primarily denote the tipmost unclosed branch head...
r20245 '''Return the tipmost open head on branch head, otherwise return the
tipmost closed head on branch.
Raise KeyError for unknown branch.'''
Brodie Rao
branchmap: introduce branchtip() method
r20186 return self._branchtip(self[branch])[0]
the31k
branches: correctly show inactive multiheaded branches...
r34076 def iteropen(self, nodes):
return (n for n in nodes if n not in self._closednodes)
Brodie Rao
branchmap: introduce branchheads() method
r20188 def branchheads(self, branch, closed=False):
heads = self[branch]
if not closed:
the31k
branches: correctly show inactive multiheaded branches...
r34076 heads = list(self.iteropen(heads))
Brodie Rao
branchmap: introduce branchheads() method
r20188 return heads
Brodie Rao
branchmap: introduce iterbranches() method
r20190 def iterbranches(self):
for bn, heads in self.iteritems():
yield (bn, heads) + self._branchtip(heads)
Pulkit Goyal
branchcache: rename itervalues() to iterheads()...
r42169 def iterheads(self):
""" returns all the heads """
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 return self._entries.itervalues()
Pulkit Goyal
branchcache: rename itervalues() to iterheads()...
r42169
Pierre-Yves David
branchmap: add a copy method...
r18232 def copy(self):
"""return an deep copy of the branchcache object"""
Pulkit Goyal
branchmap: remove the dict interface from the branchcache class (API)...
r42168 return branchcache(
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 self._entries, self.tipnode, self.tiprev, self.filteredhash,
Martijn Pieters
branchmap: updating triggers a write...
r41707 self._closednodes)
Pierre-Yves David
branchmap: move validity logic in the object itself...
r18132
Pierre-Yves David
branchmap: make write a method on the branchmap object
r18128 def write(self, repo):
try:
Martijn Pieters
branchmap: make branchcache responsible for reading...
r41706 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
Augie Fackler
branchmap: stringify int in a portable way...
r31348 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
Pierre-Yves David
branchmap: read and write key part related to filtered revision...
r18184 if self.filteredhash is not None:
cachekey.append(hex(self.filteredhash))
f.write(" ".join(cachekey) + '\n')
Gregory Szorc
branchmap: log events related to branch cache...
r21031 nodecount = 0
Mads Kiilerich
localrepo: store branchheads sorted
r18357 for label, nodes in sorted(self.iteritems()):
Pulkit Goyal
branchmap: decode a label only once...
r41827 label = encoding.fromlocal(label)
Pierre-Yves David
branchmap: make write a method on the branchmap object
r18128 for node in nodes:
Gregory Szorc
branchmap: log events related to branch cache...
r21031 nodecount += 1
Brodie Rao
branchmap: cache open/closed branch head information...
r20185 if node in self._closednodes:
state = 'c'
else:
state = 'o'
Pulkit Goyal
branchmap: decode a label only once...
r41827 f.write("%s %s %s\n" % (hex(node), state, label))
Pierre-Yves David
branchmap: make write a method on the branchmap object
r18128 f.close()
Gregory Szorc
branchmap: log events related to branch cache...
r21031 repo.ui.log('branchcache',
'wrote %s branch cache with %d labels and %d nodes\n',
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 repo.filtername, len(self._entries), nodecount)
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 except (IOError, OSError, error.Abort) as inst:
Augie Fackler
branchmap: remove superfluous pass statements
r34369 # Abort may be raised by read only opener, so log and continue
Pulkit Goyal
py3: use util.forcebytestr to convert error messages to bytes...
r36414 repo.ui.debug("couldn't write branch cache: %s\n" %
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 stringutil.forcebytestr(inst))
Pierre-Yves David
branchmap: make update a method
r18131
Pierre-Yves David
branchmap: pass revision insteads of changectx to the update function...
r18305 def update(self, repo, revgen):
Pierre-Yves David
branchmap: make update a method
r18131 """Given a branchhead cache, self, that may have extra nodes or be
Pierre-Yves David
branchmap: simplify update code...
r20263 missing heads, and a generator of nodes that are strictly a superset of
Pierre-Yves David
branchmap: make update a method
r18131 heads missing, this function updates self to be correct.
"""
Simon Farnsworth
mercurial: switch to util.timer for all interval timings...
r30975 starttime = util.timer()
Pierre-Yves David
branchmap: make update a method
r18131 cl = repo.changelog
# collect new branch entries
newbranches = {}
Durham Goode
revbranchcache: move out of branchmap onto localrepo...
r24373 getbranchinfo = repo.revbranchcache().branchinfo
Pierre-Yves David
branchmap: Save changectx creation during update...
r18307 for r in revgen:
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 branch, closesbranch = getbranchinfo(r)
Pierre-Yves David
branchmap: stop useless rev -> node -> rev round trip...
r20262 newbranches.setdefault(branch, []).append(r)
Brodie Rao
branchmap: cache open/closed branch head information...
r20185 if closesbranch:
Pierre-Yves David
branchmap: stop useless rev -> node -> rev round trip...
r20262 self._closednodes.add(cl.node(r))
Pierre-Yves David
branchmap: pre-filter topological heads before ancestors based filtering...
r22357
# fetch current topological heads to speed up filtering
topoheads = set(cl.headrevs())
Pierre-Yves David
branchmap: make update a method
r18131 # if older branchheads are reachable from new ones, they aren't
# really branchheads. Note checking parents is insufficient:
# 1 (branch a) -> 2 (branch b) -> 3 (branch a)
Pierre-Yves David
branchmap: stop useless rev -> node -> rev round trip...
r20262 for branch, newheadrevs in newbranches.iteritems():
Pulkit Goyal
branchcache: make entries a private attribute...
r42172 bheads = self._entries.setdefault(branch, [])
Pierre-Yves David
branchmap: use set for update code...
r20264 bheadset = set(cl.rev(node) for node in bheads)
Pierre-Yves David
branchmap: make update a method
r18131
Pierre-Yves David
branchmap: simplify update code...
r20263 # This have been tested True on all internal usage of this function.
# run it again in case of doubt
# assert not (set(bheadrevs) & set(newheadrevs))
Pierre-Yves David
branchmap: use set for update code...
r20264 bheadset.update(newheadrevs)
Pierre-Yves David
branchmap: make update a method
r18131
Pierre-Yves David
branchmap: issue a single call to `ancestors` for all heads...
r22356 # This prunes out two kinds of heads - heads that are superseded by
# a head in newheadrevs, and newheadrevs that are not heads because
# an existing head is their descendant.
Pierre-Yves David
branchmap: pre-filter topological heads before ancestors based filtering...
r22357 uncertain = bheadset - topoheads
if uncertain:
floorrev = min(uncertain)
ancestors = set(cl.ancestors(newheadrevs, floorrev))
bheadset -= ancestors
Pierre-Yves David
branchmap: use set for update code...
r20264 bheadrevs = sorted(bheadset)
Pierre-Yves David
branchmap: make update a method
r18131 self[branch] = [cl.node(rev) for rev in bheadrevs]
Pierre-Yves David
branchmap: simplify update code...
r20263 tiprev = bheadrevs[-1]
Pierre-Yves David
branchmap: make update a method
r18131 if tiprev > self.tiprev:
self.tipnode = cl.node(tiprev)
self.tiprev = tiprev
Pierre-Yves David
branchmap: remove the droppednodes logic...
r19838 if not self.validfor(repo):
Pierre-Yves David
branchmap: make update a method
r18131 # cache key are not valid anymore
self.tipnode = nullid
self.tiprev = nullrev
Pulkit Goyal
branchcache: rename itervalues() to iterheads()...
r42169 for heads in self.iterheads():
Pierre-Yves David
branchmap: make update a method
r18131 tiprev = max(cl.rev(node) for node in heads)
if tiprev > self.tiprev:
self.tipnode = cl.node(tiprev)
self.tiprev = tiprev
Gregory Szorc
repoview: move function for computing filtered hash...
r24723 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
Gregory Szorc
branchmap: log events related to branch cache...
r21031
Simon Farnsworth
mercurial: switch to util.timer for all interval timings...
r30975 duration = util.timer() - starttime
Gregory Szorc
branchmap: log events related to branch cache...
r21031 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
Pulkit Goyal
py3: convert filtername to str if it's None...
r42005 repo.filtername or b'None', duration)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Martijn Pieters
branchmap: updating triggers a write...
r41707 self.write(repo)
class remotebranchcache(branchcache):
"""Branchmap info for a remote connection, should not write locally"""
def write(self, repo):
pass
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # Revision branch info cache
_rbcversion = '-v1'
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 _rbcnames = 'rbc-names' + _rbcversion
_rbcrevs = 'rbc-revs' + _rbcversion
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
_rbcrecfmt = '>4sI'
_rbcrecsize = calcsize(_rbcrecfmt)
_rbcnodelen = 4
_rbcbranchidxmask = 0x7fffffff
_rbccloseflag = 0x80000000
class revbranchcache(object):
"""Persistent cache, mapping from revision number to branch name and close.
This is a low level cache, independent of filtering.
Branch names are stored in rbc-names in internal encoding separated by 0.
rbc-names is append-only, and each branch name is only stored once and will
thus have a unique index.
The branch info for each revision is stored in rbc-revs as constant size
records. The whole file is read into memory, but it is only 'parsed' on
demand. The file is usually append-only but will be truncated if repo
modification is detected.
The record for each revision contains the first 4 bytes of the
corresponding node hash, and the record is only used if it still matches.
Even a completely trashed rbc-revs fill thus still give the right result
while converging towards full recovery ... assuming no incorrectly matching
node hashes.
The record also contains 4 bytes where 31 bits contains the index of the
branch and the last bit indicate that it is a branch close commit.
The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
and will grow with it but be 1/8th of its size.
"""
Mads Kiilerich
revisionbranchcache: fall back to slow path if starting readonly (issue4531)...
r24159 def __init__(self, repo, readonly=True):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 assert repo.filtername is None
Durham Goode
revbranchcache: store repo on the object...
r24374 self._repo = repo
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 self._names = [] # branch names in local encoding with static index
Augie Fackler
py3: use bytearray() instead of array('c', ...) constructions...
r31346 self._rbcrevs = bytearray()
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 try:
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 bndata = repo.cachevfs.read(_rbcnames)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 self._rbcsnameslen = len(bndata) # for verification before writing
Mads Kiilerich
rbc: empty (and invalid) rbc-names file should give an empty name list...
r31371 if bndata:
self._names = [encoding.tolocal(bn)
for bn in bndata.split('\0')]
Gregory Szorc
branchmap: remove unused exception variable
r29423 except (IOError, OSError):
Mads Kiilerich
revisionbranchcache: fall back to slow path if starting readonly (issue4531)...
r24159 if readonly:
# don't try to use cache - fall back to the slow path
self.branchinfo = self._branchinfo
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 if self._names:
try:
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 data = repo.cachevfs.read(_rbcrevs)
Augie Fackler
py3: use bytearray() instead of array('c', ...) constructions...
r31346 self._rbcrevs[:] = data
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except (IOError, OSError) as inst:
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 repo.ui.debug("couldn't read revision branch cache: %s\n" %
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 stringutil.forcebytestr(inst))
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # remember number of good records on disk
self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
len(repo.changelog))
if self._rbcrevslen == 0:
self._names = []
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 self._rbcnamescount = len(self._names) # number of names read at
# _rbcsnameslen
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558 def _clear(self):
self._rbcsnameslen = 0
del self._names[:]
self._rbcnamescount = 0
self._rbcrevslen = len(self._repo.changelog)
Augie Fackler
py3: use bytearray() instead of array('c', ...) constructions...
r31346 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
Pulkit Goyal
branchmap: build the revbranchcache._namesreverse() only when required...
r40746 util.clearcachedproperty(self, '_namesreverse')
@util.propertycache
def _namesreverse(self):
return dict((b, r) for r, b in enumerate(self._names))
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def branchinfo(self, rev):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Return branch name and close flag for rev, using and updating
persistent cache."""
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 changelog = self._repo.changelog
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 rbcrevidx = rev * _rbcrecsize
Yuya Nishihara
revbranchcache: return uncached branchinfo for nullrev (issue4683)...
r25266 # avoid negative index, changelog.read(nullrev) is fast without cache
if rev == nullrev:
return changelog.branchinfo(rev)
Mads Kiilerich
rbc: fix invalid rbc-revs entries caused by missing cache growth...
r29604 # if requested rev isn't allocated, grow and cache the rev info
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 return self._branchinfo(rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
# fast path: extract data from cache, use it if node is matching
reponode = changelog.node(rev)[:_rbcnodelen]
Mike Hommey
branchmap: revert c34532365b38 for Python 2.7 compatibility...
r33737 cachenode, branchidx = unpack_from(
_rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 close = bool(branchidx & _rbccloseflag)
if close:
branchidx &= _rbcbranchidxmask
Durham Goode
revbranchcache: populate cache incrementally...
r24376 if cachenode == '\0\0\0\0':
pass
elif cachenode == reponode:
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 try:
Mads Kiilerich
cache: rebuild branch cache from scratch when inconsistencies are detected...
r28558 return self._names[branchidx], close
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 except IndexError:
# recover from invalid reference to unknown branch
self._repo.ui.debug("referenced branch names not found"
" - rebuilding revision branch cache from scratch\n")
self._clear()
Durham Goode
revbranchcache: populate cache incrementally...
r24376 else:
# rev/node map has changed, invalidate the cache from here up
Mads Kiilerich
rbc: fix superfluous rebuilding from scratch - don't abuse self._rbcnamescount...
r29615 self._repo.ui.debug("history modification detected - truncating "
Augie Fackler
branchmap: be more careful about using %d on ints...
r31497 "revision branch cache to revision %d\n" % rev)
Durham Goode
revbranchcache: populate cache incrementally...
r24376 truncate = rbcrevidx + _rbcrecsize
del self._rbcrevs[truncate:]
self._rbcrevslen = min(self._rbcrevslen, truncate)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 # fall back to slow path and make sure it will be written to disk
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 return self._branchinfo(rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def _branchinfo(self, rev):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Retrieve branch info from changelog and update _rbcrevs"""
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 changelog = self._repo.changelog
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 b, close = changelog.branchinfo(rev)
if b in self._namesreverse:
branchidx = self._namesreverse[b]
else:
branchidx = len(self._names)
self._names.append(b)
self._namesreverse[b] = branchidx
reponode = changelog.node(rev)
if close:
branchidx |= _rbccloseflag
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 self._setcachedata(rev, reponode, branchidx)
Durham Goode
revbranchcache: move entry writing to a separate function...
r24375 return b, close
Boris Feld
revbranchcache: add a public function to update the data...
r36980 def setdata(self, branch, rev, node, close):
"""add new data information to the cache"""
if branch in self._namesreverse:
branchidx = self._namesreverse[branch]
else:
branchidx = len(self._names)
self._names.append(branch)
self._namesreverse[branch] = branchidx
if close:
branchidx |= _rbccloseflag
self._setcachedata(rev, node, branchidx)
# If no cache data were readable (non exists, bad permission, etc)
# the cache was bypassing itself by setting:
#
# self.branchinfo = self._branchinfo
#
# Since we now have data in the cache, we need to drop this bypassing.
Yuya Nishihara
py3: pass in system string to vars(branchmap).__contains__()
r40268 if r'branchinfo' in vars(self):
Boris Feld
revbranchcache: add a public function to update the data...
r36980 del self.branchinfo
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 def _setcachedata(self, rev, node, branchidx):
Durham Goode
revbranchcache: move entry writing to a separate function...
r24375 """Writes the node's branch data to the in-memory cache data."""
Durham Goode
branchmap: handle nullrev in setcachedata...
r31454 if rev == nullrev:
return
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 rbcrevidx = rev * _rbcrecsize
Mads Kiilerich
rbc: fix invalid rbc-revs entries caused by missing cache growth...
r29604 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
self._rbcrevs.extend('\0' *
Yuya Nishihara
branchmap: do not specify changelog as an argument...
r40455 (len(self._repo.changelog) * _rbcrecsize -
Mads Kiilerich
rbc: fix invalid rbc-revs entries caused by missing cache growth...
r29604 len(self._rbcrevs)))
Mads Kiilerich
rbc: use struct unpack_from and pack_into instead of unpack and pack...
r31370 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
Durham Goode
revbranchcache: populate cache incrementally...
r24376 self._rbcrevslen = min(self._rbcrevslen, rev)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Durham Goode
revbranchcache: move cache writing to the transaction finalizer...
r24377 tr = self._repo.currenttransaction()
if tr:
tr.addfinalize('write-revbranchcache', self.write)
def write(self, tr=None):
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785 """Save branch cache if it is dirty."""
Durham Goode
revbranchcache: store repo on the object...
r24374 repo = self._repo
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 wlock = None
Pierre-Yves David
branchmap: simplify error handlind when writing rev branch cache...
r29745 step = ''
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 try:
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 if self._rbcnamescount < len(self._names):
Pierre-Yves David
branchmap: simplify error handlind when writing rev branch cache...
r29745 step = ' names'
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 wlock = repo.wlock(wait=False)
Pierre-Yves David
branchmap: remove extra indent...
r29746 if self._rbcnamescount != 0:
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 f = repo.cachevfs.open(_rbcnames, 'ab')
Pierre-Yves David
branchmap: remove extra indent...
r29746 if f.tell() == self._rbcsnameslen:
f.write('\0')
else:
f.close()
repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
self._rbcnamescount = 0
self._rbcrevslen = 0
if self._rbcnamescount == 0:
# before rewriting names, make sure references are removed
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
f = repo.cachevfs.open(_rbcnames, 'wb')
Pierre-Yves David
branchmap: remove extra indent...
r29746 f.write('\0'.join(encoding.fromlocal(b)
for b in self._names[self._rbcnamescount:]))
self._rbcsnameslen = f.tell()
f.close()
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 self._rbcnamescount = len(self._names)
Mads Kiilerich
branchcache: introduce revbranchcache for caching of revision branch names...
r23785
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 start = self._rbcrevslen * _rbcrecsize
if start != len(self._rbcrevs):
Pierre-Yves David
branchmap: simplify error handlind when writing rev branch cache...
r29745 step = ''
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 if wlock is None:
wlock = repo.wlock(wait=False)
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 revs = min(len(repo.changelog),
len(self._rbcrevs) // _rbcrecsize)
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 f = repo.cachevfs.open(_rbcrevs, 'ab')
Pierre-Yves David
branchmap: remove extra indent...
r29746 if f.tell() != start:
Boris Feld
cachevfs: migration the revbranchcache to 'cachevfs'...
r33535 repo.ui.debug("truncating cache/%s to %d\n"
% (_rbcrevs, start))
Pierre-Yves David
branchmap: remove extra indent...
r29746 f.seek(start)
Mads Kiilerich
cache: safer handling of failing seek when writing revision branch cache...
r28557 if f.tell() != start:
Pierre-Yves David
branchmap: remove extra indent...
r29746 start = 0
Mads Kiilerich
cache: safer handling of failing seek when writing revision branch cache...
r28557 f.seek(start)
Pierre-Yves David
branchmap: remove extra indent...
r29746 f.truncate()
end = revs * _rbcrecsize
f.write(self._rbcrevs[start:end])
f.close()
Pierre-Yves David
branchmap: preparatory indent of indent the branch rev writing code...
r29743 self._rbcrevslen = revs
Pierre-Yves David
branchmap: simplify error handlind when writing rev branch cache...
r29745 except (IOError, OSError, error.Abort, error.LockError) as inst:
repo.ui.debug("couldn't write revision branch cache%s: %s\n"
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 % (step, stringutil.forcebytestr(inst)))
Pierre-Yves David
branchmap: acquires lock before writting the rev branch cache...
r29744 finally:
if wlock is not None:
wlock.release()