|
|
# rev_cache.py - caching branch information per revision
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import os
|
|
|
import struct
|
|
|
|
|
|
from ..node import (
|
|
|
nullrev,
|
|
|
)
|
|
|
|
|
|
from .. import (
|
|
|
encoding,
|
|
|
error,
|
|
|
pycompat,
|
|
|
util,
|
|
|
)
|
|
|
|
|
|
from ..utils import (
|
|
|
stringutil,
|
|
|
)
|
|
|
|
|
|
calcsize = struct.calcsize
|
|
|
pack_into = struct.pack_into
|
|
|
unpack_from = struct.unpack_from
|
|
|
|
|
|
|
|
|
# Revision branch info cache
|
|
|
|
|
|
# The "V2" version use the same format as the "V1" but garantee it won't be
|
|
|
# truncated, preventing SIGBUS when it is mmap-ed
|
|
|
_rbcversion = b'-v2'
|
|
|
_rbcnames = b'rbc-names' + _rbcversion
|
|
|
_rbcrevs = b'rbc-revs' + _rbcversion
|
|
|
_rbc_legacy_version = b'-v1'
|
|
|
_rbc_legacy_names = b'rbc-names' + _rbc_legacy_version
|
|
|
_rbc_legacy_revs = b'rbc-revs' + _rbc_legacy_version
|
|
|
# [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
|
|
|
_rbcrecfmt = b'>4sI'
|
|
|
_rbcrecsize = calcsize(_rbcrecfmt)
|
|
|
_rbcmininc = 64 * _rbcrecsize
|
|
|
_rbcnodelen = 4
|
|
|
_rbcbranchidxmask = 0x7FFFFFFF
|
|
|
_rbccloseflag = 0x80000000
|
|
|
|
|
|
|
|
|
# with atomic replacement.
|
|
|
REWRITE_RATIO = 0.2
|
|
|
|
|
|
|
|
|
class rbcrevs:
|
|
|
"""a byte string consisting of an immutable prefix followed by a mutable suffix"""
|
|
|
|
|
|
def __init__(self, revs):
|
|
|
self._prefix = revs
|
|
|
self._rest = bytearray()
|
|
|
|
|
|
@property
|
|
|
def len_prefix(self):
|
|
|
size = len(self._prefix)
|
|
|
return size - (size % _rbcrecsize)
|
|
|
|
|
|
def __len__(self):
|
|
|
return self.len_prefix + len(self._rest)
|
|
|
|
|
|
def unpack_record(self, rbcrevidx):
|
|
|
if rbcrevidx < self.len_prefix:
|
|
|
return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
|
|
|
else:
|
|
|
return unpack_from(
|
|
|
_rbcrecfmt,
|
|
|
util.buffer(self._rest),
|
|
|
rbcrevidx - self.len_prefix,
|
|
|
)
|
|
|
|
|
|
def make_mutable(self):
|
|
|
if self.len_prefix > 0:
|
|
|
entirety = bytearray()
|
|
|
entirety[:] = self._prefix[: self.len_prefix]
|
|
|
entirety.extend(self._rest)
|
|
|
self._rest = entirety
|
|
|
self._prefix = bytearray()
|
|
|
|
|
|
def truncate(self, pos):
|
|
|
self.make_mutable()
|
|
|
del self._rest[pos:]
|
|
|
|
|
|
def pack_into(self, rbcrevidx, node, branchidx):
|
|
|
if rbcrevidx < self.len_prefix:
|
|
|
self.make_mutable()
|
|
|
buf = self._rest
|
|
|
start_offset = rbcrevidx - self.len_prefix
|
|
|
end_offset = start_offset + _rbcrecsize
|
|
|
|
|
|
if len(self._rest) < end_offset:
|
|
|
# bytearray doesn't allocate extra space at least in Python 3.7.
|
|
|
# When multiple changesets are added in a row, precise resize would
|
|
|
# result in quadratic complexity. Overallocate to compensate by
|
|
|
# using the classic doubling technique for dynamic arrays instead.
|
|
|
# If there was a gap in the map before, less space will be reserved.
|
|
|
self._rest.extend(b'\0' * end_offset)
|
|
|
return pack_into(
|
|
|
_rbcrecfmt,
|
|
|
buf,
|
|
|
start_offset,
|
|
|
node,
|
|
|
branchidx,
|
|
|
)
|
|
|
|
|
|
def extend(self, extension):
|
|
|
return self._rest.extend(extension)
|
|
|
|
|
|
def slice(self, begin, end):
|
|
|
if begin < self.len_prefix:
|
|
|
acc = bytearray()
|
|
|
acc[:] = self._prefix[begin : min(end, self.len_prefix)]
|
|
|
acc.extend(
|
|
|
self._rest[begin - self.len_prefix : end - self.len_prefix]
|
|
|
)
|
|
|
return acc
|
|
|
return self._rest[begin - self.len_prefix : end - self.len_prefix]
|
|
|
|
|
|
|
|
|
class revbranchcache:
|
|
|
"""Persistent cache, mapping from revision number to branch name and close.
|
|
|
This is a low level cache, independent of filtering.
|
|
|
|
|
|
Branch names are stored in rbc-names in internal encoding separated by 0.
|
|
|
rbc-names is append-only, and each branch name is only stored once and will
|
|
|
thus have a unique index.
|
|
|
|
|
|
The branch info for each revision is stored in rbc-revs as constant size
|
|
|
records. The whole file is read into memory, but it is only 'parsed' on
|
|
|
demand. The file is usually append-only but will be truncated if repo
|
|
|
modification is detected.
|
|
|
The record for each revision contains the first 4 bytes of the
|
|
|
corresponding node hash, and the record is only used if it still matches.
|
|
|
Even a completely trashed rbc-revs fill thus still give the right result
|
|
|
while converging towards full recovery ... assuming no incorrectly matching
|
|
|
node hashes.
|
|
|
The record also contains 4 bytes where 31 bits contains the index of the
|
|
|
branch and the last bit indicate that it is a branch close commit.
|
|
|
The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
|
|
|
and will grow with it but be 1/8th of its size.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, repo, readonly=True):
|
|
|
assert repo.filtername is None
|
|
|
self._repo = repo
|
|
|
self._names = [] # branch names in local encoding with static index
|
|
|
self._rbcrevs = rbcrevs(bytearray())
|
|
|
self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
|
|
|
self._force_overwrite = False
|
|
|
v1_fallback = False
|
|
|
try:
|
|
|
try:
|
|
|
bndata = repo.cachevfs.read(_rbcnames)
|
|
|
except (IOError, OSError):
|
|
|
# If we don't have "v2" data, we might have "v1" data worth
|
|
|
# using.
|
|
|
#
|
|
|
# consider stop doing this many version after hg-6.9 release
|
|
|
bndata = repo.cachevfs.read(_rbc_legacy_names)
|
|
|
v1_fallback = True
|
|
|
self._force_overwrite = True
|
|
|
self._rbcsnameslen = len(bndata) # for verification before writing
|
|
|
if bndata:
|
|
|
self._names = [
|
|
|
encoding.tolocal(bn) for bn in bndata.split(b'\0')
|
|
|
]
|
|
|
except (IOError, OSError):
|
|
|
if readonly:
|
|
|
# don't try to use cache - fall back to the slow path
|
|
|
self.branchinfo = self._branchinfo
|
|
|
|
|
|
if self._names:
|
|
|
try:
|
|
|
# In order to rename the atomictempfile in _writerevs(), the
|
|
|
# existing file needs to be removed. The Windows code
|
|
|
# (successfully) renames it to a temp file first, before moving
|
|
|
# the temp file into its place. But the removal of the original
|
|
|
# file then fails, because it's still mapped. The mmap object
|
|
|
# needs to be closed in order to remove the file, but in order
|
|
|
# to do that, the memoryview returned by util.buffer needs to be
|
|
|
# released.
|
|
|
usemmap = repo.ui.configbool(
|
|
|
b'storage',
|
|
|
b'revbranchcache.mmap',
|
|
|
default=not pycompat.iswindows,
|
|
|
)
|
|
|
if not v1_fallback:
|
|
|
with repo.cachevfs(_rbcrevs) as fp:
|
|
|
if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
|
|
|
data = util.buffer(util.mmapread(fp))
|
|
|
else:
|
|
|
data = fp.read()
|
|
|
else:
|
|
|
# If we don't have "v2" data, we might have "v1" data worth
|
|
|
# using.
|
|
|
#
|
|
|
# Consider stop doing this many version after hg-6.9
|
|
|
# release.
|
|
|
with repo.cachevfs(_rbc_legacy_revs) as fp:
|
|
|
data = fp.read()
|
|
|
self._rbcrevs = rbcrevs(data)
|
|
|
except (IOError, OSError) as inst:
|
|
|
repo.ui.debug(
|
|
|
b"couldn't read revision branch cache: %s\n"
|
|
|
% stringutil.forcebytestr(inst)
|
|
|
)
|
|
|
# remember number of good records on disk
|
|
|
self._rbcrevslen = min(
|
|
|
len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
|
|
|
)
|
|
|
if self._rbcrevslen == 0:
|
|
|
self._names = []
|
|
|
self._rbcnamescount = len(self._names) # number of names read at
|
|
|
# _rbcsnameslen
|
|
|
|
|
|
def _clear(self):
|
|
|
self._rbcsnameslen = 0
|
|
|
del self._names[:]
|
|
|
self._rbcnamescount = 0
|
|
|
self._rbcrevslen = len(self._repo.changelog)
|
|
|
self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
|
|
|
util.clearcachedproperty(self, b'_namesreverse')
|
|
|
self._force_overwrite = True
|
|
|
|
|
|
def invalidate(self, rev=0):
|
|
|
self._rbcrevslen = rev
|
|
|
self._rbcrevs.truncate(rev)
|
|
|
self._force_overwrite = True
|
|
|
|
|
|
@util.propertycache
|
|
|
def _namesreverse(self):
|
|
|
return {b: r for r, b in enumerate(self._names)}
|
|
|
|
|
|
def branchinfo(self, rev):
|
|
|
"""Return branch name and close flag for rev, using and updating
|
|
|
persistent cache."""
|
|
|
changelog = self._repo.changelog
|
|
|
rbcrevidx = rev * _rbcrecsize
|
|
|
|
|
|
# avoid negative index, changelog.read(nullrev) is fast without cache
|
|
|
if rev == nullrev:
|
|
|
return changelog.branchinfo(rev)
|
|
|
|
|
|
# if requested rev isn't allocated, grow and cache the rev info
|
|
|
if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
|
|
|
return self._branchinfo(rev)
|
|
|
|
|
|
# fast path: extract data from cache, use it if node is matching
|
|
|
reponode = changelog.node(rev)[:_rbcnodelen]
|
|
|
cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
|
|
|
close = bool(branchidx & _rbccloseflag)
|
|
|
if close:
|
|
|
branchidx &= _rbcbranchidxmask
|
|
|
if cachenode == b'\0\0\0\0':
|
|
|
pass
|
|
|
elif cachenode == reponode:
|
|
|
try:
|
|
|
return self._names[branchidx], close
|
|
|
except IndexError:
|
|
|
# recover from invalid reference to unknown branch
|
|
|
self._repo.ui.debug(
|
|
|
b"referenced branch names not found"
|
|
|
b" - rebuilding revision branch cache from scratch\n"
|
|
|
)
|
|
|
self._clear()
|
|
|
else:
|
|
|
# rev/node map has changed, invalidate the cache from here up
|
|
|
self._repo.ui.debug(
|
|
|
b"history modification detected - truncating "
|
|
|
b"revision branch cache to revision %d\n" % rev
|
|
|
)
|
|
|
truncate = rbcrevidx + _rbcrecsize
|
|
|
self._rbcrevs.truncate(truncate)
|
|
|
self._rbcrevslen = min(self._rbcrevslen, truncate)
|
|
|
|
|
|
# fall back to slow path and make sure it will be written to disk
|
|
|
return self._branchinfo(rev)
|
|
|
|
|
|
def _branchinfo(self, rev):
|
|
|
"""Retrieve branch info from changelog and update _rbcrevs"""
|
|
|
changelog = self._repo.changelog
|
|
|
b, close = changelog.branchinfo(rev)
|
|
|
if b in self._namesreverse:
|
|
|
branchidx = self._namesreverse[b]
|
|
|
else:
|
|
|
branchidx = len(self._names)
|
|
|
self._names.append(b)
|
|
|
self._namesreverse[b] = branchidx
|
|
|
reponode = changelog.node(rev)
|
|
|
if close:
|
|
|
branchidx |= _rbccloseflag
|
|
|
self._setcachedata(rev, reponode, branchidx)
|
|
|
return b, close
|
|
|
|
|
|
def setdata(self, rev, changelogrevision):
|
|
|
"""add new data information to the cache"""
|
|
|
branch, close = changelogrevision.branchinfo
|
|
|
|
|
|
if branch in self._namesreverse:
|
|
|
branchidx = self._namesreverse[branch]
|
|
|
else:
|
|
|
branchidx = len(self._names)
|
|
|
self._names.append(branch)
|
|
|
self._namesreverse[branch] = branchidx
|
|
|
if close:
|
|
|
branchidx |= _rbccloseflag
|
|
|
self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
|
|
|
# If no cache data were readable (non exists, bad permission, etc)
|
|
|
# the cache was bypassing itself by setting:
|
|
|
#
|
|
|
# self.branchinfo = self._branchinfo
|
|
|
#
|
|
|
# Since we now have data in the cache, we need to drop this bypassing.
|
|
|
if 'branchinfo' in vars(self):
|
|
|
del self.branchinfo
|
|
|
|
|
|
def _setcachedata(self, rev, node, branchidx):
|
|
|
"""Writes the node's branch data to the in-memory cache data."""
|
|
|
if rev == nullrev:
|
|
|
return
|
|
|
rbcrevidx = rev * _rbcrecsize
|
|
|
self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
|
|
|
self._rbcrevslen = min(self._rbcrevslen, rev)
|
|
|
|
|
|
tr = self._repo.currenttransaction()
|
|
|
if tr:
|
|
|
tr.addfinalize(b'write-revbranchcache', self.write)
|
|
|
|
|
|
def write(self, tr=None):
|
|
|
"""Save branch cache if it is dirty."""
|
|
|
repo = self._repo
|
|
|
wlock = None
|
|
|
step = b''
|
|
|
try:
|
|
|
# write the new names
|
|
|
if self._force_overwrite or self._rbcnamescount < len(self._names):
|
|
|
wlock = repo.wlock(wait=False)
|
|
|
step = b' names'
|
|
|
self._writenames(repo)
|
|
|
|
|
|
# write the new revs
|
|
|
start = self._rbcrevslen * _rbcrecsize
|
|
|
if self._force_overwrite or start != len(self._rbcrevs):
|
|
|
step = b''
|
|
|
if wlock is None:
|
|
|
wlock = repo.wlock(wait=False)
|
|
|
self._writerevs(repo, start)
|
|
|
|
|
|
except (IOError, OSError, error.Abort, error.LockError) as inst:
|
|
|
repo.ui.debug(
|
|
|
b"couldn't write revision branch cache%s: %s\n"
|
|
|
% (step, stringutil.forcebytestr(inst))
|
|
|
)
|
|
|
finally:
|
|
|
if wlock is not None:
|
|
|
wlock.release()
|
|
|
|
|
|
def _writenames(self, repo):
|
|
|
"""write the new branch names to revbranchcache"""
|
|
|
f = None
|
|
|
if self._force_overwrite:
|
|
|
self._rbcsnameslen = 0
|
|
|
self._rbcnamescount = 0
|
|
|
try:
|
|
|
if self._force_overwrite or self._rbcnamescount != 0:
|
|
|
f = repo.cachevfs.open(_rbcnames, b'ab')
|
|
|
current_size = f.tell()
|
|
|
if current_size == self._rbcsnameslen:
|
|
|
f.write(b'\0')
|
|
|
else:
|
|
|
f.close()
|
|
|
if self._force_overwrite:
|
|
|
dbg = b"resetting content of %s\n"
|
|
|
elif current_size > 0:
|
|
|
dbg = b"%s changed - rewriting it\n"
|
|
|
else:
|
|
|
dbg = b"%s is missing - rewriting it\n"
|
|
|
repo.ui.debug(dbg % _rbcnames)
|
|
|
self._rbcnamescount = 0
|
|
|
self._rbcrevslen = 0
|
|
|
if self._rbcnamescount == 0:
|
|
|
# before rewriting names, make sure references are removed
|
|
|
repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
|
|
|
f = repo.cachevfs.open(_rbcnames, b'wb')
|
|
|
names = self._names[self._rbcnamescount :]
|
|
|
from_local = encoding.fromlocal
|
|
|
data = b'\0'.join(from_local(b) for b in names)
|
|
|
f.write(data)
|
|
|
self._rbcsnameslen = f.tell()
|
|
|
finally:
|
|
|
if f is not None:
|
|
|
f.close()
|
|
|
self._rbcnamescount = len(self._names)
|
|
|
|
|
|
def _writerevs(self, repo, start):
|
|
|
"""write the new revs to revbranchcache"""
|
|
|
revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
|
|
|
|
|
|
end = revs * _rbcrecsize
|
|
|
if self._force_overwrite:
|
|
|
start = 0
|
|
|
|
|
|
# align start on entry boundary
|
|
|
start = _rbcrecsize * (start // _rbcrecsize)
|
|
|
|
|
|
with repo.cachevfs.open(_rbcrevs, b'a+b') as f:
|
|
|
pass # this make sure the file exist…
|
|
|
with repo.cachevfs.open(_rbcrevs, b'r+b') as f:
|
|
|
f.seek(0, os.SEEK_END)
|
|
|
current_size = f.tell()
|
|
|
if current_size < start:
|
|
|
start = 0
|
|
|
if current_size != start:
|
|
|
threshold = current_size * REWRITE_RATIO
|
|
|
overwritten = min(end, current_size) - start
|
|
|
if (max(end, current_size) - start) >= threshold:
|
|
|
start = 0
|
|
|
dbg = b"resetting content of cache/%s\n" % _rbcrevs
|
|
|
repo.ui.debug(dbg)
|
|
|
elif overwritten > 0:
|
|
|
# end affected, let us overwrite the bad value
|
|
|
dbg = b"overwriting %d bytes from %d in cache/%s"
|
|
|
dbg %= (current_size - start, start, _rbcrevs)
|
|
|
if end < current_size:
|
|
|
extra = b" leaving (%d trailing bytes)"
|
|
|
extra %= current_size - end
|
|
|
dbg += extra
|
|
|
dbg += b'\n'
|
|
|
repo.ui.debug(dbg)
|
|
|
else:
|
|
|
# extra untouched data at the end, lets warn about them
|
|
|
assert start == end # since don't write anything
|
|
|
dbg = b"cache/%s contains %d unknown trailing bytes\n"
|
|
|
dbg %= (_rbcrevs, current_size - start)
|
|
|
repo.ui.debug(dbg)
|
|
|
|
|
|
if start > 0:
|
|
|
f.seek(start)
|
|
|
f.write(self._rbcrevs.slice(start, end))
|
|
|
else:
|
|
|
f.close()
|
|
|
with repo.cachevfs.open(
|
|
|
_rbcrevs,
|
|
|
b'wb',
|
|
|
atomictemp=True,
|
|
|
) as rev_file:
|
|
|
rev_file.write(self._rbcrevs.slice(start, end))
|
|
|
self._rbcrevslen = revs
|
|
|
self._force_overwrite = False
|
|
|
|