|
|
# Copyright (C) 2015-2024 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU Affero General Public License, version 3
|
|
|
# (only), as published by the Free Software Foundation.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
#
|
|
|
# This program is dual-licensed. If you wish to learn more about the
|
|
|
# RhodeCode Enterprise Edition, including its added features, Support services,
|
|
|
# and proprietary license terms, please see https://rhodecode.com/licenses/
|
|
|
|
|
|
import codecs
|
|
|
import hashlib
|
|
|
import logging
|
|
|
import os
|
|
|
import typing
|
|
|
|
|
|
import fsspec
|
|
|
|
|
|
from .base import BaseCache, BaseShard
|
|
|
from ..utils import ShardFileReader, NOT_GIVEN
|
|
|
from ...type_utils import str2bool
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
class FileSystemShard(BaseShard):
|
|
|
|
|
|
def __init__(self, index, directory, directory_folder, fs, **settings):
|
|
|
self._index: int = index
|
|
|
self._directory: str = directory
|
|
|
self._directory_folder: str = directory_folder
|
|
|
self.storage_type: str = 'directory'
|
|
|
|
|
|
self.fs = fs
|
|
|
|
|
|
@property
|
|
|
def directory(self) -> str:
|
|
|
"""Cache directory final path."""
|
|
|
return os.path.join(self._directory, self._directory_folder)
|
|
|
|
|
|
def _get_keyfile(self, archive_key) -> tuple[str, str]:
|
|
|
key_file: str = f'{archive_key}.{self.key_suffix}'
|
|
|
return key_file, os.path.join(self.directory, key_file)
|
|
|
|
|
|
def _get_writer(self, path, mode):
|
|
|
for count in range(1, 11):
|
|
|
try:
|
|
|
# Another cache may have deleted the directory before
|
|
|
# the file could be opened.
|
|
|
return self.fs.open(path, mode)
|
|
|
except OSError:
|
|
|
if count == 10:
|
|
|
# Give up after 10 tries to open the file.
|
|
|
raise
|
|
|
continue
|
|
|
|
|
|
def _write_file(self, full_path, iterator, mode):
|
|
|
|
|
|
# ensure dir exists
|
|
|
destination, _ = os.path.split(full_path)
|
|
|
if not self.fs.exists(destination):
|
|
|
self.fs.makedirs(destination)
|
|
|
|
|
|
writer = self._get_writer(full_path, mode)
|
|
|
|
|
|
digest = hashlib.sha256()
|
|
|
with writer:
|
|
|
size = 0
|
|
|
for chunk in iterator:
|
|
|
size += len(chunk)
|
|
|
digest.update(chunk)
|
|
|
writer.write(chunk)
|
|
|
writer.flush()
|
|
|
# Get the file descriptor
|
|
|
fd = writer.fileno()
|
|
|
|
|
|
# Sync the file descriptor to disk, helps with NFS cases...
|
|
|
os.fsync(fd)
|
|
|
sha256 = digest.hexdigest()
|
|
|
log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
|
|
|
return size, sha256
|
|
|
|
|
|
def store(self, key, value_reader, metadata: dict | None = None):
|
|
|
return self._store(key, value_reader, metadata, mode='xb')
|
|
|
|
|
|
def fetch(self, key, retry=NOT_GIVEN,
|
|
|
retry_attempts=NOT_GIVEN, retry_backoff=1, **kwargs) -> tuple[ShardFileReader, dict]:
|
|
|
return self._fetch(key, retry, retry_attempts, retry_backoff)
|
|
|
|
|
|
def remove(self, key):
|
|
|
return self._remove(key)
|
|
|
|
|
|
def random_filename(self):
|
|
|
"""Return filename and full-path tuple for file storage.
|
|
|
|
|
|
Filename will be a randomly generated 28 character hexadecimal string
|
|
|
with ".archive_cache" suffixed. Two levels of sub-directories will be used to
|
|
|
reduce the size of directories. On older filesystems, lookups in
|
|
|
directories with many files may be slow.
|
|
|
"""
|
|
|
|
|
|
hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
|
|
|
|
|
|
archive_name = hex_name[4:] + '.archive_cache'
|
|
|
filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}"
|
|
|
|
|
|
full_path = os.path.join(self.directory, filename)
|
|
|
return archive_name, full_path
|
|
|
|
|
|
def __repr__(self):
|
|
|
return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})'
|
|
|
|
|
|
|
|
|
class FileSystemFanoutCache(BaseCache):
|
|
|
shard_name: str = 'shard_{:03d}'
|
|
|
shard_cls = FileSystemShard
|
|
|
|
|
|
def __init__(self, locking_url, **settings):
|
|
|
"""
|
|
|
Initialize file system cache instance.
|
|
|
|
|
|
:param str locking_url: redis url for a lock
|
|
|
:param settings: settings dict
|
|
|
|
|
|
"""
|
|
|
self._locking_url = locking_url
|
|
|
self._config = settings
|
|
|
cache_dir = self.get_conf('archive_cache.filesystem.store_dir')
|
|
|
directory = str(cache_dir)
|
|
|
directory = os.path.expanduser(directory)
|
|
|
directory = os.path.expandvars(directory)
|
|
|
self._directory = directory
|
|
|
self._storage_path = directory # common path for all from BaseCache
|
|
|
|
|
|
self._shard_count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True))
|
|
|
if self._shard_count < 1:
|
|
|
raise ValueError('cache_shards must be 1 or more')
|
|
|
|
|
|
self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True)
|
|
|
self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb')))
|
|
|
|
|
|
self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True))
|
|
|
self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True))
|
|
|
self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True))
|
|
|
|
|
|
log.debug('Initializing %s archival cache instance', self)
|
|
|
fs = fsspec.filesystem('file')
|
|
|
# check if it's ok to write, and re-create the archive cache main dir
|
|
|
# A directory is the virtual equivalent of a physical file cabinet.
|
|
|
# In other words, it's a container for organizing digital data.
|
|
|
# Unlike a folder, which can only store files, a directory can store files,
|
|
|
# subdirectories, and other directories.
|
|
|
if not fs.exists(self._directory):
|
|
|
fs.makedirs(self._directory, exist_ok=True)
|
|
|
|
|
|
self._shards = tuple(
|
|
|
self.shard_cls(
|
|
|
index=num,
|
|
|
directory=directory,
|
|
|
directory_folder=self.shard_name.format(num),
|
|
|
fs=fs,
|
|
|
**settings,
|
|
|
)
|
|
|
for num in range(self._shard_count)
|
|
|
)
|
|
|
self._hash = self._shards[0].hash
|
|
|
|
|
|
def _get_size(self, shard, archive_path):
|
|
|
return os.stat(archive_path).st_size
|
|
|
|