rhodecode-vcsserver Commit - r1251:f8e66197

feat(archive-cache): synced with CE codebase

super-admin -

r1251:f8e66197 default

parent child

vcsserver/lib/archive_cache/__init__.py

0 created 644 +79 0

			@@ -0,0 +1,79 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import logging
		20
		21	from .backends.fanout_cache import FileSystemFanoutCache
		22	from .backends.objectstore_cache import ObjectStoreCache
		23
		24	from .utils import archive_iterator # noqa
		25	from .lock import ArchiveCacheGenerationLock # noqa
		26
		27	log = logging.getLogger(__name__)
		28
		29
		30	cache_meta = None
		31
		32
		33	def includeme(config):
		34	return # vcsserver gets its config from rhodecode on a remote call
		35	# init our cache at start
		36	settings = config.get_settings()
		37	get_archival_cache_store(settings)
		38
		39
		40	def get_archival_config(config):
		41
		42	final_config = {
		43
		44	}
		45
		46	for k, v in config.items():
		47	if k.startswith('archive_cache'):
		48	final_config[k] = v
		49
		50	return final_config
		51
		52
		53	def get_archival_cache_store(config, always_init=False):
		54
		55	global cache_meta
		56	if cache_meta is not None and not always_init:
		57	return cache_meta
		58
		59	config = get_archival_config(config)
		60	backend = config['archive_cache.backend.type']
		61
		62	archive_cache_locking_url = config['archive_cache.locking.url']
		63
		64	match backend:
		65	case 'filesystem':
		66	d_cache = FileSystemFanoutCache(
		67	locking_url=archive_cache_locking_url,
		68	**config
		69	)
		70	case 'objectstore':
		71	d_cache = ObjectStoreCache(
		72	locking_url=archive_cache_locking_url,
		73	**config
		74	)
		75	case _:
		76	raise ValueError(f'archive_cache.backend.type only supports "filesystem" or "objectstore" got {backend} ')
		77
		78	cache_meta = d_cache
		79	return cache_meta

vcsserver/lib/archive_cache/backends/__init__.py

0 created 644 +17 0

			@@ -0,0 +1,17 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/

vcsserver/lib/archive_cache/backends/base.py

0 created 644 +348 0

			@@ -0,0 +1,348 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import os
		20	import functools
		21	import logging
		22	import typing
		23	import time
		24	import zlib
		25
		26	from ...ext_json import json
		27	from ..utils import StatsDB, NOT_GIVEN, ShardFileReader, EVICTION_POLICY, format_size
		28	from ..lock import GenerationLock
		29
		30	log = logging.getLogger(__name__)
		31
		32
		33	class BaseShard:
		34	storage_type: str = ''
		35	fs = None
		36
		37	@classmethod
		38	def hash(cls, key):
		39	"""Compute portable hash for `key`.
		40
		41	:param key: key to hash
		42	:return: hash value
		43
		44	"""
		45	mask = 0xFFFFFFFF
		46	return zlib.adler32(key.encode('utf-8')) & mask # noqa
		47
		48	def _write_file(self, full_path, read_iterator, mode):
		49	raise NotImplementedError
		50
		51	def _get_keyfile(self, key):
		52	raise NotImplementedError
		53
		54	def random_filename(self):
		55	raise NotImplementedError
		56
		57	def _store(self, key, value_reader, metadata, mode):
		58	(filename, # hash-name
		59	full_path # full-path/hash-name
		60	) = self.random_filename()
		61
		62	key_file, key_file_path = self._get_keyfile(key)
		63
		64	# STORE METADATA
		65	_metadata = {
		66	"version": "v1",
		67
		68	"key_file": key_file, # this is the .key.json file storing meta
		69	"key_file_path": key_file_path, # full path to key_file
		70	"archive_key": key, # original name we stored archive under, e.g my-archive.zip
		71	"archive_filename": filename, # the actual filename we stored that file under
		72	"archive_full_path": full_path,
		73
		74	"store_time": time.time(),
		75	"access_count": 0,
		76	"access_time": 0,
		77
		78	"size": 0
		79	}
		80	if metadata:
		81	_metadata.update(metadata)
		82
		83	read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')
		84	size, sha256 = self._write_file(full_path, read_iterator, mode)
		85	_metadata['size'] = size
		86	_metadata['sha256'] = sha256
		87
		88	# after archive is finished, we create a key to save the presence of the binary file
		89	with self.fs.open(key_file_path, 'wb') as f:
		90	f.write(json.dumps(_metadata))
		91
		92	return key, filename, size, _metadata
		93
		94	def _fetch(self, key, retry, retry_attempts, retry_backoff):
		95	if retry is NOT_GIVEN:
		96	retry = False
		97	if retry_attempts is NOT_GIVEN:
		98	retry_attempts = 0
		99
		100	if retry and retry_attempts > 0:
		101	for attempt in range(1, retry_attempts + 1):
		102	if key in self:
		103	break
		104	# we didn't find the key, wait retry_backoff N seconds, and re-check
		105	time.sleep(retry_backoff)
		106
		107	if key not in self:
		108	log.exception(f'requested key={key} not found in {self} retry={retry}, attempts={retry_attempts}')
		109	raise KeyError(key)
		110
		111	key_file, key_file_path = self._get_keyfile(key)
		112	with self.fs.open(key_file_path, 'rb') as f:
		113	metadata = json.loads(f.read())
		114
		115	archive_path = metadata['archive_full_path']
		116
		117	try:
		118	return ShardFileReader(self.fs.open(archive_path, 'rb')), metadata
		119	finally:
		120	# update usage stats, count and accessed
		121	metadata["access_count"] = metadata.get("access_count", 0) + 1
		122	metadata["access_time"] = time.time()
		123	log.debug('Updated %s with access snapshot, access_count=%s access_time=%s',
		124	key_file, metadata['access_count'], metadata['access_time'])
		125	with self.fs.open(key_file_path, 'wb') as f:
		126	f.write(json.dumps(metadata))
		127
		128	def _remove(self, key):
		129	if key not in self:
		130	log.exception(f'requested key={key} not found in {self}')
		131	raise KeyError(key)
		132
		133	key_file, key_file_path = self._get_keyfile(key)
		134	with self.fs.open(key_file_path, 'rb') as f:
		135	metadata = json.loads(f.read())
		136
		137	archive_path = metadata['archive_full_path']
		138	self.fs.rm(archive_path)
		139	self.fs.rm(key_file_path)
		140	return 1
		141
		142	@property
		143	def storage_medium(self):
		144	return getattr(self, self.storage_type)
		145
		146	@property
		147	def key_suffix(self):
		148	return 'key.json'
		149
		150	def __contains__(self, key):
		151	"""Return `True` if `key` matching item is found in cache.
		152
		153	:param key: key matching item
		154	:return: True if key matching item
		155
		156	"""
		157	key_file, key_file_path = self._get_keyfile(key)
		158	return self.fs.exists(key_file_path)
		159
		160
		161	class BaseCache:
		162	_locking_url: str = ''
		163	_storage_path: str = ''
		164	_config = {}
		165	retry = False
		166	retry_attempts = 0
		167	retry_backoff = 1
		168	_shards = tuple()
		169
		170	def __contains__(self, key):
		171	"""Return `True` if `key` matching item is found in cache.
		172
		173	:param key: key matching item
		174	:return: True if key matching item
		175
		176	"""
		177	return self.has_key(key)
		178
		179	def __repr__(self):
		180	return f'<{self.__class__.__name__}(storage={self._storage_path})>'
		181
		182	@classmethod
		183	def gb_to_bytes(cls, gb):
		184	return gb * (1024 ** 3)
		185
		186	@property
		187	def storage_path(self):
		188	return self._storage_path
		189
		190	@classmethod
		191	def get_stats_db(cls):
		192	return StatsDB()
		193
		194	def get_conf(self, key, pop=False):
		195	if key not in self._config:
		196	raise ValueError(f"No configuration key '{key}', please make sure it exists in archive_cache config")
		197	val = self._config[key]
		198	if pop:
		199	del self._config[key]
		200	return val
		201
		202	def _get_shard(self, key):
		203	raise NotImplementedError
		204
		205	def _get_size(self, shard, archive_path):
		206	raise NotImplementedError
		207
		208	def store(self, key, value_reader, metadata=None):
		209	shard = self._get_shard(key)
		210	return shard.store(key, value_reader, metadata)
		211
		212	def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN) -> tuple[typing.BinaryIO, dict]:
		213	"""
		214	Return file handle corresponding to `key` from specific shard cache.
		215	"""
		216	if retry is NOT_GIVEN:
		217	retry = self.retry
		218	if retry_attempts is NOT_GIVEN:
		219	retry_attempts = self.retry_attempts
		220	retry_backoff = self.retry_backoff
		221
		222	shard = self._get_shard(key)
		223	return shard.fetch(key, retry=retry, retry_attempts=retry_attempts, retry_backoff=retry_backoff)
		224
		225	def remove(self, key):
		226	shard = self._get_shard(key)
		227	return shard.remove(key)
		228
		229	def has_key(self, archive_key):
		230	"""Return `True` if `key` matching item is found in cache.
		231
		232	:param archive_key: key for item, this is a unique archive name we want to store data under. e.g my-archive-svn.zip
		233	:return: True if key is found
		234
		235	"""
		236	shard = self._get_shard(archive_key)
		237	return archive_key in shard
		238
		239	def iter_keys(self):
		240	for shard in self._shards:
		241	if shard.fs.exists(shard.storage_medium):
		242	for path, _dirs, _files in shard.fs.walk(shard.storage_medium):
		243	for key_file_path in _files:
		244	if key_file_path.endswith(shard.key_suffix):
		245	yield shard, key_file_path
		246
		247	def get_lock(self, lock_key):
		248	return GenerationLock(lock_key, self._locking_url)
		249
		250	def evict(self, policy=None, size_limit=None) -> int:
		251	"""
		252	Remove old items based on the conditions
		253
		254
		255	explanation of this algo:
		256	iterate over each shard, then for each shard iterate over the .key files
		257	read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
		258	access data, time creation, and access counts.
		259
		260	Store that into a memory DB so we can run different sorting strategies easily.
		261	Summing the size is a sum sql query.
		262
		263	Then we run a sorting strategy based on eviction policy.
		264	We iterate over sorted keys, and remove each checking if we hit the overall limit.
		265	"""
		266
		267	policy = policy or self._eviction_policy
		268	size_limit = size_limit or self._cache_size_limit
		269
		270	select_policy = EVICTION_POLICY[policy]['evict']
		271
		272	log.debug('Running eviction policy \'%s\', and checking for size limit: %s',
		273	policy, format_size(size_limit))
		274
		275	if select_policy is None:
		276	return 0
		277
		278	db = self.get_stats_db()
		279
		280	data = []
		281	cnt = 1
		282
		283	for shard, key_file in self.iter_keys():
		284	with shard.fs.open(os.path.join(shard.storage_medium, key_file), 'rb') as f:
		285	metadata = json.loads(f.read())
		286
		287	key_file_path = os.path.join(shard.storage_medium, key_file)
		288
		289	archive_key = metadata['archive_key']
		290	archive_path = metadata['archive_full_path']
		291
		292	size = metadata.get('size')
		293	if not size:
		294	# in case we don't have size re-calc it...
		295	size = self._get_size(shard, archive_path)
		296
		297	data.append([
		298	cnt,
		299	key_file,
		300	key_file_path,
		301	archive_key,
		302	archive_path,
		303	metadata.get('store_time', 0),
		304	metadata.get('access_time', 0),
		305	metadata.get('access_count', 0),
		306	size,
		307	])
		308	cnt += 1
		309
		310	# Insert bulk data using executemany
		311	db.bulk_insert(data)
		312
		313	total_size = db.get_total_size()
		314	log.debug('Analyzed %s keys, occupying: %s, running eviction to match %s',
		315	len(data), format_size(total_size), format_size(size_limit))
		316
		317	removed_items = 0
		318	removed_size = 0
		319	for key_file, archive_key, size in db.get_sorted_keys(select_policy):
		320	# simulate removal impact BEFORE removal
		321	total_size -= size
		322
		323	if total_size <= size_limit:
		324	# we obtained what we wanted...
		325	break
		326
		327	self.remove(archive_key)
		328	removed_items += 1
		329	removed_size += size
		330
		331	log.debug('Removed %s cache archives, and reduced size by: %s',
		332	removed_items, format_size(removed_size))
		333	return removed_items
		334
		335	def get_statistics(self):
		336	total_files = 0
		337	total_size = 0
		338	meta = {}
		339
		340	for shard, key_file in self.iter_keys():
		341	json_key = f"{shard.storage_medium}/{key_file}"
		342	with shard.fs.open(json_key, 'rb') as f:
		343	total_files += 1
		344	metadata = json.loads(f.read())
		345	total_size += metadata['size']
		346
		347	return total_files, total_size, meta
		348

vcsserver/lib/archive_cache/backends/fanout_cache.py

0 created 644 +166 0

			@@ -0,0 +1,166 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import codecs
		20	import hashlib
		21	import logging
		22	import os
		23
		24	import fsspec
		25
		26	from .base import BaseCache, BaseShard
		27	from ..utils import ShardFileReader, NOT_GIVEN
		28	from ...type_utils import str2bool
		29
		30	log = logging.getLogger(__name__)
		31
		32
		33	class FileSystemShard(BaseShard):
		34
		35	def __init__(self, index, directory, **settings):
		36	self._index = index
		37	self._directory = directory
		38	self.storage_type = 'directory'
		39	self.fs = fsspec.filesystem('file')
		40
		41	@property
		42	def directory(self):
		43	"""Cache directory."""
		44	return self._directory
		45
		46	def _get_keyfile(self, archive_key) -> tuple[str, str]:
		47	key_file = f'{archive_key}.{self.key_suffix}'
		48	return key_file, os.path.join(self.directory, key_file)
		49
		50	def _get_writer(self, path, mode):
		51	for count in range(1, 11):
		52	try:
		53	# Another cache may have deleted the directory before
		54	# the file could be opened.
		55	return self.fs.open(path, mode)
		56	except OSError:
		57	if count == 10:
		58	# Give up after 10 tries to open the file.
		59	raise
		60	continue
		61
		62	def _write_file(self, full_path, iterator, mode):
		63	# ensure dir exists
		64	destination, _ = os.path.split(full_path)
		65	if not self.fs.exists(destination):
		66	self.fs.makedirs(destination)
		67
		68	writer = self._get_writer(full_path, mode)
		69
		70	digest = hashlib.sha256()
		71	with writer:
		72	size = 0
		73	for chunk in iterator:
		74	size += len(chunk)
		75	digest.update(chunk)
		76	writer.write(chunk)
		77	writer.flush()
		78	# Get the file descriptor
		79	fd = writer.fileno()
		80
		81	# Sync the file descriptor to disk, helps with NFS cases...
		82	os.fsync(fd)
		83	sha256 = digest.hexdigest()
		84	log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
		85	return size, sha256
		86
		87	def store(self, key, value_reader, metadata: dict \| None = None):
		88	return self._store(key, value_reader, metadata, mode='xb')
		89
		90	def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN, retry_backoff=1) -> tuple[ShardFileReader, dict]:
		91	return self._fetch(key, retry, retry_attempts, retry_backoff)
		92
		93	def remove(self, key):
		94	return self._remove(key)
		95
		96	def random_filename(self):
		97	"""Return filename and full-path tuple for file storage.
		98
		99	Filename will be a randomly generated 28 character hexadecimal string
		100	with ".archive_cache" suffixed. Two levels of sub-directories will be used to
		101	reduce the size of directories. On older filesystems, lookups in
		102	directories with many files may be slow.
		103	"""
		104
		105	hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
		106
		107	archive_name = hex_name[4:] + '.archive_cache'
		108	filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}"
		109
		110	full_path = os.path.join(self.directory, filename)
		111	return archive_name, full_path
		112
		113	def __repr__(self):
		114	return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})'
		115
		116
		117	class FileSystemFanoutCache(BaseCache):
		118
		119	def __init__(self, locking_url, **settings):
		120	"""
		121	Initialize file system cache instance.
		122
		123	:param str locking_url: redis url for a lock
		124	:param settings: settings dict
		125
		126	"""
		127	self._locking_url = locking_url
		128	self._config = settings
		129	cache_dir = self.get_conf('archive_cache.filesystem.store_dir')
		130	directory = str(cache_dir)
		131	directory = os.path.expanduser(directory)
		132	directory = os.path.expandvars(directory)
		133	self._directory = directory
		134	self._storage_path = directory
		135
		136	# check if it's ok to write, and re-create the archive cache
		137	if not os.path.isdir(self._directory):
		138	os.makedirs(self._directory, exist_ok=True)
		139
		140	self._count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True))
		141
		142	self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True)
		143	self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb')))
		144
		145	self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True))
		146	self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True))
		147	self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True))
		148
		149	log.debug('Initializing archival cache instance under %s', self._directory)
		150	self._shards = tuple(
		151	FileSystemShard(
		152	index=num,
		153	directory=os.path.join(directory, 'shard_%03d' % num),
		154	**settings,
		155	)
		156	for num in range(self._count)
		157	)
		158	self._hash = self._shards[0].hash
		159
		160	def _get_shard(self, key) -> FileSystemShard:
		161	index = self._hash(key) % self._count
		162	shard = self._shards[index]
		163	return shard
		164
		165	def _get_size(self, shard, archive_path):
		166	return os.stat(archive_path).st_size

vcsserver/lib/archive_cache/backends/objectstore_cache.py

0 created 644 +150 0

			@@ -0,0 +1,150 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import codecs
		20	import hashlib
		21	import logging
		22	import os
		23
		24	import fsspec
		25
		26	from .base import BaseCache, BaseShard
		27	from ..utils import ShardFileReader, NOT_GIVEN
		28	from ...type_utils import str2bool
		29
		30	log = logging.getLogger(__name__)
		31
		32
		33	class S3Shard(BaseShard):
		34
		35	def __init__(self, index, bucket, **settings):
		36	self._index = index
		37	self._bucket = bucket
		38	self.storage_type = 'bucket'
		39
		40	endpoint_url = settings.pop('archive_cache.objectstore.url')
		41	key = settings.pop('archive_cache.objectstore.key')
		42	secret = settings.pop('archive_cache.objectstore.secret')
		43
		44	self.fs = fsspec.filesystem('s3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret)
		45
		46	@property
		47	def bucket(self):
		48	"""Cache bucket."""
		49	return self._bucket
		50
		51	def _get_keyfile(self, archive_key) -> tuple[str, str]:
		52	key_file = f'{archive_key}-{self.key_suffix}'
		53	return key_file, os.path.join(self.bucket, key_file)
		54
		55	def _get_writer(self, path, mode):
		56	return self.fs.open(path, 'wb')
		57
		58	def _write_file(self, full_path, iterator, mode):
		59	# ensure bucket exists
		60	destination = self.bucket
		61	if not self.fs.exists(destination):
		62	self.fs.mkdir(destination, s3_additional_kwargs={})
		63
		64	writer = self._get_writer(full_path, mode)
		65
		66	digest = hashlib.sha256()
		67	with writer:
		68	size = 0
		69	for chunk in iterator:
		70	size += len(chunk)
		71	digest.update(chunk)
		72	writer.write(chunk)
		73
		74	sha256 = digest.hexdigest()
		75	log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
		76	return size, sha256
		77
		78	def store(self, key, value_reader, metadata: dict \| None = None):
		79	return self._store(key, value_reader, metadata, mode='wb')
		80
		81	def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN, retry_backoff=1) -> tuple[ShardFileReader, dict]:
		82	return self._fetch(key, retry, retry_attempts, retry_backoff)
		83
		84	def remove(self, key):
		85	return self._remove(key)
		86
		87	def random_filename(self):
		88	"""Return filename and full-path tuple for file storage.
		89
		90	Filename will be a randomly generated 28 character hexadecimal string
		91	with ".archive_cache" suffixed. Two levels of sub-directories will be used to
		92	reduce the size of directories. On older filesystems, lookups in
		93	directories with many files may be slow.
		94	"""
		95
		96	hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
		97
		98	archive_name = hex_name[4:] + '.archive_cache'
		99	filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}"
		100
		101	full_path = os.path.join(self.bucket, filename)
		102	return archive_name, full_path
		103
		104	def __repr__(self):
		105	return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})'
		106
		107
		108	class ObjectStoreCache(BaseCache):
		109
		110	def __init__(self, locking_url, **settings):
		111	"""
		112	Initialize objectstore cache instance.
		113
		114	:param str locking_url: redis url for a lock
		115	:param settings: settings dict
		116
		117	"""
		118	self._locking_url = locking_url
		119	self._config = settings
		120
		121	objectstore_url = self.get_conf('archive_cache.objectstore.url')
		122	self._storage_path = objectstore_url
		123
		124	self._count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True))
		125
		126	self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True)
		127	self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb')))
		128
		129	self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True))
		130	self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True))
		131	self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True))
		132
		133	log.debug('Initializing archival cache instance under %s', objectstore_url)
		134	self._shards = tuple(
		135	S3Shard(
		136	index=num,
		137	bucket='rhodecode-archivecache-%03d' % num,
		138	**settings,
		139	)
		140	for num in range(self._count)
		141	)
		142	self._hash = self._shards[0].hash
		143
		144	def _get_shard(self, key) -> S3Shard:
		145	index = self._hash(key) % self._count
		146	shard = self._shards[index]
		147	return shard
		148
		149	def _get_size(self, shard, archive_path):
		150	return shard.fs.info(archive_path)['size']

vcsserver/lib/archive_cache/lock.py

0 created 644 +62 0

			@@ -0,0 +1,62 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import redis
		20	from .._vendor import redis_lock
		21
		22
		23	class ArchiveCacheGenerationLock(Exception):
		24	pass
		25
		26
		27	class GenerationLock:
		28	"""
		29	Locking mechanism that detects if a lock is acquired
		30
		31	with GenerationLock(lock_key):
		32	compute_archive()
		33	"""
		34	lock_timeout = 7200
		35
		36	def __init__(self, lock_key, url):
		37	self.lock_key = lock_key
		38	self._create_client(url)
		39	self.lock = self.get_lock()
		40
		41	def _create_client(self, url):
		42	connection_pool = redis.ConnectionPool.from_url(url)
		43	self.writer_client = redis.StrictRedis(
		44	connection_pool=connection_pool
		45	)
		46	self.reader_client = self.writer_client
		47
		48	def get_lock(self):
		49	return redis_lock.Lock(
		50	redis_client=self.writer_client,
		51	name=self.lock_key,
		52	expire=self.lock_timeout,
		53	strict=True
		54	)
		55
		56	def __enter__(self):
		57	acquired = self.lock.acquire(blocking=False)
		58	if not acquired:
		59	raise ArchiveCacheGenerationLock('Failed to create a lock')
		60
		61	def __exit__(self, exc_type, exc_val, exc_tb):
		62	self.lock.release()

vcsserver/lib/archive_cache/utils.py

0 created 644 +134 0

			@@ -0,0 +1,134 b''
		1	# Copyright (C) 2015-2024 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import sqlite3
		20	import s3fs.core
		21
		22	NOT_GIVEN = -917
		23
		24
		25	EVICTION_POLICY = {
		26	'none': {
		27	'evict': None,
		28	},
		29	'least-recently-stored': {
		30	'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
		31	},
		32	'least-recently-used': {
		33	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
		34	},
		35	'least-frequently-used': {
		36	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
		37	},
		38	}
		39
		40
		41	def archive_iterator(_reader, block_size: int = 4096 * 512):
		42	# 4096 * 64 = 64KB
		43	while 1:
		44	data = _reader.read(block_size)
		45	if not data:
		46	break
		47	yield data
		48
		49
		50	def format_size(size):
		51	# Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
		52	for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
		53	if size < 1024:
		54	return f"{size:.2f} {unit}"
		55	size /= 1024
		56
		57
		58	class StatsDB:
		59
		60	def __init__(self):
		61	self.connection = sqlite3.connect(':memory:')
		62	self._init_db()
		63
		64	def _init_db(self):
		65	qry = '''
		66	CREATE TABLE IF NOT EXISTS archive_cache (
		67	rowid INTEGER PRIMARY KEY,
		68	key_file TEXT,
		69	key_file_path TEXT,
		70	archive_key TEXT,
		71	archive_path TEXT,
		72	store_time REAL,
		73	access_time REAL,
		74	access_count INTEGER DEFAULT 0,
		75	size INTEGER DEFAULT 0
		76	)
		77	'''
		78
		79	self.sql(qry)
		80	self.connection.commit()
		81
		82	@property
		83	def sql(self):
		84	return self.connection.execute
		85
		86	def bulk_insert(self, rows):
		87	qry = '''
		88	INSERT INTO archive_cache (
		89	rowid,
		90	key_file,
		91	key_file_path,
		92	archive_key,
		93	archive_path,
		94	store_time,
		95	access_time,
		96	access_count,
		97	size
		98	)
		99	VALUES (
		100	?, ?, ?, ?, ?, ?, ?, ?, ?
		101	)
		102	'''
		103	cursor = self.connection.cursor()
		104	cursor.executemany(qry, rows)
		105	self.connection.commit()
		106
		107	def get_total_size(self):
		108	qry = 'SELECT COALESCE(SUM(size), 0) FROM archive_cache'
		109	((total_size,),) = self.sql(qry).fetchall()
		110	return total_size
		111
		112	def get_sorted_keys(self, select_policy):
		113	select_policy_qry = select_policy.format(fields='key_file, archive_key, size')
		114	return self.sql(select_policy_qry).fetchall()
		115
		116
		117	class ShardFileReader:
		118
		119	def __init__(self, file_like_reader):
		120	self._file_like_reader = file_like_reader
		121
		122	def __getattr__(self, item):
		123	if isinstance(self._file_like_reader, s3fs.core.S3File):
		124	match item:
		125	case 'name':
		126	# S3 FileWrapper doesn't support name attribute, and we use it
		127	return self._file_like_reader.full_name
		128	case _:
		129	return getattr(self._file_like_reader, item)
		130	else:
		131	return getattr(self._file_like_reader, item)
		132
		133	def __repr__(self):
		134	return f'<{self.__class__.__name__}={self._file_like_reader}>'

requirements.txt

0 +29 0

                  pbr==5.11.1
              dulwich==0.21.6
                urllib3==1.26.14
+             fsspec==2024.6.0
              gunicorn==21.2.0
                packaging==24.0
              hg-evolve==11.1.3
              redis==5.0.4
                async-timeout==4.0.3
              repoze.lru==0.7
+             s3fs==2024.6.0
+               aiobotocore==2.13.0
+                 aiohttp==3.9.5
+                   aiosignal==1.3.1
+                     frozenlist==1.4.1
+                   attrs==22.2.0
+                   frozenlist==1.4.1
+                   multidict==6.0.5
+                   yarl==1.9.4
+                     idna==3.4
+                     multidict==6.0.5
+                 aioitertools==0.11.0
+                 botocore==1.34.106
+                   jmespath==1.0.1
+                   python-dateutil==2.8.2
+                     six==1.16.0
+                   urllib3==1.26.14
+                 wrapt==1.16.0
+               aiohttp==3.9.5
+                 aiosignal==1.3.1
+                   frozenlist==1.4.1
+                 attrs==22.2.0
+                 frozenlist==1.4.1
+                 multidict==6.0.5
+                 yarl==1.9.4
+                   idna==3.4
+                   multidict==6.0.5
+               fsspec==2024.6.0
              scandir==1.10.0
              setproctitle==1.3.3
              subvertpy==0.11.0

vcsserver/base.py

0 +1 -1

              import logging
              import urllib.parse
-             from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
+             from vcsserver.lib.archive_cache import get_archival_cache_store
              from vcsserver import exceptions
              from vcsserver.exceptions import NoContentException

vcsserver/http_main.py

0 +1 -1

                      self.global_config = global_config
                      self.config.include('vcsserver.lib.rc_cache')
-                     self.config.include('vcsserver.lib.rc_cache.archive_cache')
+                     self.config.include('vcsserver.lib.archive_cache')
                      settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
                      vcs = VCS(locale_conf=settings_locale, cache_config=settings)

vcsserver/lib/rc_cache/archive_cache/__init__.py

0 removed 0 -31

NO CONTENT: file was removed

vcsserver/lib/rc_cache/archive_cache/fanout_cache.py

0 removed 0 -455

NO CONTENT: file was removed

vcsserver/lib/rc_cache/archive_cache/lock.py

0 removed 0 -58

NO CONTENT: file was removed

vcsserver/lib/rc_cache/archive_cache/utils.py

0 removed 0 -71

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages