# HG changeset patch # User RhodeCode Admin # Date 2023-07-17 12:06:01 # Node ID 2809dfc5c364bbb0c47fc2c2bbdf09cd227e8a97 # Parent 29f87dc0fbcf337e0f4c86297784fef7b061ce98 caches: new cache + archive cache implementation diff --git a/vcsserver/lib/rc_cache/__init__.py b/vcsserver/lib/rc_cache/__init__.py --- a/vcsserver/lib/rc_cache/__init__.py +++ b/vcsserver/lib/rc_cache/__init__.py @@ -52,6 +52,10 @@ register_backend( log = logging.getLogger(__name__) +CLEAR_DELETE = 'delete' +CLEAR_INVALIDATE = 'invalidate' + + def async_creation_runner(cache, somekey, creator, mutex): def runner(): diff --git a/vcsserver/lib/rc_cache/archive_cache.py b/vcsserver/lib/rc_cache/archive_cache.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/rc_cache/archive_cache.py @@ -0,0 +1,72 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2020 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import logging +import os +import diskcache + +log = logging.getLogger(__name__) + +cache_meta = None + + +def get_archival_config(config): + final_config = { + 'archive_cache.eviction_policy': 'least-frequently-used' + } + + for k, v in config.items(): + if k.startswith('archive_cache'): + final_config[k] = v + + return final_config + + +def get_archival_cache_store(config): + + global cache_meta + if cache_meta is not None: + return cache_meta + + config = get_archival_config(config) + + archive_cache_dir = config['archive_cache.store_dir'] + archive_cache_size_gb = config['archive_cache.cache_size_gb'] + archive_cache_shards = config['archive_cache.cache_shards'] + archive_cache_eviction_policy = config['archive_cache.eviction_policy'] + + log.debug('Initializing archival cache instance under %s', archive_cache_dir) + + # check if it's ok to write, and re-create the archive cache + if not os.path.isdir(archive_cache_dir): + os.makedirs(archive_cache_dir, exist_ok=True) + + d_cache = diskcache.FanoutCache( + archive_cache_dir, shards=archive_cache_shards, + cull_limit=0, # manual eviction required + size_limit=archive_cache_size_gb * 1024 * 1024 * 1024, + eviction_policy=archive_cache_eviction_policy, + timeout=30 + ) + cache_meta = d_cache + return cache_meta + + +def includeme(config): + # init our cache at start, for vcsserver we don't init at runtime + # because our cache config is sent via wire on make archive call, this call just lazy-enables the client + return diff --git a/vcsserver/lib/rc_cache/backends.py b/vcsserver/lib/rc_cache/backends.py --- a/vcsserver/lib/rc_cache/backends.py +++ b/vcsserver/lib/rc_cache/backends.py @@ -125,8 +125,11 @@ class FileNamespaceBackend(PickleSeriali def __str__(self): return self.__repr__() + def _get_keys_pattern(self, prefix: bytes = b''): + return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix)) + def list_keys(self, prefix: bytes = b''): - prefix = b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix)) + prefix = self._get_keys_pattern(prefix) def cond(dbm_key: bytes): if not prefix: @@ -185,8 +188,11 @@ class BaseRedisBackend(redis_backend.Red ) self.reader_client = self.writer_client - def list_keys(self, prefix=''): - prefix = f'{self.key_prefix}:{prefix}*' + def _get_keys_pattern(self, prefix: bytes = b''): + return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix)) + + def list_keys(self, prefix: bytes = b''): + prefix = self._get_keys_pattern(prefix) return self.reader_client.keys(prefix) def get_store(self): diff --git a/vcsserver/lib/rc_cache/utils.py b/vcsserver/lib/rc_cache/utils.py --- a/vcsserver/lib/rc_cache/utils.py +++ b/vcsserver/lib/rc_cache/utils.py @@ -50,11 +50,13 @@ class RhodeCodeCacheRegion(CacheRegion): And it's faster in cases we don't ever want to compute cached values """ expiration_time_is_callable = callable(expiration_time) + if not namespace: + namespace = getattr(self, '_default_namespace', None) if function_key_generator is None: function_key_generator = self.function_key_generator - def get_or_create_for_user_func(key_generator, user_func, *arg, **kw): + def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw): if not condition: log.debug('Calling un-cached method:%s', user_func.__name__) @@ -64,7 +66,7 @@ class RhodeCodeCacheRegion(CacheRegion): log.debug('un-cached method:%s took %.4fs', user_func.__name__, total) return result - key = key_generator(*arg, **kw) + key = func_key_generator(*arg, **kw) timeout = expiration_time() if expiration_time_is_callable \ else expiration_time @@ -139,36 +141,36 @@ def compute_key_from_params(*args): return sha1(safe_bytes("_".join(map(str, args)))) +def custom_key_generator(backend, namespace, fn): + func_name = fn.__name__ + + def generate_key(*args): + backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix' + namespace_pref = namespace or 'default_namespace' + arg_key = compute_key_from_params(*args) + final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}" + + return final_key + + return generate_key + + def backend_key_generator(backend): """ Special wrapper that also sends over the backend to the key generator """ def wrapper(namespace, fn): - return key_generator(backend, namespace, fn) + return custom_key_generator(backend, namespace, fn) return wrapper -def key_generator(backend, namespace, fn): - func_name = fn.__name__ - - def generate_key(*args): - backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix' - namespace_pref = namespace or 'default_namespace' - arg_key = compute_key_from_params(*args) - final_key = f"{backend_prefix}:{namespace_pref}:{func_name}_{arg_key}" - - return final_key - - return generate_key - - def get_or_create_region(region_name, region_namespace: str = None): from vcsserver.lib.rc_cache.backends import FileNamespaceBackend region_obj = region_meta.dogpile_cache_regions.get(region_name) if not region_obj: reg_keys = list(region_meta.dogpile_cache_regions.keys()) - raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.') + raise EnvironmentError(f'Region `{region_name}` not in configured: {reg_keys}.') region_uid_name = f'{region_name}:{region_namespace}' @@ -212,21 +214,29 @@ def get_or_create_region(region_name, re log.debug('configuring new region: %s', region_uid_name) region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region + region_obj._default_namespace = region_namespace return region_obj -def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, invalidate: bool = False, hard: bool = False): +def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str): + from . import CLEAR_DELETE, CLEAR_INVALIDATE + if not isinstance(cache_region, RhodeCodeCacheRegion): cache_region = get_or_create_region(cache_region, cache_namespace_uid) + log.debug('clearing cache region: %s with method=%s', cache_region, method) - cache_keys = cache_region.backend.list_keys(prefix=cache_namespace_uid) - num_delete_keys = len(cache_keys) - if invalidate: + num_affected_keys = None + + if method == CLEAR_INVALIDATE: # NOTE: The CacheRegion.invalidate() method’s default mode of # operation is to set a timestamp local to this CacheRegion in this Python process only. # It does not impact other Python processes or regions as the timestamp is only stored locally in memory. - cache_region.invalidate(hard=hard) - else: - if num_delete_keys: + cache_region.invalidate(hard=True) + + if method == CLEAR_DELETE: + cache_keys = cache_region.backend.list_keys(prefix=cache_namespace_uid) + num_affected_keys = len(cache_keys) + if num_affected_keys: cache_region.delete_multi(cache_keys) - return num_delete_keys + + return num_affected_keys