diff --git a/rhodecode/lib/caching_query.py b/rhodecode/lib/caching_query.py --- a/rhodecode/lib/caching_query.py +++ b/rhodecode/lib/caching_query.py @@ -24,266 +24,217 @@ Represent functions and classes which allow the usage of Dogpile caching with SQLAlchemy. Introduces a query option called FromCache. +.. versionchanged:: 1.4 the caching approach has been altered to work + based on a session event. + + The three new concepts introduced here are: - * CachingQuery - a Query subclass that caches and + * ORMCache - an extension for an ORM :class:`.Session` retrieves results in/from dogpile.cache. * FromCache - a query option that establishes caching parameters on a Query * RelationshipCache - a variant of FromCache which is specific to a query invoked during a lazy load. - * _params_from_query - extracts value parameters from - a Query. The rest of what's here are standard SQLAlchemy and dogpile.cache constructs. """ -from sqlalchemy.orm.interfaces import MapperOption -from sqlalchemy.orm.query import Query -from sqlalchemy.sql import visitors from dogpile.cache.api import NO_VALUE -from rhodecode.lib.utils2 import safe_str +from sqlalchemy import event +from sqlalchemy.orm import loading +from sqlalchemy.orm.interfaces import UserDefinedOption + + +DEFAULT_REGION = "sql_cache_short" -class CachingQuery(Query): - """A Query subclass which optionally loads full results from a dogpile - cache region. +class ORMCache: - The CachingQuery optionally stores additional state that allows it to consult - a dogpile.cache cache before accessing the database, in the form - of a FromCache or RelationshipCache object. Each of these objects - refer to the name of a :class:`dogpile.cache.Region` that's been configured - and stored in a lookup dictionary. When such an object has associated - itself with the CachingQuery, the corresponding :class:`dogpile.cache.Region` - is used to locate a cached result. If none is present, then the - Query is invoked normally, the results being cached. + """An add-on for an ORM :class:`.Session` optionally loads full results + from a dogpile cache region. - The FromCache and RelationshipCache mapper options below represent - the "public" method of configuring this state upon the CachingQuery. + cache = ORMCache(regions={}) + cache.listen_on_session(Session) """ - def _get_region(self): + + def __init__(self, regions): + self.cache_regions = regions or self._get_region() + self._statement_cache = {} + + @classmethod + def _get_region(cls): from rhodecode.lib.rc_cache import region_meta return region_meta.dogpile_cache_regions - def __init__(self, regions, *args, **kw): - self.cache_regions = regions or self._get_region() - Query.__init__(self, *args, **kw) + def listen_on_session(self, session_factory): + event.listen(session_factory, "do_orm_execute", self._do_orm_execute) + + def _do_orm_execute(self, orm_context): - def __iter__(self): - """override __iter__ to pull results from dogpile - if particular attributes have been configured. + for opt in orm_context.user_defined_options: + if isinstance(opt, RelationshipCache): + opt = opt._process_orm_context(orm_context) + if opt is None: + continue + + if isinstance(opt, FromCache): + dogpile_region = self.cache_regions[opt.region] - Note that this approach does *not* detach the loaded objects from - the current session. If the cache backend is an in-process cache - (like "memory") and lives beyond the scope of the current session's - transaction, those objects may be expired. The method here can be - modified to first expunge() each loaded item from the current - session before returning the list of items, so that the items - in the cache are not the same ones in the current Session. + if opt.cache_key: + our_cache_key = f'SQL_CACHE_{opt.cache_key}' + else: + our_cache_key = opt._generate_cache_key( + orm_context.statement, orm_context.parameters, self + ) - """ - super_ = super(CachingQuery, self) - - if hasattr(self, '_cache_region'): - return self.get_value(createfunc=lambda: list(super_.__iter__())) - else: - return super_.__iter__() - - def _execute_and_instances(self, context): - """override _execute_and_instances to pull results from dogpile - if the query is invoked directly from an external context. + if opt.ignore_expiration: + cached_value = dogpile_region.get( + our_cache_key, + expiration_time=opt.expiration_time, + ignore_expiration=opt.ignore_expiration, + ) + else: - This method is necessary in order to maintain compatibility - with the "baked query" system now used by default in some - relationship loader scenarios. Note also the - RelationshipCache._generate_cache_key method which enables - the baked query to be used within lazy loads. + def createfunc(): + return orm_context.invoke_statement().freeze() + + cached_value = dogpile_region.get_or_create( + our_cache_key, + createfunc, + expiration_time=opt.expiration_time, + ) - .. versionadded:: 1.2.7 - """ - super_ = super(CachingQuery, self) + if cached_value is NO_VALUE: + # keyerror? this is bigger than a keyerror... + raise KeyError() - if context.query is not self and hasattr(self, '_cache_region'): - # special logic called when the Query._execute_and_instances() - # method is called directly from the baked query - return self.get_value( - createfunc=lambda: list( - super_._execute_and_instances(context) + orm_result = loading.merge_frozen_result( + orm_context.session, + orm_context.statement, + cached_value, + load=False, ) - ) + return orm_result() + else: - return super_._execute_and_instances(context) + return None - def _get_cache_plus_key(self): - """Return a cache region plus key.""" - dogpile_region = self.cache_regions[self._cache_region.region] - if self._cache_region.cache_key: - key = self._cache_region.cache_key - else: - key = _key_from_query(self) - return dogpile_region, key + def invalidate(self, statement, parameters, opt): + """Invalidate the cache value represented by a statement.""" + + statement = statement.__clause_element__() - def invalidate(self): - """Invalidate the cache value represented by this Query.""" + dogpile_region = self.cache_regions[opt.region] - dogpile_region, cache_key = self._get_cache_plus_key() + cache_key = opt._generate_cache_key(statement, parameters, self) + dogpile_region.delete(cache_key) - def get_value(self, merge=True, createfunc=None, - expiration_time=None, ignore_expiration=False): - """Return the value from the cache for this query. - Raise KeyError if no value present and no - createfunc specified. - - """ - dogpile_region, cache_key = self._get_cache_plus_key() - - # ignore_expiration means, if the value is in the cache - # but is expired, return it anyway. This doesn't make sense - # with createfunc, which says, if the value is expired, generate - # a new value. - assert not ignore_expiration or not createfunc, \ - "Can't ignore expiration and also provide createfunc" - - if ignore_expiration or not createfunc: - cached_value = dogpile_region.get(cache_key, - expiration_time=expiration_time, - ignore_expiration=ignore_expiration) - else: - cached_value = dogpile_region.get_or_create( - cache_key, - createfunc, - expiration_time=expiration_time - ) - if cached_value is NO_VALUE: - raise KeyError(cache_key) - if merge: - cached_value = self.merge_result(cached_value, load=False) - return cached_value - - def set_value(self, value): - """Set the value in the cache for this query.""" - - dogpile_region, cache_key = self._get_cache_plus_key() - dogpile_region.set(cache_key, value) - - -def query_callable(regions=None, query_cls=CachingQuery): - def query(*arg, **kw): - return query_cls(regions, *arg, **kw) - return query - - -def _key_from_query(query, qualifier=None): - """Given a Query, create a cache key. - - There are many approaches to this; here we use the simplest, - which is to create an md5 hash of the text of the SQL statement, - combined with stringified versions of all the bound parameters - within it. There's a bit of a performance hit with - compiling out "query.statement" here; other approaches include - setting up an explicit cache key with a particular Query, - then combining that with the bound parameter values. - - """ - - stmt = query.with_labels().statement - compiled = stmt.compile() - params = compiled.params - - # here we return the key as a long string. our "key mangler" - # set up with the region will boil it down to an md5. - return " ".join( - [safe_str(compiled)] + - [safe_str(params[k]) for k in sorted(params)]) - - -class FromCache(MapperOption): +class FromCache(UserDefinedOption): """Specifies that a Query should load results from a cache.""" propagate_to_loaders = False - def __init__(self, region="sql_cache_short", cache_key=None): + def __init__( + self, + region=DEFAULT_REGION, + cache_key=None, + expiration_time=None, + ignore_expiration=False, + ): """Construct a new FromCache. :param region: the cache region. Should be a - region configured in the dictionary of dogpile - regions. + region configured in the dictionary of dogpile + regions. :param cache_key: optional. A string cache key - that will serve as the key to the query. Use this - if your query has a huge amount of parameters (such - as when using in_()) which correspond more simply to - some other identifier. + that will serve as the key to the query. Use this + if your query has a huge amount of parameters (such + as when using in_()) which correspond more simply to + some other identifier. """ self.region = region self.cache_key = cache_key + self.expiration_time = expiration_time + self.ignore_expiration = ignore_expiration - def process_query(self, query): - """Process a Query during normal loading operation.""" - query._cache_region = self + # this is not needed as of SQLAlchemy 1.4.28; + # UserDefinedOption classes no longer participate in the SQL + # compilation cache key + def _gen_cache_key(self, anon_map, bindparams): + return None + + def _generate_cache_key(self, statement, parameters, orm_cache): + """generate a cache key with which to key the results of a statement. + + This leverages the use of the SQL compilation cache key which is + repurposed as a SQL results key. + + """ + statement_cache_key = statement._generate_cache_key() + + key = statement_cache_key.to_offline_string( + orm_cache._statement_cache, statement, parameters + ) + repr(self.cache_key) + # print("here's our key...%s" % key) + return key -class RelationshipCache(MapperOption): +class RelationshipCache(FromCache): """Specifies that a Query as called within a "lazy load" - should load results from a cache.""" + should load results from a cache.""" propagate_to_loaders = True - def __init__(self, attribute, region="sql_cache_short", cache_key=None): + def __init__( + self, + attribute, + region=DEFAULT_REGION, + cache_key=None, + expiration_time=None, + ignore_expiration=False, + ): """Construct a new RelationshipCache. :param attribute: A Class.attribute which - indicates a particular class relationship() whose - lazy loader should be pulled from the cache. + indicates a particular class relationship() whose + lazy loader should be pulled from the cache. :param region: name of the cache region. :param cache_key: optional. A string cache key - that will serve as the key to the query, bypassing - the usual means of forming a key from the Query itself. + that will serve as the key to the query, bypassing + the usual means of forming a key from the Query itself. """ self.region = region self.cache_key = cache_key + self.expiration_time = expiration_time + self.ignore_expiration = ignore_expiration self._relationship_options = { (attribute.property.parent.class_, attribute.property.key): self } - def _generate_cache_key(self, path): - """Indicate to the lazy-loader strategy that a "baked" query - may be used by returning ``None``. - - If this method is omitted, the default implementation of - :class:`.MapperOption._generate_cache_key` takes place, which - returns ``False`` to disable the "baked" query from being used. - - .. versionadded:: 1.2.7 + def _process_orm_context(self, orm_context): + current_path = orm_context.loader_strategy_path - """ - return None - - def process_query_conditionally(self, query): - """Process a Query that is used within a lazy loader. - - (the process_query_conditionally() method is a SQLAlchemy - hook invoked only within lazyload.) - - """ - if query._current_path: - mapper, prop = query._current_path[-2:] + if current_path: + mapper, prop = current_path[-2:] key = prop.key for cls in mapper.class_.__mro__: if (cls, key) in self._relationship_options: - relationship_option = self._relationship_options[(cls, key)] - query._cache_region = relationship_option - break + relationship_option = self._relationship_options[ + (cls, key) + ] + return relationship_option def and_(self, option): """Chain another RelationshipCache option to this one. diff --git a/rhodecode/lib/rc_cache/__init__.py b/rhodecode/lib/rc_cache/__init__.py --- a/rhodecode/lib/rc_cache/__init__.py +++ b/rhodecode/lib/rc_cache/__init__.py @@ -45,7 +45,8 @@ from . import region_meta from .utils import ( get_default_cache_settings, backend_key_generator, get_or_create_region, clear_cache_namespace, make_region, InvalidationContext, - FreshRegionCache, ActiveRegionCache) + FreshRegionCache, ActiveRegionCache +) FILE_TREE_CACHE_VER = 'v4' diff --git a/rhodecode/lib/rc_cache/utils.py b/rhodecode/lib/rc_cache/utils.py --- a/rhodecode/lib/rc_cache/utils.py +++ b/rhodecode/lib/rc_cache/utils.py @@ -30,7 +30,6 @@ import rhodecode from rhodecode.lib.hash_utils import sha1 from rhodecode.lib.type_utils import str2bool from rhodecode.lib.str_utils import safe_bytes -from rhodecode.model.db import Session, CacheKey, IntegrityError from rhodecode.lib.rc_cache import cache_key_meta from rhodecode.lib.rc_cache import region_meta @@ -303,6 +302,8 @@ class InvalidationContext(object): self.compute_time = 0 def get_or_create_cache_obj(self, cache_type, invalidation_namespace=''): + from rhodecode.model.db import CacheKey + invalidation_namespace = invalidation_namespace or self.invalidation_namespace # fetch all cache keys for this namespace and convert them to a map to find if we # have specific cache_key object registered. We do this because we want to have @@ -347,6 +348,8 @@ class InvalidationContext(object): return FreshRegionCache(context=self, cache_data=cache_data) def __exit__(self, exc_type, exc_val, exc_tb): + from rhodecode.model.db import Session, IntegrityError + # save compute time self.compute_time = time.time() - self._start_time diff --git a/rhodecode/model/meta.py b/rhodecode/model/meta.py --- a/rhodecode/model/meta.py +++ b/rhodecode/model/meta.py @@ -25,7 +25,8 @@ SQLAlchemy Metadata and Session object from sqlalchemy.orm import declarative_base from sqlalchemy.orm import scoped_session, sessionmaker from sqlalchemy.orm import Session as SASession -from rhodecode.lib import caching_query +from rhodecode.lib.caching_query import ORMCache + __all__ = ['Base', 'Session', 'raw_query_executor'] @@ -36,11 +37,15 @@ from rhodecode.lib import caching_query # .options(FromCache("sqlalchemy_cache_type", "cachekey")) Session = scoped_session( sessionmaker( - query_cls=caching_query.query_callable(), expire_on_commit=True, ) ) +# pass empty regions so we can fetch it on-demand inside ORMCache +cache = ORMCache(regions={}) +cache.listen_on_session(Session) + + # The declarative Base Base = declarative_base()