rhodecode-enterprise-ce Files · rhodecode/lib/caching_query.py

readme: updated readme file.

marcink - - Load All Authors

File last commit:

r2883:f2837b35 default


                r2926:2123969c

default

Download file

             caching_query.py
        
                    298 lines
            
             | 10.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / caching_query.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # -*- coding: utf-8 -*-

      # Copyright (C) 2010-2018 RhodeCode GmbH

      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      """caching_query.py

      Represent functions and classes

      which allow the usage of Dogpile caching with SQLAlchemy.

      Introduces a query option called FromCache.

      The three new concepts introduced here are:

       * CachingQuery - a Query subclass that caches and

         retrieves results in/from dogpile.cache.

       * FromCache - a query option that establishes caching

         parameters on a Query

       * RelationshipCache - a variant of FromCache which is specific

         to a query invoked during a lazy load.

       * _params_from_query - extracts value parameters from

         a Query.

      The rest of what's here are standard SQLAlchemy and

      dogpile.cache constructs.

      """

      from sqlalchemy.orm.interfaces import MapperOption

      from sqlalchemy.orm.query import Query

      from sqlalchemy.sql import visitors

      from dogpile.cache.api import NO_VALUE

      from rhodecode.lib.utils2 import safe_str

      class CachingQuery(Query):

          """A Query subclass which optionally loads full results from a dogpile

          cache region.

          The CachingQuery optionally stores additional state that allows it to consult

          a dogpile.cache cache before accessing the database, in the form

          of a FromCache or RelationshipCache object.   Each of these objects

          refer to the name of a :class:`dogpile.cache.Region` that's been configured

          and stored in a lookup dictionary.  When such an object has associated

          itself with the CachingQuery, the corresponding :class:`dogpile.cache.Region`

          is used to locate a cached result.  If none is present, then the

          Query is invoked normally, the results being cached.

          The FromCache and RelationshipCache mapper options below represent

          the "public" method of configuring this state upon the CachingQuery.

          """

          def _get_region(self):

              from rhodecode.lib.rc_cache import region_meta

              return region_meta.dogpile_cache_regions

          def __init__(self, regions, *args, **kw):

              self.cache_regions = regions or self._get_region()

              Query.__init__(self, *args, **kw)

          def __iter__(self):

              """override __iter__ to pull results from dogpile

                 if particular attributes have been configured.

                 Note that this approach does *not* detach the loaded objects from

                 the current session. If the cache backend is an in-process cache

                 (like "memory") and lives beyond the scope of the current session's

                 transaction, those objects may be expired. The method here can be

                 modified to first expunge() each loaded item from the current

                 session before returning the list of items, so that the items

                 in the cache are not the same ones in the current Session.

              """

              super_ = super(CachingQuery, self)

              if hasattr(self, '_cache_region'):

                  return self.get_value(createfunc=lambda: list(super_.__iter__()))

              else:

                  return super_.__iter__()

          def _execute_and_instances(self, context):

              """override _execute_and_instances to pull results from dogpile

                  if the query is invoked directly from an external context.

                 This method is necessary in order to maintain compatibility

                 with the "baked query" system now used by default in some

                 relationship loader scenarios.   Note also the

                 RelationshipCache._generate_cache_key method which enables

                 the baked query to be used within lazy loads.

                 .. versionadded:: 1.2.7

              """

              super_ = super(CachingQuery, self)

              if context.query is not self and hasattr(self, '_cache_region'):

                  # special logic called when the Query._execute_and_instances()

                  # method is called directly from the baked query

                  return self.get_value(

                      createfunc=lambda: list(

                          super_._execute_and_instances(context)

                      )

                  )

              else:

                  return super_._execute_and_instances(context)

          def _get_cache_plus_key(self):

              """Return a cache region plus key."""

              dogpile_region = self.cache_regions[self._cache_region.region]

              if self._cache_region.cache_key:

                  key = self._cache_region.cache_key

              else:

                  key = _key_from_query(self)

              return dogpile_region, key

          def invalidate(self):

              """Invalidate the cache value represented by this Query."""

              dogpile_region, cache_key = self._get_cache_plus_key()

              dogpile_region.delete(cache_key)

          def get_value(self, merge=True, createfunc=None,

                        expiration_time=None, ignore_expiration=False):

              """Return the value from the cache for this query.

              Raise KeyError if no value present and no

              createfunc specified.

              """

              dogpile_region, cache_key = self._get_cache_plus_key()

              # ignore_expiration means, if the value is in the cache

              # but is expired, return it anyway.   This doesn't make sense

              # with createfunc, which says, if the value is expired, generate

              # a new value.

              assert not ignore_expiration or not createfunc, \

                      "Can't ignore expiration and also provide createfunc"

              if ignore_expiration or not createfunc:

                  cached_value = dogpile_region.get(cache_key,

                                      expiration_time=expiration_time,

                                      ignore_expiration=ignore_expiration)

              else:

                  cached_value = dogpile_region.get_or_create(

                                          cache_key,

                                          createfunc,

                                          expiration_time=expiration_time

                                      )

              if cached_value is NO_VALUE:

                  raise KeyError(cache_key)

              if merge:

                  cached_value = self.merge_result(cached_value, load=False)

              return cached_value

          def set_value(self, value):

              """Set the value in the cache for this query."""

              dogpile_region, cache_key = self._get_cache_plus_key()

              dogpile_region.set(cache_key, value)

      def query_callable(regions=None, query_cls=CachingQuery):

          def query(*arg, **kw):

              return query_cls(regions, *arg, **kw)

          return query

      def _key_from_query(query, qualifier=None):

          """Given a Query, create a cache key.

          There are many approaches to this; here we use the simplest,

          which is to create an md5 hash of the text of the SQL statement,

          combined with stringified versions of all the bound parameters

          within it.     There's a bit of a performance hit with

          compiling out "query.statement" here; other approaches include

          setting up an explicit cache key with a particular Query,

          then combining that with the bound parameter values.

          """

          stmt = query.with_labels().statement

          compiled = stmt.compile()

          params = compiled.params

          # here we return the key as a long string.  our "key mangler"

          # set up with the region will boil it down to an md5.

          return " ".join(

                          [safe_str(compiled)] +

                          [safe_str(params[k]) for k in sorted(params)])

      class FromCache(MapperOption):

          """Specifies that a Query should load results from a cache."""

          propagate_to_loaders = False

          def __init__(self, region="sql_cache_short", cache_key=None):

              """Construct a new FromCache.

              :param region: the cache region.  Should be a

              region configured in the dictionary of dogpile

              regions.

              :param cache_key: optional.  A string cache key

              that will serve as the key to the query.   Use this

              if your query has a huge amount of parameters (such

              as when using in_()) which correspond more simply to

              some other identifier.

              """

              self.region = region

              self.cache_key = cache_key

          def process_query(self, query):

              """Process a Query during normal loading operation."""

              query._cache_region = self

      class RelationshipCache(MapperOption):

          """Specifies that a Query as called within a "lazy load"

             should load results from a cache."""

          propagate_to_loaders = True

          def __init__(self, attribute, region="sql_cache_short", cache_key=None):

              """Construct a new RelationshipCache.

              :param attribute: A Class.attribute which

              indicates a particular class relationship() whose

              lazy loader should be pulled from the cache.

              :param region: name of the cache region.

              :param cache_key: optional.  A string cache key

              that will serve as the key to the query, bypassing

              the usual means of forming a key from the Query itself.

              """

              self.region = region

              self.cache_key = cache_key

              self._relationship_options = {

                  (attribute.property.parent.class_, attribute.property.key): self

              }

          def _generate_cache_key(self, path):

              """Indicate to the lazy-loader strategy that a "baked" query

              may be used by returning ``None``.

              If this method is omitted, the default implementation of

              :class:`.MapperOption._generate_cache_key` takes place, which

              returns ``False`` to disable the "baked" query from being used.

              .. versionadded:: 1.2.7

              """

              return None

          def process_query_conditionally(self, query):

              """Process a Query that is used within a lazy loader.

              (the process_query_conditionally() method is a SQLAlchemy

              hook invoked only within lazyload.)

              """

              if query._current_path:

                  mapper, prop = query._current_path[-2:]

                  key = prop.key

                  for cls in mapper.class_.__mro__:

                      if (cls, key) in self._relationship_options:

                          relationship_option = self._relationship_options[(cls, key)]

                          query._cache_region = relationship_option

                          break

          def and_(self, option):

              """Chain another RelationshipCache option to this one.

              While many RelationshipCache objects can be specified on a single

              Query separately, chaining them together allows for a more efficient

              lookup during load.

              """

              self._relationship_options.update(option._relationship_options)

              return self

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# -- coding: utf-8 --

				# Copyright (C) 2010-2018 RhodeCode GmbH
				#
				# This program is free software: you can redistribute it and/or modify
				# it under the terms of the GNU Affero General Public License, version 3
				# (only), as published by the Free Software Foundation.
				#
				# This program is distributed in the hope that it will be useful,
				# but WITHOUT ANY WARRANTY; without even the implied warranty of
				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				# GNU General Public License for more details.
				#
				# You should have received a copy of the GNU Affero General Public License
				# along with this program. If not, see <http://www.gnu.org/licenses/>.
				#
				# This program is dual-licensed. If you wish to learn more about the
				# RhodeCode Enterprise Edition, including its added features, Support services,
				# and proprietary license terms, please see https://rhodecode.com/licenses/

				"""caching_query.py

				Represent functions and classes
				which allow the usage of Dogpile caching with SQLAlchemy.
				Introduces a query option called FromCache.

				The three new concepts introduced here are:

				* CachingQuery - a Query subclass that caches and
				retrieves results in/from dogpile.cache.
				* FromCache - a query option that establishes caching
				parameters on a Query
				* RelationshipCache - a variant of FromCache which is specific
				to a query invoked during a lazy load.
				* _params_from_query - extracts value parameters from
				a Query.

				The rest of what's here are standard SQLAlchemy and
				dogpile.cache constructs.

				"""
				from sqlalchemy.orm.interfaces import MapperOption
				from sqlalchemy.orm.query import Query
				from sqlalchemy.sql import visitors
				from dogpile.cache.api import NO_VALUE

				from rhodecode.lib.utils2 import safe_str


				class CachingQuery(Query):
				"""A Query subclass which optionally loads full results from a dogpile
				cache region.

				The CachingQuery optionally stores additional state that allows it to consult
				a dogpile.cache cache before accessing the database, in the form
				of a FromCache or RelationshipCache object. Each of these objects
				refer to the name of a :class:`dogpile.cache.Region` that's been configured
				and stored in a lookup dictionary. When such an object has associated
				itself with the CachingQuery, the corresponding :class:`dogpile.cache.Region`
				is used to locate a cached result. If none is present, then the
				Query is invoked normally, the results being cached.

				The FromCache and RelationshipCache mapper options below represent
				the "public" method of configuring this state upon the CachingQuery.

				"""
				def _get_region(self):
				from rhodecode.lib.rc_cache import region_meta
				return region_meta.dogpile_cache_regions

				def __init__(self, regions, args, *kw):
				self.cache_regions = regions or self._get_region()
				Query.__init__(self, args, *kw)

				def __iter__(self):
				"""override __iter__ to pull results from dogpile
				if particular attributes have been configured.

				Note that this approach does not detach the loaded objects from
				the current session. If the cache backend is an in-process cache
				(like "memory") and lives beyond the scope of the current session's
				transaction, those objects may be expired. The method here can be
				modified to first expunge() each loaded item from the current
				session before returning the list of items, so that the items
				in the cache are not the same ones in the current Session.

				"""
				super_ = super(CachingQuery, self)

				if hasattr(self, '_cache_region'):
				return self.get_value(createfunc=lambda: list(super_.__iter__()))
				else:
				return super_.__iter__()

				def _execute_and_instances(self, context):
				"""override _execute_and_instances to pull results from dogpile
				if the query is invoked directly from an external context.

				This method is necessary in order to maintain compatibility
				with the "baked query" system now used by default in some
				relationship loader scenarios. Note also the
				RelationshipCache._generate_cache_key method which enables
				the baked query to be used within lazy loads.

				.. versionadded:: 1.2.7
				"""
				super_ = super(CachingQuery, self)

				if context.query is not self and hasattr(self, '_cache_region'):
				# special logic called when the Query._execute_and_instances()
				# method is called directly from the baked query
				return self.get_value(
				createfunc=lambda: list(
				super_._execute_and_instances(context)
				)
				)
				else:
				return super_._execute_and_instances(context)

				def _get_cache_plus_key(self):
				"""Return a cache region plus key."""
				dogpile_region = self.cache_regions[self._cache_region.region]
				if self._cache_region.cache_key:
				key = self._cache_region.cache_key
				else:
				key = _key_from_query(self)
				return dogpile_region, key

				def invalidate(self):
				"""Invalidate the cache value represented by this Query."""

				dogpile_region, cache_key = self._get_cache_plus_key()
				dogpile_region.delete(cache_key)

				def get_value(self, merge=True, createfunc=None,
				expiration_time=None, ignore_expiration=False):
				"""Return the value from the cache for this query.

				Raise KeyError if no value present and no
				createfunc specified.

				"""
				dogpile_region, cache_key = self._get_cache_plus_key()

				# ignore_expiration means, if the value is in the cache
				# but is expired, return it anyway. This doesn't make sense
				# with createfunc, which says, if the value is expired, generate
				# a new value.
				assert not ignore_expiration or not createfunc, \
				"Can't ignore expiration and also provide createfunc"

				if ignore_expiration or not createfunc:
				cached_value = dogpile_region.get(cache_key,
				expiration_time=expiration_time,
				ignore_expiration=ignore_expiration)
				else:
				cached_value = dogpile_region.get_or_create(
				cache_key,
				createfunc,
				expiration_time=expiration_time
				)
				if cached_value is NO_VALUE:
				raise KeyError(cache_key)
				if merge:
				cached_value = self.merge_result(cached_value, load=False)
				return cached_value

				def set_value(self, value):
				"""Set the value in the cache for this query."""

				dogpile_region, cache_key = self._get_cache_plus_key()
				dogpile_region.set(cache_key, value)


				def query_callable(regions=None, query_cls=CachingQuery):
				def query(arg, *kw):
				return query_cls(regions, arg, *kw)
				return query


				def _key_from_query(query, qualifier=None):
				"""Given a Query, create a cache key.

				There are many approaches to this; here we use the simplest,
				which is to create an md5 hash of the text of the SQL statement,
				combined with stringified versions of all the bound parameters
				within it. There's a bit of a performance hit with
				compiling out "query.statement" here; other approaches include
				setting up an explicit cache key with a particular Query,
				then combining that with the bound parameter values.

				"""

				stmt = query.with_labels().statement
				compiled = stmt.compile()
				params = compiled.params

				# here we return the key as a long string. our "key mangler"
				# set up with the region will boil it down to an md5.
				return " ".join(
				[safe_str(compiled)] +
				[safe_str(params[k]) for k in sorted(params)])


				class FromCache(MapperOption):
				"""Specifies that a Query should load results from a cache."""

				propagate_to_loaders = False

				def __init__(self, region="sql_cache_short", cache_key=None):
				"""Construct a new FromCache.

				:param region: the cache region. Should be a
				region configured in the dictionary of dogpile
				regions.

				:param cache_key: optional. A string cache key
				that will serve as the key to the query. Use this
				if your query has a huge amount of parameters (such
				as when using in_()) which correspond more simply to
				some other identifier.

				"""
				self.region = region
				self.cache_key = cache_key

				def process_query(self, query):
				"""Process a Query during normal loading operation."""
				query._cache_region = self


				class RelationshipCache(MapperOption):
				"""Specifies that a Query as called within a "lazy load"
				should load results from a cache."""

				propagate_to_loaders = True

				def __init__(self, attribute, region="sql_cache_short", cache_key=None):
				"""Construct a new RelationshipCache.

				:param attribute: A Class.attribute which
				indicates a particular class relationship() whose
				lazy loader should be pulled from the cache.

				:param region: name of the cache region.

				:param cache_key: optional. A string cache key
				that will serve as the key to the query, bypassing
				the usual means of forming a key from the Query itself.

				"""
				self.region = region
				self.cache_key = cache_key
				self._relationship_options = {
				(attribute.property.parent.class_, attribute.property.key): self
				}

				def _generate_cache_key(self, path):
				"""Indicate to the lazy-loader strategy that a "baked" query
				may be used by returning ``None``.

				If this method is omitted, the default implementation of
				:class:`.MapperOption._generate_cache_key` takes place, which
				returns ``False`` to disable the "baked" query from being used.

				.. versionadded:: 1.2.7

				"""
				return None

				def process_query_conditionally(self, query):
				"""Process a Query that is used within a lazy loader.

				(the process_query_conditionally() method is a SQLAlchemy
				hook invoked only within lazyload.)

				"""
				if query._current_path:
				mapper, prop = query._current_path[-2:]
				key = prop.key

				for cls in mapper.class_.__mro__:
				if (cls, key) in self._relationship_options:
				relationship_option = self._relationship_options[(cls, key)]
				query._cache_region = relationship_option
				break

				def and_(self, option):
				"""Chain another RelationshipCache option to this one.

				While many RelationshipCache objects can be specified on a single
				Query separately, chaining them together allows for a more efficient
				lookup during load.

				"""
				self._relationship_options.update(option._relationship_options)
				return self