rhodecode-vcsserver Commit - r1242:8380b87c

archive-cache: synced with CE lib

super-admin -

r1242:8380b87c default

parent child

vcsserver/lib/rc_cache/archive_cache/fanout_cache.py

0 +156 -3

              import time
              import typing
              import zlib
+             import sqlite3
              from vcsserver.lib.rc_json import json
              from .lock import GenerationLock
              MODE_BINARY = 'BINARY'
+             EVICTION_POLICY = {
+                 'none': {
+                     'evict': None,
+                 },
+                 'least-recently-stored': {
+                     'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
+                 },
+                 'least-recently-used': {
+                     'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
+                 },
+                 'least-frequently-used': {
+                     'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
+                 },
+             }
+             class DB:
+                 def __init__(self):
+                     self.connection = sqlite3.connect(':memory:')
+                     self._init_db()
+                 def _init_db(self):
+                     qry = '''
+                         CREATE TABLE IF NOT EXISTS archive_cache (
+                          rowid INTEGER PRIMARY KEY,
+                          key_file TEXT,
+                          key_file_path TEXT,
+                          filename TEXT,
+                          full_path TEXT,
+                          store_time REAL,
+                          access_time REAL,
+                          access_count INTEGER DEFAULT 0,
+                          size INTEGER DEFAULT 0
+                          )
+                     '''
+                     self.sql(qry)
+                     self.connection.commit()
+                 @property
+                 def sql(self):
+                     return self.connection.execute
+                 def bulk_insert(self, rows):
+                     qry = '''
+                         INSERT INTO archive_cache (
+                          rowid,
+                          key_file,
+                          key_file_path,
+                          filename,
+                          full_path,
+                          store_time,
+                          access_time,
+                          access_count,
+                          size
+                         )
+                         VALUES (
+                         ?, ?, ?, ?, ?, ?, ?, ?, ?
+                         )
+                     '''
+                     cursor = self.connection.cursor()
+                     cursor.executemany(qry, rows)
+                     self.connection.commit()
              class FileSystemCache:
                  def __init__(self, index, directory, **settings):
                      # STORE METADATA
                      _metadata = {
                          "version": "v1",
-                         "timestamp": time.time(),
                          "filename": filename,
                          "full_path": full_path,
                          "key_file": key_file,
+                         "store_time": time.time(),
+                         "access_count": 1,
+                         "access_time": 0,
+                         "size": 0
                      }
                      if metadata:
                          _metadata.update(metadata)
                      iterator = iter(reader, b'')
                      size = self._write_file(full_path, iterator, 'xb')
+                     metadata['size'] = size
                      # after archive is finished, we create a key to save the presence of the binary file
                      with open(key_file, 'wb') as f:
                      filename = metadata['filename']
-                     return open(os.path.join(self._directory, filename), 'rb'), metadata
+                     try:
+                         return open(os.path.join(self._directory, filename), 'rb'), metadata
+                     finally:
+                         # update usage stats, count and accessed
+                         metadata["access_count"] = metadata.get("access_count", 0) + 1
+                         metadata["access_time"] = time.time()
+                         with open(key_file, 'wb') as f:
+                             f.write(json.dumps(metadata))
                  def random_filename(self):
                      """Return filename and full-path tuple for file storage.
                      self._count = settings.pop('cache_shards')
                      self._locking_url = settings.pop('locking_url')
+                     self._eviction_policy = settings['cache_eviction_policy']
+                     self._cache_size_limit = settings['cache_size_limit']
                      self._shards = tuple(
                          FileSystemCache(
                              index=num,
                  def __contains__(self, item):
                      return self.has_key(item)
+                 def evict(self, policy=None, size_limit=None):
+                     """
+                     Remove old items based on the conditions
+                     explanation of this algo:
+                     iterate over each shard, then for each shard iterate over the .key files
+                     read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
+                     access data, time creation, and access counts.
+                     Store that into a memory DB so we can run different sorting strategies easily.
+                     Summing the size is a sum sql query.
+                     Then we run a sorting strategy based on eviction policy.
+                     We iterate over sorted keys, and remove each checking if we hit the overall limit.
+                     """
+                     policy = policy or self._eviction_policy
+                     size_limit = size_limit or self._cache_size_limit
+                     select_policy = EVICTION_POLICY[policy]['evict']
+                     if select_policy is None:
+                         return 0
+                     db = DB()
+                     data = []
+                     cnt = 1
+                     for shard in self._shards:
+                         for key_file in os.listdir(shard._directory):
+                             if key_file.endswith('.key'):
+                                 key_file_path = os.path.join(shard._directory, key_file)
+                                 with open(key_file_path, 'rb') as f:
+                                     metadata = json.loads(f.read())
+                                     # in case we don't have size re-calc it...
+                                     if not metadata.get('size'):
+                                         fn = metadata.get('full_path')
+                                         size = os.stat(fn).st_size
+                                     data.append([
+                                       cnt,
+                                       key_file,
+                                       key_file_path,
+                                       metadata.get('filename'),
+                                       metadata.get('full_path'),
+                                       metadata.get('store_time', 0),
+                                       metadata.get('access_time', 0),
+                                       metadata.get('access_count', 0),
+                                       metadata.get('size', size),
+                                     ])
+                                     cnt += 1
+                     # Insert bulk data using executemany
+                     db.bulk_insert(data)
+                     ((total_size,),) = db.sql('SELECT COALESCE(SUM(size), 0) FROM archive_cache').fetchall()
+                     select_policy_qry = select_policy.format(fields='key_file_path, full_path, size')
+                     sorted_keys = db.sql(select_policy_qry).fetchall()
+                     for key, cached_file, size in sorted_keys:
+                         # simulate removal impact BEFORE removal
+                         total_size -= size
+                         if total_size <= size_limit:
+                             # we obtained what we wanted...
+                             break
+                         os.remove(cached_file)
+                         os.remove(key)
+                     return
              def get_archival_config(config):
                  )
                  cache_meta = d_cache
                  return cache_meta

vcsserver/lib/rc_cache/archive_cache/utils.py

0 +42 0

		@@ -15,6 +15,8 b''
15	15	# along with this program; if not, write to the Free Software Foundation,
16	16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17	17
	18	import os
	19
18	20
19	21	class ArchiveCacheLock(Exception):
20	22	pass
		@@ -27,3 +29,43 b' def archive_iterator(_reader, block_size'
27	29	if not data:
28	30	break
29	31	yield data
	32
	33
	34	def get_directory_statistics(start_path):
	35	"""
	36	total_files, total_size, directory_stats = get_directory_statistics(start_path)
	37
	38	print(f"Directory statistics for: {start_path}\n")
	39	print(f"Total files: {total_files}")
	40	print(f"Total size: {format_size(total_size)}\n")
	41
	42	:param start_path:
	43	:return:
	44	"""
	45
	46	total_files = 0
	47	total_size = 0
	48	directory_stats = {}
	49
	50	for dir_path, dir_names, file_names in os.walk(start_path):
	51	dir_size = 0
	52	file_count = len(file_names)
	53
	54	for file in file_names:
	55	filepath = os.path.join(dir_path, file)
	56	file_size = os.path.getsize(filepath)
	57	dir_size += file_size
	58
	59	directory_stats[dir_path] = {'file_count': file_count, 'size': dir_size}
	60	total_files += file_count
	61	total_size += dir_size
	62
	63	return total_files, total_size, directory_stats
	64
	65
	66	def format_size(size):
	67	# Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
	68	for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
	69	if size < 1024:
	70	return f"{size:.2f} {unit}"
	71	size /= 1024

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages