rhodecode-enterprise-ce Commit - r5423:7155ef47

feat(archive-cache): implemented eviction policy logic

super-admin -

r5423:7155ef47 default

parent child

rhodecode/lib/rc_cache/archive_cache/fanout_cache.py

0 +137 -3

		@@ -24,6 +24,7 b' import logging'
24	24	import time
25	25	import typing
26	26	import zlib
	27	import sqlite3
27	28
28	29	from rhodecode.lib.ext_json import json
29	30	from .lock import GenerationLock
		@@ -38,6 +39,72 b' NO_VAL = -917'
38	39	MODE_BINARY = 'BINARY'
39	40
40	41
	42	EVICTION_POLICY = {
	43	'none': {
	44	'evict': None,
	45	},
	46	'least-recently-stored': {
	47	'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
	48	},
	49	'least-recently-used': {
	50	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
	51	},
	52	'least-frequently-used': {
	53	'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
	54	},
	55	}
	56
	57
	58	class DB:
	59
	60	def __init__(self):
	61	self.connection = sqlite3.connect(':memory:')
	62	self._init_db()
	63
	64	def _init_db(self):
	65	qry = '''
	66	CREATE TABLE IF NOT EXISTS archive_cache (
	67	rowid INTEGER PRIMARY KEY,
	68	key_file TEXT,
	69	key_file_path TEXT,
	70	filename TEXT,
	71	full_path TEXT,
	72	store_time REAL,
	73	access_time REAL,
	74	access_count INTEGER DEFAULT 0,
	75	size INTEGER DEFAULT 0
	76	)
	77	'''
	78
	79	self.sql(qry)
	80	self.connection.commit()
	81
	82	@property
	83	def sql(self):
	84	return self.connection.execute
	85
	86	def bulk_insert(self, rows):
	87	qry = '''
	88	INSERT INTO archive_cache (
	89	rowid,
	90	key_file,
	91	key_file_path,
	92	filename,
	93	full_path,
	94	store_time,
	95	access_time,
	96	access_count,
	97	size
	98	)
	99	VALUES (
	100	?, ?, ?, ?, ?, ?, ?, ?, ?
	101	)
	102	'''
	103	cursor = self.connection.cursor()
	104	cursor.executemany(qry, rows)
	105	self.connection.commit()
	106
	107
41	108	class FileSystemCache:
42	109
43	110	def __init__(self, index, directory, **settings):
		@@ -225,9 +292,76 b' class FanoutCache:'
225	292	def __contains__(self, item):
226	293	return self.has_key(item)
227	294
228		def evict(self):
229		"""Remove old items based on the conditions"""
230		# TODO: Implement this...
	295	def evict(self, policy=None, size_limit=None):
	296	"""
	297	Remove old items based on the conditions
	298
	299
	300	explanation of this algo:
	301	iterate over each shard, then for each shard iterate over the .key files
	302	read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
	303	access data, time creation, and access counts.
	304
	305	Store that into a memory DB so we can run different sorting strategies easily.
	306	Summing the size is a sum sql query.
	307
	308	Then we run a sorting strategy based on eviction policy.
	309	We iterate over sorted keys, and remove each checking if we hit the overall limit.
	310	"""
	311
	312	policy = policy or self._eviction_policy
	313	size_limit = size_limit or self._cache_size_limit
	314
	315	select_policy = EVICTION_POLICY[policy]['evict']
	316
	317	if select_policy is None:
	318	return 0
	319
	320	db = DB()
	321
	322	data = []
	323	cnt = 1
	324	for shard in self._shards:
	325	for key_file in os.listdir(shard._directory):
	326	if key_file.endswith('.key'):
	327	key_file_path = os.path.join(shard._directory, key_file)
	328	with open(key_file_path, 'rb') as f:
	329	metadata = json.loads(f.read())
	330	# in case we don't have size re-calc it...
	331	if not metadata.get('size'):
	332	fn = metadata.get('full_path')
	333	size = os.stat(fn).st_size
	334
	335	data.append([
	336	cnt,
	337	key_file,
	338	key_file_path,
	339	metadata.get('filename'),
	340	metadata.get('full_path'),
	341	metadata.get('store_time', 0),
	342	metadata.get('access_time', 0),
	343	metadata.get('access_count', 0),
	344	metadata.get('size', size),
	345	])
	346	cnt += 1
	347
	348	# Insert bulk data using executemany
	349	db.bulk_insert(data)
	350
	351	((total_size,),) = db.sql('SELECT COALESCE(SUM(size), 0) FROM archive_cache').fetchall()
	352
	353	select_policy_qry = select_policy.format(fields='key_file_path, full_path, size')
	354	sorted_keys = db.sql(select_policy_qry).fetchall()
	355
	356	for key, cached_file, size in sorted_keys:
	357	# simulate removal impact BEFORE removal
	358	total_size -= size
	359	if total_size <= size_limit:
	360	# we obtained what we wanted...
	361	break
	362
	363	os.remove(cached_file)
	364	os.remove(key)
231	365	return
232	366
233	367

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages