##// END OF EJS Templates
feat(archive-cache): implemented eviction policy logic
super-admin -
r5423:7155ef47 default
parent child Browse files
Show More
@@ -24,6 +24,7 b' import logging'
24 import time
24 import time
25 import typing
25 import typing
26 import zlib
26 import zlib
27 import sqlite3
27
28
28 from rhodecode.lib.ext_json import json
29 from rhodecode.lib.ext_json import json
29 from .lock import GenerationLock
30 from .lock import GenerationLock
@@ -38,6 +39,72 b' NO_VAL = -917'
38 MODE_BINARY = 'BINARY'
39 MODE_BINARY = 'BINARY'
39
40
40
41
42 EVICTION_POLICY = {
43 'none': {
44 'evict': None,
45 },
46 'least-recently-stored': {
47 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
48 },
49 'least-recently-used': {
50 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
51 },
52 'least-frequently-used': {
53 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
54 },
55 }
56
57
58 class DB:
59
60 def __init__(self):
61 self.connection = sqlite3.connect(':memory:')
62 self._init_db()
63
64 def _init_db(self):
65 qry = '''
66 CREATE TABLE IF NOT EXISTS archive_cache (
67 rowid INTEGER PRIMARY KEY,
68 key_file TEXT,
69 key_file_path TEXT,
70 filename TEXT,
71 full_path TEXT,
72 store_time REAL,
73 access_time REAL,
74 access_count INTEGER DEFAULT 0,
75 size INTEGER DEFAULT 0
76 )
77 '''
78
79 self.sql(qry)
80 self.connection.commit()
81
82 @property
83 def sql(self):
84 return self.connection.execute
85
86 def bulk_insert(self, rows):
87 qry = '''
88 INSERT INTO archive_cache (
89 rowid,
90 key_file,
91 key_file_path,
92 filename,
93 full_path,
94 store_time,
95 access_time,
96 access_count,
97 size
98 )
99 VALUES (
100 ?, ?, ?, ?, ?, ?, ?, ?, ?
101 )
102 '''
103 cursor = self.connection.cursor()
104 cursor.executemany(qry, rows)
105 self.connection.commit()
106
107
41 class FileSystemCache:
108 class FileSystemCache:
42
109
43 def __init__(self, index, directory, **settings):
110 def __init__(self, index, directory, **settings):
@@ -225,9 +292,76 b' class FanoutCache:'
225 def __contains__(self, item):
292 def __contains__(self, item):
226 return self.has_key(item)
293 return self.has_key(item)
227
294
228 def evict(self):
295 def evict(self, policy=None, size_limit=None):
229 """Remove old items based on the conditions"""
296 """
230 # TODO: Implement this...
297 Remove old items based on the conditions
298
299
300 explanation of this algo:
301 iterate over each shard, then for each shard iterate over the .key files
302 read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
303 access data, time creation, and access counts.
304
305 Store that into a memory DB so we can run different sorting strategies easily.
306 Summing the size is a sum sql query.
307
308 Then we run a sorting strategy based on eviction policy.
309 We iterate over sorted keys, and remove each checking if we hit the overall limit.
310 """
311
312 policy = policy or self._eviction_policy
313 size_limit = size_limit or self._cache_size_limit
314
315 select_policy = EVICTION_POLICY[policy]['evict']
316
317 if select_policy is None:
318 return 0
319
320 db = DB()
321
322 data = []
323 cnt = 1
324 for shard in self._shards:
325 for key_file in os.listdir(shard._directory):
326 if key_file.endswith('.key'):
327 key_file_path = os.path.join(shard._directory, key_file)
328 with open(key_file_path, 'rb') as f:
329 metadata = json.loads(f.read())
330 # in case we don't have size re-calc it...
331 if not metadata.get('size'):
332 fn = metadata.get('full_path')
333 size = os.stat(fn).st_size
334
335 data.append([
336 cnt,
337 key_file,
338 key_file_path,
339 metadata.get('filename'),
340 metadata.get('full_path'),
341 metadata.get('store_time', 0),
342 metadata.get('access_time', 0),
343 metadata.get('access_count', 0),
344 metadata.get('size', size),
345 ])
346 cnt += 1
347
348 # Insert bulk data using executemany
349 db.bulk_insert(data)
350
351 ((total_size,),) = db.sql('SELECT COALESCE(SUM(size), 0) FROM archive_cache').fetchall()
352
353 select_policy_qry = select_policy.format(fields='key_file_path, full_path, size')
354 sorted_keys = db.sql(select_policy_qry).fetchall()
355
356 for key, cached_file, size in sorted_keys:
357 # simulate removal impact BEFORE removal
358 total_size -= size
359 if total_size <= size_limit:
360 # we obtained what we wanted...
361 break
362
363 os.remove(cached_file)
364 os.remove(key)
231 return
365 return
232
366
233
367
General Comments 0
You need to be logged in to leave comments. Login now