##// END OF EJS Templates
archive-cache: synced with CE lib
super-admin -
r1242:8380b87c default
parent child Browse files
Show More
@@ -23,6 +23,7 b' import logging'
23 import time
23 import time
24 import typing
24 import typing
25 import zlib
25 import zlib
26 import sqlite3
26
27
27 from vcsserver.lib.rc_json import json
28 from vcsserver.lib.rc_json import json
28 from .lock import GenerationLock
29 from .lock import GenerationLock
@@ -37,6 +38,72 b' NO_VAL = -917'
37 MODE_BINARY = 'BINARY'
38 MODE_BINARY = 'BINARY'
38
39
39
40
41 EVICTION_POLICY = {
42 'none': {
43 'evict': None,
44 },
45 'least-recently-stored': {
46 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
47 },
48 'least-recently-used': {
49 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
50 },
51 'least-frequently-used': {
52 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
53 },
54 }
55
56
57 class DB:
58
59 def __init__(self):
60 self.connection = sqlite3.connect(':memory:')
61 self._init_db()
62
63 def _init_db(self):
64 qry = '''
65 CREATE TABLE IF NOT EXISTS archive_cache (
66 rowid INTEGER PRIMARY KEY,
67 key_file TEXT,
68 key_file_path TEXT,
69 filename TEXT,
70 full_path TEXT,
71 store_time REAL,
72 access_time REAL,
73 access_count INTEGER DEFAULT 0,
74 size INTEGER DEFAULT 0
75 )
76 '''
77
78 self.sql(qry)
79 self.connection.commit()
80
81 @property
82 def sql(self):
83 return self.connection.execute
84
85 def bulk_insert(self, rows):
86 qry = '''
87 INSERT INTO archive_cache (
88 rowid,
89 key_file,
90 key_file_path,
91 filename,
92 full_path,
93 store_time,
94 access_time,
95 access_count,
96 size
97 )
98 VALUES (
99 ?, ?, ?, ?, ?, ?, ?, ?, ?
100 )
101 '''
102 cursor = self.connection.cursor()
103 cursor.executemany(qry, rows)
104 self.connection.commit()
105
106
40 class FileSystemCache:
107 class FileSystemCache:
41
108
42 def __init__(self, index, directory, **settings):
109 def __init__(self, index, directory, **settings):
@@ -77,10 +144,13 b' class FileSystemCache:'
77 # STORE METADATA
144 # STORE METADATA
78 _metadata = {
145 _metadata = {
79 "version": "v1",
146 "version": "v1",
80 "timestamp": time.time(),
81 "filename": filename,
147 "filename": filename,
82 "full_path": full_path,
148 "full_path": full_path,
83 "key_file": key_file,
149 "key_file": key_file,
150 "store_time": time.time(),
151 "access_count": 1,
152 "access_time": 0,
153 "size": 0
84 }
154 }
85 if metadata:
155 if metadata:
86 _metadata.update(metadata)
156 _metadata.update(metadata)
@@ -89,6 +159,7 b' class FileSystemCache:'
89
159
90 iterator = iter(reader, b'')
160 iterator = iter(reader, b'')
91 size = self._write_file(full_path, iterator, 'xb')
161 size = self._write_file(full_path, iterator, 'xb')
162 metadata['size'] = size
92
163
93 # after archive is finished, we create a key to save the presence of the binary file
164 # after archive is finished, we create a key to save the presence of the binary file
94 with open(key_file, 'wb') as f:
165 with open(key_file, 'wb') as f:
@@ -106,7 +177,15 b' class FileSystemCache:'
106
177
107 filename = metadata['filename']
178 filename = metadata['filename']
108
179
180 try:
109 return open(os.path.join(self._directory, filename), 'rb'), metadata
181 return open(os.path.join(self._directory, filename), 'rb'), metadata
182 finally:
183 # update usage stats, count and accessed
184 metadata["access_count"] = metadata.get("access_count", 0) + 1
185 metadata["access_time"] = time.time()
186
187 with open(key_file, 'wb') as f:
188 f.write(json.dumps(metadata))
110
189
111 def random_filename(self):
190 def random_filename(self):
112 """Return filename and full-path tuple for file storage.
191 """Return filename and full-path tuple for file storage.
@@ -168,6 +247,9 b' class FanoutCache:'
168 self._count = settings.pop('cache_shards')
247 self._count = settings.pop('cache_shards')
169 self._locking_url = settings.pop('locking_url')
248 self._locking_url = settings.pop('locking_url')
170
249
250 self._eviction_policy = settings['cache_eviction_policy']
251 self._cache_size_limit = settings['cache_size_limit']
252
171 self._shards = tuple(
253 self._shards = tuple(
172 FileSystemCache(
254 FileSystemCache(
173 index=num,
255 index=num,
@@ -209,6 +291,78 b' class FanoutCache:'
209 def __contains__(self, item):
291 def __contains__(self, item):
210 return self.has_key(item)
292 return self.has_key(item)
211
293
294 def evict(self, policy=None, size_limit=None):
295 """
296 Remove old items based on the conditions
297
298
299 explanation of this algo:
300 iterate over each shard, then for each shard iterate over the .key files
301 read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
302 access data, time creation, and access counts.
303
304 Store that into a memory DB so we can run different sorting strategies easily.
305 Summing the size is a sum sql query.
306
307 Then we run a sorting strategy based on eviction policy.
308 We iterate over sorted keys, and remove each checking if we hit the overall limit.
309 """
310
311 policy = policy or self._eviction_policy
312 size_limit = size_limit or self._cache_size_limit
313
314 select_policy = EVICTION_POLICY[policy]['evict']
315
316 if select_policy is None:
317 return 0
318
319 db = DB()
320
321 data = []
322 cnt = 1
323 for shard in self._shards:
324 for key_file in os.listdir(shard._directory):
325 if key_file.endswith('.key'):
326 key_file_path = os.path.join(shard._directory, key_file)
327 with open(key_file_path, 'rb') as f:
328 metadata = json.loads(f.read())
329 # in case we don't have size re-calc it...
330 if not metadata.get('size'):
331 fn = metadata.get('full_path')
332 size = os.stat(fn).st_size
333
334 data.append([
335 cnt,
336 key_file,
337 key_file_path,
338 metadata.get('filename'),
339 metadata.get('full_path'),
340 metadata.get('store_time', 0),
341 metadata.get('access_time', 0),
342 metadata.get('access_count', 0),
343 metadata.get('size', size),
344 ])
345 cnt += 1
346
347 # Insert bulk data using executemany
348 db.bulk_insert(data)
349
350 ((total_size,),) = db.sql('SELECT COALESCE(SUM(size), 0) FROM archive_cache').fetchall()
351
352 select_policy_qry = select_policy.format(fields='key_file_path, full_path, size')
353 sorted_keys = db.sql(select_policy_qry).fetchall()
354
355 for key, cached_file, size in sorted_keys:
356 # simulate removal impact BEFORE removal
357 total_size -= size
358 if total_size <= size_limit:
359 # we obtained what we wanted...
360 break
361
362 os.remove(cached_file)
363 os.remove(key)
364 return
365
212
366
213 def get_archival_config(config):
367 def get_archival_config(config):
214
368
@@ -255,4 +409,3 b' def get_archival_cache_store(config):'
255 )
409 )
256 cache_meta = d_cache
410 cache_meta = d_cache
257 return cache_meta
411 return cache_meta
258
@@ -15,6 +15,8 b''
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
18 import os
19
18
20
19 class ArchiveCacheLock(Exception):
21 class ArchiveCacheLock(Exception):
20 pass
22 pass
@@ -27,3 +29,43 b' def archive_iterator(_reader, block_size'
27 if not data:
29 if not data:
28 break
30 break
29 yield data
31 yield data
32
33
34 def get_directory_statistics(start_path):
35 """
36 total_files, total_size, directory_stats = get_directory_statistics(start_path)
37
38 print(f"Directory statistics for: {start_path}\n")
39 print(f"Total files: {total_files}")
40 print(f"Total size: {format_size(total_size)}\n")
41
42 :param start_path:
43 :return:
44 """
45
46 total_files = 0
47 total_size = 0
48 directory_stats = {}
49
50 for dir_path, dir_names, file_names in os.walk(start_path):
51 dir_size = 0
52 file_count = len(file_names)
53
54 for file in file_names:
55 filepath = os.path.join(dir_path, file)
56 file_size = os.path.getsize(filepath)
57 dir_size += file_size
58
59 directory_stats[dir_path] = {'file_count': file_count, 'size': dir_size}
60 total_files += file_count
61 total_size += dir_size
62
63 return total_files, total_size, directory_stats
64
65
66 def format_size(size):
67 # Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
68 for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
69 if size < 1024:
70 return f"{size:.2f} {unit}"
71 size /= 1024
General Comments 0
You need to be logged in to leave comments. Login now