##// END OF EJS Templates
merge: Resolved conflicts
andverb -
r1266:6139e442 merge v5.1.0 stable
parent child Browse files
Show More
@@ -0,0 +1,79 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import logging
20
21 from .backends.fanout_cache import FileSystemFanoutCache
22 from .backends.objectstore_cache import ObjectStoreCache
23
24 from .utils import archive_iterator # noqa
25 from .lock import ArchiveCacheGenerationLock # noqa
26
27 log = logging.getLogger(__name__)
28
29
30 cache_meta = None
31
32
33 def includeme(config):
34 return # vcsserver gets its config from rhodecode on a remote call
35 # init our cache at start
36 settings = config.get_settings()
37 get_archival_cache_store(settings)
38
39
40 def get_archival_config(config):
41
42 final_config = {
43
44 }
45
46 for k, v in config.items():
47 if k.startswith('archive_cache'):
48 final_config[k] = v
49
50 return final_config
51
52
53 def get_archival_cache_store(config, always_init=False):
54
55 global cache_meta
56 if cache_meta is not None and not always_init:
57 return cache_meta
58
59 config = get_archival_config(config)
60 backend = config['archive_cache.backend.type']
61
62 archive_cache_locking_url = config['archive_cache.locking.url']
63
64 match backend:
65 case 'filesystem':
66 d_cache = FileSystemFanoutCache(
67 locking_url=archive_cache_locking_url,
68 **config
69 )
70 case 'objectstore':
71 d_cache = ObjectStoreCache(
72 locking_url=archive_cache_locking_url,
73 **config
74 )
75 case _:
76 raise ValueError(f'archive_cache.backend.type only supports "filesystem" or "objectstore" got {backend} ')
77
78 cache_meta = d_cache
79 return cache_meta
@@ -0,0 +1,17 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
@@ -0,0 +1,372 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import os
20 import functools
21 import logging
22 import typing
23 import time
24 import zlib
25
26 from ...ext_json import json
27 from ..utils import StatsDB, NOT_GIVEN, ShardFileReader, EVICTION_POLICY, format_size
28 from ..lock import GenerationLock
29
30 log = logging.getLogger(__name__)
31
32
33 class BaseShard:
34 storage_type: str = ''
35 fs = None
36
37 @classmethod
38 def hash(cls, key):
39 """Compute portable hash for `key`.
40
41 :param key: key to hash
42 :return: hash value
43
44 """
45 mask = 0xFFFFFFFF
46 return zlib.adler32(key.encode('utf-8')) & mask # noqa
47
48 def _write_file(self, full_path, read_iterator, mode):
49 raise NotImplementedError
50
51 def _get_keyfile(self, key):
52 raise NotImplementedError
53
54 def random_filename(self):
55 raise NotImplementedError
56
57 def store(self, *args, **kwargs):
58 raise NotImplementedError
59
60 def _store(self, key, value_reader, metadata, mode):
61 (filename, # hash-name
62 full_path # full-path/hash-name
63 ) = self.random_filename()
64
65 key_file, key_file_path = self._get_keyfile(key)
66
67 # STORE METADATA
68 _metadata = {
69 "version": "v1",
70
71 "key_file": key_file, # this is the .key.json file storing meta
72 "key_file_path": key_file_path, # full path to key_file
73 "archive_key": key, # original name we stored archive under, e.g my-archive.zip
74 "archive_filename": filename, # the actual filename we stored that file under
75 "archive_full_path": full_path,
76
77 "store_time": time.time(),
78 "access_count": 0,
79 "access_time": 0,
80
81 "size": 0
82 }
83 if metadata:
84 _metadata.update(metadata)
85
86 read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')
87 size, sha256 = self._write_file(full_path, read_iterator, mode)
88 _metadata['size'] = size
89 _metadata['sha256'] = sha256
90
91 # after archive is finished, we create a key to save the presence of the binary file
92 with self.fs.open(key_file_path, 'wb') as f:
93 f.write(json.dumps(_metadata))
94
95 return key, filename, size, _metadata
96
97 def fetch(self, *args, **kwargs):
98 raise NotImplementedError
99
100 def _fetch(self, key, retry, retry_attempts, retry_backoff,
101 presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
102 if retry is NOT_GIVEN:
103 retry = False
104 if retry_attempts is NOT_GIVEN:
105 retry_attempts = 0
106
107 if retry and retry_attempts > 0:
108 for attempt in range(1, retry_attempts + 1):
109 if key in self:
110 break
111 # we didn't find the key, wait retry_backoff N seconds, and re-check
112 time.sleep(retry_backoff)
113
114 if key not in self:
115 log.exception(f'requested key={key} not found in {self} retry={retry}, attempts={retry_attempts}')
116 raise KeyError(key)
117
118 key_file, key_file_path = self._get_keyfile(key)
119 with self.fs.open(key_file_path, 'rb') as f:
120 metadata = json.loads(f.read())
121
122 archive_path = metadata['archive_full_path']
123 if presigned_url_expires and presigned_url_expires > 0:
124 metadata['url'] = self.fs.url(archive_path, expires=presigned_url_expires)
125
126 try:
127 return ShardFileReader(self.fs.open(archive_path, 'rb')), metadata
128 finally:
129 # update usage stats, count and accessed
130 metadata["access_count"] = metadata.get("access_count", 0) + 1
131 metadata["access_time"] = time.time()
132 log.debug('Updated %s with access snapshot, access_count=%s access_time=%s',
133 key_file, metadata['access_count'], metadata['access_time'])
134 with self.fs.open(key_file_path, 'wb') as f:
135 f.write(json.dumps(metadata))
136
137 def remove(self, *args, **kwargs):
138 raise NotImplementedError
139
140 def _remove(self, key):
141 if key not in self:
142 log.exception(f'requested key={key} not found in {self}')
143 raise KeyError(key)
144
145 key_file, key_file_path = self._get_keyfile(key)
146 with self.fs.open(key_file_path, 'rb') as f:
147 metadata = json.loads(f.read())
148
149 archive_path = metadata['archive_full_path']
150 self.fs.rm(archive_path)
151 self.fs.rm(key_file_path)
152 return 1
153
154 @property
155 def storage_medium(self):
156 return getattr(self, self.storage_type)
157
158 @property
159 def key_suffix(self):
160 return 'key.json'
161
162 def __contains__(self, key):
163 """Return `True` if `key` matching item is found in cache.
164
165 :param key: key matching item
166 :return: True if key matching item
167
168 """
169 key_file, key_file_path = self._get_keyfile(key)
170 return self.fs.exists(key_file_path)
171
172
173 class BaseCache:
174 _locking_url: str = ''
175 _storage_path: str = ''
176 _config: dict = {}
177 retry = False
178 retry_attempts: int = 0
179 retry_backoff: int | float = 1
180 _shards = tuple()
181 shard_cls = BaseShard
182 # define the presigned url expiration, 0 == disabled
183 presigned_url_expires: int = 0
184
185 def __contains__(self, key):
186 """Return `True` if `key` matching item is found in cache.
187
188 :param key: key matching item
189 :return: True if key matching item
190
191 """
192 return self.has_key(key)
193
194 def __repr__(self):
195 return f'<{self.__class__.__name__}(storage={self._storage_path})>'
196
197 @classmethod
198 def gb_to_bytes(cls, gb):
199 return gb * (1024 ** 3)
200
201 @property
202 def storage_path(self):
203 return self._storage_path
204
205 @classmethod
206 def get_stats_db(cls):
207 return StatsDB()
208
209 def get_conf(self, key, pop=False):
210 if key not in self._config:
211 raise ValueError(f"No configuration key '{key}', please make sure it exists in archive_cache config")
212 val = self._config[key]
213 if pop:
214 del self._config[key]
215 return val
216
217 def _get_shard(self, key) -> shard_cls:
218 index = self._hash(key) % self._shard_count
219 shard = self._shards[index]
220 return shard
221
222 def _get_size(self, shard, archive_path):
223 raise NotImplementedError
224
225 def store(self, key, value_reader, metadata=None):
226 shard = self._get_shard(key)
227 return shard.store(key, value_reader, metadata)
228
229 def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN) -> tuple[typing.BinaryIO, dict]:
230 """
231 Return file handle corresponding to `key` from specific shard cache.
232 """
233 if retry is NOT_GIVEN:
234 retry = self.retry
235 if retry_attempts is NOT_GIVEN:
236 retry_attempts = self.retry_attempts
237 retry_backoff = self.retry_backoff
238 presigned_url_expires = self.presigned_url_expires
239
240 shard = self._get_shard(key)
241 return shard.fetch(key, retry=retry,
242 retry_attempts=retry_attempts,
243 retry_backoff=retry_backoff,
244 presigned_url_expires=presigned_url_expires)
245
246 def remove(self, key):
247 shard = self._get_shard(key)
248 return shard.remove(key)
249
250 def has_key(self, archive_key):
251 """Return `True` if `key` matching item is found in cache.
252
253 :param archive_key: key for item, this is a unique archive name we want to store data under. e.g my-archive-svn.zip
254 :return: True if key is found
255
256 """
257 shard = self._get_shard(archive_key)
258 return archive_key in shard
259
260 def iter_keys(self):
261 for shard in self._shards:
262 if shard.fs.exists(shard.storage_medium):
263 for path, _dirs, _files in shard.fs.walk(shard.storage_medium):
264 for key_file_path in _files:
265 if key_file_path.endswith(shard.key_suffix):
266 yield shard, key_file_path
267
268 def get_lock(self, lock_key):
269 return GenerationLock(lock_key, self._locking_url)
270
271 def evict(self, policy=None, size_limit=None) -> dict:
272 """
273 Remove old items based on the conditions
274
275
276 explanation of this algo:
277 iterate over each shard, then for each shard iterate over the .key files
278 read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
279 access data, time creation, and access counts.
280
281 Store that into a memory DB in order we can run different sorting strategies easily.
282 Summing the size is a sum sql query.
283
284 Then we run a sorting strategy based on eviction policy.
285 We iterate over sorted keys, and remove each checking if we hit the overall limit.
286 """
287 removal_info = {
288 "removed_items": 0,
289 "removed_size": 0
290 }
291 policy = policy or self._eviction_policy
292 size_limit = size_limit or self._cache_size_limit
293
294 select_policy = EVICTION_POLICY[policy]['evict']
295
296 log.debug('Running eviction policy \'%s\', and checking for size limit: %s',
297 policy, format_size(size_limit))
298
299 if select_policy is None:
300 return removal_info
301
302 db = self.get_stats_db()
303
304 data = []
305 cnt = 1
306
307 for shard, key_file in self.iter_keys():
308 with shard.fs.open(os.path.join(shard.storage_medium, key_file), 'rb') as f:
309 metadata = json.loads(f.read())
310
311 key_file_path = os.path.join(shard.storage_medium, key_file)
312
313 archive_key = metadata['archive_key']
314 archive_path = metadata['archive_full_path']
315
316 size = metadata.get('size')
317 if not size:
318 # in case we don't have size re-calc it...
319 size = self._get_size(shard, archive_path)
320
321 data.append([
322 cnt,
323 key_file,
324 key_file_path,
325 archive_key,
326 archive_path,
327 metadata.get('store_time', 0),
328 metadata.get('access_time', 0),
329 metadata.get('access_count', 0),
330 size,
331 ])
332 cnt += 1
333
334 # Insert bulk data using executemany
335 db.bulk_insert(data)
336
337 total_size = db.get_total_size()
338 log.debug('Analyzed %s keys, occupying: %s, running eviction to match %s',
339 len(data), format_size(total_size), format_size(size_limit))
340
341 removed_items = 0
342 removed_size = 0
343 for key_file, archive_key, size in db.get_sorted_keys(select_policy):
344 # simulate removal impact BEFORE removal
345 total_size -= size
346
347 if total_size <= size_limit:
348 # we obtained what we wanted...
349 break
350
351 self.remove(archive_key)
352 removed_items += 1
353 removed_size += size
354 removal_info['removed_items'] = removed_items
355 removal_info['removed_size'] = removed_size
356 log.debug('Removed %s cache archives, and reduced size by: %s',
357 removed_items, format_size(removed_size))
358 return removal_info
359
360 def get_statistics(self):
361 total_files = 0
362 total_size = 0
363 meta = {}
364
365 for shard, key_file in self.iter_keys():
366 json_key = f"{shard.storage_medium}/{key_file}"
367 with shard.fs.open(json_key, 'rb') as f:
368 total_files += 1
369 metadata = json.loads(f.read())
370 total_size += metadata['size']
371
372 return total_files, total_size, meta
@@ -0,0 +1,177 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import codecs
20 import hashlib
21 import logging
22 import os
23 import typing
24
25 import fsspec
26
27 from .base import BaseCache, BaseShard
28 from ..utils import ShardFileReader, NOT_GIVEN
29 from ...type_utils import str2bool
30
31 log = logging.getLogger(__name__)
32
33
34 class FileSystemShard(BaseShard):
35
36 def __init__(self, index, directory, directory_folder, fs, **settings):
37 self._index: int = index
38 self._directory: str = directory
39 self._directory_folder: str = directory_folder
40 self.storage_type: str = 'directory'
41
42 self.fs = fs
43
44 @property
45 def directory(self) -> str:
46 """Cache directory final path."""
47 return os.path.join(self._directory, self._directory_folder)
48
49 def _get_keyfile(self, archive_key) -> tuple[str, str]:
50 key_file: str = f'{archive_key}.{self.key_suffix}'
51 return key_file, os.path.join(self.directory, key_file)
52
53 def _get_writer(self, path, mode):
54 for count in range(1, 11):
55 try:
56 # Another cache may have deleted the directory before
57 # the file could be opened.
58 return self.fs.open(path, mode)
59 except OSError:
60 if count == 10:
61 # Give up after 10 tries to open the file.
62 raise
63 continue
64
65 def _write_file(self, full_path, iterator, mode):
66
67 # ensure dir exists
68 destination, _ = os.path.split(full_path)
69 if not self.fs.exists(destination):
70 self.fs.makedirs(destination)
71
72 writer = self._get_writer(full_path, mode)
73
74 digest = hashlib.sha256()
75 with writer:
76 size = 0
77 for chunk in iterator:
78 size += len(chunk)
79 digest.update(chunk)
80 writer.write(chunk)
81 writer.flush()
82 # Get the file descriptor
83 fd = writer.fileno()
84
85 # Sync the file descriptor to disk, helps with NFS cases...
86 os.fsync(fd)
87 sha256 = digest.hexdigest()
88 log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
89 return size, sha256
90
91 def store(self, key, value_reader, metadata: dict | None = None):
92 return self._store(key, value_reader, metadata, mode='xb')
93
94 def fetch(self, key, retry=NOT_GIVEN,
95 retry_attempts=NOT_GIVEN, retry_backoff=1, **kwargs) -> tuple[ShardFileReader, dict]:
96 return self._fetch(key, retry, retry_attempts, retry_backoff)
97
98 def remove(self, key):
99 return self._remove(key)
100
101 def random_filename(self):
102 """Return filename and full-path tuple for file storage.
103
104 Filename will be a randomly generated 28 character hexadecimal string
105 with ".archive_cache" suffixed. Two levels of sub-directories will be used to
106 reduce the size of directories. On older filesystems, lookups in
107 directories with many files may be slow.
108 """
109
110 hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
111
112 archive_name = hex_name[4:] + '.archive_cache'
113 filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}"
114
115 full_path = os.path.join(self.directory, filename)
116 return archive_name, full_path
117
118 def __repr__(self):
119 return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})'
120
121
122 class FileSystemFanoutCache(BaseCache):
123 shard_name: str = 'shard_{:03d}'
124 shard_cls = FileSystemShard
125
126 def __init__(self, locking_url, **settings):
127 """
128 Initialize file system cache instance.
129
130 :param str locking_url: redis url for a lock
131 :param settings: settings dict
132
133 """
134 self._locking_url = locking_url
135 self._config = settings
136 cache_dir = self.get_conf('archive_cache.filesystem.store_dir')
137 directory = str(cache_dir)
138 directory = os.path.expanduser(directory)
139 directory = os.path.expandvars(directory)
140 self._directory = directory
141 self._storage_path = directory # common path for all from BaseCache
142
143 self._shard_count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True))
144 if self._shard_count < 1:
145 raise ValueError('cache_shards must be 1 or more')
146
147 self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True)
148 self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb')))
149
150 self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True))
151 self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True))
152 self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True))
153
154 log.debug('Initializing %s archival cache instance', self)
155 fs = fsspec.filesystem('file')
156 # check if it's ok to write, and re-create the archive cache main dir
157 # A directory is the virtual equivalent of a physical file cabinet.
158 # In other words, it's a container for organizing digital data.
159 # Unlike a folder, which can only store files, a directory can store files,
160 # subdirectories, and other directories.
161 if not fs.exists(self._directory):
162 fs.makedirs(self._directory, exist_ok=True)
163
164 self._shards = tuple(
165 self.shard_cls(
166 index=num,
167 directory=directory,
168 directory_folder=self.shard_name.format(num),
169 fs=fs,
170 **settings,
171 )
172 for num in range(self._shard_count)
173 )
174 self._hash = self._shards[0].hash
175
176 def _get_size(self, shard, archive_path):
177 return os.stat(archive_path).st_size
@@ -0,0 +1,173 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import codecs
20 import hashlib
21 import logging
22 import os
23 import typing
24
25 import fsspec
26
27 from .base import BaseCache, BaseShard
28 from ..utils import ShardFileReader, NOT_GIVEN
29 from ...type_utils import str2bool
30
31 log = logging.getLogger(__name__)
32
33
34 class S3Shard(BaseShard):
35
36 def __init__(self, index, bucket, bucket_folder, fs, **settings):
37 self._index: int = index
38 self._bucket_folder: str = bucket_folder
39 self.storage_type: str = 'bucket'
40 self._bucket_main: str = bucket
41
42 self.fs = fs
43
44 @property
45 def bucket(self) -> str:
46 """Cache bucket final path."""
47 return os.path.join(self._bucket_main, self._bucket_folder)
48
49 def _get_keyfile(self, archive_key) -> tuple[str, str]:
50 key_file: str = f'{archive_key}-{self.key_suffix}'
51 return key_file, os.path.join(self.bucket, key_file)
52
53 def _get_writer(self, path, mode):
54 return self.fs.open(path, 'wb')
55
56 def _write_file(self, full_path, iterator, mode):
57
58 # ensure folder in bucket exists
59 destination = self.bucket
60 if not self.fs.exists(destination):
61 self.fs.mkdir(destination, s3_additional_kwargs={})
62
63 writer = self._get_writer(full_path, mode)
64
65 digest = hashlib.sha256()
66 with writer:
67 size = 0
68 for chunk in iterator:
69 size += len(chunk)
70 digest.update(chunk)
71 writer.write(chunk)
72
73 sha256 = digest.hexdigest()
74 log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
75 return size, sha256
76
77 def store(self, key, value_reader, metadata: dict | None = None):
78 return self._store(key, value_reader, metadata, mode='wb')
79
80 def fetch(self, key, retry=NOT_GIVEN,
81 retry_attempts=NOT_GIVEN, retry_backoff=1,
82 presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
83 return self._fetch(key, retry, retry_attempts, retry_backoff, presigned_url_expires=presigned_url_expires)
84
85 def remove(self, key):
86 return self._remove(key)
87
88 def random_filename(self):
89 """Return filename and full-path tuple for file storage.
90
91 Filename will be a randomly generated 28 character hexadecimal string
92 with ".archive_cache" suffixed. Two levels of sub-directories will be used to
93 reduce the size of directories. On older filesystems, lookups in
94 directories with many files may be slow.
95 """
96
97 hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
98
99 archive_name = hex_name[4:] + '.archive_cache'
100 filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}"
101
102 full_path = os.path.join(self.bucket, filename)
103 return archive_name, full_path
104
105 def __repr__(self):
106 return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})'
107
108
109 class ObjectStoreCache(BaseCache):
110 shard_name: str = 'shard-{:03d}'
111 shard_cls = S3Shard
112
113 def __init__(self, locking_url, **settings):
114 """
115 Initialize objectstore cache instance.
116
117 :param str locking_url: redis url for a lock
118 :param settings: settings dict
119
120 """
121 self._locking_url = locking_url
122 self._config = settings
123
124 objectstore_url = self.get_conf('archive_cache.objectstore.url')
125 self._storage_path = objectstore_url # common path for all from BaseCache
126
127 self._shard_count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True))
128 if self._shard_count < 1:
129 raise ValueError('cache_shards must be 1 or more')
130
131 self._bucket = settings.pop('archive_cache.objectstore.bucket')
132 if not self._bucket:
133 raise ValueError('archive_cache.objectstore.bucket needs to have a value')
134
135 self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True)
136 self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb')))
137
138 self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True))
139 self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True))
140 self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True))
141
142 endpoint_url = settings.pop('archive_cache.objectstore.url')
143 key = settings.pop('archive_cache.objectstore.key')
144 secret = settings.pop('archive_cache.objectstore.secret')
145 region = settings.pop('archive_cache.objectstore.region')
146
147 log.debug('Initializing %s archival cache instance', self)
148
149 fs = fsspec.filesystem(
150 's3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret, client_kwargs={'region_name': region}
151 )
152
153 # init main bucket
154 if not fs.exists(self._bucket):
155 fs.mkdir(self._bucket)
156
157 self._shards = tuple(
158 self.shard_cls(
159 index=num,
160 bucket=self._bucket,
161 bucket_folder=self.shard_name.format(num),
162 fs=fs,
163 **settings,
164 )
165 for num in range(self._shard_count)
166 )
167 self._hash = self._shards[0].hash
168
169 def _get_size(self, shard, archive_path):
170 return shard.fs.info(archive_path)['size']
171
172 def set_presigned_url_expiry(self, val: int) -> None:
173 self.presigned_url_expires = val
@@ -0,0 +1,62 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import redis
20 from .._vendor import redis_lock
21
22
23 class ArchiveCacheGenerationLock(Exception):
24 pass
25
26
27 class GenerationLock:
28 """
29 Locking mechanism that detects if a lock is acquired
30
31 with GenerationLock(lock_key):
32 compute_archive()
33 """
34 lock_timeout = 7200
35
36 def __init__(self, lock_key, url):
37 self.lock_key = lock_key
38 self._create_client(url)
39 self.lock = self.get_lock()
40
41 def _create_client(self, url):
42 connection_pool = redis.ConnectionPool.from_url(url)
43 self.writer_client = redis.StrictRedis(
44 connection_pool=connection_pool
45 )
46 self.reader_client = self.writer_client
47
48 def get_lock(self):
49 return redis_lock.Lock(
50 redis_client=self.writer_client,
51 name=self.lock_key,
52 expire=self.lock_timeout,
53 strict=True
54 )
55
56 def __enter__(self):
57 acquired = self.lock.acquire(blocking=False)
58 if not acquired:
59 raise ArchiveCacheGenerationLock('Failed to create a lock')
60
61 def __exit__(self, exc_type, exc_val, exc_tb):
62 self.lock.release()
@@ -0,0 +1,134 b''
1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import sqlite3
20 import s3fs.core
21
22 NOT_GIVEN = -917
23
24
25 EVICTION_POLICY = {
26 'none': {
27 'evict': None,
28 },
29 'least-recently-stored': {
30 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
31 },
32 'least-recently-used': {
33 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
34 },
35 'least-frequently-used': {
36 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
37 },
38 }
39
40
41 def archive_iterator(_reader, block_size: int = 4096 * 512):
42 # 4096 * 64 = 64KB
43 while 1:
44 data = _reader.read(block_size)
45 if not data:
46 break
47 yield data
48
49
50 def format_size(size):
51 # Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
52 for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
53 if size < 1024:
54 return f"{size:.2f} {unit}"
55 size /= 1024
56
57
58 class StatsDB:
59
60 def __init__(self):
61 self.connection = sqlite3.connect(':memory:')
62 self._init_db()
63
64 def _init_db(self):
65 qry = '''
66 CREATE TABLE IF NOT EXISTS archive_cache (
67 rowid INTEGER PRIMARY KEY,
68 key_file TEXT,
69 key_file_path TEXT,
70 archive_key TEXT,
71 archive_path TEXT,
72 store_time REAL,
73 access_time REAL,
74 access_count INTEGER DEFAULT 0,
75 size INTEGER DEFAULT 0
76 )
77 '''
78
79 self.sql(qry)
80 self.connection.commit()
81
82 @property
83 def sql(self):
84 return self.connection.execute
85
86 def bulk_insert(self, rows):
87 qry = '''
88 INSERT INTO archive_cache (
89 rowid,
90 key_file,
91 key_file_path,
92 archive_key,
93 archive_path,
94 store_time,
95 access_time,
96 access_count,
97 size
98 )
99 VALUES (
100 ?, ?, ?, ?, ?, ?, ?, ?, ?
101 )
102 '''
103 cursor = self.connection.cursor()
104 cursor.executemany(qry, rows)
105 self.connection.commit()
106
107 def get_total_size(self):
108 qry = 'SELECT COALESCE(SUM(size), 0) FROM archive_cache'
109 ((total_size,),) = self.sql(qry).fetchall()
110 return total_size
111
112 def get_sorted_keys(self, select_policy):
113 select_policy_qry = select_policy.format(fields='key_file, archive_key, size')
114 return self.sql(select_policy_qry).fetchall()
115
116
117 class ShardFileReader:
118
119 def __init__(self, file_like_reader):
120 self._file_like_reader = file_like_reader
121
122 def __getattr__(self, item):
123 if isinstance(self._file_like_reader, s3fs.core.S3File):
124 match item:
125 case 'name':
126 # S3 FileWrapper doesn't support name attribute, and we use it
127 return self._file_like_reader.full_name
128 case _:
129 return getattr(self._file_like_reader, item)
130 else:
131 return getattr(self._file_like_reader, item)
132
133 def __repr__(self):
134 return f'<{self.__class__.__name__}={self._file_like_reader}>'
@@ -0,0 +1,111 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import logging
19 import redis
20
21 from ..lib import rc_cache
22 from ..lib.ext_json import json
23
24
25 log = logging.getLogger(__name__)
26
27 redis_client = None
28
29
30 class RedisTxnClient:
31
32 def __init__(self, url):
33 self.url = url
34 self._create_client(url)
35
36 def _create_client(self, url):
37 connection_pool = redis.ConnectionPool.from_url(url)
38 self.writer_client = redis.StrictRedis(
39 connection_pool=connection_pool
40 )
41 self.reader_client = self.writer_client
42
43 def set(self, key, value):
44 self.writer_client.set(key, value)
45
46 def get(self, key):
47 return self.reader_client.get(key)
48
49 def delete(self, key):
50 self.writer_client.delete(key)
51
52
53 def get_redis_client(url=''):
54
55 global redis_client
56 if redis_client is not None:
57 return redis_client
58 if not url:
59 from vcsserver import CONFIG
60 url = CONFIG['vcs.svn.redis_conn']
61 redis_client = RedisTxnClient(url)
62 return redis_client
63
64
65 def get_txn_id_data_key(repo_path, svn_txn_id):
66 log.debug('svn-txn-id: %s, obtaining data path', svn_txn_id)
67 repo_key = rc_cache.utils.compute_key_from_params(repo_path)
68 final_key = f'{repo_key}.{svn_txn_id}.svn_txn_id'
69 log.debug('computed final key: %s', final_key)
70
71 return final_key
72
73
74 def store_txn_id_data(repo_path, svn_txn_id, data_dict):
75 log.debug('svn-txn-id: %s, storing data', svn_txn_id)
76
77 if not svn_txn_id:
78 log.warning('Cannot store txn_id because it is empty')
79 return
80
81 redis_conn = get_redis_client()
82
83 store_key = get_txn_id_data_key(repo_path, svn_txn_id)
84 store_data = json.dumps(data_dict)
85 redis_conn.set(store_key, store_data)
86
87
88 def get_txn_id_from_store(repo_path, svn_txn_id, rm_on_read=False):
89 """
90 Reads txn_id from store and if present returns the data for callback manager
91 """
92 log.debug('svn-txn-id: %s, retrieving data', svn_txn_id)
93 redis_conn = get_redis_client()
94
95 store_key = get_txn_id_data_key(repo_path, svn_txn_id)
96 data = {}
97 redis_conn.get(store_key)
98 raw_data = 'not-set'
99 try:
100 raw_data = redis_conn.get(store_key)
101 if not raw_data:
102 raise ValueError(f'Failed to get txn_id metadata, from store: {store_key}')
103 data = json.loads(raw_data)
104 except Exception:
105 log.exception('Failed to get txn_id metadata: %s', raw_data)
106
107 if rm_on_read:
108 log.debug('Cleaning up txn_id at %s', store_key)
109 redis_conn.delete(store_key)
110
111 return data
@@ -1,5 +1,5 b''
1 1 [bumpversion]
2 current_version = 5.0.3
2 current_version = 5.1.0
3 3 message = release: Bump version {current_version} to {new_version}
4 4
5 5 [bumpversion:file:vcsserver/VERSION]
@@ -42,7 +42,6 b' test-only:'
42 42 ruff-check:
43 43 ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
44 44
45
46 45 .PHONY: pip-packages
47 46 ## Show outdated packages
48 47 pip-packages:
@@ -63,14 +62,23 b' dev-sh:'
63 62 sudo apt-get install -y zsh carapace-bin
64 63 rm -rf /home/rhodecode/.oh-my-zsh
65 64 curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
66 echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
67 PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
65 @echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
66 @echo "${RC_DEV_CMD_HELP}"
67 @PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
68
69
70 .PHONY: dev-cleanup
71 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
72 dev-cleanup:
73 pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
74 rm -rf /tmp/*
68 75
69 76
70 77 .PHONY: dev-env
71 78 ## make dev-env based on the requirements files and install develop of packages
72 79 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
73 80 dev-env:
81 sudo -u root chown rhodecode:rhodecode /home/rhodecode/.cache/pip/
74 82 pip install build virtualenv
75 83 pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
76 84 pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
@@ -84,16 +92,13 b' sh:'
84 92 make dev-sh
85 93
86 94
87 .PHONY: dev-srv
88 ## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
89 dev-srv:
90 pserve --reload .dev/dev.ini
95 ## Allows changes of workers e.g make dev-srv-g workers=2
96 workers?=1
91 97
92
93 .PHONY: dev-srv-g
94 ## run gunicorn multi process workers
95 dev-srv-g:
96 gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
98 .PHONY: dev-srv
99 ## run gunicorn web server with reloader, use workers=N to set multiworker mode
100 dev-srv:
101 gunicorn --paste=.dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py --reload --workers=$(workers)
97 102
98 103
99 104 # Default command on calling make
@@ -1,40 +1,23 b''
1 #
2 1
3 2 ; #################################
4 3 ; RHODECODE VCSSERVER CONFIGURATION
5 4 ; #################################
6 5
7 6 [server:main]
8 ; COMMON HOST/IP CONFIG
7 ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
8 ; Host port for gunicorn are controlled by gunicorn_conf.py
9 9 host = 0.0.0.0
10 10 port = 10010
11 11
12 ; ##################################################
13 ; WAITRESS WSGI SERVER - Recommended for Development
14 ; ##################################################
15
16 ; use server type
17 use = egg:waitress#main
18
19 ; number of worker threads
20 threads = 5
21
22 ; MAX BODY SIZE 100GB
23 max_request_body_size = 107374182400
24
25 ; Use poll instead of select, fixes file descriptors limits problems.
26 ; May not work on old windows systems.
27 asyncore_use_poll = true
28
29 12
30 13 ; ###########################
31 14 ; GUNICORN APPLICATION SERVER
32 15 ; ###########################
33 16
34 ; run with gunicorn --paste rhodecode.ini
17 ; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini
35 18
36 19 ; Module to use, this setting shouldn't be changed
37 #use = egg:gunicorn#main
20 use = egg:gunicorn#main
38 21
39 22 [app:main]
40 23 ; The %(here)s variable will be replaced with the absolute path of parent directory
@@ -78,10 +61,14 b' debugtoolbar.exclude_prefixes ='
78 61 ; default locale used by VCS systems
79 62 #locale = en_US.UTF-8
80 63
81 ; path to binaries for vcsserver, it should be set by the installer
64 ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
82 65 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
83 ; it can also be a path to nix-build output in case of development
84 core.binary_dir = ""
66 ; or /usr/local/bin/rhodecode_bin/vcs_bin
67 core.binary_dir =
68
69 ; Redis connection settings for svn integrations logic
70 ; This connection string needs to be the same on ce and vcsserver
71 vcs.svn.redis_conn = redis://redis:6379/0
85 72
86 73 ; Custom exception store path, defaults to TMPDIR
87 74 ; This is used to store exception from RhodeCode in shared directory
@@ -1,12 +1,12 b''
1 #
2 1
3 2 ; #################################
4 3 ; RHODECODE VCSSERVER CONFIGURATION
5 4 ; #################################
6 5
7 6 [server:main]
8 ; COMMON HOST/IP CONFIG
9 host = 127.0.0.1
7 ; COMMON HOST/IP CONFIG, This applies mostly to develop setup,
8 ; Host port for gunicorn are controlled by gunicorn_conf.py
9 host = 0.0.0.0
10 10 port = 10010
11 11
12 12
@@ -14,7 +14,7 b' port = 10010'
14 14 ; GUNICORN APPLICATION SERVER
15 15 ; ###########################
16 16
17 ; run with gunicorn --paste rhodecode.ini
17 ; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini
18 18
19 19 ; Module to use, this setting shouldn't be changed
20 20 use = egg:gunicorn#main
@@ -41,10 +41,14 b' use = egg:rhodecode-vcsserver'
41 41 ; default locale used by VCS systems
42 42 #locale = en_US.UTF-8
43 43
44 ; path to binaries for vcsserver, it should be set by the installer
44 ; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer
45 45 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
46 ; it can also be a path to nix-build output in case of development
47 core.binary_dir = ""
46 ; or /usr/local/bin/rhodecode_bin/vcs_bin
47 core.binary_dir =
48
49 ; Redis connection settings for svn integrations logic
50 ; This connection string needs to be the same on ce and vcsserver
51 vcs.svn.redis_conn = redis://redis:6379/0
48 52
49 53 ; Custom exception store path, defaults to TMPDIR
50 54 ; This is used to store exception from RhodeCode in shared directory
@@ -20,28 +20,25 b' celery==5.3.6'
20 20 vine==5.1.0
21 21 python-dateutil==2.8.2
22 22 six==1.16.0
23 tzdata==2023.4
23 tzdata==2024.1
24 24 vine==5.1.0
25 25 contextlib2==21.6.0
26 cov-core==1.15.0
27 coverage==7.2.3
28 diskcache==5.6.3
29 dogpile.cache==1.3.0
26 dogpile.cache==1.3.3
30 27 decorator==5.1.1
31 28 stevedore==5.1.0
32 29 pbr==5.11.1
33 30 dulwich==0.21.6
34 31 urllib3==1.26.14
32 fsspec==2024.6.0
35 33 gunicorn==21.2.0
36 packaging==23.1
37 hg-evolve==11.0.2
34 packaging==24.0
35 hg-evolve==11.1.3
38 36 importlib-metadata==6.0.0
39 37 zipp==3.15.0
40 mercurial==6.3.3
41 mock==5.0.2
38 mercurial==6.7.4
42 39 more-itertools==9.1.0
43 msgpack==1.0.7
44 orjson==3.9.13
40 msgpack==1.0.8
41 orjson==3.10.3
45 42 psutil==5.9.8
46 43 py==1.11.0
47 44 pygit2==1.13.3
@@ -59,10 +56,38 b' pyramid==2.0.2'
59 56 venusian==3.0.0
60 57 webob==1.8.7
61 58 zope.deprecation==5.0.0
62 zope.interface==6.1.0
63 redis==5.0.1
59 zope.interface==6.3.0
60 redis==5.0.4
64 61 async-timeout==4.0.3
65 62 repoze.lru==0.7
63 s3fs==2024.6.0
64 aiobotocore==2.13.0
65 aiohttp==3.9.5
66 aiosignal==1.3.1
67 frozenlist==1.4.1
68 attrs==22.2.0
69 frozenlist==1.4.1
70 multidict==6.0.5
71 yarl==1.9.4
72 idna==3.4
73 multidict==6.0.5
74 aioitertools==0.11.0
75 botocore==1.34.106
76 jmespath==1.0.1
77 python-dateutil==2.8.2
78 six==1.16.0
79 urllib3==1.26.14
80 wrapt==1.16.0
81 aiohttp==3.9.5
82 aiosignal==1.3.1
83 frozenlist==1.4.1
84 attrs==22.2.0
85 frozenlist==1.4.1
86 multidict==6.0.5
87 yarl==1.9.4
88 idna==3.4
89 multidict==6.0.5
90 fsspec==2024.6.0
66 91 scandir==1.10.0
67 92 setproctitle==1.3.3
68 93 subvertpy==0.11.0
@@ -1,42 +1,45 b''
1 1 # test related requirements
2
3 cov-core==1.15.0
4 coverage==7.2.3
5 mock==5.0.2
6 py==1.11.0
7 pytest-cov==4.0.0
8 coverage==7.2.3
9 pytest==7.3.1
10 attrs==22.2.0
2 mock==5.1.0
3 pytest-cov==4.1.0
4 coverage==7.4.3
5 pytest==8.1.1
11 6 iniconfig==2.0.0
12 packaging==23.1
13 pluggy==1.0.0
7 packaging==24.0
8 pluggy==1.4.0
9 pytest-env==1.1.3
10 pytest==8.1.1
11 iniconfig==2.0.0
12 packaging==24.0
13 pluggy==1.4.0
14 14 pytest-profiling==1.7.0
15 15 gprof2dot==2022.7.29
16 pytest==7.3.1
17 attrs==22.2.0
16 pytest==8.1.1
18 17 iniconfig==2.0.0
19 packaging==23.1
20 pluggy==1.0.0
18 packaging==24.0
19 pluggy==1.4.0
21 20 six==1.16.0
22 pytest-runner==6.0.0
23 pytest-sugar==0.9.7
24 packaging==23.1
25 pytest==7.3.1
26 attrs==22.2.0
21 pytest-rerunfailures==13.0
22 packaging==24.0
23 pytest==8.1.1
27 24 iniconfig==2.0.0
28 packaging==23.1
29 pluggy==1.0.0
30 termcolor==2.3.0
31 pytest-timeout==2.1.0
32 pytest==7.3.1
33 attrs==22.2.0
25 packaging==24.0
26 pluggy==1.4.0
27 pytest-runner==6.0.1
28 pytest-sugar==1.0.0
29 packaging==24.0
30 pytest==8.1.1
34 31 iniconfig==2.0.0
35 packaging==23.1
36 pluggy==1.0.0
32 packaging==24.0
33 pluggy==1.4.0
34 termcolor==2.4.0
35 pytest-timeout==2.3.1
36 pytest==8.1.1
37 iniconfig==2.0.0
38 packaging==24.0
39 pluggy==1.4.0
37 40 webtest==3.0.0
38 beautifulsoup4==4.11.2
39 soupsieve==2.4
41 beautifulsoup4==4.12.3
42 soupsieve==2.5
40 43 waitress==3.0.0
41 44 webob==1.8.7
42 45
@@ -1,1 +1,1 b''
1 5.0.3 No newline at end of file
1 5.1.0 No newline at end of file
@@ -20,12 +20,12 b' import tempfile'
20 20 import logging
21 21 import urllib.parse
22 22
23 from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
23 from vcsserver.lib.archive_cache import get_archival_cache_store
24 24
25 25 from vcsserver import exceptions
26 26 from vcsserver.exceptions import NoContentException
27 27 from vcsserver.hgcompat import archival
28 from vcsserver.str_utils import safe_bytes
28 from vcsserver.lib.str_utils import safe_bytes
29 29 from vcsserver.lib.exc_tracking import format_exc
30 30 log = logging.getLogger(__name__)
31 31
@@ -120,8 +120,7 b' def store_archive_in_cache(node_walker, '
120 120 d_cache = get_archival_cache_store(config=cache_config)
121 121
122 122 if archive_key in d_cache:
123 with d_cache as d_cache_reader:
124 reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
123 reader, metadata = d_cache.fetch(archive_key)
125 124 return reader.name
126 125
127 126 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
@@ -139,6 +138,7 b' def store_archive_in_cache(node_walker, '
139 138
140 139 for f in node_walker(commit_id, archive_at_path):
141 140 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
141
142 142 try:
143 143 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
144 144 except NoContentException:
@@ -146,32 +146,26 b' def store_archive_in_cache(node_walker, '
146 146 # directories which are not supported by archiver
147 147 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
148 148
149 if write_metadata:
150 149 metadata = dict([
151 150 ('commit_id', commit_id),
152 151 ('mtime', mtime),
153 152 ])
154 153 metadata.update(extra_metadata)
155
154 if write_metadata:
156 155 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
157 156 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
158 157 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
159 158
160 159 archiver.done()
161 160
162 # ensure set & get are atomic
163 with d_cache.transact():
164
165 161 with open(archive_tmp_path, 'rb') as archive_file:
166 add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
162 add_result = d_cache.store(archive_key, archive_file, metadata=metadata)
167 163 if not add_result:
168 164 log.error('Failed to store cache for key=%s', archive_key)
169 165
170 166 os.remove(archive_tmp_path)
171 167
172 reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
173 if not reader:
174 raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
168 reader, metadata = d_cache.fetch(archive_key)
175 169
176 170 return reader.name
177 171
@@ -24,7 +24,7 b' import logging'
24 24 import tempfile
25 25 import logging.config
26 26
27 from vcsserver.type_utils import str2bool, aslist
27 from vcsserver.lib.type_utils import str2bool, aslist
28 28
29 29 log = logging.getLogger(__name__)
30 30
@@ -24,10 +24,10 b' from pyramid.httpexceptions import ('
24 24 HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
25 25 HTTPUnprocessableEntity)
26 26
27 from vcsserver.lib.rc_json import json
27 from vcsserver.lib.ext_json import json
28 28 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
29 29 from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
30 from vcsserver.str_utils import safe_int
30 from vcsserver.lib.str_utils import safe_int
31 31
32 32 log = logging.getLogger(__name__)
33 33
@@ -19,8 +19,8 b' import os'
19 19 import pytest
20 20 from webtest.app import TestApp as WebObTestApp
21 21
22 from vcsserver.lib.rc_json import json
23 from vcsserver.str_utils import safe_bytes
22 from vcsserver.lib.ext_json import json
23 from vcsserver.lib.str_utils import safe_bytes
24 24 from vcsserver.git_lfs.app import create_app
25 25 from vcsserver.git_lfs.lib import LFSOidStore
26 26
@@ -17,7 +17,7 b''
17 17
18 18 import os
19 19 import pytest
20 from vcsserver.str_utils import safe_bytes
20 from vcsserver.lib.str_utils import safe_bytes
21 21 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
22 22
23 23
@@ -24,7 +24,7 b' from mercurial import demandimport'
24 24
25 25 # patch demandimport, due to bug in mercurial when it always triggers
26 26 # demandimport.enable()
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28
29 29 demandimport.enable = lambda *args, **kwargs: 1
30 30
@@ -56,7 +56,7 b' from mercurial.node import bin, hex'
56 56 from mercurial.encoding import tolocal
57 57 from mercurial.discovery import findcommonoutgoing
58 58 from mercurial.hg import peer
59 from mercurial.httppeer import makepeer
59 from mercurial.httppeer import make_peer
60 60 from mercurial.utils.urlutil import url as hg_url
61 61 from mercurial.scmutil import revrange, revsymbol
62 62 from mercurial.node import nullrev
@@ -25,7 +25,7 b' import pkg_resources'
25 25
26 26 import vcsserver
27 27 import vcsserver.settings
28 from vcsserver.str_utils import safe_bytes
28 from vcsserver.lib.str_utils import safe_bytes
29 29
30 30 log = logging.getLogger(__name__)
31 31
@@ -87,8 +87,16 b' def install_git_hooks(repo_path, bare, e'
87 87
88 88 if _rhodecode_hook or force_create:
89 89 log.debug('writing git %s hook file at %s !', h_type, _hook_file)
90 env_expand = str([
91 ('RC_INI_FILE', vcsserver.CONFIG['__file__']),
92 ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
93 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
94 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
95 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
96 ])
90 97 try:
91 98 with open(_hook_file, 'wb') as f:
99 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
92 100 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
93 101 template = template.replace(b'_DATE_', safe_bytes(timestamp))
94 102 template = template.replace(b'_ENV_', safe_bytes(executable))
@@ -141,17 +149,17 b' def install_svn_hooks(repo_path, executa'
141 149 log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
142 150
143 151 env_expand = str([
152 ('RC_INI_FILE', vcsserver.CONFIG['__file__']),
144 153 ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
145 154 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
146 155 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
147 156 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
148
149 157 ])
150 158 try:
151 159 with open(_hook_file, 'wb') as f:
160 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
152 161 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
153 162 template = template.replace(b'_DATE_', safe_bytes(timestamp))
154 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
155 163 template = template.replace(b'_ENV_', safe_bytes(executable))
156 164 template = template.replace(b'_PATH_', safe_bytes(path))
157 165
@@ -1,4 +1,5 b''
1 1 #!_ENV_
2
2 3 import os
3 4 import sys
4 5 path_adjust = [_PATH_]
@@ -6,6 +7,11 b' path_adjust = [_PATH_]'
6 7 if path_adjust:
7 8 sys.path = path_adjust
8 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
9 15 try:
10 16 from vcsserver import hooks
11 17 except ImportError:
@@ -30,11 +36,13 b' def main():'
30 36
31 37 repo_path = os.getcwd()
32 38 push_data = sys.stdin.readlines()
33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
39
34 40 # os.environ is modified here by a subprocess call that
35 41 # runs git and later git executes this hook.
36 42 # Environ gets some additional info from rhodecode system
37 43 # like IP or username from basic-auth
44
45 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
38 46 try:
39 47 result = hooks.git_post_receive(repo_path, push_data, os.environ)
40 48 sys.exit(result)
@@ -1,4 +1,5 b''
1 1 #!_ENV_
2
2 3 import os
3 4 import sys
4 5 path_adjust = [_PATH_]
@@ -6,6 +7,11 b' path_adjust = [_PATH_]'
6 7 if path_adjust:
7 8 sys.path = path_adjust
8 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
9 15 try:
10 16 from vcsserver import hooks
11 17 except ImportError:
@@ -30,11 +36,13 b' def main():'
30 36
31 37 repo_path = os.getcwd()
32 38 push_data = sys.stdin.readlines()
33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
39
34 40 # os.environ is modified here by a subprocess call that
35 41 # runs git and later git executes this hook.
36 42 # Environ gets some additional info from rhodecode system
37 43 # like IP or username from basic-auth
44
45 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
38 46 try:
39 47 result = hooks.git_pre_receive(repo_path, push_data, os.environ)
40 48 sys.exit(result)
@@ -7,6 +7,11 b' path_adjust = [_PATH_]'
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
10 15 try:
11 16 from vcsserver import hooks
12 17 except ImportError:
@@ -33,13 +38,13 b' def main():'
33 38
34 39 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
35 40 sys.exit(0)
36 repo_path = os.getcwd()
41 cwd_repo_path = os.getcwd()
37 42 push_data = sys.argv[1:]
38 43
39 44 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
40 45
41 46 try:
42 result = hooks.svn_post_commit(repo_path, push_data, os.environ)
47 result = hooks.svn_post_commit(cwd_repo_path, push_data, os.environ)
43 48 sys.exit(result)
44 49 except Exception as error:
45 50 # TODO: johbo: Improve handling of this special case
@@ -7,6 +7,11 b' path_adjust = [_PATH_]'
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 # special trick to pass in some information from rc to hooks
11 # mod_dav strips ALL env vars and we can't even access things like PATH
12 for env_k, env_v in _OS_EXPAND_:
13 os.environ[env_k] = env_v
14
10 15 try:
11 16 from vcsserver import hooks
12 17 except ImportError:
@@ -37,13 +42,12 b' def main():'
37 42
38 43 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
39 44 sys.exit(0)
40 repo_path = os.getcwd()
45 cwd_repo_path = os.getcwd()
41 46 push_data = sys.argv[1:]
42 47
43 48 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
44
45 49 try:
46 result = hooks.svn_pre_commit(repo_path, push_data, os.environ)
50 result = hooks.svn_pre_commit(cwd_repo_path, push_data, os.environ)
47 51 sys.exit(result)
48 52 except Exception as error:
49 53 # TODO: johbo: Improve handling of this special case
@@ -31,9 +31,10 b' from celery import Celery'
31 31 import mercurial.scmutil
32 32 import mercurial.node
33 33
34 from vcsserver.lib.rc_json import json
35 34 from vcsserver import exceptions, subprocessio, settings
36 from vcsserver.str_utils import ascii_str, safe_str
35 from vcsserver.lib.ext_json import json
36 from vcsserver.lib.str_utils import ascii_str, safe_str
37 from vcsserver.lib.svn_txn_utils import get_txn_id_from_store
37 38 from vcsserver.remote.git_remote import Repository
38 39
39 40 celery_app = Celery('__vcsserver__')
@@ -95,9 +96,9 b' class HooksCeleryClient:'
95 96 celery_app.config_from_object({
96 97 'broker_url': queue, 'result_backend': backend,
97 98 'broker_connection_retry_on_startup': True,
98 'task_serializer': 'msgpack',
99 'task_serializer': 'json',
99 100 'accept_content': ['json', 'msgpack'],
100 'result_serializer': 'msgpack',
101 'result_serializer': 'json',
101 102 'result_accept_content': ['json', 'msgpack']
102 103 })
103 104 self.celery_app = celery_app
@@ -293,20 +294,28 b' def _get_hg_env(old_rev, new_rev, txnid,'
293 294 return [(k, v) for k, v in env.items()]
294 295
295 296
297 def _get_ini_settings(ini_file):
298 from vcsserver.http_main import sanitize_settings_and_apply_defaults
299 from vcsserver.lib.config_utils import get_app_config_lightweight, configure_and_store_settings
300
301 global_config = {'__file__': ini_file}
302 ini_settings = get_app_config_lightweight(ini_file)
303 sanitize_settings_and_apply_defaults(global_config, ini_settings)
304 configure_and_store_settings(global_config, ini_settings)
305
306 return ini_settings
307
308
296 309 def _fix_hooks_executables(ini_path=''):
297 310 """
298 311 This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
299 312 especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
300 313 because svn is not on PATH
301 314 """
302 from vcsserver.http_main import sanitize_settings_and_apply_defaults
303 from vcsserver.lib.config_utils import get_app_config_lightweight
304
315 # set defaults, in case we can't read from ini_file
305 316 core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
306 317 if ini_path:
307
308 ini_settings = get_app_config_lightweight(ini_path)
309 ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings)
318 ini_settings = _get_ini_settings(ini_path)
310 319 core_binary_dir = ini_settings['core.binary_dir']
311 320
312 321 settings.BINARY_DIR = core_binary_dir
@@ -570,7 +579,7 b' def git_pre_receive(unused_repo_path, re'
570 579 rev_data = _parse_git_ref_lines(revision_lines)
571 580 if 'push' not in extras['hooks']:
572 581 return 0
573 _fix_hooks_executables()
582 _fix_hooks_executables(env.get('RC_INI_FILE'))
574 583
575 584 empty_commit_id = '0' * 40
576 585
@@ -616,7 +625,7 b' def git_post_receive(unused_repo_path, r'
616 625 if 'push' not in extras['hooks']:
617 626 return 0
618 627
619 _fix_hooks_executables()
628 _fix_hooks_executables(env.get('RC_INI_FILE'))
620 629
621 630 rev_data = _parse_git_ref_lines(revision_lines)
622 631
@@ -720,37 +729,8 b' def git_post_receive(unused_repo_path, r'
720 729 return status_code
721 730
722 731
723 def _get_extras_from_txn_id(path, txn_id):
724 _fix_hooks_executables()
725
726 extras = {}
727 try:
728 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
729 '-t', txn_id,
730 '--revprop', path, 'rc-scm-extras']
731 stdout, stderr = subprocessio.run_command(
732 cmd, env=os.environ.copy())
733 extras = json.loads(base64.urlsafe_b64decode(stdout))
734 except Exception:
735 log.exception('Failed to extract extras info from txn_id')
736
737 return extras
738
739
740 def _get_extras_from_commit_id(commit_id, path):
741 _fix_hooks_executables()
742
743 extras = {}
744 try:
745 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
746 '-r', commit_id,
747 '--revprop', path, 'rc-scm-extras']
748 stdout, stderr = subprocessio.run_command(
749 cmd, env=os.environ.copy())
750 extras = json.loads(base64.urlsafe_b64decode(stdout))
751 except Exception:
752 log.exception('Failed to extract extras info from commit_id')
753
732 def get_extras_from_txn_id(repo_path, txn_id):
733 extras = get_txn_id_from_store(repo_path, txn_id)
754 734 return extras
755 735
756 736
@@ -763,9 +743,17 b' def svn_pre_commit(repo_path, commit_dat'
763 743 if env.get('RC_SCM_DATA'):
764 744 extras = json.loads(env['RC_SCM_DATA'])
765 745 else:
746 ini_path = env.get('RC_INI_FILE')
747 if ini_path:
748 _get_ini_settings(ini_path)
766 749 # fallback method to read from TXN-ID stored data
767 extras = _get_extras_from_txn_id(path, txn_id)
750 extras = get_extras_from_txn_id(path, txn_id)
751
768 752 if not extras:
753 raise ValueError('SVN-PRE-COMMIT: Failed to extract context data in called extras for hook execution')
754
755 if extras.get('rc_internal_commit'):
756 # special marker for internal commit, we don't call hooks client
769 757 return 0
770 758
771 759 extras['hook_type'] = 'pre_commit'
@@ -802,9 +790,17 b' def svn_post_commit(repo_path, commit_da'
802 790 if env.get('RC_SCM_DATA'):
803 791 extras = json.loads(env['RC_SCM_DATA'])
804 792 else:
793 ini_path = env.get('RC_INI_FILE')
794 if ini_path:
795 _get_ini_settings(ini_path)
805 796 # fallback method to read from TXN-ID stored data
806 extras = _get_extras_from_commit_id(commit_id, path)
807 if not extras:
797 extras = get_extras_from_txn_id(path, txn_id)
798
799 if not extras and txn_id:
800 raise ValueError('SVN-POST-COMMIT: Failed to extract context data in called extras for hook execution')
801
802 if extras.get('rc_internal_commit'):
803 # special marker for internal commit, we don't call hooks client
808 804 return 0
809 805
810 806 extras['hook_type'] = 'post_commit'
@@ -37,20 +37,23 b' from pyramid.wsgi import wsgiapp'
37 37 from pyramid.response import Response
38 38
39 39 from vcsserver.base import BytesEnvelope, BinaryEnvelope
40 from vcsserver.lib.rc_json import json
40
41 41 from vcsserver.config.settings_maker import SettingsMaker
42 from vcsserver.str_utils import safe_int
43 from vcsserver.lib.statsd_client import StatsdClient
42
44 43 from vcsserver.tweens.request_wrapper import get_headers_call_context
45 44
46 import vcsserver
47 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
45 from vcsserver import remote_wsgi, scm_app, hgpatches
46 from vcsserver.server import VcsServer
48 47 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
49 48 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
50 49 from vcsserver.echo_stub.echo_app import EchoApp
51 50 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
52 51 from vcsserver.lib.exc_tracking import store_exception, format_exc
53 from vcsserver.server import VcsServer
52 from vcsserver.lib.str_utils import safe_int
53 from vcsserver.lib.statsd_client import StatsdClient
54 from vcsserver.lib.ext_json import json
55 from vcsserver.lib.config_utils import configure_and_store_settings
56
54 57
55 58 strict_vcs = True
56 59
@@ -94,8 +97,7 b' log = logging.getLogger(__name__)'
94 97 try:
95 98 locale.setlocale(locale.LC_ALL, '')
96 99 except locale.Error as e:
97 log.error(
98 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
100 log.error('LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
99 101 os.environ['LC_ALL'] = 'C'
100 102
101 103
@@ -233,7 +235,7 b' class HTTPApplication:'
233 235
234 236 self.global_config = global_config
235 237 self.config.include('vcsserver.lib.rc_cache')
236 self.config.include('vcsserver.lib.rc_cache.archive_cache')
238 self.config.include('vcsserver.lib.archive_cache')
237 239
238 240 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
239 241 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
@@ -248,25 +250,10 b' class HTTPApplication:'
248 250 log.warning("Using EchoApp for VCS operations.")
249 251 self.remote_wsgi = remote_wsgi_stub
250 252
251 self._configure_settings(global_config, settings)
253 configure_and_store_settings(global_config, settings)
252 254
253 255 self._configure()
254 256
255 def _configure_settings(self, global_config, app_settings):
256 """
257 Configure the settings module.
258 """
259 settings_merged = global_config.copy()
260 settings_merged.update(app_settings)
261
262 binary_dir = app_settings['core.binary_dir']
263
264 settings.BINARY_DIR = binary_dir
265
266 # Store the settings to make them available to other modules.
267 vcsserver.PYRAMID_SETTINGS = settings_merged
268 vcsserver.CONFIG = settings_merged
269
270 257 def _configure(self):
271 258 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
272 259
@@ -715,6 +702,8 b' def sanitize_settings_and_apply_defaults'
715 702 'core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin',
716 703 default_when_empty=True, parser='string:noquote')
717 704
705 settings_maker.make_setting('vcs.svn.redis_conn', 'redis://redis:6379/0')
706
718 707 temp_store = tempfile.gettempdir()
719 708 default_cache_dir = os.path.join(temp_store, 'rc_cache')
720 709 # save default, cache dir, and use it for all backends later.
@@ -3,7 +3,6 b' This library is provided to allow standa'
3 3 to output log data as JSON formatted strings
4 4 '''
5 5 import logging
6 import json
7 6 import re
8 7 from datetime import date, datetime, time, tzinfo, timedelta
9 8 import traceback
@@ -13,13 +12,8 b' from inspect import istraceback'
13 12
14 13 from collections import OrderedDict
15 14
16
17 def _inject_req_id(record, *args, **kwargs):
18 return record
19
20
21 ExceptionAwareFormatter = logging.Formatter
22
15 from ...logging_formatter import _inject_req_id, ExceptionAwareFormatter
16 from ...ext_json import sjson as json
23 17
24 18 ZERO = timedelta(0)
25 19 HOUR = timedelta(hours=1)
@@ -111,11 +111,12 b' class Lock:'
111 111 extend_script = None
112 112 reset_script = None
113 113 reset_all_script = None
114 blocking = None
114 115
115 116 _lock_renewal_interval: float
116 117 _lock_renewal_thread: Union[threading.Thread, None]
117 118
118 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
119 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000, blocking=True):
119 120 """
120 121 :param redis_client:
121 122 An instance of :class:`~StrictRedis`.
@@ -143,6 +144,9 b' class Lock:'
143 144 If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
144 145 :param signal_expire:
145 146 Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
147 :param blocking:
148 Boolean value specifying whether lock should be blocking or not.
149 Used in `__enter__` method.
146 150 """
147 151 if strict and not isinstance(redis_client, StrictRedis):
148 152 raise ValueError("redis_client must be instance of StrictRedis. "
@@ -179,6 +183,8 b' class Lock:'
179 183 else None)
180 184 self._lock_renewal_thread = None
181 185
186 self.blocking = blocking
187
182 188 self.register_scripts(redis_client)
183 189
184 190 @classmethod
@@ -342,9 +348,11 b' class Lock:'
342 348 loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
343 349
344 350 def __enter__(self):
345 acquired = self.acquire(blocking=True)
351 acquired = self.acquire(blocking=self.blocking)
346 352 if not acquired:
353 if self.blocking:
347 354 raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
355 raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
348 356 return self
349 357
350 358 def __exit__(self, exc_type=None, exc_value=None, traceback=None):
@@ -14,7 +14,7 b" log = logging.getLogger('rhodecode.stats"
14 14
15 15 def statsd_config(config, prefix='statsd.'):
16 16 _config = {}
17 for key in config.keys():
17 for key in list(config.keys()):
18 18 if key.startswith(prefix):
19 19 _config[key[len(prefix):]] = config[key]
20 20 return _config
@@ -24,9 +24,10 b' def client_from_config(configuration, pr'
24 24 from pyramid.settings import asbool
25 25
26 26 _config = statsd_config(configuration, prefix)
27 statsd_flag = _config.get('enabled')
27 28 statsd_enabled = asbool(_config.pop('enabled', False))
28 29 if not statsd_enabled:
29 log.debug('statsd client not enabled by statsd.enabled = flag, skipping...')
30 log.debug('statsd client not enabled by statsd.enabled = %s flag, skipping...', statsd_flag)
30 31 return
31 32
32 33 host = _config.pop('statsd_host', HOST)
@@ -16,6 +16,8 b''
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18 import os
19 import vcsserver
20 import vcsserver.settings
19 21
20 22
21 23 def get_config(ini_path, **kwargs):
@@ -38,3 +40,19 b' def get_app_config(ini_path):'
38 40 """
39 41 from paste.deploy.loadwsgi import appconfig
40 42 return appconfig(f'config:{ini_path}', relative_to=os.getcwd())
43
44
45 def configure_and_store_settings(global_config, app_settings):
46 """
47 Configure the settings module.
48 """
49 settings_merged = global_config.copy()
50 settings_merged.update(app_settings)
51
52 binary_dir = app_settings['core.binary_dir']
53
54 vcsserver.settings.BINARY_DIR = binary_dir
55
56 # Store the settings to make them available to other modules.
57 vcsserver.PYRAMID_SETTINGS = settings_merged
58 vcsserver.CONFIG = settings_merged
@@ -1,2 +1,14 b''
1 # use orjson by default
1 import json as stdlib_json
2
3 try:
4 # we keep simplejson for having dump functionality still
5 # orjson doesn't support it
6 import simplejson as sjson
7 except ImportError:
8 sjson = stdlib_json
9
10 try:
11 import orjson
2 12 import orjson as json
13 except ImportError:
14 json = stdlib_json
@@ -37,11 +37,19 b' COLORS = {'
37 37 }
38 38
39 39
40 def _inject_req_id(record, *args, **kwargs):
41 return record
42
43
44 class ExceptionAwareFormatter(logging.Formatter):
45 pass
46
47
40 48 class ColorFormatter(logging.Formatter):
41 49
42 50 def format(self, record):
43 51 """
44 Change record's levelname to use with COLORS enum
52 Changes record's levelname to use with COLORS enum
45 53 """
46 54 def_record = super().format(record)
47 55
@@ -51,3 +59,5 b' class ColorFormatter(logging.Formatter):'
51 59
52 60 colored_record = ''.join([start, def_record, end])
53 61 return colored_record
62
63
@@ -20,7 +20,7 b' import logging'
20 20
21 21 from repoze.lru import LRUCache
22 22
23 from vcsserver.str_utils import safe_str
23 from vcsserver.lib.str_utils import safe_str
24 24
25 25 log = logging.getLogger(__name__)
26 26
@@ -38,8 +38,8 b' from dogpile.cache.backends.file import '
38 38 from dogpile.cache.util import memoized_property
39 39
40 40 from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
41 from vcsserver.str_utils import safe_bytes, safe_str
42 from vcsserver.type_utils import str2bool
41 from vcsserver.lib.str_utils import safe_bytes, safe_str
42 from vcsserver.lib.type_utils import str2bool
43 43
44 44 _default_max_size = 1024
45 45
@@ -26,8 +26,8 b' from dogpile.cache import CacheRegion'
26 26
27 27
28 28 from vcsserver.utils import sha1
29 from vcsserver.str_utils import safe_bytes
30 from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
29 from vcsserver.lib.str_utils import safe_bytes
30 from vcsserver.lib.type_utils import str2bool # noqa :required by imports from .utils
31 31
32 32 from . import region_meta
33 33
@@ -155,4 +155,4 b' def splitnewlines(text: bytes):'
155 155 lines.pop()
156 156 else:
157 157 lines[-1] = lines[-1][:-1]
158 return lines No newline at end of file
158 return lines
@@ -24,7 +24,7 b' from svn import ra'
24 24
25 25 from mercurial import error
26 26
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28
29 29 core.svn_config_ensure(None)
30 30 svn_config = core.svn_config_get_config(None)
1 NO CONTENT: file renamed from vcsserver/type_utils.py to vcsserver/lib/type_utils.py
@@ -25,9 +25,9 b' import dulwich.protocol'
25 25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 from vcsserver.lib.rc_json import json
28 from vcsserver.lib.ext_json import json
29 29 from vcsserver import hooks, subprocessio
30 from vcsserver.str_utils import ascii_bytes
30 from vcsserver.lib.str_utils import ascii_bytes
31 31
32 32
33 33 log = logging.getLogger(__name__)
@@ -40,7 +40,7 b' from dulwich.repo import Repo as Dulwich'
40 40
41 41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
43 from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
@@ -551,6 +551,13 b' class GitRemote(RemoteBase):'
551 551 return _branch(context_uid, repo_id, commit_id)
552 552
553 553 @reraise_safe_exceptions
554 def delete_branch(self, wire, branch_name):
555 repo_init = self._factory.repo_libgit2(wire)
556 with repo_init as repo:
557 if branch := repo.lookup_branch(branch_name):
558 branch.delete()
559
560 @reraise_safe_exceptions
554 561 def commit_branches(self, wire, commit_id):
555 562 cache_on, context_uid, repo_id = self._cache_on(wire)
556 563 region = self._region(wire)
@@ -31,6 +31,7 b' from mercurial import unionrepo'
31 31 from mercurial import verify
32 32 from mercurial import repair
33 33 from mercurial.error import AmbiguousPrefixLookupError
34 from mercurial.utils.urlutil import path as hg_path
34 35
35 36 import vcsserver
36 37 from vcsserver import exceptions
@@ -54,7 +55,7 b' from vcsserver.hgcompat import ('
54 55 hg_url as url_parser,
55 56 httpbasicauthhandler,
56 57 httpdigestauthhandler,
57 makepeer,
58 make_peer,
58 59 instance,
59 60 match,
60 61 memctx,
@@ -77,7 +78,7 b' from vcsserver.hgcompat import ('
77 78 patternmatcher,
78 79 hgext_strip,
79 80 )
80 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
81 from vcsserver.lib.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
81 82 from vcsserver.vcs_base import RemoteBase
82 83 from vcsserver.config import hooks as hooks_config
83 84 from vcsserver.lib.exc_tracking import format_exc
@@ -85,7 +86,7 b' from vcsserver.lib.exc_tracking import f'
85 86 log = logging.getLogger(__name__)
86 87
87 88
88 def make_ui_from_config(repo_config):
89 def make_ui_from_config(repo_config, interactive=True):
89 90
90 91 class LoggingUI(ui.ui):
91 92
@@ -143,6 +144,7 b' def make_ui_from_config(repo_config):'
143 144 log.debug('Explicitly disable largefiles extension for repo.')
144 145 baseui.setconfig(b'extensions', b'largefiles', b'!')
145 146
147 baseui.setconfig(b'ui', b'interactive', b'true' if interactive else b'false')
146 148 return baseui
147 149
148 150
@@ -521,8 +523,10 b' class HgRemote(RemoteBase):'
521 523 # check for pure hg repos
522 524 log.debug(
523 525 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
524 ui = make_ui_from_config(config)
525 peer_checker = makepeer(ui, safe_bytes(url))
526 # Create repo path with custom mercurial path object
527 ui = make_ui_from_config(config, interactive=False)
528 repo_path = hg_path(ui=ui, rawloc=safe_bytes(url))
529 peer_checker = make_peer(ui, repo_path, False)
526 530 peer_checker.lookup(b'tip')
527 531 except Exception as e:
528 532 log.warning("URL is not a valid Mercurial repository: %s",
@@ -800,12 +804,12 b' class HgRemote(RemoteBase):'
800 804 repo = self._factory.repo(wire)
801 805
802 806 # Disable any prompts for this repo
803 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
807 repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
804 808
805 809 bookmarks = list(dict(repo._bookmarks).keys())
806 810 remote = peer(repo, {}, safe_bytes(url))
807 811 # Disable any prompts for this remote
808 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
812 remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
809 813
810 814 return exchange.push(
811 815 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
@@ -1017,11 +1021,11 b' class HgRemote(RemoteBase):'
1017 1021 def pull(self, wire, url, commit_ids=None):
1018 1022 repo = self._factory.repo(wire)
1019 1023 # Disable any prompts for this repo
1020 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
1024 repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
1021 1025
1022 1026 remote = peer(repo, {}, safe_bytes(url))
1023 1027 # Disable any prompts for this remote
1024 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
1028 remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y')
1025 1029
1026 1030 if commit_ids:
1027 1031 commit_ids = [bin(commit_id) for commit_id in commit_ids]
@@ -1038,7 +1042,7 b' class HgRemote(RemoteBase):'
1038 1042
1039 1043 # Mercurial internally has a lot of logic that checks ONLY if
1040 1044 # option is defined, we just pass those if they are defined then
1041 opts = {}
1045 opts = {"remote_hidden": False}
1042 1046
1043 1047 if bookmark:
1044 1048 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
@@ -1100,7 +1104,7 b' class HgRemote(RemoteBase):'
1100 1104 # case of merge conflicts or different sub repository sources. By
1101 1105 # setting the interactive flag to `False` mercurial doesn't prompt the
1102 1106 # used but instead uses a default value.
1103 repo.ui.setconfig(b'ui', b'interactive', False)
1107 repo.ui.setconfig(b'ui', b'interactive', b'false')
1104 1108 commands.merge(baseui, repo, rev=safe_bytes(revision))
1105 1109
1106 1110 @reraise_safe_exceptions
@@ -1112,7 +1116,7 b' class HgRemote(RemoteBase):'
1112 1116 # case of merge conflicts or different sub repository sources. By
1113 1117 # setting the interactive flag to `False` mercurial doesn't prompt the
1114 1118 # used but instead uses a default value.
1115 repo.ui.setconfig(b'ui', b'interactive', False)
1119 repo.ui.setconfig(b'ui', b'interactive', b'false')
1116 1120 ms = hg_merge.mergestate(repo)
1117 1121 return [x for x in ms.unresolved()]
1118 1122
@@ -1133,7 +1137,7 b' class HgRemote(RemoteBase):'
1133 1137 # case of merge conflicts or different sub repository sources. By
1134 1138 # setting the interactive flag to `False` mercurial doesn't prompt the
1135 1139 # used but instead uses a default value.
1136 repo.ui.setconfig(b'ui', b'interactive', False)
1140 repo.ui.setconfig(b'ui', b'interactive', b'false')
1137 1141
1138 1142 rebase_kws = dict(
1139 1143 keep=not abort,
@@ -28,7 +28,6 b' import urllib.parse'
28 28 import urllib.error
29 29 import traceback
30 30
31
32 31 import svn.client # noqa
33 32 import svn.core # noqa
34 33 import svn.delta # noqa
@@ -47,10 +46,11 b' from vcsserver.base import ('
47 46 BinaryEnvelope,
48 47 )
49 48 from vcsserver.exceptions import NoContentException
50 from vcsserver.str_utils import safe_str, safe_bytes
51 from vcsserver.type_utils import assert_bytes
52 49 from vcsserver.vcs_base import RemoteBase
50 from vcsserver.lib.str_utils import safe_str, safe_bytes
51 from vcsserver.lib.type_utils import assert_bytes
53 52 from vcsserver.lib.svnremoterepo import svnremoterepo
53 from vcsserver.lib.svn_txn_utils import store_txn_id_data
54 54
55 55 log = logging.getLogger(__name__)
56 56
@@ -503,6 +503,11 b' class SvnRemote(RemoteBase):'
503 503 for node in removed:
504 504 TxnNodeProcessor(node, txn_root).remove()
505 505
506 svn_txn_id = safe_str(svn.fs.svn_fs_txn_name(txn))
507 full_repo_path = wire['path']
508 txn_id_data = {'svn_txn_id': svn_txn_id, 'rc_internal_commit': True}
509
510 store_txn_id_data(full_repo_path, svn_txn_id, txn_id_data)
506 511 commit_id = svn.repos.fs_commit_txn(repo, txn)
507 512
508 513 if timestamp:
@@ -27,7 +27,7 b' import mercurial.hgweb.hgweb_mod'
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30 from vcsserver.str_utils import ascii_bytes, safe_bytes
30 from vcsserver.lib.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
@@ -136,6 +136,9 b' def make_hg_ui_from_config(repo_config):'
136 136 # make our hgweb quiet so it doesn't print output
137 137 baseui.setconfig(b'ui', b'quiet', b'true')
138 138
139 # use POST requests with args instead of GET with headers - fixes issues with big repos with lots of branches
140 baseui.setconfig(b'experimental', b'httppostargs', b'true')
141
139 142 return baseui
140 143
141 144
@@ -28,7 +28,7 b' import logging'
28 28 import subprocess
29 29 import threading
30 30
31 from vcsserver.str_utils import safe_str
31 from vcsserver.lib.str_utils import safe_str
32 32
33 33 log = logging.getLogger(__name__)
34 34
@@ -26,7 +26,7 b' import mock'
26 26 import pytest
27 27
28 28 from vcsserver.hooks import HooksHttpClient
29 from vcsserver.lib.rc_json import json
29 from vcsserver.lib.ext_json import json
30 30 from vcsserver import hooks
31 31
32 32
@@ -24,7 +24,7 b' import tempfile'
24 24 from vcsserver import hook_utils
25 25 from vcsserver.hook_utils import set_permissions_if_needed, HOOKS_DIR_MODE, HOOKS_FILE_MODE
26 26 from vcsserver.tests.fixture import no_newline_id_generator
27 from vcsserver.str_utils import safe_bytes
27 from vcsserver.lib.str_utils import safe_bytes
28 28 from vcsserver.utils import AttributeDict
29 29
30 30
@@ -26,7 +26,7 b' import webtest'
26 26
27 27 from vcsserver import hooks, pygrack
28 28
29 from vcsserver.str_utils import ascii_bytes
29 from vcsserver.lib.str_utils import ascii_bytes
30 30
31 31
32 32 @pytest.fixture()
@@ -25,7 +25,7 b' import pytest'
25 25 import webtest
26 26
27 27 from vcsserver import scm_app
28 from vcsserver.str_utils import ascii_bytes
28 from vcsserver.lib.str_utils import ascii_bytes
29 29
30 30
31 31 def test_hg_does_not_accept_invalid_cmd(tmpdir):
@@ -22,7 +22,7 b' import sys'
22 22 import pytest
23 23
24 24 from vcsserver import subprocessio
25 from vcsserver.str_utils import ascii_bytes
25 from vcsserver.lib.str_utils import ascii_bytes
26 26
27 27
28 28 class FileLikeObj: # pragma: no cover
@@ -20,7 +20,7 b' import mock'
20 20 import pytest
21 21 import sys
22 22
23 from vcsserver.str_utils import ascii_bytes
23 from vcsserver.lib.str_utils import ascii_bytes
24 24
25 25
26 26 class MockPopen:
@@ -16,7 +16,7 b''
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import pytest
19 from vcsserver.str_utils import ascii_bytes, ascii_str, convert_to_str
19 from vcsserver.lib.str_utils import ascii_bytes, ascii_str, convert_to_str
20 20
21 21
22 22 @pytest.mark.parametrize('given, expected', [
@@ -19,7 +19,7 b' import wsgiref.simple_server'
19 19 import wsgiref.validate
20 20
21 21 from vcsserver import wsgi_app_caller
22 from vcsserver.str_utils import ascii_bytes, safe_str
22 from vcsserver.lib.str_utils import ascii_bytes, safe_str
23 23
24 24
25 25 @wsgiref.validate.validator
@@ -21,7 +21,7 b' import time'
21 21 import msgpack
22 22
23 23 import vcsserver
24 from vcsserver.str_utils import safe_str
24 from vcsserver.lib.str_utils import safe_str
25 25
26 26 log = logging.getLogger(__name__)
27 27
@@ -23,7 +23,7 b' import io'
23 23 import logging
24 24 import os
25 25
26 from vcsserver.str_utils import ascii_bytes
26 from vcsserver.lib.str_utils import ascii_bytes
27 27
28 28 log = logging.getLogger(__name__)
29 29
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now