# HG changeset patch # User Andrii Verbytskyi # Date 2024-07-18 16:04:53 # Node ID 6139e442fab926a8474d498e0290c0a5f572cf49 # Parent 480987d93adf23a48fada91a53c4595a3137672c # Parent e9334ba1fc588ea5ad2a5d33832a9a66cedbea7c merge: Resolved conflicts diff --git a/.bumpversion.cfg b/.bumpversion.cfg --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.3 +current_version = 5.1.0 message = release: Bump version {current_version} to {new_version} [bumpversion:file:vcsserver/VERSION] diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -42,7 +42,6 @@ test-only: ruff-check: ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev . - .PHONY: pip-packages ## Show outdated packages pip-packages: @@ -63,14 +62,23 @@ dev-sh: sudo apt-get install -y zsh carapace-bin rm -rf /home/rhodecode/.oh-my-zsh curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh - echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc - PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh + @echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc + @echo "${RC_DEV_CMD_HELP}" + @PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh + + +.PHONY: dev-cleanup +## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y +dev-cleanup: + pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y + rm -rf /tmp/* .PHONY: dev-env ## make dev-env based on the requirements files and install develop of packages ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y dev-env: + sudo -u root chown rhodecode:rhodecode /home/rhodecode/.cache/pip/ pip install build virtualenv pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt @@ -84,16 +92,13 @@ sh: make dev-sh -.PHONY: dev-srv -## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash -dev-srv: - pserve --reload .dev/dev.ini +## Allows changes of workers e.g make dev-srv-g workers=2 +workers?=1 - -.PHONY: dev-srv-g -## run gunicorn multi process workers -dev-srv-g: - gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py +.PHONY: dev-srv +## run gunicorn web server with reloader, use workers=N to set multiworker mode +dev-srv: + gunicorn --paste=.dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py --reload --workers=$(workers) # Default command on calling make diff --git a/configs/development.ini b/configs/development.ini --- a/configs/development.ini +++ b/configs/development.ini @@ -1,40 +1,23 @@ -# ; ################################# ; RHODECODE VCSSERVER CONFIGURATION ; ################################# [server:main] -; COMMON HOST/IP CONFIG +; COMMON HOST/IP CONFIG, This applies mostly to develop setup, +; Host port for gunicorn are controlled by gunicorn_conf.py host = 0.0.0.0 port = 10010 -; ################################################## -; WAITRESS WSGI SERVER - Recommended for Development -; ################################################## - -; use server type -use = egg:waitress#main - -; number of worker threads -threads = 5 - -; MAX BODY SIZE 100GB -max_request_body_size = 107374182400 - -; Use poll instead of select, fixes file descriptors limits problems. -; May not work on old windows systems. -asyncore_use_poll = true - ; ########################### ; GUNICORN APPLICATION SERVER ; ########################### -; run with gunicorn --paste rhodecode.ini +; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini ; Module to use, this setting shouldn't be changed -#use = egg:gunicorn#main +use = egg:gunicorn#main [app:main] ; The %(here)s variable will be replaced with the absolute path of parent directory @@ -78,10 +61,14 @@ debugtoolbar.exclude_prefixes = ; default locale used by VCS systems #locale = en_US.UTF-8 -; path to binaries for vcsserver, it should be set by the installer +; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin -; it can also be a path to nix-build output in case of development -core.binary_dir = "" +; or /usr/local/bin/rhodecode_bin/vcs_bin +core.binary_dir = + +; Redis connection settings for svn integrations logic +; This connection string needs to be the same on ce and vcsserver +vcs.svn.redis_conn = redis://redis:6379/0 ; Custom exception store path, defaults to TMPDIR ; This is used to store exception from RhodeCode in shared directory diff --git a/configs/production.ini b/configs/production.ini --- a/configs/production.ini +++ b/configs/production.ini @@ -1,12 +1,12 @@ -# ; ################################# ; RHODECODE VCSSERVER CONFIGURATION ; ################################# [server:main] -; COMMON HOST/IP CONFIG -host = 127.0.0.1 +; COMMON HOST/IP CONFIG, This applies mostly to develop setup, +; Host port for gunicorn are controlled by gunicorn_conf.py +host = 0.0.0.0 port = 10010 @@ -14,7 +14,7 @@ port = 10010 ; GUNICORN APPLICATION SERVER ; ########################### -; run with gunicorn --paste rhodecode.ini +; run with gunicorn --config gunicorn_conf.py --paste vcsserver.ini ; Module to use, this setting shouldn't be changed use = egg:gunicorn#main @@ -41,10 +41,14 @@ use = egg:rhodecode-vcsserver ; default locale used by VCS systems #locale = en_US.UTF-8 -; path to binaries for vcsserver, it should be set by the installer +; path to binaries (hg,git,svn) for vcsserver, it should be set by the installer ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin -; it can also be a path to nix-build output in case of development -core.binary_dir = "" +; or /usr/local/bin/rhodecode_bin/vcs_bin +core.binary_dir = + +; Redis connection settings for svn integrations logic +; This connection string needs to be the same on ce and vcsserver +vcs.svn.redis_conn = redis://redis:6379/0 ; Custom exception store path, defaults to TMPDIR ; This is used to store exception from RhodeCode in shared directory diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -20,28 +20,25 @@ celery==5.3.6 vine==5.1.0 python-dateutil==2.8.2 six==1.16.0 - tzdata==2023.4 + tzdata==2024.1 vine==5.1.0 contextlib2==21.6.0 -cov-core==1.15.0 - coverage==7.2.3 -diskcache==5.6.3 -dogpile.cache==1.3.0 +dogpile.cache==1.3.3 decorator==5.1.1 stevedore==5.1.0 pbr==5.11.1 dulwich==0.21.6 urllib3==1.26.14 +fsspec==2024.6.0 gunicorn==21.2.0 - packaging==23.1 -hg-evolve==11.0.2 + packaging==24.0 +hg-evolve==11.1.3 importlib-metadata==6.0.0 zipp==3.15.0 -mercurial==6.3.3 -mock==5.0.2 +mercurial==6.7.4 more-itertools==9.1.0 -msgpack==1.0.7 -orjson==3.9.13 +msgpack==1.0.8 +orjson==3.10.3 psutil==5.9.8 py==1.11.0 pygit2==1.13.3 @@ -59,10 +56,38 @@ pyramid==2.0.2 venusian==3.0.0 webob==1.8.7 zope.deprecation==5.0.0 - zope.interface==6.1.0 -redis==5.0.1 + zope.interface==6.3.0 +redis==5.0.4 async-timeout==4.0.3 repoze.lru==0.7 +s3fs==2024.6.0 + aiobotocore==2.13.0 + aiohttp==3.9.5 + aiosignal==1.3.1 + frozenlist==1.4.1 + attrs==22.2.0 + frozenlist==1.4.1 + multidict==6.0.5 + yarl==1.9.4 + idna==3.4 + multidict==6.0.5 + aioitertools==0.11.0 + botocore==1.34.106 + jmespath==1.0.1 + python-dateutil==2.8.2 + six==1.16.0 + urllib3==1.26.14 + wrapt==1.16.0 + aiohttp==3.9.5 + aiosignal==1.3.1 + frozenlist==1.4.1 + attrs==22.2.0 + frozenlist==1.4.1 + multidict==6.0.5 + yarl==1.9.4 + idna==3.4 + multidict==6.0.5 + fsspec==2024.6.0 scandir==1.10.0 setproctitle==1.3.3 subvertpy==0.11.0 diff --git a/requirements_test.txt b/requirements_test.txt --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,42 +1,45 @@ # test related requirements - -cov-core==1.15.0 - coverage==7.2.3 -mock==5.0.2 -py==1.11.0 -pytest-cov==4.0.0 - coverage==7.2.3 - pytest==7.3.1 - attrs==22.2.0 +mock==5.1.0 +pytest-cov==4.1.0 + coverage==7.4.3 + pytest==8.1.1 iniconfig==2.0.0 - packaging==23.1 - pluggy==1.0.0 + packaging==24.0 + pluggy==1.4.0 +pytest-env==1.1.3 + pytest==8.1.1 + iniconfig==2.0.0 + packaging==24.0 + pluggy==1.4.0 pytest-profiling==1.7.0 gprof2dot==2022.7.29 - pytest==7.3.1 - attrs==22.2.0 + pytest==8.1.1 iniconfig==2.0.0 - packaging==23.1 - pluggy==1.0.0 + packaging==24.0 + pluggy==1.4.0 six==1.16.0 -pytest-runner==6.0.0 -pytest-sugar==0.9.7 - packaging==23.1 - pytest==7.3.1 - attrs==22.2.0 +pytest-rerunfailures==13.0 + packaging==24.0 + pytest==8.1.1 iniconfig==2.0.0 - packaging==23.1 - pluggy==1.0.0 - termcolor==2.3.0 -pytest-timeout==2.1.0 - pytest==7.3.1 - attrs==22.2.0 + packaging==24.0 + pluggy==1.4.0 +pytest-runner==6.0.1 +pytest-sugar==1.0.0 + packaging==24.0 + pytest==8.1.1 iniconfig==2.0.0 - packaging==23.1 - pluggy==1.0.0 + packaging==24.0 + pluggy==1.4.0 + termcolor==2.4.0 +pytest-timeout==2.3.1 + pytest==8.1.1 + iniconfig==2.0.0 + packaging==24.0 + pluggy==1.4.0 webtest==3.0.0 - beautifulsoup4==4.11.2 - soupsieve==2.4 + beautifulsoup4==4.12.3 + soupsieve==2.5 waitress==3.0.0 webob==1.8.7 diff --git a/vcsserver/VERSION b/vcsserver/VERSION --- a/vcsserver/VERSION +++ b/vcsserver/VERSION @@ -1,1 +1,1 @@ -5.0.3 \ No newline at end of file +5.1.0 \ No newline at end of file diff --git a/vcsserver/base.py b/vcsserver/base.py --- a/vcsserver/base.py +++ b/vcsserver/base.py @@ -20,12 +20,12 @@ import tempfile import logging import urllib.parse -from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store +from vcsserver.lib.archive_cache import get_archival_cache_store from vcsserver import exceptions from vcsserver.exceptions import NoContentException from vcsserver.hgcompat import archival -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes from vcsserver.lib.exc_tracking import format_exc log = logging.getLogger(__name__) @@ -120,9 +120,8 @@ def store_archive_in_cache(node_walker, d_cache = get_archival_cache_store(config=cache_config) if archive_key in d_cache: - with d_cache as d_cache_reader: - reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True) - return reader.name + reader, metadata = d_cache.fetch(archive_key) + return reader.name archive_tmp_path = safe_bytes(tempfile.mkstemp()[1]) log.debug('Creating new temp archive in %s', archive_tmp_path) @@ -139,6 +138,7 @@ def store_archive_in_cache(node_walker, for f in node_walker(commit_id, archive_at_path): f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/')) + try: archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes()) except NoContentException: @@ -146,34 +146,28 @@ def store_archive_in_cache(node_walker, # directories which are not supported by archiver archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'') + metadata = dict([ + ('commit_id', commit_id), + ('mtime', mtime), + ]) + metadata.update(extra_metadata) if write_metadata: - metadata = dict([ - ('commit_id', commit_id), - ('mtime', mtime), - ]) - metadata.update(extra_metadata) - meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()] f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt') archiver.addfile(f_path, 0o644, False, b'\n'.join(meta)) archiver.done() - # ensure set & get are atomic - with d_cache.transact(): - - with open(archive_tmp_path, 'rb') as archive_file: - add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True) - if not add_result: - log.error('Failed to store cache for key=%s', archive_key) + with open(archive_tmp_path, 'rb') as archive_file: + add_result = d_cache.store(archive_key, archive_file, metadata=metadata) + if not add_result: + log.error('Failed to store cache for key=%s', archive_key) - os.remove(archive_tmp_path) + os.remove(archive_tmp_path) - reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True) - if not reader: - raise AssertionError(f'empty reader on key={archive_key} added={add_result}') + reader, metadata = d_cache.fetch(archive_key) - return reader.name + return reader.name class BinaryEnvelope: diff --git a/vcsserver/config/settings_maker.py b/vcsserver/config/settings_maker.py --- a/vcsserver/config/settings_maker.py +++ b/vcsserver/config/settings_maker.py @@ -24,7 +24,7 @@ import logging import tempfile import logging.config -from vcsserver.type_utils import str2bool, aslist +from vcsserver.lib.type_utils import str2bool, aslist log = logging.getLogger(__name__) diff --git a/vcsserver/git_lfs/app.py b/vcsserver/git_lfs/app.py --- a/vcsserver/git_lfs/app.py +++ b/vcsserver/git_lfs/app.py @@ -24,10 +24,10 @@ from pyramid.httpexceptions import ( HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden, HTTPUnprocessableEntity) -from vcsserver.lib.rc_json import json +from vcsserver.lib.ext_json import json from vcsserver.git_lfs.lib import OidHandler, LFSOidStore from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator -from vcsserver.str_utils import safe_int +from vcsserver.lib.str_utils import safe_int log = logging.getLogger(__name__) diff --git a/vcsserver/git_lfs/tests/test_lfs_app.py b/vcsserver/git_lfs/tests/test_lfs_app.py --- a/vcsserver/git_lfs/tests/test_lfs_app.py +++ b/vcsserver/git_lfs/tests/test_lfs_app.py @@ -19,8 +19,8 @@ import os import pytest from webtest.app import TestApp as WebObTestApp -from vcsserver.lib.rc_json import json -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.ext_json import json +from vcsserver.lib.str_utils import safe_bytes from vcsserver.git_lfs.app import create_app from vcsserver.git_lfs.lib import LFSOidStore diff --git a/vcsserver/git_lfs/tests/test_lib.py b/vcsserver/git_lfs/tests/test_lib.py --- a/vcsserver/git_lfs/tests/test_lib.py +++ b/vcsserver/git_lfs/tests/test_lib.py @@ -17,7 +17,7 @@ import os import pytest -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes from vcsserver.git_lfs.lib import OidHandler, LFSOidStore diff --git a/vcsserver/hgcompat.py b/vcsserver/hgcompat.py --- a/vcsserver/hgcompat.py +++ b/vcsserver/hgcompat.py @@ -24,7 +24,7 @@ from mercurial import demandimport # patch demandimport, due to bug in mercurial when it always triggers # demandimport.enable() -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes demandimport.enable = lambda *args, **kwargs: 1 @@ -56,7 +56,7 @@ from mercurial.node import bin, hex from mercurial.encoding import tolocal from mercurial.discovery import findcommonoutgoing from mercurial.hg import peer -from mercurial.httppeer import makepeer +from mercurial.httppeer import make_peer from mercurial.utils.urlutil import url as hg_url from mercurial.scmutil import revrange, revsymbol from mercurial.node import nullrev diff --git a/vcsserver/hook_utils/__init__.py b/vcsserver/hook_utils/__init__.py --- a/vcsserver/hook_utils/__init__.py +++ b/vcsserver/hook_utils/__init__.py @@ -25,7 +25,7 @@ import pkg_resources import vcsserver import vcsserver.settings -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes log = logging.getLogger(__name__) @@ -87,8 +87,16 @@ def install_git_hooks(repo_path, bare, e if _rhodecode_hook or force_create: log.debug('writing git %s hook file at %s !', h_type, _hook_file) + env_expand = str([ + ('RC_INI_FILE', vcsserver.CONFIG['__file__']), + ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR), + ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()), + ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()), + ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()), + ]) try: with open(_hook_file, 'wb') as f: + template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand)) template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version())) template = template.replace(b'_DATE_', safe_bytes(timestamp)) template = template.replace(b'_ENV_', safe_bytes(executable)) @@ -141,17 +149,17 @@ def install_svn_hooks(repo_path, executa log.debug('writing svn %s hook file at %s !', h_type, _hook_file) env_expand = str([ + ('RC_INI_FILE', vcsserver.CONFIG['__file__']), ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR), ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()), ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()), ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()), - ]) try: with open(_hook_file, 'wb') as f: + template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand)) template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version())) template = template.replace(b'_DATE_', safe_bytes(timestamp)) - template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand)) template = template.replace(b'_ENV_', safe_bytes(executable)) template = template.replace(b'_PATH_', safe_bytes(path)) diff --git a/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl b/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl --- a/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl @@ -1,4 +1,5 @@ #!_ENV_ + import os import sys path_adjust = [_PATH_] @@ -6,6 +7,11 @@ path_adjust = [_PATH_] if path_adjust: sys.path = path_adjust +# special trick to pass in some information from rc to hooks +# mod_dav strips ALL env vars and we can't even access things like PATH +for env_k, env_v in _OS_EXPAND_: + os.environ[env_k] = env_v + try: from vcsserver import hooks except ImportError: @@ -30,11 +36,13 @@ def main(): repo_path = os.getcwd() push_data = sys.stdin.readlines() - os.environ['RC_HOOK_VER'] = RC_HOOK_VER + # os.environ is modified here by a subprocess call that # runs git and later git executes this hook. # Environ gets some additional info from rhodecode system # like IP or username from basic-auth + + os.environ['RC_HOOK_VER'] = RC_HOOK_VER try: result = hooks.git_post_receive(repo_path, push_data, os.environ) sys.exit(result) diff --git a/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl b/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl --- a/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl @@ -1,4 +1,5 @@ #!_ENV_ + import os import sys path_adjust = [_PATH_] @@ -6,6 +7,11 @@ path_adjust = [_PATH_] if path_adjust: sys.path = path_adjust +# special trick to pass in some information from rc to hooks +# mod_dav strips ALL env vars and we can't even access things like PATH +for env_k, env_v in _OS_EXPAND_: + os.environ[env_k] = env_v + try: from vcsserver import hooks except ImportError: @@ -30,11 +36,13 @@ def main(): repo_path = os.getcwd() push_data = sys.stdin.readlines() - os.environ['RC_HOOK_VER'] = RC_HOOK_VER + # os.environ is modified here by a subprocess call that # runs git and later git executes this hook. # Environ gets some additional info from rhodecode system # like IP or username from basic-auth + + os.environ['RC_HOOK_VER'] = RC_HOOK_VER try: result = hooks.git_pre_receive(repo_path, push_data, os.environ) sys.exit(result) diff --git a/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl b/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl --- a/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl @@ -7,6 +7,11 @@ path_adjust = [_PATH_] if path_adjust: sys.path = path_adjust +# special trick to pass in some information from rc to hooks +# mod_dav strips ALL env vars and we can't even access things like PATH +for env_k, env_v in _OS_EXPAND_: + os.environ[env_k] = env_v + try: from vcsserver import hooks except ImportError: @@ -33,13 +38,13 @@ def main(): if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'): sys.exit(0) - repo_path = os.getcwd() + cwd_repo_path = os.getcwd() push_data = sys.argv[1:] os.environ['RC_HOOK_VER'] = RC_HOOK_VER try: - result = hooks.svn_post_commit(repo_path, push_data, os.environ) + result = hooks.svn_post_commit(cwd_repo_path, push_data, os.environ) sys.exit(result) except Exception as error: # TODO: johbo: Improve handling of this special case diff --git a/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl b/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl --- a/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl @@ -7,6 +7,11 @@ path_adjust = [_PATH_] if path_adjust: sys.path = path_adjust +# special trick to pass in some information from rc to hooks +# mod_dav strips ALL env vars and we can't even access things like PATH +for env_k, env_v in _OS_EXPAND_: + os.environ[env_k] = env_v + try: from vcsserver import hooks except ImportError: @@ -37,13 +42,12 @@ def main(): if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'): sys.exit(0) - repo_path = os.getcwd() + cwd_repo_path = os.getcwd() push_data = sys.argv[1:] os.environ['RC_HOOK_VER'] = RC_HOOK_VER - try: - result = hooks.svn_pre_commit(repo_path, push_data, os.environ) + result = hooks.svn_pre_commit(cwd_repo_path, push_data, os.environ) sys.exit(result) except Exception as error: # TODO: johbo: Improve handling of this special case diff --git a/vcsserver/hooks.py b/vcsserver/hooks.py --- a/vcsserver/hooks.py +++ b/vcsserver/hooks.py @@ -31,9 +31,10 @@ from celery import Celery import mercurial.scmutil import mercurial.node -from vcsserver.lib.rc_json import json from vcsserver import exceptions, subprocessio, settings -from vcsserver.str_utils import ascii_str, safe_str +from vcsserver.lib.ext_json import json +from vcsserver.lib.str_utils import ascii_str, safe_str +from vcsserver.lib.svn_txn_utils import get_txn_id_from_store from vcsserver.remote.git_remote import Repository celery_app = Celery('__vcsserver__') @@ -95,9 +96,9 @@ class HooksCeleryClient: celery_app.config_from_object({ 'broker_url': queue, 'result_backend': backend, 'broker_connection_retry_on_startup': True, - 'task_serializer': 'msgpack', + 'task_serializer': 'json', 'accept_content': ['json', 'msgpack'], - 'result_serializer': 'msgpack', + 'result_serializer': 'json', 'result_accept_content': ['json', 'msgpack'] }) self.celery_app = celery_app @@ -293,20 +294,28 @@ def _get_hg_env(old_rev, new_rev, txnid, return [(k, v) for k, v in env.items()] +def _get_ini_settings(ini_file): + from vcsserver.http_main import sanitize_settings_and_apply_defaults + from vcsserver.lib.config_utils import get_app_config_lightweight, configure_and_store_settings + + global_config = {'__file__': ini_file} + ini_settings = get_app_config_lightweight(ini_file) + sanitize_settings_and_apply_defaults(global_config, ini_settings) + configure_and_store_settings(global_config, ini_settings) + + return ini_settings + + def _fix_hooks_executables(ini_path=''): """ This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail because svn is not on PATH """ - from vcsserver.http_main import sanitize_settings_and_apply_defaults - from vcsserver.lib.config_utils import get_app_config_lightweight - + # set defaults, in case we can't read from ini_file core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin' if ini_path: - - ini_settings = get_app_config_lightweight(ini_path) - ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings) + ini_settings = _get_ini_settings(ini_path) core_binary_dir = ini_settings['core.binary_dir'] settings.BINARY_DIR = core_binary_dir @@ -570,7 +579,7 @@ def git_pre_receive(unused_repo_path, re rev_data = _parse_git_ref_lines(revision_lines) if 'push' not in extras['hooks']: return 0 - _fix_hooks_executables() + _fix_hooks_executables(env.get('RC_INI_FILE')) empty_commit_id = '0' * 40 @@ -616,7 +625,7 @@ def git_post_receive(unused_repo_path, r if 'push' not in extras['hooks']: return 0 - _fix_hooks_executables() + _fix_hooks_executables(env.get('RC_INI_FILE')) rev_data = _parse_git_ref_lines(revision_lines) @@ -720,37 +729,8 @@ def git_post_receive(unused_repo_path, r return status_code -def _get_extras_from_txn_id(path, txn_id): - _fix_hooks_executables() - - extras = {} - try: - cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget', - '-t', txn_id, - '--revprop', path, 'rc-scm-extras'] - stdout, stderr = subprocessio.run_command( - cmd, env=os.environ.copy()) - extras = json.loads(base64.urlsafe_b64decode(stdout)) - except Exception: - log.exception('Failed to extract extras info from txn_id') - - return extras - - -def _get_extras_from_commit_id(commit_id, path): - _fix_hooks_executables() - - extras = {} - try: - cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget', - '-r', commit_id, - '--revprop', path, 'rc-scm-extras'] - stdout, stderr = subprocessio.run_command( - cmd, env=os.environ.copy()) - extras = json.loads(base64.urlsafe_b64decode(stdout)) - except Exception: - log.exception('Failed to extract extras info from commit_id') - +def get_extras_from_txn_id(repo_path, txn_id): + extras = get_txn_id_from_store(repo_path, txn_id) return extras @@ -763,10 +743,18 @@ def svn_pre_commit(repo_path, commit_dat if env.get('RC_SCM_DATA'): extras = json.loads(env['RC_SCM_DATA']) else: + ini_path = env.get('RC_INI_FILE') + if ini_path: + _get_ini_settings(ini_path) # fallback method to read from TXN-ID stored data - extras = _get_extras_from_txn_id(path, txn_id) - if not extras: - return 0 + extras = get_extras_from_txn_id(path, txn_id) + + if not extras: + raise ValueError('SVN-PRE-COMMIT: Failed to extract context data in called extras for hook execution') + + if extras.get('rc_internal_commit'): + # special marker for internal commit, we don't call hooks client + return 0 extras['hook_type'] = 'pre_commit' extras['commit_ids'] = [txn_id] @@ -802,10 +790,18 @@ def svn_post_commit(repo_path, commit_da if env.get('RC_SCM_DATA'): extras = json.loads(env['RC_SCM_DATA']) else: + ini_path = env.get('RC_INI_FILE') + if ini_path: + _get_ini_settings(ini_path) # fallback method to read from TXN-ID stored data - extras = _get_extras_from_commit_id(commit_id, path) - if not extras: - return 0 + extras = get_extras_from_txn_id(path, txn_id) + + if not extras and txn_id: + raise ValueError('SVN-POST-COMMIT: Failed to extract context data in called extras for hook execution') + + if extras.get('rc_internal_commit'): + # special marker for internal commit, we don't call hooks client + return 0 extras['hook_type'] = 'post_commit' extras['commit_ids'] = [commit_id] diff --git a/vcsserver/http_main.py b/vcsserver/http_main.py --- a/vcsserver/http_main.py +++ b/vcsserver/http_main.py @@ -37,20 +37,23 @@ from pyramid.wsgi import wsgiapp from pyramid.response import Response from vcsserver.base import BytesEnvelope, BinaryEnvelope -from vcsserver.lib.rc_json import json + from vcsserver.config.settings_maker import SettingsMaker -from vcsserver.str_utils import safe_int -from vcsserver.lib.statsd_client import StatsdClient + from vcsserver.tweens.request_wrapper import get_headers_call_context -import vcsserver -from vcsserver import remote_wsgi, scm_app, settings, hgpatches +from vcsserver import remote_wsgi, scm_app, hgpatches +from vcsserver.server import VcsServer from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub from vcsserver.echo_stub.echo_app import EchoApp from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected from vcsserver.lib.exc_tracking import store_exception, format_exc -from vcsserver.server import VcsServer +from vcsserver.lib.str_utils import safe_int +from vcsserver.lib.statsd_client import StatsdClient +from vcsserver.lib.ext_json import json +from vcsserver.lib.config_utils import configure_and_store_settings + strict_vcs = True @@ -94,8 +97,7 @@ log = logging.getLogger(__name__) try: locale.setlocale(locale.LC_ALL, '') except locale.Error as e: - log.error( - 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e) + log.error('LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e) os.environ['LC_ALL'] = 'C' @@ -233,7 +235,7 @@ class HTTPApplication: self.global_config = global_config self.config.include('vcsserver.lib.rc_cache') - self.config.include('vcsserver.lib.rc_cache.archive_cache') + self.config.include('vcsserver.lib.archive_cache') settings_locale = settings.get('locale', '') or 'en_US.UTF-8' vcs = VCS(locale_conf=settings_locale, cache_config=settings) @@ -248,25 +250,10 @@ class HTTPApplication: log.warning("Using EchoApp for VCS operations.") self.remote_wsgi = remote_wsgi_stub - self._configure_settings(global_config, settings) + configure_and_store_settings(global_config, settings) self._configure() - def _configure_settings(self, global_config, app_settings): - """ - Configure the settings module. - """ - settings_merged = global_config.copy() - settings_merged.update(app_settings) - - binary_dir = app_settings['core.binary_dir'] - - settings.BINARY_DIR = binary_dir - - # Store the settings to make them available to other modules. - vcsserver.PYRAMID_SETTINGS = settings_merged - vcsserver.CONFIG = settings_merged - def _configure(self): self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory) @@ -715,6 +702,8 @@ def sanitize_settings_and_apply_defaults 'core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin', default_when_empty=True, parser='string:noquote') + settings_maker.make_setting('vcs.svn.redis_conn', 'redis://redis:6379/0') + temp_store = tempfile.gettempdir() default_cache_dir = os.path.join(temp_store, 'rc_cache') # save default, cache dir, and use it for all backends later. diff --git a/vcsserver/lib/_vendor/jsonlogger/__init__.py b/vcsserver/lib/_vendor/jsonlogger/__init__.py --- a/vcsserver/lib/_vendor/jsonlogger/__init__.py +++ b/vcsserver/lib/_vendor/jsonlogger/__init__.py @@ -3,7 +3,6 @@ This library is provided to allow standa to output log data as JSON formatted strings ''' import logging -import json import re from datetime import date, datetime, time, tzinfo, timedelta import traceback @@ -13,13 +12,8 @@ from inspect import istraceback from collections import OrderedDict - -def _inject_req_id(record, *args, **kwargs): - return record - - -ExceptionAwareFormatter = logging.Formatter - +from ...logging_formatter import _inject_req_id, ExceptionAwareFormatter +from ...ext_json import sjson as json ZERO = timedelta(0) HOUR = timedelta(hours=1) diff --git a/vcsserver/lib/_vendor/redis_lock/__init__.py b/vcsserver/lib/_vendor/redis_lock/__init__.py --- a/vcsserver/lib/_vendor/redis_lock/__init__.py +++ b/vcsserver/lib/_vendor/redis_lock/__init__.py @@ -111,11 +111,12 @@ class Lock: extend_script = None reset_script = None reset_all_script = None + blocking = None _lock_renewal_interval: float _lock_renewal_thread: Union[threading.Thread, None] - def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000): + def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000, blocking=True): """ :param redis_client: An instance of :class:`~StrictRedis`. @@ -143,6 +144,9 @@ class Lock: If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``. :param signal_expire: Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``. + :param blocking: + Boolean value specifying whether lock should be blocking or not. + Used in `__enter__` method. """ if strict and not isinstance(redis_client, StrictRedis): raise ValueError("redis_client must be instance of StrictRedis. " @@ -179,6 +183,8 @@ class Lock: else None) self._lock_renewal_thread = None + self.blocking = blocking + self.register_scripts(redis_client) @classmethod @@ -342,9 +348,11 @@ class Lock: loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name) def __enter__(self): - acquired = self.acquire(blocking=True) + acquired = self.acquire(blocking=self.blocking) if not acquired: - raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!") + if self.blocking: + raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!") + raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.") return self def __exit__(self, exc_type=None, exc_value=None, traceback=None): diff --git a/vcsserver/lib/_vendor/statsd/__init__.py b/vcsserver/lib/_vendor/statsd/__init__.py --- a/vcsserver/lib/_vendor/statsd/__init__.py +++ b/vcsserver/lib/_vendor/statsd/__init__.py @@ -14,7 +14,7 @@ log = logging.getLogger('rhodecode.stats def statsd_config(config, prefix='statsd.'): _config = {} - for key in config.keys(): + for key in list(config.keys()): if key.startswith(prefix): _config[key[len(prefix):]] = config[key] return _config @@ -24,9 +24,10 @@ def client_from_config(configuration, pr from pyramid.settings import asbool _config = statsd_config(configuration, prefix) + statsd_flag = _config.get('enabled') statsd_enabled = asbool(_config.pop('enabled', False)) if not statsd_enabled: - log.debug('statsd client not enabled by statsd.enabled = flag, skipping...') + log.debug('statsd client not enabled by statsd.enabled = %s flag, skipping...', statsd_flag) return host = _config.pop('statsd_host', HOST) diff --git a/vcsserver/lib/archive_cache/__init__.py b/vcsserver/lib/archive_cache/__init__.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/__init__.py @@ -0,0 +1,79 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import logging + +from .backends.fanout_cache import FileSystemFanoutCache +from .backends.objectstore_cache import ObjectStoreCache + +from .utils import archive_iterator # noqa +from .lock import ArchiveCacheGenerationLock # noqa + +log = logging.getLogger(__name__) + + +cache_meta = None + + +def includeme(config): + return # vcsserver gets its config from rhodecode on a remote call + # init our cache at start + settings = config.get_settings() + get_archival_cache_store(settings) + + +def get_archival_config(config): + + final_config = { + + } + + for k, v in config.items(): + if k.startswith('archive_cache'): + final_config[k] = v + + return final_config + + +def get_archival_cache_store(config, always_init=False): + + global cache_meta + if cache_meta is not None and not always_init: + return cache_meta + + config = get_archival_config(config) + backend = config['archive_cache.backend.type'] + + archive_cache_locking_url = config['archive_cache.locking.url'] + + match backend: + case 'filesystem': + d_cache = FileSystemFanoutCache( + locking_url=archive_cache_locking_url, + **config + ) + case 'objectstore': + d_cache = ObjectStoreCache( + locking_url=archive_cache_locking_url, + **config + ) + case _: + raise ValueError(f'archive_cache.backend.type only supports "filesystem" or "objectstore" got {backend} ') + + cache_meta = d_cache + return cache_meta diff --git a/vcsserver/lib/archive_cache/backends/__init__.py b/vcsserver/lib/archive_cache/backends/__init__.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/backends/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ diff --git a/vcsserver/lib/archive_cache/backends/base.py b/vcsserver/lib/archive_cache/backends/base.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/backends/base.py @@ -0,0 +1,372 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import os +import functools +import logging +import typing +import time +import zlib + +from ...ext_json import json +from ..utils import StatsDB, NOT_GIVEN, ShardFileReader, EVICTION_POLICY, format_size +from ..lock import GenerationLock + +log = logging.getLogger(__name__) + + +class BaseShard: + storage_type: str = '' + fs = None + + @classmethod + def hash(cls, key): + """Compute portable hash for `key`. + + :param key: key to hash + :return: hash value + + """ + mask = 0xFFFFFFFF + return zlib.adler32(key.encode('utf-8')) & mask # noqa + + def _write_file(self, full_path, read_iterator, mode): + raise NotImplementedError + + def _get_keyfile(self, key): + raise NotImplementedError + + def random_filename(self): + raise NotImplementedError + + def store(self, *args, **kwargs): + raise NotImplementedError + + def _store(self, key, value_reader, metadata, mode): + (filename, # hash-name + full_path # full-path/hash-name + ) = self.random_filename() + + key_file, key_file_path = self._get_keyfile(key) + + # STORE METADATA + _metadata = { + "version": "v1", + + "key_file": key_file, # this is the .key.json file storing meta + "key_file_path": key_file_path, # full path to key_file + "archive_key": key, # original name we stored archive under, e.g my-archive.zip + "archive_filename": filename, # the actual filename we stored that file under + "archive_full_path": full_path, + + "store_time": time.time(), + "access_count": 0, + "access_time": 0, + + "size": 0 + } + if metadata: + _metadata.update(metadata) + + read_iterator = iter(functools.partial(value_reader.read, 2**22), b'') + size, sha256 = self._write_file(full_path, read_iterator, mode) + _metadata['size'] = size + _metadata['sha256'] = sha256 + + # after archive is finished, we create a key to save the presence of the binary file + with self.fs.open(key_file_path, 'wb') as f: + f.write(json.dumps(_metadata)) + + return key, filename, size, _metadata + + def fetch(self, *args, **kwargs): + raise NotImplementedError + + def _fetch(self, key, retry, retry_attempts, retry_backoff, + presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]: + if retry is NOT_GIVEN: + retry = False + if retry_attempts is NOT_GIVEN: + retry_attempts = 0 + + if retry and retry_attempts > 0: + for attempt in range(1, retry_attempts + 1): + if key in self: + break + # we didn't find the key, wait retry_backoff N seconds, and re-check + time.sleep(retry_backoff) + + if key not in self: + log.exception(f'requested key={key} not found in {self} retry={retry}, attempts={retry_attempts}') + raise KeyError(key) + + key_file, key_file_path = self._get_keyfile(key) + with self.fs.open(key_file_path, 'rb') as f: + metadata = json.loads(f.read()) + + archive_path = metadata['archive_full_path'] + if presigned_url_expires and presigned_url_expires > 0: + metadata['url'] = self.fs.url(archive_path, expires=presigned_url_expires) + + try: + return ShardFileReader(self.fs.open(archive_path, 'rb')), metadata + finally: + # update usage stats, count and accessed + metadata["access_count"] = metadata.get("access_count", 0) + 1 + metadata["access_time"] = time.time() + log.debug('Updated %s with access snapshot, access_count=%s access_time=%s', + key_file, metadata['access_count'], metadata['access_time']) + with self.fs.open(key_file_path, 'wb') as f: + f.write(json.dumps(metadata)) + + def remove(self, *args, **kwargs): + raise NotImplementedError + + def _remove(self, key): + if key not in self: + log.exception(f'requested key={key} not found in {self}') + raise KeyError(key) + + key_file, key_file_path = self._get_keyfile(key) + with self.fs.open(key_file_path, 'rb') as f: + metadata = json.loads(f.read()) + + archive_path = metadata['archive_full_path'] + self.fs.rm(archive_path) + self.fs.rm(key_file_path) + return 1 + + @property + def storage_medium(self): + return getattr(self, self.storage_type) + + @property + def key_suffix(self): + return 'key.json' + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key matching item + :return: True if key matching item + + """ + key_file, key_file_path = self._get_keyfile(key) + return self.fs.exists(key_file_path) + + +class BaseCache: + _locking_url: str = '' + _storage_path: str = '' + _config: dict = {} + retry = False + retry_attempts: int = 0 + retry_backoff: int | float = 1 + _shards = tuple() + shard_cls = BaseShard + # define the presigned url expiration, 0 == disabled + presigned_url_expires: int = 0 + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key matching item + :return: True if key matching item + + """ + return self.has_key(key) + + def __repr__(self): + return f'<{self.__class__.__name__}(storage={self._storage_path})>' + + @classmethod + def gb_to_bytes(cls, gb): + return gb * (1024 ** 3) + + @property + def storage_path(self): + return self._storage_path + + @classmethod + def get_stats_db(cls): + return StatsDB() + + def get_conf(self, key, pop=False): + if key not in self._config: + raise ValueError(f"No configuration key '{key}', please make sure it exists in archive_cache config") + val = self._config[key] + if pop: + del self._config[key] + return val + + def _get_shard(self, key) -> shard_cls: + index = self._hash(key) % self._shard_count + shard = self._shards[index] + return shard + + def _get_size(self, shard, archive_path): + raise NotImplementedError + + def store(self, key, value_reader, metadata=None): + shard = self._get_shard(key) + return shard.store(key, value_reader, metadata) + + def fetch(self, key, retry=NOT_GIVEN, retry_attempts=NOT_GIVEN) -> tuple[typing.BinaryIO, dict]: + """ + Return file handle corresponding to `key` from specific shard cache. + """ + if retry is NOT_GIVEN: + retry = self.retry + if retry_attempts is NOT_GIVEN: + retry_attempts = self.retry_attempts + retry_backoff = self.retry_backoff + presigned_url_expires = self.presigned_url_expires + + shard = self._get_shard(key) + return shard.fetch(key, retry=retry, + retry_attempts=retry_attempts, + retry_backoff=retry_backoff, + presigned_url_expires=presigned_url_expires) + + def remove(self, key): + shard = self._get_shard(key) + return shard.remove(key) + + def has_key(self, archive_key): + """Return `True` if `key` matching item is found in cache. + + :param archive_key: key for item, this is a unique archive name we want to store data under. e.g my-archive-svn.zip + :return: True if key is found + + """ + shard = self._get_shard(archive_key) + return archive_key in shard + + def iter_keys(self): + for shard in self._shards: + if shard.fs.exists(shard.storage_medium): + for path, _dirs, _files in shard.fs.walk(shard.storage_medium): + for key_file_path in _files: + if key_file_path.endswith(shard.key_suffix): + yield shard, key_file_path + + def get_lock(self, lock_key): + return GenerationLock(lock_key, self._locking_url) + + def evict(self, policy=None, size_limit=None) -> dict: + """ + Remove old items based on the conditions + + + explanation of this algo: + iterate over each shard, then for each shard iterate over the .key files + read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and + access data, time creation, and access counts. + + Store that into a memory DB in order we can run different sorting strategies easily. + Summing the size is a sum sql query. + + Then we run a sorting strategy based on eviction policy. + We iterate over sorted keys, and remove each checking if we hit the overall limit. + """ + removal_info = { + "removed_items": 0, + "removed_size": 0 + } + policy = policy or self._eviction_policy + size_limit = size_limit or self._cache_size_limit + + select_policy = EVICTION_POLICY[policy]['evict'] + + log.debug('Running eviction policy \'%s\', and checking for size limit: %s', + policy, format_size(size_limit)) + + if select_policy is None: + return removal_info + + db = self.get_stats_db() + + data = [] + cnt = 1 + + for shard, key_file in self.iter_keys(): + with shard.fs.open(os.path.join(shard.storage_medium, key_file), 'rb') as f: + metadata = json.loads(f.read()) + + key_file_path = os.path.join(shard.storage_medium, key_file) + + archive_key = metadata['archive_key'] + archive_path = metadata['archive_full_path'] + + size = metadata.get('size') + if not size: + # in case we don't have size re-calc it... + size = self._get_size(shard, archive_path) + + data.append([ + cnt, + key_file, + key_file_path, + archive_key, + archive_path, + metadata.get('store_time', 0), + metadata.get('access_time', 0), + metadata.get('access_count', 0), + size, + ]) + cnt += 1 + + # Insert bulk data using executemany + db.bulk_insert(data) + + total_size = db.get_total_size() + log.debug('Analyzed %s keys, occupying: %s, running eviction to match %s', + len(data), format_size(total_size), format_size(size_limit)) + + removed_items = 0 + removed_size = 0 + for key_file, archive_key, size in db.get_sorted_keys(select_policy): + # simulate removal impact BEFORE removal + total_size -= size + + if total_size <= size_limit: + # we obtained what we wanted... + break + + self.remove(archive_key) + removed_items += 1 + removed_size += size + removal_info['removed_items'] = removed_items + removal_info['removed_size'] = removed_size + log.debug('Removed %s cache archives, and reduced size by: %s', + removed_items, format_size(removed_size)) + return removal_info + + def get_statistics(self): + total_files = 0 + total_size = 0 + meta = {} + + for shard, key_file in self.iter_keys(): + json_key = f"{shard.storage_medium}/{key_file}" + with shard.fs.open(json_key, 'rb') as f: + total_files += 1 + metadata = json.loads(f.read()) + total_size += metadata['size'] + + return total_files, total_size, meta diff --git a/vcsserver/lib/archive_cache/backends/fanout_cache.py b/vcsserver/lib/archive_cache/backends/fanout_cache.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/backends/fanout_cache.py @@ -0,0 +1,177 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import codecs +import hashlib +import logging +import os +import typing + +import fsspec + +from .base import BaseCache, BaseShard +from ..utils import ShardFileReader, NOT_GIVEN +from ...type_utils import str2bool + +log = logging.getLogger(__name__) + + +class FileSystemShard(BaseShard): + + def __init__(self, index, directory, directory_folder, fs, **settings): + self._index: int = index + self._directory: str = directory + self._directory_folder: str = directory_folder + self.storage_type: str = 'directory' + + self.fs = fs + + @property + def directory(self) -> str: + """Cache directory final path.""" + return os.path.join(self._directory, self._directory_folder) + + def _get_keyfile(self, archive_key) -> tuple[str, str]: + key_file: str = f'{archive_key}.{self.key_suffix}' + return key_file, os.path.join(self.directory, key_file) + + def _get_writer(self, path, mode): + for count in range(1, 11): + try: + # Another cache may have deleted the directory before + # the file could be opened. + return self.fs.open(path, mode) + except OSError: + if count == 10: + # Give up after 10 tries to open the file. + raise + continue + + def _write_file(self, full_path, iterator, mode): + + # ensure dir exists + destination, _ = os.path.split(full_path) + if not self.fs.exists(destination): + self.fs.makedirs(destination) + + writer = self._get_writer(full_path, mode) + + digest = hashlib.sha256() + with writer: + size = 0 + for chunk in iterator: + size += len(chunk) + digest.update(chunk) + writer.write(chunk) + writer.flush() + # Get the file descriptor + fd = writer.fileno() + + # Sync the file descriptor to disk, helps with NFS cases... + os.fsync(fd) + sha256 = digest.hexdigest() + log.debug('written new archive cache under %s, sha256: %s', full_path, sha256) + return size, sha256 + + def store(self, key, value_reader, metadata: dict | None = None): + return self._store(key, value_reader, metadata, mode='xb') + + def fetch(self, key, retry=NOT_GIVEN, + retry_attempts=NOT_GIVEN, retry_backoff=1, **kwargs) -> tuple[ShardFileReader, dict]: + return self._fetch(key, retry, retry_attempts, retry_backoff) + + def remove(self, key): + return self._remove(key) + + def random_filename(self): + """Return filename and full-path tuple for file storage. + + Filename will be a randomly generated 28 character hexadecimal string + with ".archive_cache" suffixed. Two levels of sub-directories will be used to + reduce the size of directories. On older filesystems, lookups in + directories with many files may be slow. + """ + + hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') + + archive_name = hex_name[4:] + '.archive_cache' + filename = f"{hex_name[:2]}/{hex_name[2:4]}/{archive_name}" + + full_path = os.path.join(self.directory, filename) + return archive_name, full_path + + def __repr__(self): + return f'{self.__class__.__name__}(index={self._index}, dir={self.directory})' + + +class FileSystemFanoutCache(BaseCache): + shard_name: str = 'shard_{:03d}' + shard_cls = FileSystemShard + + def __init__(self, locking_url, **settings): + """ + Initialize file system cache instance. + + :param str locking_url: redis url for a lock + :param settings: settings dict + + """ + self._locking_url = locking_url + self._config = settings + cache_dir = self.get_conf('archive_cache.filesystem.store_dir') + directory = str(cache_dir) + directory = os.path.expanduser(directory) + directory = os.path.expandvars(directory) + self._directory = directory + self._storage_path = directory # common path for all from BaseCache + + self._shard_count = int(self.get_conf('archive_cache.filesystem.cache_shards', pop=True)) + if self._shard_count < 1: + raise ValueError('cache_shards must be 1 or more') + + self._eviction_policy = self.get_conf('archive_cache.filesystem.eviction_policy', pop=True) + self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.filesystem.cache_size_gb'))) + + self.retry = str2bool(self.get_conf('archive_cache.filesystem.retry', pop=True)) + self.retry_attempts = int(self.get_conf('archive_cache.filesystem.retry_attempts', pop=True)) + self.retry_backoff = int(self.get_conf('archive_cache.filesystem.retry_backoff', pop=True)) + + log.debug('Initializing %s archival cache instance', self) + fs = fsspec.filesystem('file') + # check if it's ok to write, and re-create the archive cache main dir + # A directory is the virtual equivalent of a physical file cabinet. + # In other words, it's a container for organizing digital data. + # Unlike a folder, which can only store files, a directory can store files, + # subdirectories, and other directories. + if not fs.exists(self._directory): + fs.makedirs(self._directory, exist_ok=True) + + self._shards = tuple( + self.shard_cls( + index=num, + directory=directory, + directory_folder=self.shard_name.format(num), + fs=fs, + **settings, + ) + for num in range(self._shard_count) + ) + self._hash = self._shards[0].hash + + def _get_size(self, shard, archive_path): + return os.stat(archive_path).st_size diff --git a/vcsserver/lib/archive_cache/backends/objectstore_cache.py b/vcsserver/lib/archive_cache/backends/objectstore_cache.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/backends/objectstore_cache.py @@ -0,0 +1,173 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import codecs +import hashlib +import logging +import os +import typing + +import fsspec + +from .base import BaseCache, BaseShard +from ..utils import ShardFileReader, NOT_GIVEN +from ...type_utils import str2bool + +log = logging.getLogger(__name__) + + +class S3Shard(BaseShard): + + def __init__(self, index, bucket, bucket_folder, fs, **settings): + self._index: int = index + self._bucket_folder: str = bucket_folder + self.storage_type: str = 'bucket' + self._bucket_main: str = bucket + + self.fs = fs + + @property + def bucket(self) -> str: + """Cache bucket final path.""" + return os.path.join(self._bucket_main, self._bucket_folder) + + def _get_keyfile(self, archive_key) -> tuple[str, str]: + key_file: str = f'{archive_key}-{self.key_suffix}' + return key_file, os.path.join(self.bucket, key_file) + + def _get_writer(self, path, mode): + return self.fs.open(path, 'wb') + + def _write_file(self, full_path, iterator, mode): + + # ensure folder in bucket exists + destination = self.bucket + if not self.fs.exists(destination): + self.fs.mkdir(destination, s3_additional_kwargs={}) + + writer = self._get_writer(full_path, mode) + + digest = hashlib.sha256() + with writer: + size = 0 + for chunk in iterator: + size += len(chunk) + digest.update(chunk) + writer.write(chunk) + + sha256 = digest.hexdigest() + log.debug('written new archive cache under %s, sha256: %s', full_path, sha256) + return size, sha256 + + def store(self, key, value_reader, metadata: dict | None = None): + return self._store(key, value_reader, metadata, mode='wb') + + def fetch(self, key, retry=NOT_GIVEN, + retry_attempts=NOT_GIVEN, retry_backoff=1, + presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]: + return self._fetch(key, retry, retry_attempts, retry_backoff, presigned_url_expires=presigned_url_expires) + + def remove(self, key): + return self._remove(key) + + def random_filename(self): + """Return filename and full-path tuple for file storage. + + Filename will be a randomly generated 28 character hexadecimal string + with ".archive_cache" suffixed. Two levels of sub-directories will be used to + reduce the size of directories. On older filesystems, lookups in + directories with many files may be slow. + """ + + hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') + + archive_name = hex_name[4:] + '.archive_cache' + filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}" + + full_path = os.path.join(self.bucket, filename) + return archive_name, full_path + + def __repr__(self): + return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})' + + +class ObjectStoreCache(BaseCache): + shard_name: str = 'shard-{:03d}' + shard_cls = S3Shard + + def __init__(self, locking_url, **settings): + """ + Initialize objectstore cache instance. + + :param str locking_url: redis url for a lock + :param settings: settings dict + + """ + self._locking_url = locking_url + self._config = settings + + objectstore_url = self.get_conf('archive_cache.objectstore.url') + self._storage_path = objectstore_url # common path for all from BaseCache + + self._shard_count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True)) + if self._shard_count < 1: + raise ValueError('cache_shards must be 1 or more') + + self._bucket = settings.pop('archive_cache.objectstore.bucket') + if not self._bucket: + raise ValueError('archive_cache.objectstore.bucket needs to have a value') + + self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True) + self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb'))) + + self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True)) + self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True)) + self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True)) + + endpoint_url = settings.pop('archive_cache.objectstore.url') + key = settings.pop('archive_cache.objectstore.key') + secret = settings.pop('archive_cache.objectstore.secret') + region = settings.pop('archive_cache.objectstore.region') + + log.debug('Initializing %s archival cache instance', self) + + fs = fsspec.filesystem( + 's3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret, client_kwargs={'region_name': region} + ) + + # init main bucket + if not fs.exists(self._bucket): + fs.mkdir(self._bucket) + + self._shards = tuple( + self.shard_cls( + index=num, + bucket=self._bucket, + bucket_folder=self.shard_name.format(num), + fs=fs, + **settings, + ) + for num in range(self._shard_count) + ) + self._hash = self._shards[0].hash + + def _get_size(self, shard, archive_path): + return shard.fs.info(archive_path)['size'] + + def set_presigned_url_expiry(self, val: int) -> None: + self.presigned_url_expires = val diff --git a/vcsserver/lib/archive_cache/lock.py b/vcsserver/lib/archive_cache/lock.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/lock.py @@ -0,0 +1,62 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import redis +from .._vendor import redis_lock + + +class ArchiveCacheGenerationLock(Exception): + pass + + +class GenerationLock: + """ + Locking mechanism that detects if a lock is acquired + + with GenerationLock(lock_key): + compute_archive() + """ + lock_timeout = 7200 + + def __init__(self, lock_key, url): + self.lock_key = lock_key + self._create_client(url) + self.lock = self.get_lock() + + def _create_client(self, url): + connection_pool = redis.ConnectionPool.from_url(url) + self.writer_client = redis.StrictRedis( + connection_pool=connection_pool + ) + self.reader_client = self.writer_client + + def get_lock(self): + return redis_lock.Lock( + redis_client=self.writer_client, + name=self.lock_key, + expire=self.lock_timeout, + strict=True + ) + + def __enter__(self): + acquired = self.lock.acquire(blocking=False) + if not acquired: + raise ArchiveCacheGenerationLock('Failed to create a lock') + + def __exit__(self, exc_type, exc_val, exc_tb): + self.lock.release() diff --git a/vcsserver/lib/archive_cache/utils.py b/vcsserver/lib/archive_cache/utils.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/archive_cache/utils.py @@ -0,0 +1,134 @@ +# Copyright (C) 2015-2024 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import sqlite3 +import s3fs.core + +NOT_GIVEN = -917 + + +EVICTION_POLICY = { + 'none': { + 'evict': None, + }, + 'least-recently-stored': { + 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time', + }, + 'least-recently-used': { + 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time', + }, + 'least-frequently-used': { + 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count', + }, +} + + +def archive_iterator(_reader, block_size: int = 4096 * 512): + # 4096 * 64 = 64KB + while 1: + data = _reader.read(block_size) + if not data: + break + yield data + + +def format_size(size): + # Convert size in bytes to a human-readable format (e.g., KB, MB, GB) + for unit in ['B', 'KB', 'MB', 'GB', 'TB']: + if size < 1024: + return f"{size:.2f} {unit}" + size /= 1024 + + +class StatsDB: + + def __init__(self): + self.connection = sqlite3.connect(':memory:') + self._init_db() + + def _init_db(self): + qry = ''' + CREATE TABLE IF NOT EXISTS archive_cache ( + rowid INTEGER PRIMARY KEY, + key_file TEXT, + key_file_path TEXT, + archive_key TEXT, + archive_path TEXT, + store_time REAL, + access_time REAL, + access_count INTEGER DEFAULT 0, + size INTEGER DEFAULT 0 + ) + ''' + + self.sql(qry) + self.connection.commit() + + @property + def sql(self): + return self.connection.execute + + def bulk_insert(self, rows): + qry = ''' + INSERT INTO archive_cache ( + rowid, + key_file, + key_file_path, + archive_key, + archive_path, + store_time, + access_time, + access_count, + size + ) + VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ? + ) + ''' + cursor = self.connection.cursor() + cursor.executemany(qry, rows) + self.connection.commit() + + def get_total_size(self): + qry = 'SELECT COALESCE(SUM(size), 0) FROM archive_cache' + ((total_size,),) = self.sql(qry).fetchall() + return total_size + + def get_sorted_keys(self, select_policy): + select_policy_qry = select_policy.format(fields='key_file, archive_key, size') + return self.sql(select_policy_qry).fetchall() + + +class ShardFileReader: + + def __init__(self, file_like_reader): + self._file_like_reader = file_like_reader + + def __getattr__(self, item): + if isinstance(self._file_like_reader, s3fs.core.S3File): + match item: + case 'name': + # S3 FileWrapper doesn't support name attribute, and we use it + return self._file_like_reader.full_name + case _: + return getattr(self._file_like_reader, item) + else: + return getattr(self._file_like_reader, item) + + def __repr__(self): + return f'<{self.__class__.__name__}={self._file_like_reader}>' diff --git a/vcsserver/lib/config_utils.py b/vcsserver/lib/config_utils.py --- a/vcsserver/lib/config_utils.py +++ b/vcsserver/lib/config_utils.py @@ -16,6 +16,8 @@ # RhodeCode Enterprise Edition, including its added features, Support services, # and proprietary license terms, please see https://rhodecode.com/licenses/ import os +import vcsserver +import vcsserver.settings def get_config(ini_path, **kwargs): @@ -38,3 +40,19 @@ def get_app_config(ini_path): """ from paste.deploy.loadwsgi import appconfig return appconfig(f'config:{ini_path}', relative_to=os.getcwd()) + + +def configure_and_store_settings(global_config, app_settings): + """ + Configure the settings module. + """ + settings_merged = global_config.copy() + settings_merged.update(app_settings) + + binary_dir = app_settings['core.binary_dir'] + + vcsserver.settings.BINARY_DIR = binary_dir + + # Store the settings to make them available to other modules. + vcsserver.PYRAMID_SETTINGS = settings_merged + vcsserver.CONFIG = settings_merged diff --git a/vcsserver/lib/rc_json.py b/vcsserver/lib/ext_json.py rename from vcsserver/lib/rc_json.py rename to vcsserver/lib/ext_json.py --- a/vcsserver/lib/rc_json.py +++ b/vcsserver/lib/ext_json.py @@ -1,2 +1,14 @@ -# use orjson by default -import orjson as json +import json as stdlib_json + +try: + # we keep simplejson for having dump functionality still + # orjson doesn't support it + import simplejson as sjson +except ImportError: + sjson = stdlib_json + +try: + import orjson + import orjson as json +except ImportError: + json = stdlib_json diff --git a/vcsserver/lib/logging_formatter.py b/vcsserver/lib/logging_formatter.py --- a/vcsserver/lib/logging_formatter.py +++ b/vcsserver/lib/logging_formatter.py @@ -37,11 +37,19 @@ COLORS = { } +def _inject_req_id(record, *args, **kwargs): + return record + + +class ExceptionAwareFormatter(logging.Formatter): + pass + + class ColorFormatter(logging.Formatter): def format(self, record): """ - Change record's levelname to use with COLORS enum + Changes record's levelname to use with COLORS enum """ def_record = super().format(record) @@ -51,3 +59,5 @@ class ColorFormatter(logging.Formatter): colored_record = ''.join([start, def_record, end]) return colored_record + + diff --git a/vcsserver/lib/memory_lru_dict.py b/vcsserver/lib/memory_lru_dict.py --- a/vcsserver/lib/memory_lru_dict.py +++ b/vcsserver/lib/memory_lru_dict.py @@ -20,7 +20,7 @@ import logging from repoze.lru import LRUCache -from vcsserver.str_utils import safe_str +from vcsserver.lib.str_utils import safe_str log = logging.getLogger(__name__) diff --git a/vcsserver/lib/rc_cache/archive_cache.py b/vcsserver/lib/rc_cache/archive_cache.py deleted file mode 100644 --- a/vcsserver/lib/rc_cache/archive_cache.py +++ /dev/null @@ -1,87 +0,0 @@ -# RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2023 RhodeCode GmbH -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -import logging -import os -import diskcache -from diskcache import RLock - -log = logging.getLogger(__name__) - -cache_meta = None - - -class ReentrantLock(RLock): - def __enter__(self): - reentrant_lock_key = self._key - - log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key) - #self.acquire() - log.debug('Lock for key=%s acquired', reentrant_lock_key) - - def __exit__(self, *exc_info): - #self.release() - pass - - -def get_archival_config(config): - - final_config = { - 'archive_cache.eviction_policy': 'least-frequently-used' - } - - for k, v in config.items(): - if k.startswith('archive_cache'): - final_config[k] = v - - return final_config - - -def get_archival_cache_store(config): - - global cache_meta - if cache_meta is not None: - return cache_meta - - config = get_archival_config(config) - - archive_cache_dir = config['archive_cache.store_dir'] - archive_cache_size_gb = config['archive_cache.cache_size_gb'] - archive_cache_shards = config['archive_cache.cache_shards'] - archive_cache_eviction_policy = config['archive_cache.eviction_policy'] - - log.debug('Initializing archival cache instance under %s', archive_cache_dir) - - # check if it's ok to write, and re-create the archive cache - if not os.path.isdir(archive_cache_dir): - os.makedirs(archive_cache_dir, exist_ok=True) - - d_cache = diskcache.FanoutCache( - archive_cache_dir, shards=archive_cache_shards, - cull_limit=0, # manual eviction required - size_limit=archive_cache_size_gb * 1024 * 1024 * 1024, - eviction_policy=archive_cache_eviction_policy, - timeout=30 - ) - cache_meta = d_cache - return cache_meta - - -def includeme(config): - # init our cache at start, for vcsserver we don't init at runtime - # because our cache config is sent via wire on make archive call, this call just lazy-enables the client - return diff --git a/vcsserver/lib/rc_cache/backends.py b/vcsserver/lib/rc_cache/backends.py --- a/vcsserver/lib/rc_cache/backends.py +++ b/vcsserver/lib/rc_cache/backends.py @@ -38,8 +38,8 @@ from dogpile.cache.backends.file import from dogpile.cache.util import memoized_property from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug -from vcsserver.str_utils import safe_bytes, safe_str -from vcsserver.type_utils import str2bool +from vcsserver.lib.str_utils import safe_bytes, safe_str +from vcsserver.lib.type_utils import str2bool _default_max_size = 1024 diff --git a/vcsserver/lib/rc_cache/utils.py b/vcsserver/lib/rc_cache/utils.py --- a/vcsserver/lib/rc_cache/utils.py +++ b/vcsserver/lib/rc_cache/utils.py @@ -26,8 +26,8 @@ from dogpile.cache import CacheRegion from vcsserver.utils import sha1 -from vcsserver.str_utils import safe_bytes -from vcsserver.type_utils import str2bool # noqa :required by imports from .utils +from vcsserver.lib.str_utils import safe_bytes +from vcsserver.lib.type_utils import str2bool # noqa :required by imports from .utils from . import region_meta diff --git a/vcsserver/str_utils.py b/vcsserver/lib/str_utils.py rename from vcsserver/str_utils.py rename to vcsserver/lib/str_utils.py --- a/vcsserver/str_utils.py +++ b/vcsserver/lib/str_utils.py @@ -155,4 +155,4 @@ def splitnewlines(text: bytes): lines.pop() else: lines[-1] = lines[-1][:-1] - return lines \ No newline at end of file + return lines diff --git a/vcsserver/lib/svn_txn_utils.py b/vcsserver/lib/svn_txn_utils.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/svn_txn_utils.py @@ -0,0 +1,111 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import logging +import redis + +from ..lib import rc_cache +from ..lib.ext_json import json + + +log = logging.getLogger(__name__) + +redis_client = None + + +class RedisTxnClient: + + def __init__(self, url): + self.url = url + self._create_client(url) + + def _create_client(self, url): + connection_pool = redis.ConnectionPool.from_url(url) + self.writer_client = redis.StrictRedis( + connection_pool=connection_pool + ) + self.reader_client = self.writer_client + + def set(self, key, value): + self.writer_client.set(key, value) + + def get(self, key): + return self.reader_client.get(key) + + def delete(self, key): + self.writer_client.delete(key) + + +def get_redis_client(url=''): + + global redis_client + if redis_client is not None: + return redis_client + if not url: + from vcsserver import CONFIG + url = CONFIG['vcs.svn.redis_conn'] + redis_client = RedisTxnClient(url) + return redis_client + + +def get_txn_id_data_key(repo_path, svn_txn_id): + log.debug('svn-txn-id: %s, obtaining data path', svn_txn_id) + repo_key = rc_cache.utils.compute_key_from_params(repo_path) + final_key = f'{repo_key}.{svn_txn_id}.svn_txn_id' + log.debug('computed final key: %s', final_key) + + return final_key + + +def store_txn_id_data(repo_path, svn_txn_id, data_dict): + log.debug('svn-txn-id: %s, storing data', svn_txn_id) + + if not svn_txn_id: + log.warning('Cannot store txn_id because it is empty') + return + + redis_conn = get_redis_client() + + store_key = get_txn_id_data_key(repo_path, svn_txn_id) + store_data = json.dumps(data_dict) + redis_conn.set(store_key, store_data) + + +def get_txn_id_from_store(repo_path, svn_txn_id, rm_on_read=False): + """ + Reads txn_id from store and if present returns the data for callback manager + """ + log.debug('svn-txn-id: %s, retrieving data', svn_txn_id) + redis_conn = get_redis_client() + + store_key = get_txn_id_data_key(repo_path, svn_txn_id) + data = {} + redis_conn.get(store_key) + raw_data = 'not-set' + try: + raw_data = redis_conn.get(store_key) + if not raw_data: + raise ValueError(f'Failed to get txn_id metadata, from store: {store_key}') + data = json.loads(raw_data) + except Exception: + log.exception('Failed to get txn_id metadata: %s', raw_data) + + if rm_on_read: + log.debug('Cleaning up txn_id at %s', store_key) + redis_conn.delete(store_key) + + return data diff --git a/vcsserver/lib/svnremoterepo.py b/vcsserver/lib/svnremoterepo.py --- a/vcsserver/lib/svnremoterepo.py +++ b/vcsserver/lib/svnremoterepo.py @@ -24,7 +24,7 @@ from svn import ra from mercurial import error -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes core.svn_config_ensure(None) svn_config = core.svn_config_get_config(None) diff --git a/vcsserver/type_utils.py b/vcsserver/lib/type_utils.py rename from vcsserver/type_utils.py rename to vcsserver/lib/type_utils.py diff --git a/vcsserver/pygrack.py b/vcsserver/pygrack.py --- a/vcsserver/pygrack.py +++ b/vcsserver/pygrack.py @@ -25,9 +25,9 @@ import dulwich.protocol from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K from webob import Request, Response, exc -from vcsserver.lib.rc_json import json +from vcsserver.lib.ext_json import json from vcsserver import hooks, subprocessio -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes log = logging.getLogger(__name__) diff --git a/vcsserver/remote/git_remote.py b/vcsserver/remote/git_remote.py --- a/vcsserver/remote/git_remote.py +++ b/vcsserver/remote/git_remote.py @@ -40,7 +40,7 @@ from dulwich.repo import Repo as Dulwich import rhodecode from vcsserver import exceptions, settings, subprocessio -from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines +from vcsserver.lib.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) @@ -551,6 +551,13 @@ class GitRemote(RemoteBase): return _branch(context_uid, repo_id, commit_id) @reraise_safe_exceptions + def delete_branch(self, wire, branch_name): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + if branch := repo.lookup_branch(branch_name): + branch.delete() + + @reraise_safe_exceptions def commit_branches(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) diff --git a/vcsserver/remote/hg_remote.py b/vcsserver/remote/hg_remote.py --- a/vcsserver/remote/hg_remote.py +++ b/vcsserver/remote/hg_remote.py @@ -31,6 +31,7 @@ from mercurial import unionrepo from mercurial import verify from mercurial import repair from mercurial.error import AmbiguousPrefixLookupError +from mercurial.utils.urlutil import path as hg_path import vcsserver from vcsserver import exceptions @@ -54,7 +55,7 @@ from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler, - makepeer, + make_peer, instance, match, memctx, @@ -77,7 +78,7 @@ from vcsserver.hgcompat import ( patternmatcher, hgext_strip, ) -from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str +from vcsserver.lib.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str from vcsserver.vcs_base import RemoteBase from vcsserver.config import hooks as hooks_config from vcsserver.lib.exc_tracking import format_exc @@ -85,7 +86,7 @@ from vcsserver.lib.exc_tracking import f log = logging.getLogger(__name__) -def make_ui_from_config(repo_config): +def make_ui_from_config(repo_config, interactive=True): class LoggingUI(ui.ui): @@ -143,6 +144,7 @@ def make_ui_from_config(repo_config): log.debug('Explicitly disable largefiles extension for repo.') baseui.setconfig(b'extensions', b'largefiles', b'!') + baseui.setconfig(b'ui', b'interactive', b'true' if interactive else b'false') return baseui @@ -521,8 +523,10 @@ class HgRemote(RemoteBase): # check for pure hg repos log.debug( "Verifying if URL is a Mercurial repository: %s", obfuscated_uri) - ui = make_ui_from_config(config) - peer_checker = makepeer(ui, safe_bytes(url)) + # Create repo path with custom mercurial path object + ui = make_ui_from_config(config, interactive=False) + repo_path = hg_path(ui=ui, rawloc=safe_bytes(url)) + peer_checker = make_peer(ui, repo_path, False) peer_checker.lookup(b'tip') except Exception as e: log.warning("URL is not a valid Mercurial repository: %s", @@ -800,12 +804,12 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') + repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y') bookmarks = list(dict(repo._bookmarks).keys()) remote = peer(repo, {}, safe_bytes(url)) # Disable any prompts for this remote - remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') + remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y') return exchange.push( repo, remote, newbranch=True, bookmarks=bookmarks).cgresult @@ -1017,11 +1021,11 @@ class HgRemote(RemoteBase): def pull(self, wire, url, commit_ids=None): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') + repo.ui.setconfig(b'ui', b'interactive', b'false', b'-y') remote = peer(repo, {}, safe_bytes(url)) # Disable any prompts for this remote - remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') + remote.ui.setconfig(b'ui', b'interactive', b'false', b'-y') if commit_ids: commit_ids = [bin(commit_id) for commit_id in commit_ids] @@ -1038,7 +1042,7 @@ class HgRemote(RemoteBase): # Mercurial internally has a lot of logic that checks ONLY if # option is defined, we just pass those if they are defined then - opts = {} + opts = {"remote_hidden": False} if bookmark: opts['bookmark'] = [safe_bytes(x) for x in bookmark] \ @@ -1100,7 +1104,7 @@ class HgRemote(RemoteBase): # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig(b'ui', b'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', b'false') commands.merge(baseui, repo, rev=safe_bytes(revision)) @reraise_safe_exceptions @@ -1112,7 +1116,7 @@ class HgRemote(RemoteBase): # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig(b'ui', b'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', b'false') ms = hg_merge.mergestate(repo) return [x for x in ms.unresolved()] @@ -1133,7 +1137,7 @@ class HgRemote(RemoteBase): # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig(b'ui', b'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', b'false') rebase_kws = dict( keep=not abort, diff --git a/vcsserver/remote/svn_remote.py b/vcsserver/remote/svn_remote.py --- a/vcsserver/remote/svn_remote.py +++ b/vcsserver/remote/svn_remote.py @@ -28,7 +28,6 @@ import urllib.parse import urllib.error import traceback - import svn.client # noqa import svn.core # noqa import svn.delta # noqa @@ -47,10 +46,11 @@ from vcsserver.base import ( BinaryEnvelope, ) from vcsserver.exceptions import NoContentException -from vcsserver.str_utils import safe_str, safe_bytes -from vcsserver.type_utils import assert_bytes from vcsserver.vcs_base import RemoteBase +from vcsserver.lib.str_utils import safe_str, safe_bytes +from vcsserver.lib.type_utils import assert_bytes from vcsserver.lib.svnremoterepo import svnremoterepo +from vcsserver.lib.svn_txn_utils import store_txn_id_data log = logging.getLogger(__name__) @@ -503,6 +503,11 @@ class SvnRemote(RemoteBase): for node in removed: TxnNodeProcessor(node, txn_root).remove() + svn_txn_id = safe_str(svn.fs.svn_fs_txn_name(txn)) + full_repo_path = wire['path'] + txn_id_data = {'svn_txn_id': svn_txn_id, 'rc_internal_commit': True} + + store_txn_id_data(full_repo_path, svn_txn_id, txn_id_data) commit_id = svn.repos.fs_commit_txn(repo, txn) if timestamp: diff --git a/vcsserver/scm_app.py b/vcsserver/scm_app.py --- a/vcsserver/scm_app.py +++ b/vcsserver/scm_app.py @@ -27,7 +27,7 @@ import mercurial.hgweb.hgweb_mod import webob.exc from vcsserver import pygrack, exceptions, settings, git_lfs -from vcsserver.str_utils import ascii_bytes, safe_bytes +from vcsserver.lib.str_utils import ascii_bytes, safe_bytes log = logging.getLogger(__name__) @@ -136,6 +136,9 @@ def make_hg_ui_from_config(repo_config): # make our hgweb quiet so it doesn't print output baseui.setconfig(b'ui', b'quiet', b'true') + # use POST requests with args instead of GET with headers - fixes issues with big repos with lots of branches + baseui.setconfig(b'experimental', b'httppostargs', b'true') + return baseui diff --git a/vcsserver/subprocessio.py b/vcsserver/subprocessio.py --- a/vcsserver/subprocessio.py +++ b/vcsserver/subprocessio.py @@ -28,7 +28,7 @@ import logging import subprocess import threading -from vcsserver.str_utils import safe_str +from vcsserver.lib.str_utils import safe_str log = logging.getLogger(__name__) diff --git a/vcsserver/tests/test_hooks.py b/vcsserver/tests/test_hooks.py --- a/vcsserver/tests/test_hooks.py +++ b/vcsserver/tests/test_hooks.py @@ -26,7 +26,7 @@ import mock import pytest from vcsserver.hooks import HooksHttpClient -from vcsserver.lib.rc_json import json +from vcsserver.lib.ext_json import json from vcsserver import hooks diff --git a/vcsserver/tests/test_install_hooks.py b/vcsserver/tests/test_install_hooks.py --- a/vcsserver/tests/test_install_hooks.py +++ b/vcsserver/tests/test_install_hooks.py @@ -24,7 +24,7 @@ import tempfile from vcsserver import hook_utils from vcsserver.hook_utils import set_permissions_if_needed, HOOKS_DIR_MODE, HOOKS_FILE_MODE from vcsserver.tests.fixture import no_newline_id_generator -from vcsserver.str_utils import safe_bytes +from vcsserver.lib.str_utils import safe_bytes from vcsserver.utils import AttributeDict diff --git a/vcsserver/tests/test_pygrack.py b/vcsserver/tests/test_pygrack.py --- a/vcsserver/tests/test_pygrack.py +++ b/vcsserver/tests/test_pygrack.py @@ -26,7 +26,7 @@ import webtest from vcsserver import hooks, pygrack -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes @pytest.fixture() diff --git a/vcsserver/tests/test_scm_app.py b/vcsserver/tests/test_scm_app.py --- a/vcsserver/tests/test_scm_app.py +++ b/vcsserver/tests/test_scm_app.py @@ -25,7 +25,7 @@ import pytest import webtest from vcsserver import scm_app -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes def test_hg_does_not_accept_invalid_cmd(tmpdir): diff --git a/vcsserver/tests/test_subprocessio.py b/vcsserver/tests/test_subprocessio.py --- a/vcsserver/tests/test_subprocessio.py +++ b/vcsserver/tests/test_subprocessio.py @@ -22,7 +22,7 @@ import sys import pytest from vcsserver import subprocessio -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes class FileLikeObj: # pragma: no cover diff --git a/vcsserver/tests/test_svn.py b/vcsserver/tests/test_svn.py --- a/vcsserver/tests/test_svn.py +++ b/vcsserver/tests/test_svn.py @@ -20,7 +20,7 @@ import mock import pytest import sys -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes class MockPopen: diff --git a/vcsserver/tests/test_utils.py b/vcsserver/tests/test_utils.py --- a/vcsserver/tests/test_utils.py +++ b/vcsserver/tests/test_utils.py @@ -16,7 +16,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import pytest -from vcsserver.str_utils import ascii_bytes, ascii_str, convert_to_str +from vcsserver.lib.str_utils import ascii_bytes, ascii_str, convert_to_str @pytest.mark.parametrize('given, expected', [ diff --git a/vcsserver/tests/test_wsgi_app_caller.py b/vcsserver/tests/test_wsgi_app_caller.py --- a/vcsserver/tests/test_wsgi_app_caller.py +++ b/vcsserver/tests/test_wsgi_app_caller.py @@ -19,7 +19,7 @@ import wsgiref.simple_server import wsgiref.validate from vcsserver import wsgi_app_caller -from vcsserver.str_utils import ascii_bytes, safe_str +from vcsserver.lib.str_utils import ascii_bytes, safe_str @wsgiref.validate.validator diff --git a/vcsserver/tweens/request_wrapper.py b/vcsserver/tweens/request_wrapper.py --- a/vcsserver/tweens/request_wrapper.py +++ b/vcsserver/tweens/request_wrapper.py @@ -21,7 +21,7 @@ import time import msgpack import vcsserver -from vcsserver.str_utils import safe_str +from vcsserver.lib.str_utils import safe_str log = logging.getLogger(__name__) diff --git a/vcsserver/wsgi_app_caller.py b/vcsserver/wsgi_app_caller.py --- a/vcsserver/wsgi_app_caller.py +++ b/vcsserver/wsgi_app_caller.py @@ -23,7 +23,7 @@ import io import logging import os -from vcsserver.str_utils import ascii_bytes +from vcsserver.lib.str_utils import ascii_bytes log = logging.getLogger(__name__)