diff --git a/.bumpversion.cfg b/.bumpversion.cfg --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,6 +1,5 @@ [bumpversion] -current_version = 4.27.1 +current_version = 5.0.0 message = release: Bump version {current_version} to {new_version} [bumpversion:file:vcsserver/VERSION] - diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -1,4 +1,5 @@ syntax: glob + *.orig *.pyc *.swp @@ -19,8 +20,11 @@ syntax: regexp ^\.pydevproject$ ^\.coverage$ ^\.cache.*$ +^\.venv.*$ +^\.ruff_cache.*$ ^\.rhodecode$ + ^.dev ^build/ ^coverage\.xml$ diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,9 @@ include *.txt # package extras include vcsserver/VERSION +# all python files inside vcsserver +graft vcsserver + # all config files recursive-include configs * @@ -14,3 +17,7 @@ recursive-include vcsserver/hook_utils/h # skip any tests files recursive-exclude vcsserver/tests * +recursive-exclude docs/_build * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] +recursive-exclude * .*.sw[a-z] diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -1,45 +1,139 @@ -.DEFAULT_GOAL := help +# required for pushd to work.. +SHELL = /bin/bash + # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES} .PHONY: clean -clean: ## full clean +## Cleanup compiled and cache py files +clean: make test-clean find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';' + find . -type d -name "build" -prune -exec rm -rf '{}' ';' .PHONY: test -test: ## run test-clean and tests +## run test-clean and tests +test: make test-clean make test-only -.PHONY:test-clean -test-clean: ## run test-clean and tests +.PHONY: test-clean +## run test-clean and tests +test-clean: rm -rf coverage.xml htmlcov junit.xml pylint.log result find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';' find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';' .PHONY: test-only -test-only: ## run tests +## Run tests only without cleanup +test-only: PYTHONHASHSEED=random \ py.test -x -vv -r xw -p no:sugar \ - --cov=vcsserver --cov-report=term-missing --cov-report=html \ - vcsserver + --cov-report=term-missing --cov-report=html \ + --cov=vcsserver vcsserver -.PHONY: generate-pkgs -generate-pkgs: ## generate new python packages - nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses" +.PHONY: ruff-check +## run a ruff analysis +ruff-check: + ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev . .PHONY: pip-packages -pip-packages: ## show outdated packages +## Show outdated packages +pip-packages: python ${OUTDATED_PACKAGES} -.PHONY: help -help: - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-24s\033[0m %s\n", $$1, $$2}' +.PHONY: build +## Build sdist/egg +build: + python -m build + + +.PHONY: dev-sh +## make dev-sh +dev-sh: + sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list" + sudo apt-get update + sudo apt-get install -y zsh carapace-bin + rm -rf /home/rhodecode/.oh-my-zsh + curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh + echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc + PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh + + +.PHONY: dev-env +## make dev-env based on the requirements files and install develop of packages +## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y +dev-env: + pip install build virtualenv + pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt + pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt + pip install -e . + + +.PHONY: sh +## shortcut for make dev-sh dev-env +sh: + make dev-env + make dev-sh + + +.PHONY: dev-srv +## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash +dev-srv: + pserve --reload .dev/dev.ini + + +.PHONY: dev-srv-g +## run gunicorn multi process workers +dev-srv-g: + gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py + + +# Default command on calling make +.DEFAULT_GOAL := show-help + +.PHONY: show-help +show-help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=19 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' diff --git a/configs/development.ini b/configs/development.ini --- a/configs/development.ini +++ b/configs/development.ini @@ -1,4 +1,4 @@ -## -*- coding: utf-8 -*- +# ; ################################# ; RHODECODE VCSSERVER CONFIGURATION @@ -7,7 +7,7 @@ [server:main] ; COMMON HOST/IP CONFIG host = 0.0.0.0 -port = 9900 +port = 10010 ; ################################################## ; WAITRESS WSGI SERVER - Recommended for Development @@ -31,85 +31,25 @@ asyncore_use_poll = true ; GUNICORN APPLICATION SERVER ; ########################### -; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini +; run with gunicorn --paste rhodecode.ini ; Module to use, this setting shouldn't be changed #use = egg:gunicorn#main -; Sets the number of process workers. More workers means more concurrent connections -; RhodeCode can handle at the same time. Each additional worker also it increases -; memory usage as each has it's own set of caches. -; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more -; than 8-10 unless for really big deployments .e.g 700-1000 users. -; `instance_id = *` must be set in the [app:main] section below (which is the default) -; when using more than 1 worker. -#workers = 2 - -; Gunicorn access log level -#loglevel = info - -; Process name visible in process list -#proc_name = rhodecode_vcsserver - -; Type of worker class, one of `sync`, `gevent` -; currently `sync` is the only option allowed. -#worker_class = sync - -; The maximum number of simultaneous clients. Valid only for gevent -#worker_connections = 10 - -; Max number of requests that worker will handle before being gracefully restarted. -; Prevents memory leaks, jitter adds variability so not all workers are restarted at once. -#max_requests = 1000 -#max_requests_jitter = 30 - -; Amount of time a worker can spend with handling a request before it -; gets killed and restarted. By default set to 21600 (6hrs) -; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) -#timeout = 21600 - -; The maximum size of HTTP request line in bytes. -; 0 for unlimited -#limit_request_line = 0 - -; Limit the number of HTTP headers fields in a request. -; By default this value is 100 and can't be larger than 32768. -#limit_request_fields = 32768 - -; Limit the allowed size of an HTTP request header field. -; Value is a positive number or 0. -; Setting it to 0 will allow unlimited header field sizes. -#limit_request_field_size = 0 - -; Timeout for graceful workers restart. -; After receiving a restart signal, workers have this much time to finish -; serving requests. Workers still alive after the timeout (starting from the -; receipt of the restart signal) are force killed. -; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) -#graceful_timeout = 3600 - -# The number of seconds to wait for requests on a Keep-Alive connection. -# Generally set in the 1-5 seconds range. -#keepalive = 2 - -; Maximum memory usage that each worker can use before it will receive a -; graceful restart signal 0 = memory monitoring is disabled -; Examples: 268435456 (256MB), 536870912 (512MB) -; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB) -#memory_max_usage = 0 - -; How often in seconds to check for memory usage for each gunicorn worker -#memory_usage_check_interval = 60 - -; Threshold value for which we don't recycle worker if GarbageCollection -; frees up enough resources. Before each restart we try to run GC on worker -; in case we get enough free memory after that, restart will not happen. -#memory_usage_recovery_threshold = 0.8 - - [app:main] ; The %(here)s variable will be replaced with the absolute path of parent directory ; of this file +; Each option in the app:main can be override by an environmental variable +; +;To override an option: +; +;RC_ +;Everything should be uppercase, . and - should be replaced by _. +;For example, if you have these configuration settings: +;rc_cache.repo_object.backend = foo +;can be overridden by +;export RC_CACHE_REPO_OBJECT_BACKEND=foo + use = egg:rhodecode-vcsserver @@ -133,13 +73,13 @@ debugtoolbar.exclude_prefixes = ; ################# ; Pyramid default locales, we need this to be set -pyramid.default_locale_name = en +#pyramid.default_locale_name = en ; default locale used by VCS systems -locale = en_US.UTF-8 +#locale = en_US.UTF-8 ; path to binaries for vcsserver, it should be set by the installer -; at installation time, e.g /home/user/vcsserver-1/profile/bin +; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin ; it can also be a path to nix-build output in case of development core.binary_dir = "" @@ -153,21 +93,21 @@ core.binary_dir = "" ; Default cache dir for caches. Putting this into a ramdisk can boost performance. ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space -cache_dir = %(here)s/data +#cache_dir = %(here)s/data ; *************************************** ; `repo_object` cache, default file based ; *************************************** ; `repo_object` cache settings for vcs methods for repositories -rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace +#rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace ; cache auto-expires after N seconds ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days) -rc_cache.repo_object.expiration_time = 2592000 +#rc_cache.repo_object.expiration_time = 2592000 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set -#rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db +#rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db ; *********************************************************** ; `repo_object` cache with redis backend @@ -194,16 +134,29 @@ rc_cache.repo_object.expiration_time = 2 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen #rc_cache.repo_object.arguments.lock_auto_renewal = true -; Statsd client config +; Statsd client config, this is used to send metrics to statsd +; We recommend setting statsd_exported and scrape them using Promethues #statsd.enabled = false #statsd.statsd_host = 0.0.0.0 #statsd.statsd_port = 8125 #statsd.statsd_prefix = #statsd.statsd_ipv6 = false +; configure logging automatically at server startup set to false +; to use the below custom logging config. +; RC_LOGGING_FORMATTER +; RC_LOGGING_LEVEL +; env variables can control the settings for logging in case of autoconfigure + +#logging.autoconfigure = true + +; specify your own custom logging config file to configure logging +#logging.logging_conf_file = /path/to/custom_logging.ini + ; ##################### ; LOGGING CONFIGURATION ; ##################### + [loggers] keys = root, vcsserver @@ -211,7 +164,7 @@ keys = root, vcsserver keys = console [formatters] -keys = generic +keys = generic, json ; ####### ; LOGGERS @@ -226,7 +179,6 @@ handlers = qualname = vcsserver propagate = 1 - ; ######## ; HANDLERS ; ######## @@ -235,6 +187,8 @@ propagate = 1 class = StreamHandler args = (sys.stderr, ) level = DEBUG +; To enable JSON formatted logs replace 'generic' with 'json' +; This allows sending properly formatted logs to grafana loki or elasticsearch formatter = generic ; ########## @@ -244,3 +198,7 @@ formatter = generic [formatter_generic] format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s datefmt = %Y-%m-%d %H:%M:%S + +[formatter_json] +format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s +class = vcsserver.lib._vendor.jsonlogger.JsonFormatter diff --git a/configs/gunicorn_config.py b/configs/gunicorn_config.py --- a/configs/gunicorn_config.py +++ b/configs/gunicorn_config.py @@ -11,6 +11,8 @@ import time import threading import traceback import random +import socket +import dataclasses from gunicorn.glogging import Logger @@ -18,8 +20,14 @@ def get_workers(): import multiprocessing return multiprocessing.cpu_count() * 2 + 1 -# GLOBAL + +bind = "127.0.0.1:10010" + + +# Error logging output for gunicorn (-) is stdout errorlog = '-' + +# Access logging output for gunicorn (-) is stdout accesslog = '-' @@ -29,12 +37,112 @@ accesslog = '-' worker_tmp_dir = None tmp_upload_dir = None +# use re-use port logic +#reuse_port = True + # Custom log format +#access_log_format = ( +# '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"') + +# loki format for easier parsing in grafana access_log_format = ( - '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"') + 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"') + +# self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources +# workers = get_workers() + +# Gunicorn access log level +loglevel = 'info' + +# Process name visible in a process list +proc_name = "rhodecode_vcsserver" + +# Type of worker class, one of `sync`, `gevent` or `gthread` +# currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed +# gevent: +# In this case, the maximum number of concurrent requests is (N workers * X worker_connections) +# e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled +# gthread: +# In this case, the maximum number of concurrent requests is (N workers * X threads) +# e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled +worker_class = 'sync' + +# Sets the number of process workers. More workers means more concurrent connections +# RhodeCode can handle at the same time. Each additional worker also it increases +# memory usage as each has its own set of caches. +# The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more +# than 8-10 unless for huge deployments .e.g 700-1000 users. +# `instance_id = *` must be set in the [app:main] section below (which is the default) +# when using more than 1 worker. +workers = 2 + +# Threads numbers for worker class gthread +threads = 1 + +# The maximum number of simultaneous clients. Valid only for gevent +# In this case, the maximum number of concurrent requests is (N workers * X worker_connections) +# e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled +worker_connections = 10 + +# Max number of requests that worker will handle before being gracefully restarted. +# Prevents memory leaks, jitter adds variability so not all workers are restarted at once. +max_requests = 2000 +max_requests_jitter = int(max_requests * 0.2) # 20% of max_requests + +# The maximum number of pending connections. +# Exceeding this number results in the client getting an error when attempting to connect. +backlog = 64 -# self adjust workers based on CPU count -# workers = get_workers() +# The Amount of time a worker can spend with handling a request before it +# gets killed and restarted. By default, set to 21600 (6hrs) +# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) +timeout = 21600 + +# The maximum size of HTTP request line in bytes. +# 0 for unlimited +limit_request_line = 0 + +# Limit the number of HTTP headers fields in a request. +# By default this value is 100 and can't be larger than 32768. +limit_request_fields = 32768 + +# Limit the allowed size of an HTTP request header field. +# Value is a positive number or 0. +# Setting it to 0 will allow unlimited header field sizes. +limit_request_field_size = 0 + +# Timeout for graceful workers restart. +# After receiving a restart signal, workers have this much time to finish +# serving requests. Workers still alive after the timeout (starting from the +# receipt of the restart signal) are force killed. +# Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) +graceful_timeout = 21600 + +# The number of seconds to wait for requests on a Keep-Alive connection. +# Generally set in the 1-5 seconds range. +keepalive = 2 + +# Maximum memory usage that each worker can use before it will receive a +# graceful restart signal 0 = memory monitoring is disabled +# Examples: 268435456 (256MB), 536870912 (512MB) +# 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB) +# Dynamic formula 1024 * 1024 * 256 == 256MBs +memory_max_usage = 0 + +# How often in seconds to check for memory usage for each gunicorn worker +memory_usage_check_interval = 60 + +# Threshold value for which we don't recycle worker if GarbageCollection +# frees up enough resources. Before each restart, we try to run GC on worker +# in case we get enough free memory after that; restart will not happen. +memory_usage_recovery_threshold = 0.8 + + +@dataclasses.dataclass +class MemoryCheckConfig: + max_usage: int + check_interval: int + recovery_threshold: float def _get_process_rss(pid=None): @@ -50,12 +158,9 @@ def _get_process_rss(pid=None): def _get_config(ini_path): + import configparser try: - import configparser - except ImportError: - import ConfigParser as configparser - try: config = configparser.RawConfigParser() config.read(ini_path) return config @@ -63,8 +168,40 @@ def _get_config(ini_path): return None -def _time_with_offset(memory_usage_check_interval): - return time.time() - random.randint(0, memory_usage_check_interval/2.0) +def get_memory_usage_params(config=None): + # memory spec defaults + _memory_max_usage = memory_max_usage + _memory_usage_check_interval = memory_usage_check_interval + _memory_usage_recovery_threshold = memory_usage_recovery_threshold + + if config: + ini_path = os.path.abspath(config) + conf = _get_config(ini_path) + + section = 'server:main' + if conf and conf.has_section(section): + + if conf.has_option(section, 'memory_max_usage'): + _memory_max_usage = conf.getint(section, 'memory_max_usage') + + if conf.has_option(section, 'memory_usage_check_interval'): + _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval') + + if conf.has_option(section, 'memory_usage_recovery_threshold'): + _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold') + + _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '') + or _memory_max_usage) + _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '') + or _memory_usage_check_interval) + _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '') + or _memory_usage_recovery_threshold) + + return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold) + + +def _time_with_offset(check_interval): + return time.time() - random.randint(0, check_interval/2.0) def pre_fork(server, worker): @@ -73,39 +210,27 @@ def pre_fork(server, worker): def post_fork(server, worker): - # memory spec defaults - _memory_max_usage = 0 - _memory_usage_check_interval = 60 - _memory_usage_recovery_threshold = 0.8 - - ini_path = os.path.abspath(server.cfg.paste) - conf = _get_config(ini_path) - - section = 'server:main' - if conf and conf.has_section(section): + memory_conf = get_memory_usage_params() + _memory_max_usage = memory_conf.max_usage + _memory_usage_check_interval = memory_conf.check_interval + _memory_usage_recovery_threshold = memory_conf.recovery_threshold - if conf.has_option(section, 'memory_max_usage'): - _memory_max_usage = conf.getint(section, 'memory_max_usage') - - if conf.has_option(section, 'memory_usage_check_interval'): - _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval') - - if conf.has_option(section, 'memory_usage_recovery_threshold'): - _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold') - - worker._memory_max_usage = _memory_max_usage - worker._memory_usage_check_interval = _memory_usage_check_interval - worker._memory_usage_recovery_threshold = _memory_usage_recovery_threshold + worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '') + or _memory_max_usage) + worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '') + or _memory_usage_check_interval) + worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '') + or _memory_usage_recovery_threshold) # register memory last check time, with some random offset so we don't recycle all # at once worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval) if _memory_max_usage: - server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid, + server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid, _format_data_size(_memory_max_usage)) else: - server.log.info("[%-10s] WORKER spawned", worker.pid) + server.log.info("pid=[%-10s] WORKER spawned", worker.pid) def pre_exec(server): @@ -115,6 +240,9 @@ def pre_exec(server): def on_starting(server): server_lbl = '{} {}'.format(server.proc_name, server.address) server.log.info("Server %s is starting.", server_lbl) + server.log.info('Config:') + server.log.info(f"\n{server.cfg}") + server.log.info(get_memory_usage_params()) def when_ready(server): @@ -174,42 +302,45 @@ def _format_data_size(size, unit="B", pr def _check_memory_usage(worker): - memory_max_usage = worker._memory_max_usage - if not memory_max_usage: + _memory_max_usage = worker._memory_max_usage + if not _memory_max_usage: return - memory_usage_check_interval = worker._memory_usage_check_interval - memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold + _memory_usage_check_interval = worker._memory_usage_check_interval + _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold elapsed = time.time() - worker._last_memory_check_time - if elapsed > memory_usage_check_interval: + if elapsed > _memory_usage_check_interval: mem_usage = _get_process_rss() - if mem_usage and mem_usage > memory_max_usage: + if mem_usage and mem_usage > _memory_max_usage: worker.log.info( "memory usage %s > %s, forcing gc", - _format_data_size(mem_usage), _format_data_size(memory_max_usage)) + _format_data_size(mem_usage), _format_data_size(_memory_max_usage)) # Try to clean it up by forcing a full collection. gc.collect() mem_usage = _get_process_rss() - if mem_usage > memory_usage_recovery_threshold: + if mem_usage > _memory_usage_recovery_threshold: # Didn't clean up enough, we'll have to terminate. worker.log.warning( "memory usage %s > %s after gc, quitting", - _format_data_size(mem_usage), _format_data_size(memory_max_usage)) + _format_data_size(mem_usage), _format_data_size(_memory_max_usage)) # This will cause worker to auto-restart itself worker.alive = False worker._last_memory_check_time = time.time() def worker_int(worker): - worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid) + worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid) - # get traceback info, on worker crash - id2name = dict([(th.ident, th.name) for th in threading.enumerate()]) + # get traceback info, when a worker crashes + def get_thread_id(t_id): + id2name = dict([(th.ident, th.name) for th in threading.enumerate()]) + return id2name.get(t_id, "unknown_thread_id") + code = [] - for thread_id, stack in sys._current_frames().items(): + for thread_id, stack in sys._current_frames().items(): # noqa code.append( - "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id)) + "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id)) for fname, lineno, name, line in traceback.extract_stack(stack): code.append('File: "%s", line %d, in %s' % (fname, lineno, name)) if line: @@ -218,15 +349,15 @@ def worker_int(worker): def worker_abort(worker): - worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid) + worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid) def worker_exit(server, worker): - worker.log.info("[%-10s] worker exit", worker.pid) + worker.log.info("pid=[%-10s] worker exit", worker.pid) def child_exit(server, worker): - worker.log.info("[%-10s] worker child exit", worker.pid) + worker.log.info("pid=[%-10s] worker child exit", worker.pid) def pre_request(worker, req): @@ -245,6 +376,76 @@ def post_request(worker, req, environ, r _check_memory_usage(worker) +def _filter_proxy(ip): + """ + Passed in IP addresses in HEADERS can be in a special format of multiple + ips. Those comma separated IPs are passed from various proxies in the + chain of request processing. The left-most being the original client. + We only care about the first IP which came from the org. client. + + :param ip: ip string from headers + """ + if ',' in ip: + _ips = ip.split(',') + _first_ip = _ips[0].strip() + return _first_ip + return ip + + +def _filter_port(ip): + """ + Removes a port from ip, there are 4 main cases to handle here. + - ipv4 eg. 127.0.0.1 + - ipv6 eg. ::1 + - ipv4+port eg. 127.0.0.1:8080 + - ipv6+port eg. [::1]:8080 + + :param ip: + """ + def is_ipv6(ip_addr): + if hasattr(socket, 'inet_pton'): + try: + socket.inet_pton(socket.AF_INET6, ip_addr) + except socket.error: + return False + else: + return False + return True + + if ':' not in ip: # must be ipv4 pure ip + return ip + + if '[' in ip and ']' in ip: # ipv6 with port + return ip.split(']')[0][1:].lower() + + # must be ipv6 or ipv4 with port + if is_ipv6(ip): + return ip + else: + ip, _port = ip.split(':')[:2] # means ipv4+port + return ip + + +def get_ip_addr(environ): + proxy_key = 'HTTP_X_REAL_IP' + proxy_key2 = 'HTTP_X_FORWARDED_FOR' + def_key = 'REMOTE_ADDR' + + def _filters(x): + return _filter_port(_filter_proxy(x)) + + ip = environ.get(proxy_key) + if ip: + return _filters(ip) + + ip = environ.get(proxy_key2) + if ip: + return _filters(ip) + + ip = environ.get(def_key, '0.0.0.0') + return _filters(ip) + + class RhodeCodeLogger(Logger): """ Custom Logger that allows some customization that gunicorn doesn't allow @@ -258,8 +459,62 @@ class RhodeCodeLogger(Logger): def now(self): """ return date in RhodeCode Log format """ now = time.time() - msecs = int((now - long(now)) * 1000) + msecs = int((now - int(now)) * 1000) return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs) + def atoms(self, resp, req, environ, request_time): + """ Gets atoms for log formatting. + """ + status = resp.status + if isinstance(status, str): + status = status.split(None, 1)[0] + atoms = { + 'h': get_ip_addr(environ), + 'l': '-', + 'u': self._get_user(environ) or '-', + 't': self.now(), + 'r': "%s %s %s" % (environ['REQUEST_METHOD'], + environ['RAW_URI'], + environ["SERVER_PROTOCOL"]), + 's': status, + 'm': environ.get('REQUEST_METHOD'), + 'U': environ.get('PATH_INFO'), + 'q': environ.get('QUERY_STRING'), + 'H': environ.get('SERVER_PROTOCOL'), + 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-', + 'B': getattr(resp, 'sent', None), + 'f': environ.get('HTTP_REFERER', '-'), + 'a': environ.get('HTTP_USER_AGENT', '-'), + 'T': request_time.seconds, + 'D': (request_time.seconds * 1000000) + request_time.microseconds, + 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000), + 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds), + 'p': "<%s>" % os.getpid() + } + + # add request headers + if hasattr(req, 'headers'): + req_headers = req.headers + else: + req_headers = req + + if hasattr(req_headers, "items"): + req_headers = req_headers.items() + + atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers}) + + resp_headers = resp.headers + if hasattr(resp_headers, "items"): + resp_headers = resp_headers.items() + + # add response headers + atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers}) + + # add environ variables + environ_variables = environ.items() + atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables}) + + return atoms + logger_class = RhodeCodeLogger diff --git a/configs/logging.ini b/configs/logging.ini new file mode 100644 --- /dev/null +++ b/configs/logging.ini @@ -0,0 +1,53 @@ +; ##################### +; LOGGING CONFIGURATION +; ##################### +; Logging template, used for configure the logging +; some variables here are replaced by RhodeCode to default values + +[loggers] +keys = root, vcsserver + +[handlers] +keys = console + +[formatters] +keys = generic, json + +; ####### +; LOGGERS +; ####### +[logger_root] +level = NOTSET +handlers = console + +[logger_vcsserver] +level = $RC_LOGGING_LEVEL +handlers = +qualname = vcsserver +propagate = 1 + +; ######## +; HANDLERS +; ######## + +[handler_console] +class = StreamHandler +args = (sys.stderr, ) +level = $RC_LOGGING_LEVEL +; To enable JSON formatted logs replace generic with json +; This allows sending properly formatted logs to grafana loki or elasticsearch +#formatter = json +#formatter = generic +formatter = $RC_LOGGING_FORMATTER + +; ########## +; FORMATTERS +; ########## + +[formatter_generic] +format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %Y-%m-%d %H:%M:%S + +[formatter_json] +format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s +class = vcsserver.lib._vendor.jsonlogger.JsonFormatter diff --git a/configs/production.ini b/configs/production.ini --- a/configs/production.ini +++ b/configs/production.ini @@ -1,4 +1,4 @@ -## -*- coding: utf-8 -*- +# ; ################################# ; RHODECODE VCSSERVER CONFIGURATION @@ -7,102 +7,42 @@ [server:main] ; COMMON HOST/IP CONFIG host = 127.0.0.1 -port = 9900 +port = 10010 ; ########################### ; GUNICORN APPLICATION SERVER ; ########################### -; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini +; run with gunicorn --paste rhodecode.ini ; Module to use, this setting shouldn't be changed use = egg:gunicorn#main -; Sets the number of process workers. More workers means more concurrent connections -; RhodeCode can handle at the same time. Each additional worker also it increases -; memory usage as each has it's own set of caches. -; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more -; than 8-10 unless for really big deployments .e.g 700-1000 users. -; `instance_id = *` must be set in the [app:main] section below (which is the default) -; when using more than 1 worker. -workers = 2 - -; Gunicorn access log level -loglevel = info - -; Process name visible in process list -proc_name = rhodecode_vcsserver - -; Type of worker class, one of `sync`, `gevent` -; currently `sync` is the only option allowed. -worker_class = sync - -; The maximum number of simultaneous clients. Valid only for gevent -worker_connections = 10 - -; Max number of requests that worker will handle before being gracefully restarted. -; Prevents memory leaks, jitter adds variability so not all workers are restarted at once. -max_requests = 1000 -max_requests_jitter = 30 - -; Amount of time a worker can spend with handling a request before it -; gets killed and restarted. By default set to 21600 (6hrs) -; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) -timeout = 21600 - -; The maximum size of HTTP request line in bytes. -; 0 for unlimited -limit_request_line = 0 - -; Limit the number of HTTP headers fields in a request. -; By default this value is 100 and can't be larger than 32768. -limit_request_fields = 32768 - -; Limit the allowed size of an HTTP request header field. -; Value is a positive number or 0. -; Setting it to 0 will allow unlimited header field sizes. -limit_request_field_size = 0 - -; Timeout for graceful workers restart. -; After receiving a restart signal, workers have this much time to finish -; serving requests. Workers still alive after the timeout (starting from the -; receipt of the restart signal) are force killed. -; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h) -graceful_timeout = 3600 - -# The number of seconds to wait for requests on a Keep-Alive connection. -# Generally set in the 1-5 seconds range. -keepalive = 2 - -; Maximum memory usage that each worker can use before it will receive a -; graceful restart signal 0 = memory monitoring is disabled -; Examples: 268435456 (256MB), 536870912 (512MB) -; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB) -memory_max_usage = 0 - -; How often in seconds to check for memory usage for each gunicorn worker -memory_usage_check_interval = 60 - -; Threshold value for which we don't recycle worker if GarbageCollection -; frees up enough resources. Before each restart we try to run GC on worker -; in case we get enough free memory after that, restart will not happen. -memory_usage_recovery_threshold = 0.8 - - [app:main] ; The %(here)s variable will be replaced with the absolute path of parent directory ; of this file +; Each option in the app:main can be override by an environmental variable +; +;To override an option: +; +;RC_ +;Everything should be uppercase, . and - should be replaced by _. +;For example, if you have these configuration settings: +;rc_cache.repo_object.backend = foo +;can be overridden by +;export RC_CACHE_REPO_OBJECT_BACKEND=foo + use = egg:rhodecode-vcsserver ; Pyramid default locales, we need this to be set -pyramid.default_locale_name = en +#pyramid.default_locale_name = en ; default locale used by VCS systems -locale = en_US.UTF-8 +#locale = en_US.UTF-8 ; path to binaries for vcsserver, it should be set by the installer -; at installation time, e.g /home/user/vcsserver-1/profile/bin +; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin ; it can also be a path to nix-build output in case of development core.binary_dir = "" @@ -116,21 +56,21 @@ core.binary_dir = "" ; Default cache dir for caches. Putting this into a ramdisk can boost performance. ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space -cache_dir = %(here)s/data +#cache_dir = %(here)s/data ; *************************************** ; `repo_object` cache, default file based ; *************************************** ; `repo_object` cache settings for vcs methods for repositories -rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace +#rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace ; cache auto-expires after N seconds ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days) -rc_cache.repo_object.expiration_time = 2592000 +#rc_cache.repo_object.expiration_time = 2592000 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set -#rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db +#rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db ; *********************************************************** ; `repo_object` cache with redis backend @@ -157,16 +97,29 @@ rc_cache.repo_object.expiration_time = 2 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen #rc_cache.repo_object.arguments.lock_auto_renewal = true -; Statsd client config +; Statsd client config, this is used to send metrics to statsd +; We recommend setting statsd_exported and scrape them using Promethues #statsd.enabled = false #statsd.statsd_host = 0.0.0.0 #statsd.statsd_port = 8125 #statsd.statsd_prefix = #statsd.statsd_ipv6 = false +; configure logging automatically at server startup set to false +; to use the below custom logging config. +; RC_LOGGING_FORMATTER +; RC_LOGGING_LEVEL +; env variables can control the settings for logging in case of autoconfigure + +#logging.autoconfigure = true + +; specify your own custom logging config file to configure logging +#logging.logging_conf_file = /path/to/custom_logging.ini + ; ##################### ; LOGGING CONFIGURATION ; ##################### + [loggers] keys = root, vcsserver @@ -174,7 +127,7 @@ keys = root, vcsserver keys = console [formatters] -keys = generic +keys = generic, json ; ####### ; LOGGERS @@ -184,12 +137,11 @@ level = NOTSET handlers = console [logger_vcsserver] -level = DEBUG +level = INFO handlers = qualname = vcsserver propagate = 1 - ; ######## ; HANDLERS ; ######## @@ -198,6 +150,8 @@ propagate = 1 class = StreamHandler args = (sys.stderr, ) level = INFO +; To enable JSON formatted logs replace 'generic' with 'json' +; This allows sending properly formatted logs to grafana loki or elasticsearch formatter = generic ; ########## @@ -207,3 +161,7 @@ formatter = generic [formatter_generic] format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s datefmt = %Y-%m-%d %H:%M:%S + +[formatter_json] +format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s +class = vcsserver.lib._vendor.jsonlogger.JsonFormatter diff --git a/vcsserver/tests/conftest.py b/conftest.py rename from vcsserver/tests/conftest.py rename to conftest.py --- a/vcsserver/tests/conftest.py +++ b/conftest.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,13 +16,12 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import socket - import pytest def pytest_addoption(parser): parser.addoption( - '--repeat', type=int, default=100, + '--perf-repeat-vcs', type=int, default=100, help="Number of repetitions in performance tests.") @@ -34,13 +33,13 @@ def repeat(request): Slower calls may divide it by 10 or 100. It is chosen in a way so that the tests are not too slow in our default test suite. """ - return request.config.getoption('--repeat') + return request.config.getoption('--perf-repeat-vcs') @pytest.fixture(scope='session') def vcsserver_port(request): port = get_available_port() - print('Using vcsserver port %s' % (port, )) + print(f'Using vcsserver port {port}') return port diff --git a/default.nix b/default.nix deleted file mode 100644 --- a/default.nix +++ /dev/null @@ -1,197 +0,0 @@ -# Nix environment for the community edition -# -# This shall be as lean as possible, just producing the rhodecode-vcsserver -# derivation. For advanced tweaks to pimp up the development environment we use -# "shell.nix" so that it does not have to clutter this file. - -args@ -{ system ? builtins.currentSystem -, pythonPackages ? "python27Packages" -, pythonExternalOverrides ? self: super: {} -, doCheck ? false -, ... -}: - -let - pkgs_ = args.pkgs or (import { inherit system; }); -in - -let - pkgs = import { - overlays = [ - (import ./pkgs/overlays.nix) - ]; - inherit - (pkgs_) - system; - }; - - # Works with the new python-packages, still can fallback to the old - # variant. - basePythonPackagesUnfix = basePythonPackages.__unfix__ or ( - self: basePythonPackages.override (a: { inherit self; })); - - # Evaluates to the last segment of a file system path. - basename = path: with pkgs.lib; last (splitString "/" path); - - # source code filter used as arugment to builtins.filterSource. - src-filter = path: type: with pkgs.lib; - let - ext = last (splitString "." path); - in - !builtins.elem (basename path) [ - ".git" ".hg" "__pycache__" ".eggs" ".idea" ".dev" - "node_modules" "node_binaries" - "build" "data" "result" "tmp"] && - !builtins.elem ext ["egg-info" "pyc"] && - # TODO: johbo: This check is wrong, since "path" contains an absolute path, - # it would still be good to restore it since we want to ignore "result-*". - !hasPrefix "result" path; - - sources = - let - inherit - (pkgs.lib) - all - isString - attrValues; - sourcesConfig = pkgs.config.rc.sources or {}; - in - # Ensure that sources are configured as strings. Using a path - # would result in a copy into the nix store. - assert all isString (attrValues sourcesConfig); - sourcesConfig; - - version = builtins.readFile "${rhodecode-vcsserver-src}/vcsserver/VERSION"; - rhodecode-vcsserver-src = builtins.filterSource src-filter ./.; - - pythonLocalOverrides = self: super: { - rhodecode-vcsserver = - let - releaseName = "RhodeCodeVCSServer-${version}"; - in super.rhodecode-vcsserver.override (attrs: { - inherit - doCheck - version; - - name = "rhodecode-vcsserver-${version}"; - releaseName = releaseName; - src = rhodecode-vcsserver-src; - dontStrip = true; # prevent strip, we don't need it. - - # expose following attributed outside - passthru = { - pythonPackages = self; - }; - - propagatedBuildInputs = - attrs.propagatedBuildInputs or [] ++ [ - pkgs.git - pkgs.subversion - ]; - - # set some default locale env variables - LC_ALL = "en_US.UTF-8"; - LOCALE_ARCHIVE = - if pkgs.stdenv.isLinux - then "${pkgs.glibcLocales}/lib/locale/locale-archive" - else ""; - - # Add bin directory to path so that tests can find 'vcsserver'. - preCheck = '' - export PATH="$out/bin:$PATH" - ''; - - # custom check phase for testing - checkPhase = '' - runHook preCheck - PYTHONHASHSEED=random py.test -vv -p no:sugar -r xw --cov-config=.coveragerc --cov=vcsserver --cov-report=term-missing vcsserver - runHook postCheck - ''; - - postCheck = '' - echo "Cleanup of vcsserver/tests" - rm -rf $out/lib/${self.python.libPrefix}/site-packages/vcsserver/tests - ''; - - postInstall = '' - echo "Writing vcsserver meta information for rccontrol to nix-support/rccontrol" - mkdir -p $out/nix-support/rccontrol - cp -v vcsserver/VERSION $out/nix-support/rccontrol/version - echo "DONE: vcsserver meta information for rccontrol written" - - mkdir -p $out/etc - cp configs/production.ini $out/etc - echo "DONE: saved vcsserver production.ini into $out/etc" - - # python based programs need to be wrapped - mkdir -p $out/bin - ln -s ${self.python}/bin/python $out/bin/ - ln -s ${self.gunicorn}/bin/gunicorn $out/bin/ - ln -s ${self.pyramid}/bin/prequest $out/bin/ - ln -s ${self.pyramid}/bin/pserve $out/bin/ - - # Symlink version control utilities - # We ensure that always the correct version is available as a symlink. - # So that users calling them via the profile path will always use the - # correct version. Wrapping is required so those can "import" - # vcsserver python hooks. - - ln -s ${pkgs.git}/bin/git $out/bin - ln -s ${self.mercurial}/bin/hg $out/bin - ln -s ${pkgs.subversion}/bin/svn* $out/bin - - echo "DONE: created symlinks into $out/bin" - DEPS="$out/bin/*" - - # wrap only dependency scripts, they require to have full PYTHONPATH set - # to be able to import all packages - for file in $DEPS; - do - wrapProgram $file \ - --prefix PATH : $PATH \ - --prefix PYTHONPATH : $PYTHONPATH \ - --set PYTHONHASHSEED random - done - - echo "DONE: vcsserver binary wrapping" - - ''; - - }); - }; - - basePythonPackages = with builtins; - if isAttrs pythonPackages then - pythonPackages - else - getAttr pythonPackages pkgs; - - pythonGeneratedPackages = import ./pkgs/python-packages.nix { - inherit - pkgs; - inherit - (pkgs) - fetchurl - fetchgit - fetchhg; - }; - - pythonVCSServerOverrides = import ./pkgs/python-packages-overrides.nix { - inherit - pkgs - basePythonPackages; - }; - - - # Apply all overrides and fix the final package set - myPythonPackagesUnfix = with pkgs.lib; - (extends pythonExternalOverrides - (extends pythonLocalOverrides - (extends pythonVCSServerOverrides - (extends pythonGeneratedPackages - basePythonPackagesUnfix)))); - - myPythonPackages = (pkgs.lib.fix myPythonPackagesUnfix); - -in myPythonPackages.rhodecode-vcsserver diff --git a/pip2nix.ini b/pip2nix.ini deleted file mode 100644 --- a/pip2nix.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pip2nix] -requirements = ., -r ./requirements.txt, -r ./requirements_pinned.txt -output = ./pkgs/python-packages.nix diff --git a/pkgs/README.rst b/pkgs/README.rst deleted file mode 100644 --- a/pkgs/README.rst +++ /dev/null @@ -1,28 +0,0 @@ - -============================== - Generate the Nix expressions -============================== - -Details can be found in the repository of `RhodeCode Enterprise CE`_ inside of -the file `docs/contributing/dependencies.rst`. - -Start the environment as follows: - -.. code:: shell - - nix-shell pkgs/shell-generate.nix - - -Python dependencies -=================== - -.. code:: shell - - pip2nix generate --licenses - # or faster - nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses" - - -.. Links - -.. _RhodeCode Enterprise CE: https://code.rhodecode.com/rhodecode-enterprise-ce diff --git a/pkgs/nix-common/pip2nix.nix b/pkgs/nix-common/pip2nix.nix deleted file mode 100755 --- a/pkgs/nix-common/pip2nix.nix +++ /dev/null @@ -1,17 +0,0 @@ -{ pkgs -, pythonPackages -}: - -rec { - pip2nix-src = pkgs.fetchzip { - url = https://github.com/johbo/pip2nix/archive/51e6fdae34d0e8ded9efeef7a8601730249687a6.tar.gz; - sha256 = "02a4jjgi7lsvf8mhrxsd56s9a3yg20081rl9bgc2m84w60v2gbz2"; - }; - - pip2nix = import pip2nix-src { - inherit - pkgs - pythonPackages; - }; - -} diff --git a/pkgs/overlays.nix b/pkgs/overlays.nix deleted file mode 100755 --- a/pkgs/overlays.nix +++ /dev/null @@ -1,83 +0,0 @@ -self: super: { - - # bump GIT version - git = - let - gitWithoutPerl = super.git.override { - #perlSupport = false; - }; - in - super.lib.overrideDerivation gitWithoutPerl (oldAttrs: { - - name = "git-2.30.0"; - src = self.fetchurl { - url = "https://www.kernel.org/pub/software/scm/git/git-2.30.0.tar.xz"; - sha256 = "06ad6dylgla34k9am7d5z8y3rryc8ln3ibq5z0d74rcm20hm0wsm"; - }; - - # patches come from: https://github.com/NixOS/nixpkgs/tree/master/pkgs/applications/version-management/git-and-tools/git - patches = [ - ./patches/git/docbook2texi.patch - ./patches/git/git-sh-i18n.patch - ./patches/git/ssh-path.patch - ./patches/git/git-send-email-honor-PATH.patch - ./patches/git/installCheck-path.patch - ]; - - #preInstallCheck = oldAttrs.preInstallCheck + '' - # disable_test t4129-apply-samemode - # disable_test t5324-split-commit-graph - #''; - - }); - - libgit2rc = super.lib.overrideDerivation super.libgit2 (oldAttrs: { - name = "libgit2-0.28.2"; - version = "0.28.2"; - - src = self.fetchFromGitHub { - owner = "libgit2"; - repo = "libgit2"; - rev = "v0.28.2"; - sha256 = "0cm8fvs05rj0baigs2133q5a0sm3pa234y8h6hmwhl2bz9xq3k4b"; - }; - - cmakeFlags = [ "-DTHREADSAFE=ON" "-DUSE_HTTPS=no"]; - - buildInputs = [ - super.zlib - super.libssh2 - super.openssl - super.curl - ]; - - - }); - - # Override subversion derivation to - # - activate python bindings - subversion = - let - subversionWithPython = super.subversion.override { - httpSupport = true; - pythonBindings = true; - python = self.python27Packages.python; - }; - in - super.lib.overrideDerivation subversionWithPython (oldAttrs: { - name = "subversion-1.13.0"; - src = self.fetchurl { - url = "https://archive.apache.org/dist/subversion/subversion-1.13.0.tar.gz"; - sha256 = "0cb9p7f5hg0l4k32hz8vmvy2r45igchq5sh4m366za5q0c649bfs"; - }; - - ## use internal lz4/utf8proc because it is stable and shipped with SVN - configureFlags = oldAttrs.configureFlags ++ [ - " --with-lz4=internal" - " --with-utf8proc=internal" - ]; - - }); - - -} diff --git a/pkgs/patches/configparser/pyproject.patch b/pkgs/patches/configparser/pyproject.patch deleted file mode 100644 --- a/pkgs/patches/configparser/pyproject.patch +++ /dev/null @@ -1,10 +0,0 @@ -diff -rup configparser-4.0.2-orig/pyproject.toml configparser-4.0.2/pyproject.toml ---- configparser-4.0.2-orig/pyproject.toml 2021-03-22 21:28:11.000000000 +0100 -+++ configparser-4.0.2/pyproject.toml 2021-03-22 21:28:11.000000000 +0100 -@@ -1,5 +1,5 @@ - [build-system] --requires = ["setuptools>=40.7", "wheel", "setuptools_scm>=1.15"] -+requires = ["setuptools<=42.0", "wheel", "setuptools_scm<6.0.0"] - build-backend = "setuptools.build_meta" - - [tool.black] diff --git a/pkgs/patches/dulwich/handle-dir-refs.patch b/pkgs/patches/dulwich/handle-dir-refs.patch deleted file mode 100644 --- a/pkgs/patches/dulwich/handle-dir-refs.patch +++ /dev/null @@ -1,15 +0,0 @@ -This patch allows handling directories inside the refs/heads. This was added in dulwich -0.19.X series - -diff -rup dulwich-0.13.0-orig/dulwich/refs.py dulwich-0.13.0/dulwich/refs.py ---- dulwich-0.13.0-orig/dulwich/refs.py 2018-10-09 09:42:38.182597268 +0200 -+++ dulwich-0.13.0/dulwich/refs.py 2018-10-09 09:43:39.057145566 +0200 -@@ -509,7 +509,7 @@ class DiskRefsContainer(RefsContainer): - # Read only the first 40 bytes - return header + f.read(40 - len(SYMREF)) - except IOError as e: -- if e.errno == errno.ENOENT: -+ if e.errno in (errno.ENOENT, errno.EISDIR): - return None - raise - diff --git a/pkgs/patches/git/docbook2texi.patch b/pkgs/patches/git/docbook2texi.patch deleted file mode 100644 --- a/pkgs/patches/git/docbook2texi.patch +++ /dev/null @@ -1,38 +0,0 @@ -This patch does two things: (1) use the right name for `docbook2texi', -and (2) make sure `gitman.info' isn't produced since it's broken (duplicate -node names). - -diff --git a/Documentation/Makefile b/Documentation/Makefile -index 26a2342bea..ceccd67ebb 100644 ---- a/Documentation/Makefile -+++ b/Documentation/Makefile -@@ -132,7 +132,7 @@ HTML_REPO = ../../git-htmldocs - - MAKEINFO = makeinfo - INSTALL_INFO = install-info --DOCBOOK2X_TEXI = docbook2x-texi -+DOCBOOK2X_TEXI = docbook2texi - DBLATEX = dblatex - ASCIIDOC_DBLATEX_DIR = /etc/asciidoc/dblatex - DBLATEX_COMMON = -p $(ASCIIDOC_DBLATEX_DIR)/asciidoc-dblatex.xsl -s $(ASCIIDOC_DBLATEX_DIR)/asciidoc-dblatex.sty -@@ -250,7 +250,7 @@ man1: $(DOC_MAN1) - man5: $(DOC_MAN5) - man7: $(DOC_MAN7) - --info: git.info gitman.info -+info: git.info - - pdf: user-manual.pdf - -@@ -266,10 +266,9 @@ install-man: man - - install-info: info - $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) -- $(INSTALL) -m 644 git.info gitman.info $(DESTDIR)$(infodir) -+ $(INSTALL) -m 644 git.info $(DESTDIR)$(infodir) - if test -r $(DESTDIR)$(infodir)/dir; then \ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) git.info ;\ -- $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) gitman.info ;\ - else \ - echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ - fi diff --git a/pkgs/patches/git/git-send-email-honor-PATH.patch b/pkgs/patches/git/git-send-email-honor-PATH.patch deleted file mode 100644 --- a/pkgs/patches/git/git-send-email-honor-PATH.patch +++ /dev/null @@ -1,28 +0,0 @@ -diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt -index 1afe9fc858..05dd7c3a90 100644 ---- a/Documentation/git-send-email.txt -+++ b/Documentation/git-send-email.txt -@@ -215,8 +215,7 @@ a password is obtained using 'git-credential'. - specify a full pathname of a sendmail-like program instead; - the program must support the `-i` option. Default value can - be specified by the `sendemail.smtpServer` configuration -- option; the built-in default is to search for `sendmail` in -- `/usr/sbin`, `/usr/lib` and $PATH if such program is -+ option; the built-in default is to search in $PATH if such program is - available, falling back to `localhost` otherwise. - - --smtp-server-port=:: -diff --git a/git-send-email.perl b/git-send-email.perl -index 8eb63b5a2f..74a61d8213 100755 ---- a/git-send-email.perl -+++ b/git-send-email.perl -@@ -956,8 +956,7 @@ sub expand_one_alias { - } - - if (!defined $smtp_server) { -- my @sendmail_paths = qw( /usr/sbin/sendmail /usr/lib/sendmail ); -- push @sendmail_paths, map {"$_/sendmail"} split /:/, $ENV{PATH}; -+ my @sendmail_paths = map {"$_/sendmail"} split /:/, $ENV{PATH}; - foreach (@sendmail_paths) { - if (-x $_) { - $smtp_server = $_; diff --git a/pkgs/patches/git/git-sh-i18n.patch b/pkgs/patches/git/git-sh-i18n.patch deleted file mode 100644 --- a/pkgs/patches/git/git-sh-i18n.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff --git a/git-sh-i18n.sh b/git-sh-i18n.sh -index e1d917fd27..e90f8e1414 100644 ---- a/git-sh-i18n.sh -+++ b/git-sh-i18n.sh -@@ -26,7 +26,7 @@ then - elif test -n "$GIT_INTERNAL_GETTEXT_TEST_FALLBACKS" - then - : no probing necessary --elif type gettext.sh >/dev/null 2>&1 -+elif type @gettext@/bin/gettext.sh >/dev/null 2>&1 - then - # GNU libintl's gettext.sh - GIT_INTERNAL_GETTEXT_SH_SCHEME=gnu -@@ -43,7 +43,8 @@ export GIT_INTERNAL_GETTEXT_SH_SCHEME - case "$GIT_INTERNAL_GETTEXT_SH_SCHEME" in - gnu) - # Use libintl's gettext.sh, or fall back to English if we can't. -- . gettext.sh -+ . @gettext@/bin/gettext.sh -+ export PATH=@gettext@/bin:$PATH - ;; - gettext_without_eval_gettext) - # Solaris has a gettext(1) but no eval_gettext(1) diff --git a/pkgs/patches/git/installCheck-path.patch b/pkgs/patches/git/installCheck-path.patch deleted file mode 100644 --- a/pkgs/patches/git/installCheck-path.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/t/test-lib.sh b/t/test-lib.sh -index 8665b0a9b6..8bb892b1af 100644 ---- a/t/test-lib.sh -+++ b/t/test-lib.sh -@@ -1227,7 +1227,7 @@ elif test -n "$GIT_TEST_INSTALLED" - then - GIT_EXEC_PATH=$($GIT_TEST_INSTALLED/git --exec-path) || - error "Cannot run git from $GIT_TEST_INSTALLED." -- PATH=$GIT_TEST_INSTALLED:$GIT_BUILD_DIR/t/helper:$PATH -+ PATH=$GIT_TEST_INSTALLED:$GIT_BUILD_DIR/t/helper:$GIT_BUILD_DIR:$PATH - GIT_EXEC_PATH=${GIT_TEST_EXEC_PATH:-$GIT_EXEC_PATH} - else # normal case, use ../bin-wrappers only unless $with_dashes: - if test -n "$no_bin_wrappers" diff --git a/pkgs/patches/git/ssh-path.patch b/pkgs/patches/git/ssh-path.patch deleted file mode 100644 --- a/pkgs/patches/git/ssh-path.patch +++ /dev/null @@ -1,26 +0,0 @@ -diff --git a/connect.c b/connect.c -index 4813f005ab..b3f12f3268 100644 ---- a/connect.c -+++ b/connect.c -@@ -1183,7 +1183,7 @@ static void fill_ssh_args(struct child_process *conn, const char *ssh_host, - - ssh = getenv("GIT_SSH"); - if (!ssh) -- ssh = "ssh"; -+ ssh = "@ssh@"; - variant = determine_ssh_variant(ssh, 0); - } - -diff --git a/git-gui/lib/remote_add.tcl b/git-gui/lib/remote_add.tcl -index 480a6b30d0..7817204241 100644 ---- a/git-gui/lib/remote_add.tcl -+++ b/git-gui/lib/remote_add.tcl -@@ -139,7 +139,7 @@ method _add {} { - # Parse the location - if { [regexp {(?:git\+)?ssh://([^/]+)(/.+)} $location xx host path] - || [regexp {([^:][^:]+):(.+)} $location xx host path]} { -- set ssh ssh -+ set ssh @ssh@ - if {[info exists env(GIT_SSH)]} { - set ssh $env(GIT_SSH) - } diff --git a/pkgs/patches/importlib_metadata/pyproject.patch b/pkgs/patches/importlib_metadata/pyproject.patch deleted file mode 100644 --- a/pkgs/patches/importlib_metadata/pyproject.patch +++ /dev/null @@ -1,7 +0,0 @@ -diff -rup importlib-metadata-1.6.0-orig/yproject.toml importlib-metadata-1.6.0/pyproject.toml ---- importlib-metadata-1.6.0-orig/yproject.toml 2021-03-22 22:10:33.000000000 +0100 -+++ importlib-metadata-1.6.0/pyproject.toml 2021-03-22 22:11:09.000000000 +0100 -@@ -1,3 +1,3 @@ - [build-system] --requires = ["setuptools>=30.3", "wheel", "setuptools_scm"] -+requires = ["setuptools<42.0", "wheel", "setuptools_scm<6.0.0"] diff --git a/pkgs/patches/pytest/setuptools.patch b/pkgs/patches/pytest/setuptools.patch deleted file mode 100644 --- a/pkgs/patches/pytest/setuptools.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -rup pytest-4.6.5-orig/setup.py pytest-4.6.5/setup.py ---- pytest-4.6.5-orig/setup.py 2018-04-10 10:23:04.000000000 +0200 -+++ pytest-4.6.5/setup.py 2018-04-10 10:23:34.000000000 +0200 -@@ -24,7 +24,7 @@ INSTALL_REQUIRES = [ - def main(): - setup( - use_scm_version={"write_to": "src/_pytest/_version.py"}, -- setup_requires=["setuptools-scm", "setuptools>=40.0"], -+ setup_requires=["setuptools-scm<6.0.0", "setuptools<=42.0"], - package_dir={"": "src"}, - # fmt: off - extras_require={ \ No newline at end of file diff --git a/pkgs/patches/zipp/pyproject.patch b/pkgs/patches/zipp/pyproject.patch deleted file mode 100644 --- a/pkgs/patches/zipp/pyproject.patch +++ /dev/null @@ -1,10 +0,0 @@ -diff -rup zip-1.2.0-orig/pyproject.toml zip-1.2.0/pyproject.toml ---- zip-1.2.0-orig/pyproject.toml 2021-03-23 10:55:37.000000000 +0100 -+++ zip-1.2.0/pyproject.toml 2021-03-23 10:56:05.000000000 +0100 -@@ -1,5 +1,5 @@ - [build-system] --requires = ["setuptools>=34.4", "wheel", "setuptools_scm>=1.15"] -+requires = ["setuptools<42.0", "wheel", "setuptools_scm<6.0.0"] - build-backend = "setuptools.build_meta" - - [tool.black] diff --git a/pkgs/python-packages-overrides.nix b/pkgs/python-packages-overrides.nix deleted file mode 100644 --- a/pkgs/python-packages-overrides.nix +++ /dev/null @@ -1,126 +0,0 @@ -# Overrides for the generated python-packages.nix -# -# This function is intended to be used as an extension to the generated file -# python-packages.nix. The main objective is to add needed dependencies of C -# libraries and tweak the build instructions where needed. - -{ pkgs -, basePythonPackages -}: - -let - sed = "sed -i"; - -in - -self: super: { - - "cffi" = super."cffi".override (attrs: { - buildInputs = [ - pkgs.libffi - ]; - }); - - "ipython" = super."ipython".override (attrs: { - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - self."setuptools-scm" - ]; - }); - - "gevent" = super."gevent".override (attrs: { - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - # NOTE: (marcink) odd requirements from gevent aren not set properly, - # thus we need to inject psutil manually - self."psutil" - ]; - }); - - "hgsubversion" = super."hgsubversion".override (attrs: { - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - pkgs.sqlite - #basePythonPackages.sqlite3 - self.mercurial - ]; - }); - - "subvertpy" = super."subvertpy".override (attrs: { - SVN_PREFIX = "${pkgs.subversion.dev}"; - propagatedBuildInputs = [ - pkgs.apr.dev - pkgs.aprutil - pkgs.subversion - ]; - }); - - "mercurial" = super."mercurial".override (attrs: { - propagatedBuildInputs = [ - # self.python.modules.curses - ]; - }); - - "dulwich" = super."dulwich".override (attrs: { - patches = [ - ./patches/dulwich/handle-dir-refs.patch - ]; - }); - - "pygit2" = super."pygit2".override (attrs: { - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - pkgs.libffi - pkgs.libgit2rc - ]; - }); - - "pytest" = super."pytest".override (attrs: { - patches = [ - ./patches/pytest/setuptools.patch - ]; - }); - - "pytest-runner" = super."pytest-runner".override (attrs: { - propagatedBuildInputs = [ - self."setuptools-scm" - ]; - }); - - "py" = super."py".override (attrs: { - propagatedBuildInputs = [ - self."setuptools-scm" - ]; - }); - - "configparser" = super."configparser".override (attrs: { - patches = [ - ./patches/configparser/pyproject.patch - ]; - propagatedBuildInputs = [ - self."setuptools-scm" - ]; - }); - - "importlib-metadata" = super."importlib-metadata".override (attrs: { - - patches = [ - ./patches/importlib_metadata/pyproject.patch - ]; - - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - self."setuptools-scm" - ]; - - }); - - "zipp" = super."zipp".override (attrs: { - patches = [ - ./patches/zipp/pyproject.patch - ]; - propagatedBuildInputs = attrs.propagatedBuildInputs ++ [ - self."setuptools-scm" - ]; - }); - - # Avoid that base packages screw up the build process - inherit (basePythonPackages) - setuptools; - -} diff --git a/pkgs/python-packages.nix b/pkgs/python-packages.nix deleted file mode 100644 --- a/pkgs/python-packages.nix +++ /dev/null @@ -1,1103 +0,0 @@ -# Generated by pip2nix 0.8.0.dev1 -# See https://github.com/johbo/pip2nix - -{ pkgs, fetchurl, fetchgit, fetchhg }: - -self: super: { - "atomicwrites" = super.buildPythonPackage { - name = "atomicwrites-1.3.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/ec/0f/cd484ac8820fed363b374af30049adc8fd13065720fd4f4c6be8a2309da7/atomicwrites-1.3.0.tar.gz"; - sha256 = "19ngcscdf3jsqmpcxn6zl5b6anmsajb6izp1smcd1n02midl9abm"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "attrs" = super.buildPythonPackage { - name = "attrs-19.3.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/98/c3/2c227e66b5e896e15ccdae2e00bbc69aa46e9a8ce8869cc5fa96310bf612/attrs-19.3.0.tar.gz"; - sha256 = "0wky4h28n7xnr6xv69p9z6kv8bzn50d10c3drmd9ds8gawbcxdzp"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "backports.shutil-get-terminal-size" = super.buildPythonPackage { - name = "backports.shutil-get-terminal-size-1.0.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/ec/9c/368086faa9c016efce5da3e0e13ba392c9db79e3ab740b763fe28620b18b/backports.shutil_get_terminal_size-1.0.0.tar.gz"; - sha256 = "107cmn7g3jnbkp826zlj8rrj19fam301qvaqf0f3905f5217lgki"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "beautifulsoup4" = super.buildPythonPackage { - name = "beautifulsoup4-4.6.3"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/88/df/86bffad6309f74f3ff85ea69344a078fc30003270c8df6894fca7a3c72ff/beautifulsoup4-4.6.3.tar.gz"; - sha256 = "041dhalzjciw6qyzzq7a2k4h1yvyk76xigp35hv5ibnn448ydy4h"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "cffi" = super.buildPythonPackage { - name = "cffi-1.12.3"; - doCheck = false; - propagatedBuildInputs = [ - self."pycparser" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/93/1a/ab8c62b5838722f29f3daffcc8d4bd61844aa9b5f437341cc890ceee483b/cffi-1.12.3.tar.gz"; - sha256 = "0x075521fxwv0mfp4cqzk7lvmw4n94bjw601qkcv314z5s182704"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "configobj" = super.buildPythonPackage { - name = "configobj-5.0.6"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - ]; - src = fetchurl { - url = "https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626"; - sha256 = "0kqfrdfr14mw8yd8qwq14dv2xghpkjmd3yjsy8dfcbvpcc17xnxp"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "configparser" = super.buildPythonPackage { - name = "configparser-4.0.2"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/16/4f/48975536bd488d3a272549eb795ac4a13a5f7fcdc8995def77fbef3532ee/configparser-4.0.2.tar.gz"; - sha256 = "1priacxym85yjcf68hh38w55nqswaxp71ryjyfdk222kg9l85ln7"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "contextlib2" = super.buildPythonPackage { - name = "contextlib2-0.6.0.post1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/02/54/669207eb72e3d8ae8b38aa1f0703ee87a0e9f88f30d3c0a47bebdb6de242/contextlib2-0.6.0.post1.tar.gz"; - sha256 = "0bhnr2ac7wy5l85ji909gyljyk85n92w8pdvslmrvc8qih4r1x01"; - }; - meta = { - license = [ pkgs.lib.licenses.psfl ]; - }; - }; - "cov-core" = super.buildPythonPackage { - name = "cov-core-1.15.0"; - doCheck = false; - propagatedBuildInputs = [ - self."coverage" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/4b/87/13e75a47b4ba1be06f29f6d807ca99638bedc6b57fa491cd3de891ca2923/cov-core-1.15.0.tar.gz"; - sha256 = "0k3np9ymh06yv1ib96sb6wfsxjkqhmik8qfsn119vnhga9ywc52a"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "coverage" = super.buildPythonPackage { - name = "coverage-4.5.4"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/85/d5/818d0e603685c4a613d56f065a721013e942088047ff1027a632948bdae6/coverage-4.5.4.tar.gz"; - sha256 = "0p0j4di6h8k6ica7jwwj09azdcg4ycxq60i9qsskmsg94cd9yzg0"; - }; - meta = { - license = [ pkgs.lib.licenses.asl20 ]; - }; - }; - "decorator" = super.buildPythonPackage { - name = "decorator-4.1.2"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/bb/e0/f6e41e9091e130bf16d4437dabbac3993908e4d6485ecbc985ef1352db94/decorator-4.1.2.tar.gz"; - sha256 = "1d8npb11kxyi36mrvjdpcjij76l5zfyrz2f820brf0l0rcw4vdkw"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal { fullName = "new BSD License"; } ]; - }; - }; - "dogpile.cache" = super.buildPythonPackage { - name = "dogpile.cache-0.9.0"; - doCheck = false; - propagatedBuildInputs = [ - self."decorator" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/ac/6a/9ac405686a94b7f009a20a50070a5786b0e1aedc707b88d40d0c4b51a82e/dogpile.cache-0.9.0.tar.gz"; - sha256 = "0sr1fn6b4k5bh0cscd9yi8csqxvj4ngzildav58x5p694mc86j5k"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "dogpile.core" = super.buildPythonPackage { - name = "dogpile.core-0.4.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/0e/77/e72abc04c22aedf874301861e5c1e761231c288b5de369c18be8f4b5c9bb/dogpile.core-0.4.1.tar.gz"; - sha256 = "0xpdvg4kr1isfkrh1rfsh7za4q5a5s6l2kf9wpvndbwf3aqjyrdy"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "dulwich" = super.buildPythonPackage { - name = "dulwich-0.13.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/84/95/732d280eee829dacc954e8109f97b47abcadcca472c2ab013e1635eb4792/dulwich-0.13.0.tar.gz"; - sha256 = "0f1jwvrh549c4rgavkn3wizrch904s73s4fmrxykxy9cw8s57lwf"; - }; - meta = { - license = [ pkgs.lib.licenses.gpl2Plus ]; - }; - }; - "enum34" = super.buildPythonPackage { - name = "enum34-1.1.10"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/11/c4/2da1f4952ba476677a42f25cd32ab8aaf0e1c0d0e00b89822b835c7e654c/enum34-1.1.10.tar.gz"; - sha256 = "0j7ji699fwswm4vg6w1v07fkbf8dkzdm6gfh88jvs5nqgr3sgrnc"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "funcsigs" = super.buildPythonPackage { - name = "funcsigs-1.0.2"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/94/4a/db842e7a0545de1cdb0439bb80e6e42dfe82aaeaadd4072f2263a4fbed23/funcsigs-1.0.2.tar.gz"; - sha256 = "0l4g5818ffyfmfs1a924811azhjj8ax9xd1cffr1mzd3ycn0zfx7"; - }; - meta = { - license = [ { fullName = "ASL"; } pkgs.lib.licenses.asl20 ]; - }; - }; - "gevent" = super.buildPythonPackage { - name = "gevent-1.5.0"; - doCheck = false; - propagatedBuildInputs = [ - self."greenlet" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/5a/79/2c63d385d017b5dd7d70983a463dfd25befae70c824fedb857df6e72eff2/gevent-1.5.0.tar.gz"; - sha256 = "0aac3d4vhv5n4rsb6cqzq0d1xx9immqz4fmpddw35yxkwdc450dj"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "gprof2dot" = super.buildPythonPackage { - name = "gprof2dot-2017.9.19"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/9d/36/f977122502979f3dfb50704979c9ed70e6b620787942b089bf1af15f5aba/gprof2dot-2017.9.19.tar.gz"; - sha256 = "17ih23ld2nzgc3xwgbay911l6lh96jp1zshmskm17n1gg2i7mg6f"; - }; - meta = { - license = [ { fullName = "GNU Lesser General Public License v3 or later (LGPLv3+)"; } { fullName = "LGPL"; } ]; - }; - }; - "greenlet" = super.buildPythonPackage { - name = "greenlet-0.4.15"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/f8/e8/b30ae23b45f69aa3f024b46064c0ac8e5fcb4f22ace0dca8d6f9c8bbe5e7/greenlet-0.4.15.tar.gz"; - sha256 = "1g4g1wwc472ds89zmqlpyan3fbnzpa8qm48z3z1y6mlk44z485ll"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "gunicorn" = super.buildPythonPackage { - name = "gunicorn-19.9.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/47/52/68ba8e5e8ba251e54006a49441f7ccabca83b6bef5aedacb4890596c7911/gunicorn-19.9.0.tar.gz"; - sha256 = "1wzlf4xmn6qjirh5w81l6i6kqjnab1n1qqkh7zsj1yb6gh4n49ps"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "hg-evolve" = super.buildPythonPackage { - name = "hg-evolve-9.1.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/20/36/5a6655975aa0c663be91098d31a0b24841acad44fe896aa2bdee77c6b883/hg-evolve-9.1.0.tar.gz"; - sha256 = "1mna81cmzxxn7s2nwz3g1xgdjlcc1axkvfmwg7gjqghwn3pdraps"; - }; - meta = { - license = [ { fullName = "GPLv2+"; } ]; - }; - }; - "hgsubversion" = super.buildPythonPackage { - name = "hgsubversion-1.9.3"; - doCheck = false; - propagatedBuildInputs = [ - self."mercurial" - self."subvertpy" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/a3/53/6d205e641f3e09abcf1ddaed66e5e4b20da22d0145566d440a02c9e35f0d/hgsubversion-1.9.3.tar.gz"; - sha256 = "0nymcjlch8c4zjbncrs30p2nrbylsf25g3h6mr0zzzxr141h3sig"; - }; - meta = { - license = [ pkgs.lib.licenses.gpl1 ]; - }; - }; - "hupper" = super.buildPythonPackage { - name = "hupper-1.10.2"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/41/24/ea90fef04706e54bd1635c05c50dc9cf87cda543c59303a03e7aa7dda0ce/hupper-1.10.2.tar.gz"; - sha256 = "0am0p6g5cz6xmcaf04xq8q6dzdd9qz0phj6gcmpsckf2mcyza61q"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "importlib-metadata" = super.buildPythonPackage { - name = "importlib-metadata-1.6.0"; - doCheck = false; - propagatedBuildInputs = [ - self."zipp" - self."pathlib2" - self."contextlib2" - self."configparser" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/b4/1b/baab42e3cd64c9d5caac25a9d6c054f8324cdc38975a44d600569f1f7158/importlib_metadata-1.6.0.tar.gz"; - sha256 = "07icyggasn38yv2swdrd8z6i0plazmc9adavsdkbqqj91j53ll9l"; - }; - meta = { - license = [ pkgs.lib.licenses.asl20 ]; - }; - }; - "ipdb" = super.buildPythonPackage { - name = "ipdb-0.13.2"; - doCheck = false; - propagatedBuildInputs = [ - self."setuptools" - self."ipython" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/2c/bb/a3e1a441719ebd75c6dac8170d3ddba884b7ee8a5c0f9aefa7297386627a/ipdb-0.13.2.tar.gz"; - sha256 = "0jcd849rx30y3wcgzsqbn06v0yjlzvb9x3076q0yxpycdwm1ryvp"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "ipython" = super.buildPythonPackage { - name = "ipython-5.1.0"; - doCheck = false; - propagatedBuildInputs = [ - self."setuptools" - self."decorator" - self."pickleshare" - self."simplegeneric" - self."traitlets" - self."prompt-toolkit" - self."pygments" - self."pexpect" - self."backports.shutil-get-terminal-size" - self."pathlib2" - self."pexpect" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/89/63/a9292f7cd9d0090a0f995e1167f3f17d5889dcbc9a175261719c513b9848/ipython-5.1.0.tar.gz"; - sha256 = "0qdrf6aj9kvjczd5chj1my8y2iq09am9l8bb2a1334a52d76kx3y"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "ipython-genutils" = super.buildPythonPackage { - name = "ipython-genutils-0.2.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/e8/69/fbeffffc05236398ebfcfb512b6d2511c622871dca1746361006da310399/ipython_genutils-0.2.0.tar.gz"; - sha256 = "1a4bc9y8hnvq6cp08qs4mckgm6i6ajpndp4g496rvvzcfmp12bpb"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "mako" = super.buildPythonPackage { - name = "mako-1.1.0"; - doCheck = false; - propagatedBuildInputs = [ - self."markupsafe" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/b0/3c/8dcd6883d009f7cae0f3157fb53e9afb05a0d3d33b3db1268ec2e6f4a56b/Mako-1.1.0.tar.gz"; - sha256 = "0jqa3qfpykyn4fmkn0kh6043sfls7br8i2bsdbccazcvk9cijsd3"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "markupsafe" = super.buildPythonPackage { - name = "markupsafe-1.1.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/b9/2e/64db92e53b86efccfaea71321f597fa2e1b2bd3853d8ce658568f7a13094/MarkupSafe-1.1.1.tar.gz"; - sha256 = "0sqipg4fk7xbixqd8kq6rlkxj664d157bdwbh93farcphf92x1r9"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal pkgs.lib.licenses.bsd3 ]; - }; - }; - "mercurial" = super.buildPythonPackage { - name = "mercurial-5.1.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/22/39/e1a95f6048aa0785b82f5faad8281ae7320894a635cb4a57e19479639c92/mercurial-5.1.1.tar.gz"; - sha256 = "17z42rfjdkrks4grzgac66nfh285zf1pwxd2zwx1p71pw2jqpz1m"; - }; - meta = { - license = [ pkgs.lib.licenses.gpl1 pkgs.lib.licenses.gpl2Plus ]; - }; - }; - "mock" = super.buildPythonPackage { - name = "mock-3.0.5"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - self."funcsigs" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/2e/ab/4fe657d78b270aa6a32f027849513b829b41b0f28d9d8d7f8c3d29ea559a/mock-3.0.5.tar.gz"; - sha256 = "1hrp6j0yrx2xzylfv02qa8kph661m6yq4p0mc8fnimch9j4psrc3"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal { fullName = "OSI Approved :: BSD License"; } ]; - }; - }; - "more-itertools" = super.buildPythonPackage { - name = "more-itertools-5.0.0"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/dd/26/30fc0d541d9fdf55faf5ba4b0fd68f81d5bd2447579224820ad525934178/more-itertools-5.0.0.tar.gz"; - sha256 = "1r12cm6mcdwdzz7d47a6g4l437xsvapdlgyhqay3i2nrlv03da9q"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "msgpack-python" = super.buildPythonPackage { - name = "msgpack-python-0.5.6"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/8a/20/6eca772d1a5830336f84aca1d8198e5a3f4715cd1c7fc36d3cc7f7185091/msgpack-python-0.5.6.tar.gz"; - sha256 = "16wh8qgybmfh4pjp8vfv78mdlkxfmcasg78lzlnm6nslsfkci31p"; - }; - meta = { - license = [ pkgs.lib.licenses.asl20 ]; - }; - }; - "packaging" = super.buildPythonPackage { - name = "packaging-20.3"; - doCheck = false; - propagatedBuildInputs = [ - self."pyparsing" - self."six" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/65/37/83e3f492eb52d771e2820e88105f605335553fe10422cba9d256faeb1702/packaging-20.3.tar.gz"; - sha256 = "18xpablq278janh03bai9xd4kz9b0yfp6vflazn725ns9x3jna9w"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal { fullName = "BSD or Apache License, Version 2.0"; } pkgs.lib.licenses.asl20 ]; - }; - }; - "pastedeploy" = super.buildPythonPackage { - name = "pastedeploy-2.1.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/c4/e9/972a1c20318b3ae9edcab11a6cef64308fbae5d0d45ab52c6f8b2b8f35b8/PasteDeploy-2.1.0.tar.gz"; - sha256 = "16qsq5y6mryslmbp5pn35x4z8z3ndp5rpgl42h226879nrw9hmg7"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pathlib2" = super.buildPythonPackage { - name = "pathlib2-2.3.5"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - self."scandir" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/94/d8/65c86584e7e97ef824a1845c72bbe95d79f5b306364fa778a3c3e401b309/pathlib2-2.3.5.tar.gz"; - sha256 = "0s4qa8c082fdkb17izh4mfgwrjd1n5pya18wvrbwqdvvb5xs9nbc"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pexpect" = super.buildPythonPackage { - name = "pexpect-4.8.0"; - doCheck = false; - propagatedBuildInputs = [ - self."ptyprocess" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/e5/9b/ff402e0e930e70467a7178abb7c128709a30dfb22d8777c043e501bc1b10/pexpect-4.8.0.tar.gz"; - sha256 = "032cg337h8awydgypz6f4wx848lw8dyrj4zy988x0lyib4ws8rgw"; - }; - meta = { - license = [ pkgs.lib.licenses.isc { fullName = "ISC License (ISCL)"; } ]; - }; - }; - "pickleshare" = super.buildPythonPackage { - name = "pickleshare-0.7.5"; - doCheck = false; - propagatedBuildInputs = [ - self."pathlib2" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/d8/b6/df3c1c9b616e9c0edbc4fbab6ddd09df9535849c64ba51fcb6531c32d4d8/pickleshare-0.7.5.tar.gz"; - sha256 = "1jmghg3c53yp1i8cm6pcrm280ayi8621rwyav9fac7awjr3kss47"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "plaster" = super.buildPythonPackage { - name = "plaster-1.0"; - doCheck = false; - propagatedBuildInputs = [ - self."setuptools" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/37/e1/56d04382d718d32751017d32f351214384e529b794084eee20bb52405563/plaster-1.0.tar.gz"; - sha256 = "1hy8k0nv2mxq94y5aysk6hjk9ryb4bsd13g83m60hcyzxz3wflc3"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "plaster-pastedeploy" = super.buildPythonPackage { - name = "plaster-pastedeploy-0.7"; - doCheck = false; - propagatedBuildInputs = [ - self."pastedeploy" - self."plaster" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/99/69/2d3bc33091249266a1bd3cf24499e40ab31d54dffb4a7d76fe647950b98c/plaster_pastedeploy-0.7.tar.gz"; - sha256 = "1zg7gcsvc1kzay1ry5p699rg2qavfsxqwl17mqxzr0gzw6j9679r"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pluggy" = super.buildPythonPackage { - name = "pluggy-0.13.1"; - doCheck = false; - propagatedBuildInputs = [ - self."importlib-metadata" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/f8/04/7a8542bed4b16a65c2714bf76cf5a0b026157da7f75e87cc88774aa10b14/pluggy-0.13.1.tar.gz"; - sha256 = "1c35qyhvy27q9ih9n899f3h4sdnpgq027dbiilly2qb5cvgarchm"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "prompt-toolkit" = super.buildPythonPackage { - name = "prompt-toolkit-1.0.18"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - self."wcwidth" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/c5/64/c170e5b1913b540bf0c8ab7676b21fdd1d25b65ddeb10025c6ca43cccd4c/prompt_toolkit-1.0.18.tar.gz"; - sha256 = "09h1153wgr5x2ny7ds0w2m81n3bb9j8hjb8sjfnrg506r01clkyx"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "psutil" = super.buildPythonPackage { - name = "psutil-5.7.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/c4/b8/3512f0e93e0db23a71d82485ba256071ebef99b227351f0f5540f744af41/psutil-5.7.0.tar.gz"; - sha256 = "03jykdi3dgf1cdal9bv4fq9zjvzj9l9bs99gi5ar81sdl5nc2pk8"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "ptyprocess" = super.buildPythonPackage { - name = "ptyprocess-0.6.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/7d/2d/e4b8733cf79b7309d84c9081a4ab558c89d8c89da5961bf4ddb050ca1ce0/ptyprocess-0.6.0.tar.gz"; - sha256 = "1h4lcd3w5nrxnsk436ar7fwkiy5rfn5wj2xwy9l0r4mdqnf2jgwj"; - }; - meta = { - license = [ ]; - }; - }; - "py" = super.buildPythonPackage { - name = "py-1.8.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/f1/5a/87ca5909f400a2de1561f1648883af74345fe96349f34f737cdfc94eba8c/py-1.8.0.tar.gz"; - sha256 = "0lsy1gajva083pzc7csj1cvbmminb7b4l6a0prdzyb3fd829nqyw"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pycparser" = super.buildPythonPackage { - name = "pycparser-2.20"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/0f/86/e19659527668d70be91d0369aeaa055b4eb396b0f387a4f92293a20035bd/pycparser-2.20.tar.gz"; - sha256 = "1w0m3xvlrzq4lkbvd1ngfm8mdw64r1yxy6n7djlw6qj5d0km6ird"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "pygit2" = super.buildPythonPackage { - name = "pygit2-0.28.2"; - doCheck = false; - propagatedBuildInputs = [ - self."cffi" - self."six" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/4c/64/88c2a4eb2d22ca1982b364f41ff5da42d61de791d7eb68140e7f8f7eb721/pygit2-0.28.2.tar.gz"; - sha256 = "11kzj5mjkspvplnpdb6bj8dcj6rgmkk986k8hjcklyg5yaxkz32d"; - }; - meta = { - license = [ { fullName = "GPLv2 with linking exception"; } ]; - }; - }; - "pygments" = super.buildPythonPackage { - name = "pygments-2.4.2"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/7e/ae/26808275fc76bf2832deb10d3a3ed3107bc4de01b85dcccbe525f2cd6d1e/Pygments-2.4.2.tar.gz"; - sha256 = "15v2sqm5g12bqa0c7wikfh9ck2nl97ayizy1hpqhmws5gqalq748"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "pyparsing" = super.buildPythonPackage { - name = "pyparsing-2.4.7"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/c1/47/dfc9c342c9842bbe0036c7f763d2d6686bcf5eb1808ba3e170afdb282210/pyparsing-2.4.7.tar.gz"; - sha256 = "1hgc8qrbq1ymxbwfbjghv01fm3fbpjwpjwi0bcailxxzhf3yq0y2"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pyramid" = super.buildPythonPackage { - name = "pyramid-1.10.4"; - doCheck = false; - propagatedBuildInputs = [ - self."hupper" - self."plaster" - self."plaster-pastedeploy" - self."setuptools" - self."translationstring" - self."venusian" - self."webob" - self."zope.deprecation" - self."zope.interface" - self."repoze.lru" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/c2/43/1ae701c9c6bb3a434358e678a5e72c96e8aa55cf4cb1d2fa2041b5dd38b7/pyramid-1.10.4.tar.gz"; - sha256 = "0rkxs1ajycg2zh1c94xlmls56mx5m161sn8112skj0amza6cn36q"; - }; - meta = { - license = [ { fullName = "Repoze Public License"; } { fullName = "BSD-derived (http://www.repoze.org/LICENSE.txt)"; } ]; - }; - }; - "pyramid-mako" = super.buildPythonPackage { - name = "pyramid-mako-1.1.0"; - doCheck = false; - propagatedBuildInputs = [ - self."pyramid" - self."mako" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/63/7b/5e2af68f675071a6bad148c1c393928f0ef5fcd94e95cbf53b89d6471a83/pyramid_mako-1.1.0.tar.gz"; - sha256 = "1qj0m091mnii86j2q1d82yir22nha361rvhclvg3s70z8iiwhrh0"; - }; - meta = { - license = [ { fullName = "Repoze Public License"; } { fullName = "BSD-derived (http://www.repoze.org/LICENSE.txt)"; } ]; - }; - }; - "pytest" = super.buildPythonPackage { - name = "pytest-4.6.5"; - doCheck = false; - propagatedBuildInputs = [ - self."py" - self."six" - self."packaging" - self."attrs" - self."atomicwrites" - self."pluggy" - self."importlib-metadata" - self."wcwidth" - self."funcsigs" - self."pathlib2" - self."more-itertools" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/2a/c6/1d1f32f6a5009900521b12e6560fb6b7245b0d4bc3fb771acd63d10e30e1/pytest-4.6.5.tar.gz"; - sha256 = "0iykwwfp4h181nd7rsihh2120b0rkawlw7rvbl19sgfspncr3hwg"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pytest-cov" = super.buildPythonPackage { - name = "pytest-cov-2.7.1"; - doCheck = false; - propagatedBuildInputs = [ - self."pytest" - self."coverage" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/bb/0f/3db7ff86801883b21d5353b258c994b1b8e2abbc804e2273b8d0fd19004b/pytest-cov-2.7.1.tar.gz"; - sha256 = "0filvmmyqm715azsl09ql8hy2x7h286n6d8z5x42a1wpvvys83p0"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal pkgs.lib.licenses.mit ]; - }; - }; - "pytest-profiling" = super.buildPythonPackage { - name = "pytest-profiling-1.7.0"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - self."pytest" - self."gprof2dot" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/39/70/22a4b33739f07f1732a63e33bbfbf68e0fa58cfba9d200e76d01921eddbf/pytest-profiling-1.7.0.tar.gz"; - sha256 = "0abz9gi26jpcfdzgsvwad91555lpgdc8kbymicmms8k2fqa8z4wk"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pytest-runner" = super.buildPythonPackage { - name = "pytest-runner-5.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/d9/6d/4b41a74b31720e25abd4799be72d54811da4b4d0233e38b75864dcc1f7ad/pytest-runner-5.1.tar.gz"; - sha256 = "0ykfcnpp8c22winj63qzc07l5axwlc9ikl8vn05sc32gv3417815"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "pytest-sugar" = super.buildPythonPackage { - name = "pytest-sugar-0.9.2"; - doCheck = false; - propagatedBuildInputs = [ - self."pytest" - self."termcolor" - self."packaging" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/55/59/f02f78d1c80f7e03e23177f60624c8106d4f23d124c921df103f65692464/pytest-sugar-0.9.2.tar.gz"; - sha256 = "1asq7yc4g8bx2sn7yy974mhc9ywvaihasjab4inkirdwn9s7mn7w"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "pytest-timeout" = super.buildPythonPackage { - name = "pytest-timeout-1.3.3"; - doCheck = false; - propagatedBuildInputs = [ - self."pytest" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/13/48/7a166eaa29c1dca6cc253e3ba5773ff2e4aa4f567c1ea3905808e95ac5c1/pytest-timeout-1.3.3.tar.gz"; - sha256 = "1cczcjhw4xx5sjkhxlhc5c1bkr7x6fcyx12wrnvwfckshdvblc2a"; - }; - meta = { - license = [ pkgs.lib.licenses.mit { fullName = "DFSG approved"; } ]; - }; - }; - "redis" = super.buildPythonPackage { - name = "redis-3.5.3"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/b3/17/1e567ff78c83854e16b98694411fe6e08c3426af866ad11397cddceb80d3/redis-3.5.3.tar.gz"; - sha256 = "0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "repoze.lru" = super.buildPythonPackage { - name = "repoze.lru-0.7"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/12/bc/595a77c4b5e204847fdf19268314ef59c85193a9dc9f83630fc459c0fee5/repoze.lru-0.7.tar.gz"; - sha256 = "0xzz1aw2smy8hdszrq8yhnklx6w1r1mf55061kalw3iq35gafa84"; - }; - meta = { - license = [ { fullName = "Repoze Public License"; } { fullName = "BSD-derived (http://www.repoze.org/LICENSE.txt)"; } ]; - }; - }; - "rhodecode-vcsserver" = super.buildPythonPackage { - name = "rhodecode-vcsserver-4.27.1"; - buildInputs = [ - self."pytest" - self."py" - self."pytest-cov" - self."pytest-sugar" - self."pytest-runner" - self."pytest-profiling" - self."pytest-timeout" - self."gprof2dot" - self."mock" - self."cov-core" - self."coverage" - self."webtest" - self."beautifulsoup4" - self."configobj" - ]; - doCheck = true; - propagatedBuildInputs = [ - self."configobj" - self."dogpile.cache" - self."dogpile.core" - self."decorator" - self."dulwich" - self."hgsubversion" - self."hg-evolve" - self."mako" - self."markupsafe" - self."mercurial" - self."msgpack-python" - self."pastedeploy" - self."pyramid" - self."pyramid-mako" - self."pygit2" - self."repoze.lru" - self."redis" - self."simplejson" - self."subprocess32" - self."subvertpy" - self."six" - self."translationstring" - self."webob" - self."zope.deprecation" - self."zope.interface" - self."gevent" - self."greenlet" - self."gunicorn" - self."waitress" - self."ipdb" - self."ipython" - self."pytest" - self."py" - self."pytest-cov" - self."pytest-sugar" - self."pytest-runner" - self."pytest-profiling" - self."pytest-timeout" - self."gprof2dot" - self."mock" - self."cov-core" - self."coverage" - self."webtest" - self."beautifulsoup4" - ]; - src = ./.; - meta = { - license = [ { fullName = "GPL V3"; } { fullName = "GNU General Public License v3 or later (GPLv3+)"; } ]; - }; - }; - "scandir" = super.buildPythonPackage { - name = "scandir-1.10.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/df/f5/9c052db7bd54d0cbf1bc0bb6554362bba1012d03e5888950a4f5c5dadc4e/scandir-1.10.0.tar.gz"; - sha256 = "1bkqwmf056pkchf05ywbnf659wqlp6lljcdb0y88wr9f0vv32ijd"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal { fullName = "New BSD License"; } ]; - }; - }; - "setproctitle" = super.buildPythonPackage { - name = "setproctitle-1.1.10"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/5a/0d/dc0d2234aacba6cf1a729964383e3452c52096dc695581248b548786f2b3/setproctitle-1.1.10.tar.gz"; - sha256 = "163kplw9dcrw0lffq1bvli5yws3rngpnvrxrzdw89pbphjjvg0v2"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "setuptools" = super.buildPythonPackage { - name = "setuptools-44.1.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/ed/7b/bbf89ca71e722b7f9464ebffe4b5ee20a9e5c9a555a56e2d3914bb9119a6/setuptools-44.1.0.zip"; - sha256 = "1jja896zvd1ppccnjbhkgagxbwchgq6vfamp6qn1hvywq6q9cjkr"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - - "setuptools-scm" = super.buildPythonPackage { - name = "setuptools-scm-3.5.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/b2/f7/60a645aae001a2e06cf4b8db2fba9d9f36b8fd378f10647e3e218b61b74b/setuptools_scm-3.5.0.tar.gz"; - sha256 = "5bdf21a05792903cafe7ae0c9501182ab52497614fa6b1750d9dbae7b60c1a87"; - }; - meta = { - license = [ pkgs.lib.licenses.psfl ]; - }; - }; - - "simplegeneric" = super.buildPythonPackage { - name = "simplegeneric-0.8.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/3d/57/4d9c9e3ae9a255cd4e1106bb57e24056d3d0709fc01b2e3e345898e49d5b/simplegeneric-0.8.1.zip"; - sha256 = "0wwi1c6md4vkbcsfsf8dklf3vr4mcdj4mpxkanwgb6jb1432x5yw"; - }; - meta = { - license = [ pkgs.lib.licenses.zpl21 ]; - }; - }; - "simplejson" = super.buildPythonPackage { - name = "simplejson-3.16.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/e3/24/c35fb1c1c315fc0fffe61ea00d3f88e85469004713dab488dee4f35b0aff/simplejson-3.16.0.tar.gz"; - sha256 = "19cws1syk8jzq2pw43878dv6fjkb0ifvjpx0i9aajix6kc9jkwxi"; - }; - meta = { - license = [ { fullName = "Academic Free License (AFL)"; } pkgs.lib.licenses.mit ]; - }; - }; - "six" = super.buildPythonPackage { - name = "six-1.11.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/16/d8/bc6316cf98419719bd59c91742194c111b6f2e85abac88e496adefaf7afe/six-1.11.0.tar.gz"; - sha256 = "1scqzwc51c875z23phj48gircqjgnn3af8zy2izjwmnlxrxsgs3h"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "subprocess32" = super.buildPythonPackage { - name = "subprocess32-3.5.4"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz"; - sha256 = "17f7mvwx2271s1wrl0qac3wjqqnrqag866zs3qc8v5wp0k43fagb"; - }; - meta = { - license = [ pkgs.lib.licenses.psfl ]; - }; - }; - "subvertpy" = super.buildPythonPackage { - name = "subvertpy-0.10.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/9d/76/99fa82affce75f5ac0f7dbe513796c3f37311ace0c68e1b063683b4f9b99/subvertpy-0.10.1.tar.gz"; - sha256 = "061ncy9wjz3zyv527avcrdyk0xygyssyy7p1644nhzhwp8zpybij"; - }; - meta = { - license = [ pkgs.lib.licenses.lgpl21Plus pkgs.lib.licenses.gpl2Plus ]; - }; - }; - "termcolor" = super.buildPythonPackage { - name = "termcolor-1.1.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz"; - sha256 = "0fv1vq14rpqwgazxg4981904lfyp84mnammw7y046491cv76jv8x"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "traitlets" = super.buildPythonPackage { - name = "traitlets-4.3.3"; - doCheck = false; - propagatedBuildInputs = [ - self."ipython-genutils" - self."six" - self."decorator" - self."enum34" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/75/b0/43deb021bc943f18f07cbe3dac1d681626a48997b7ffa1e7fb14ef922b21/traitlets-4.3.3.tar.gz"; - sha256 = "1xsrwgivpkxlbr4dfndfsi098s29yqgswgjc1qqn69yxklvfw8yh"; - }; - meta = { - license = [ pkgs.lib.licenses.bsdOriginal ]; - }; - }; - "translationstring" = super.buildPythonPackage { - name = "translationstring-1.3"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/5e/eb/bee578cc150b44c653b63f5ebe258b5d0d812ddac12497e5f80fcad5d0b4/translationstring-1.3.tar.gz"; - sha256 = "0bdpcnd9pv0131dl08h4zbcwmgc45lyvq3pa224xwan5b3x4rr2f"; - }; - meta = { - license = [ { fullName = "BSD-like (http://repoze.org/license.html)"; } ]; - }; - }; - "venusian" = super.buildPythonPackage { - name = "venusian-1.2.0"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/7e/6f/40a9d43ac77cb51cb62be5b5662d170f43f8037bdc4eab56336c4ca92bb7/venusian-1.2.0.tar.gz"; - sha256 = "0ghyx66g8ikx9nx1mnwqvdcqm11i1vlq0hnvwl50s48bp22q5v34"; - }; - meta = { - license = [ { fullName = "BSD-derived (http://www.repoze.org/LICENSE.txt)"; } ]; - }; - }; - "waitress" = super.buildPythonPackage { - name = "waitress-1.3.1"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/a6/e6/708da7bba65898e5d759ade8391b1077e49d07be0b0223c39f5be04def56/waitress-1.3.1.tar.gz"; - sha256 = "1iysl8ka3l4cdrr0r19fh1cv28q41mwpvgsb81ji7k4shkb0k3i7"; - }; - meta = { - license = [ pkgs.lib.licenses.zpl21 ]; - }; - }; - "wcwidth" = super.buildPythonPackage { - name = "wcwidth-0.1.9"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/25/9d/0acbed6e4a4be4fc99148f275488580968f44ddb5e69b8ceb53fc9df55a0/wcwidth-0.1.9.tar.gz"; - sha256 = "1wf5ycjx8s066rdvr0fgz4xds9a8zhs91c4jzxvvymm1c8l8cwzf"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "webob" = super.buildPythonPackage { - name = "webob-1.8.5"; - doCheck = false; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/9d/1a/0c89c070ee2829c934cb6c7082287c822e28236a4fcf90063e6be7c35532/WebOb-1.8.5.tar.gz"; - sha256 = "11khpzaxc88q31v25ic330gsf56fwmbdc9b30br8mvp0fmwspah5"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "webtest" = super.buildPythonPackage { - name = "webtest-2.0.34"; - doCheck = false; - propagatedBuildInputs = [ - self."six" - self."webob" - self."waitress" - self."beautifulsoup4" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/2c/74/a0e63feee438735d628631e2b70d82280276a930637ac535479e5fad9427/WebTest-2.0.34.tar.gz"; - sha256 = "0x1y2c8z4fmpsny4hbp6ka37si2g10r5r2jwxhvv5mx7g3blq4bi"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "zipp" = super.buildPythonPackage { - name = "zipp-1.2.0"; - doCheck = false; - propagatedBuildInputs = [ - self."contextlib2" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/78/08/d52f0ea643bc1068d6dc98b412f4966a9b63255d20911a23ac3220c033c4/zipp-1.2.0.tar.gz"; - sha256 = "1c91lnv1bxjimh8as27hz7bghsjkkbxn1d37xq7in9c82iai0167"; - }; - meta = { - license = [ pkgs.lib.licenses.mit ]; - }; - }; - "zope.deprecation" = super.buildPythonPackage { - name = "zope.deprecation-4.4.0"; - doCheck = false; - propagatedBuildInputs = [ - self."setuptools" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/34/da/46e92d32d545dd067b9436279d84c339e8b16de2ca393d7b892bc1e1e9fd/zope.deprecation-4.4.0.tar.gz"; - sha256 = "1pz2cv7gv9y1r3m0bdv7ks1alagmrn5msm5spwdzkb2by0w36i8d"; - }; - meta = { - license = [ pkgs.lib.licenses.zpl21 ]; - }; - }; - "zope.interface" = super.buildPythonPackage { - name = "zope.interface-4.6.0"; - doCheck = false; - propagatedBuildInputs = [ - self."setuptools" - ]; - src = fetchurl { - url = "https://files.pythonhosted.org/packages/4e/d0/c9d16bd5b38de44a20c6dc5d5ed80a49626fafcb3db9f9efdc2a19026db6/zope.interface-4.6.0.tar.gz"; - sha256 = "1rgh2x3rcl9r0v0499kf78xy86rnmanajf4ywmqb943wpk50sg8v"; - }; - meta = { - license = [ pkgs.lib.licenses.zpl21 ]; - }; - }; - -### Test requirements - - -} diff --git a/pkgs/shell-generate.nix b/pkgs/shell-generate.nix deleted file mode 100755 --- a/pkgs/shell-generate.nix +++ /dev/null @@ -1,42 +0,0 @@ -{ pkgs ? (import {}) -, pythonPackages ? "python27Packages" -}: - -with pkgs.lib; - -let _pythonPackages = pythonPackages; in -let - pythonPackages = getAttr _pythonPackages pkgs; - - pip2nix = import ./nix-common/pip2nix.nix { - inherit - pkgs - pythonPackages; - }; - -in - -pkgs.stdenv.mkDerivation { - name = "pip2nix-generated"; - buildInputs = [ - pip2nix.pip2nix - pythonPackages.pip-tools - pkgs.apr - pkgs.aprutil - pkgs.libffi - ]; - - shellHook = '' - runHook preShellHook - echo "Setting SVN_* variables" - export SVN_LIBRARY_PATH=${pkgs.subversion}/lib - export SVN_HEADER_PATH=${pkgs.subversion.dev}/include - runHook postShellHook - ''; - - preShellHook = '' - echo "Starting Generate Shell" - # Custom prompt to distinguish from other dev envs. - export PS1="\n\[\033[1;32m\][Generate-shell:\w]$\[\033[0m\] " - ''; -} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,73 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "rhodecode-vcsserver" +description = "Version Control System Server for RhodeCode" +authors = [ + {name = "RhodeCode GmbH", email = "support@rhodecode.com"}, +] + +license = {text = "GPL V3"} +requires-python = ">=3.10" +dynamic = ["version", "readme", "dependencies", "optional-dependencies"] +classifiers = [ + 'Development Status :: 6 - Mature', + 'Intended Audience :: Developers', + 'Operating System :: OS Independent', + 'Topic :: Software Development :: Version Control', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', + 'Programming Language :: Python :: 3.10', +] + +[project.entry-points."paste.app_factory"] +main = "vcsserver.http_main:main" + + +[tool.setuptools] +packages = ["vcsserver"] + +[tool.setuptools.dynamic] +readme = {file = ["README.rst"], content-type = "text/rst"} +version = {file = "vcsserver/VERSION"} +dependencies = {file = ["requirements.txt"]} +optional-dependencies.tests = {file = ["requirements_test.txt"]} + +[tool.ruff] + +select = [ + # Pyflakes + "F", + # Pycodestyle + "E", + "W", + # isort + "I001" +] + +ignore = [ + "E501", # line too long, handled by black +] + +# Same as Black. +line-length = 120 + +[tool.ruff.isort] + +known-first-party = ["vcsserver"] + +[tool.ruff.format] + +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + diff --git a/release.nix b/release.nix deleted file mode 100644 --- a/release.nix +++ /dev/null @@ -1,22 +0,0 @@ -# This file defines how to "build" for packaging. - -{ pkgs ? import {} -, system ? builtins.currentSystem -, doCheck ? false -}: - -let - vcsserver = import ./default.nix { - inherit - doCheck - system; - - # disable checkPhase for build - checkPhase = '' - ''; - - }; - -in { - build = vcsserver; -} diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,48 +1,77 @@ -## dependencies - -# our custom configobj -https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626#egg=configobj==5.0.6 - -dogpile.cache==0.9.0 -dogpile.core==0.4.1 -decorator==4.1.2 -dulwich==0.13.0 -hgsubversion==1.9.3 -hg-evolve==9.1.0 -mako==1.1.0 -markupsafe==1.1.1 -mercurial==5.1.1 -msgpack-python==0.5.6 - -pastedeploy==2.1.0 -pyramid==1.10.4 -pyramid-mako==1.1.0 -pygit2==0.28.2 +# deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]' +async-timeout==4.0.3 +atomicwrites==1.4.1 +celery==5.3.6 + billiard==4.2.0 + click==8.1.3 + click-didyoumean==0.3.0 + click==8.1.3 + click-plugins==1.1.1 + click==8.1.3 + click-repl==0.2.0 + click==8.1.3 + prompt-toolkit==3.0.38 + wcwidth==0.2.6 + six==1.16.0 + kombu==5.3.5 + amqp==5.2.0 + vine==5.1.0 + vine==5.1.0 + python-dateutil==2.8.2 + six==1.16.0 + tzdata==2023.4 + vine==5.1.0 +contextlib2==21.6.0 +cov-core==1.15.0 + coverage==7.2.3 +diskcache==5.6.3 +dogpile.cache==1.3.0 + decorator==5.1.1 + stevedore==5.1.0 + pbr==5.11.1 +dulwich==0.21.6 + urllib3==1.26.14 +gunicorn==21.2.0 + packaging==23.1 +hg-evolve==11.0.2 +importlib-metadata==6.0.0 + zipp==3.15.0 +mercurial==6.3.3 +mock==5.0.2 +more-itertools==9.1.0 +msgpack==1.0.7 +orjson==3.9.13 +psutil==5.9.8 +py==1.11.0 +pygit2==1.13.3 + cffi==1.16.0 + pycparser==2.21 +pygments==2.15.1 +pyparsing==3.1.1 +pyramid==2.0.2 + hupper==1.12 + plaster==1.1.2 + plaster-pastedeploy==1.0.1 + pastedeploy==3.1.0 + plaster==1.1.2 + translationstring==1.4 + venusian==3.0.0 + webob==1.8.7 + zope.deprecation==5.0.0 + zope.interface==6.1.0 +redis==5.0.1 + async-timeout==4.0.3 repoze.lru==0.7 -redis==3.5.3 -simplejson==3.16.0 -subprocess32==3.5.4 -subvertpy==0.10.1 +scandir==1.10.0 +setproctitle==1.3.3 +subvertpy==0.11.0 +waitress==3.0.0 +wcwidth==0.2.6 -six==1.11.0 -translationstring==1.3 -webob==1.8.5 -zope.deprecation==4.4.0 -zope.interface==4.6.0 - -## http servers -gevent==1.5.0 -greenlet==0.4.15 -gunicorn==19.9.0 -waitress==1.3.1 - -## debug -ipdb==0.13.2 -ipython==5.1.0 ## test related requirements --r requirements_test.txt +#-r requirements_test.txt ## uncomment to add the debug libraries #-r requirements_debug.txt diff --git a/requirements_debug.txt b/requirements_debug.txt --- a/requirements_debug.txt +++ b/requirements_debug.txt @@ -1,8 +1,28 @@ ## special libraries we could extend the requirements.txt file with to add some -## custom libraries useful for debug and memory tracing - -## uncomment inclusion of this file in requirements.txt run make generate-pkgs and nix-shell +## custom libraries usefull for debug and memory tracing objgraph memory-profiler pympler + +## debug +ipdb +ipython +rich + +# format +flake8 +ruff + +pipdeptree==2.7.1 +invoke==2.0.0 +bumpversion==0.6.0 +bump2version==1.0.1 + +docutils-stubs +types-redis +types-requests==2.31.0.6 +types-sqlalchemy +types-psutil +types-pycurl +types-ujson diff --git a/requirements_pinned.txt b/requirements_pinned.txt deleted file mode 100644 --- a/requirements_pinned.txt +++ /dev/null @@ -1,18 +0,0 @@ -# contains not directly required libraries we want to pin the version. - -atomicwrites==1.3.0 -attrs==19.3.0 -contextlib2==0.6.0.post1 -cffi==1.12.3 -hupper==1.10.2 -importlib-metadata==1.6.0 -packaging==20.3 -pathlib2==2.3.5 -pygments==2.4.2 -pyparsing==2.4.7 -psutil==5.7.0 -pluggy==0.13.1 -scandir==1.10.0 -setproctitle==1.1.10 -venusian==1.2.0 -wcwidth==0.1.9 diff --git a/requirements_test.txt b/requirements_test.txt --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,16 +1,45 @@ # test related requirements -pytest==4.6.5 -py==1.8.0 -pytest-cov==2.7.1 -pytest-sugar==0.9.2 -pytest-runner==5.1.0 + +cov-core==1.15.0 + coverage==7.2.3 +mock==5.0.2 +py==1.11.0 +pytest-cov==4.0.0 + coverage==7.2.3 + pytest==7.3.1 + attrs==22.2.0 + iniconfig==2.0.0 + packaging==23.1 + pluggy==1.0.0 pytest-profiling==1.7.0 -pytest-timeout==1.3.3 -gprof2dot==2017.9.19 + gprof2dot==2022.7.29 + pytest==7.3.1 + attrs==22.2.0 + iniconfig==2.0.0 + packaging==23.1 + pluggy==1.0.0 + six==1.16.0 +pytest-runner==6.0.0 +pytest-sugar==0.9.7 + packaging==23.1 + pytest==7.3.1 + attrs==22.2.0 + iniconfig==2.0.0 + packaging==23.1 + pluggy==1.0.0 + termcolor==2.3.0 +pytest-timeout==2.1.0 + pytest==7.3.1 + attrs==22.2.0 + iniconfig==2.0.0 + packaging==23.1 + pluggy==1.0.0 +webtest==3.0.0 + beautifulsoup4==4.11.2 + soupsieve==2.4 + waitress==3.0.0 + webob==1.8.7 -mock==3.0.5 -cov-core==1.15.0 -coverage==4.5.4 - -webtest==2.0.34 -beautifulsoup4==4.6.3 +# RhodeCode test-data +rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata +rc_testdata==0.11.0 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[aliases] -test = pytest diff --git a/setup.py b/setup.py deleted file mode 100644 --- a/setup.py +++ /dev/null @@ -1,136 +0,0 @@ -# -*- coding: utf-8 -*- -# RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2019 RodeCode GmbH -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -# Import early to make sure things are patched up properly -from setuptools import setup, find_packages - -import os -import sys -import pkgutil -import platform -import codecs - -try: # for pip >= 10 - from pip._internal.req import parse_requirements -except ImportError: # for pip <= 9.0.3 - from pip.req import parse_requirements - -try: # for pip >= 10 - from pip._internal.download import PipSession -except ImportError: # for pip <= 9.0.3 - from pip.download import PipSession - - - -if sys.version_info < (2, 7): - raise Exception('VCSServer requires Python 2.7 or later') - -here = os.path.abspath(os.path.dirname(__file__)) - -# defines current platform -__platform__ = platform.system() -__license__ = 'GPL V3' -__author__ = 'RhodeCode GmbH' -__url__ = 'https://code.rhodecode.com' -is_windows = __platform__ in ('Windows',) - - -def _get_requirements(req_filename, exclude=None, extras=None): - extras = extras or [] - exclude = exclude or [] - - try: - parsed = parse_requirements( - os.path.join(here, req_filename), session=PipSession()) - except TypeError: - # try pip < 6.0.0, that doesn't support session - parsed = parse_requirements(os.path.join(here, req_filename)) - - requirements = [] - for ir in parsed: - if ir.req and ir.name not in exclude: - requirements.append(str(ir.req)) - return requirements + extras - - -# requirements extract -setup_requirements = ['pytest-runner'] -install_requirements = _get_requirements( - 'requirements.txt', exclude=['setuptools']) -test_requirements = _get_requirements( - 'requirements_test.txt', extras=['configobj']) - - -def get_version(): - version = pkgutil.get_data('vcsserver', 'VERSION') - return version.strip() - - -# additional files that goes into package itself -package_data = { - '': ['*.txt', '*.rst'], - 'configs': ['*.ini'], - 'vcsserver': ['VERSION'], -} - -description = 'Version Control System Server' -keywords = ' '.join([ - 'CLI', 'RhodeCode', 'RhodeCode Enterprise', 'RhodeCode Tools']) - -# README/DESCRIPTION generation -readme_file = 'README.rst' -changelog_file = 'CHANGES.rst' -try: - long_description = codecs.open(readme_file).read() + '\n\n' + \ - codecs.open(changelog_file).read() -except IOError as err: - sys.stderr.write( - "[WARNING] Cannot find file specified as long_description (%s)\n " - "or changelog (%s) skipping that file" % (readme_file, changelog_file)) - long_description = description - - -setup( - name='rhodecode-vcsserver', - version=get_version(), - description=description, - long_description=long_description, - keywords=keywords, - license=__license__, - author=__author__, - author_email='admin@rhodecode.com', - url=__url__, - setup_requires=setup_requirements, - install_requires=install_requirements, - tests_require=test_requirements, - zip_safe=False, - packages=find_packages(exclude=["docs", "tests*"]), - package_data=package_data, - include_package_data=True, - classifiers=[ - 'Development Status :: 6 - Mature', - 'Intended Audience :: Developers', - 'Operating System :: OS Independent', - 'Topic :: Software Development :: Version Control', - 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', - 'Programming Language :: Python :: 2.7', - ], - entry_points={ - 'paste.app_factory': ['main=vcsserver.http_main:main'] - }, -) diff --git a/shell.nix b/shell.nix deleted file mode 100644 --- a/shell.nix +++ /dev/null @@ -1,66 +0,0 @@ -# This file contains the adjustments which are desired for a development -# environment. - -{ pkgs ? (import {}) -, pythonPackages ? "python27Packages" -, doCheck ? false -}: - -let - - vcsserver = import ./default.nix { - inherit - doCheck; - }; - - vcs-pythonPackages = vcsserver.pythonPackages; - -in vcsserver.override (attrs: { - # Avoid that we dump any sources into the store when entering the shell and - # make development a little bit more convenient. - src = null; - - # Add dependencies which are useful for the development environment. - buildInputs = - attrs.buildInputs ++ - (with vcs-pythonPackages; [ - ipdb - ]); - - # place to inject some required libs from develop installs - propagatedBuildInputs = - attrs.propagatedBuildInputs ++ - []; - - - # Make sure we execute both hooks - shellHook = '' - runHook preShellHook - runHook postShellHook - ''; - - preShellHook = '' - echo "Entering VCS-Shell" - - # Custom prompt to distinguish from other dev envs. - export PS1="\n\[\033[1;32m\][VCS-shell:\w]$\[\033[0m\] " - - # Set locale - export LC_ALL="en_US.UTF-8" - - # Setup a temporary directory. - tmp_path=$(mktemp -d) - export PATH="$tmp_path/bin:$PATH" - export PYTHONPATH="$tmp_path/${vcs-pythonPackages.python.sitePackages}:$PYTHONPATH" - mkdir -p $tmp_path/${vcs-pythonPackages.python.sitePackages} - - # Develop installation - echo "[BEGIN]: develop install of rhodecode-vcsserver" - python setup.py develop --prefix $tmp_path --allow-hosts "" - ''; - - postShellHook = '' - - ''; - -}) diff --git a/vcsserver/VERSION b/vcsserver/VERSION --- a/vcsserver/VERSION +++ b/vcsserver/VERSION @@ -1,1 +1,1 @@ -4.27.1 \ No newline at end of file +5.0.0 \ No newline at end of file diff --git a/vcsserver/__init__.py b/vcsserver/__init__.py --- a/vcsserver/__init__.py +++ b/vcsserver/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,10 +15,23 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import pkgutil +import os + +__version__ = '' -__version__ = pkgutil.get_data('vcsserver', 'VERSION').strip() +def get_version(): + global __version__ + if __version__: + return __version__ + + here = os.path.abspath(os.path.dirname(__file__)) + ver_file = os.path.join(here, "VERSION") + with open(ver_file, "rt") as f: + version = f.read().strip() + + __version__ = version + return version # link to config for pyramid CONFIG = {} diff --git a/vcsserver/base.py b/vcsserver/base.py --- a/vcsserver/base.py +++ b/vcsserver/base.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,18 +16,21 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import os import sys -import traceback +import tempfile import logging -import urlparse +import urllib.parse + +from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store from vcsserver import exceptions from vcsserver.exceptions import NoContentException -from vcsserver.hgcompat import (archival) - +from vcsserver.hgcompat import archival +from vcsserver.str_utils import safe_bytes +from vcsserver.lib.exc_tracking import format_exc log = logging.getLogger(__name__) -class RepoFactory(object): +class RepoFactory: """ Utility to create instances of repository @@ -55,31 +58,33 @@ def obfuscate_qs(query_string): return None parsed = [] - for k, v in urlparse.parse_qsl(query_string, keep_blank_values=True): + for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True): if k in ['auth_token', 'api_key']: v = "*****" parsed.append((k, v)) return '&'.join('{}{}'.format( - k, '={}'.format(v) if v else '') for k, v in parsed) + k, f'={v}' if v else '') for k, v in parsed) -def raise_from_original(new_type): +def raise_from_original(new_type, org_exc: Exception): """ Raise a new exception type with original args and traceback. """ - exc_type, exc_value, exc_traceback = sys.exc_info() + exc_info = sys.exc_info() + exc_type, exc_value, exc_traceback = exc_info new_exc = new_type(*exc_value.args) + # store the original traceback into the new exc - new_exc._org_exc_tb = traceback.format_exc(exc_traceback) + new_exc._org_exc_tb = format_exc(exc_info) try: - raise new_exc, None, exc_traceback + raise new_exc.with_traceback(exc_traceback) finally: del exc_traceback -class ArchiveNode(object): +class ArchiveNode: def __init__(self, path, mode, is_link, raw_bytes): self.path = path self.mode = mode @@ -87,34 +92,59 @@ class ArchiveNode(object): self.raw_bytes = raw_bytes -def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id, write_metadata=True, extra_metadata=None): +def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name, + commit_id, write_metadata=True, extra_metadata=None, cache_config=None): """ - walker should be a file walker, for example: - def walker(): + Function that would store generate archive and send it to a dedicated backend store + In here we use diskcache + + :param node_walker: a generator returning nodes to add to archive + :param archive_key: key used to store the path + :param kind: archive kind + :param mtime: time of creation + :param archive_at_path: default '/' the path at archive was started. + If this is not '/' it means it's a partial archive + :param archive_dir_name: inside dir name when creating an archive + :param commit_id: commit sha of revision archive was created at + :param write_metadata: + :param extra_metadata: + :param cache_config: + + walker should be a file walker, for example, + def node_walker(): for file_info in files: yield ArchiveNode(fn, mode, is_link, ctx[fn].data) """ extra_metadata = extra_metadata or {} + d_cache = get_archival_cache_store(config=cache_config) + + if archive_key in d_cache: + with d_cache as d_cache_reader: + reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True) + return reader.name + + archive_tmp_path = safe_bytes(tempfile.mkstemp()[1]) + log.debug('Creating new temp archive in %s', archive_tmp_path) + if kind == "tgz": - archiver = archival.tarit(archive_dest_path, mtime, "gz") + archiver = archival.tarit(archive_tmp_path, mtime, b"gz") elif kind == "tbz2": - archiver = archival.tarit(archive_dest_path, mtime, "bz2") + archiver = archival.tarit(archive_tmp_path, mtime, b"bz2") elif kind == 'zip': - archiver = archival.zipit(archive_dest_path, mtime) + archiver = archival.zipit(archive_tmp_path, mtime) else: raise exceptions.ArchiveException()( - 'Remote does not support: "%s" archive type.' % kind) + f'Remote does not support: "{kind}" archive type.') - for f in walker(commit_id, archive_at_path): - f_path = os.path.join(archive_dir_name, f.path.lstrip('/')) + for f in node_walker(commit_id, archive_at_path): + f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/')) try: archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes()) except NoContentException: # NOTE(marcink): this is a special case for SVN so we can create "empty" - # directories which arent supported by archiver - archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '') + # directories which are not supported by archiver + archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'') if write_metadata: metadata = dict([ @@ -123,8 +153,41 @@ def archive_repo(walker, archive_dest_pa ]) metadata.update(extra_metadata) - meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()] - f_path = os.path.join(archive_dir_name, '.archival.txt') - archiver.addfile(f_path, 0o644, False, '\n'.join(meta)) + meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()] + f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt') + archiver.addfile(f_path, 0o644, False, b'\n'.join(meta)) + + archiver.done() + + # ensure set & get are atomic + with d_cache.transact(): + + with open(archive_tmp_path, 'rb') as archive_file: + add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True) + if not add_result: + log.error('Failed to store cache for key=%s', archive_key) + + os.remove(archive_tmp_path) - return archiver.done() + reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True) + if not reader: + raise AssertionError(f'empty reader on key={archive_key} added={add_result}') + + return reader.name + + +class BinaryEnvelope: + def __init__(self, val): + self.val = val + + +class BytesEnvelope(bytes): + def __new__(cls, content): + if isinstance(content, bytes): + return super().__new__(cls, content) + else: + raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types') + + +class BinaryBytesEnvelope(BytesEnvelope): + pass diff --git a/vcsserver/config/__init__.py b/vcsserver/config/__init__.py new file mode 100644 --- /dev/null +++ b/vcsserver/config/__init__.py @@ -0,0 +1,1 @@ +# Copyright (C) 2014-2023 RhodeCode GmbH diff --git a/vcsserver/config/hooks.py b/vcsserver/config/hooks.py new file mode 100644 --- /dev/null +++ b/vcsserver/config/hooks.py @@ -0,0 +1,27 @@ +# Copyright (C) 2010-2023 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +HOOK_REPO_SIZE = 'changegroup.repo_size' + +# HG +HOOK_PRE_PULL = 'preoutgoing.pre_pull' +HOOK_PULL = 'outgoing.pull_logger' +HOOK_PRE_PUSH = 'prechangegroup.pre_push' +HOOK_PRETX_PUSH = 'pretxnchangegroup.pre_push' +HOOK_PUSH = 'changegroup.push_logger' +HOOK_PUSH_KEY = 'pushkey.key_push' diff --git a/vcsserver/config/settings_maker.py b/vcsserver/config/settings_maker.py new file mode 100644 --- /dev/null +++ b/vcsserver/config/settings_maker.py @@ -0,0 +1,168 @@ +# Copyright (C) 2010-2023 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import os +import textwrap +import string +import functools +import logging +import tempfile +import logging.config + +from vcsserver.type_utils import str2bool, aslist + +log = logging.getLogger(__name__) + +# skip keys, that are set here, so we don't double process those +set_keys = { + '__file__': '' +} + + +class SettingsMaker: + + def __init__(self, app_settings): + self.settings = app_settings + + @classmethod + def _bool_func(cls, input_val): + if isinstance(input_val, bytes): + # decode to str + input_val = input_val.decode('utf8') + return str2bool(input_val) + + @classmethod + def _int_func(cls, input_val): + return int(input_val) + + @classmethod + def _list_func(cls, input_val, sep=','): + return aslist(input_val, sep=sep) + + @classmethod + def _string_func(cls, input_val, lower=True): + if lower: + input_val = input_val.lower() + return input_val + + @classmethod + def _float_func(cls, input_val): + return float(input_val) + + @classmethod + def _dir_func(cls, input_val, ensure_dir=False, mode=0o755): + + # ensure we have our dir created + if not os.path.isdir(input_val) and ensure_dir: + os.makedirs(input_val, mode=mode, exist_ok=True) + + if not os.path.isdir(input_val): + raise Exception(f'Dir at {input_val} does not exist') + return input_val + + @classmethod + def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755): + dirname = os.path.dirname(input_val) + cls._dir_func(dirname, ensure_dir=ensure_dir) + return input_val + + @classmethod + def _key_transformator(cls, key): + return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_')) + + def maybe_env_key(self, key): + # now maybe we have this KEY in env, search and use the value with higher priority. + transformed_key = self._key_transformator(key) + envvar_value = os.environ.get(transformed_key) + if envvar_value: + log.debug('using `%s` key instead of `%s` key for config', transformed_key, key) + + return envvar_value + + def env_expand(self): + replaced = {} + for k, v in self.settings.items(): + if k not in set_keys: + envvar_value = self.maybe_env_key(k) + if envvar_value: + replaced[k] = envvar_value + set_keys[k] = envvar_value + + # replace ALL keys updated + self.settings.update(replaced) + + def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'): + """ + Helper to enable debug on running instance + :return: + """ + + if not str2bool(self.settings.get('logging.autoconfigure')): + log.info('logging configuration based on main .ini file') + return + + if logging_conf is None: + logging_conf = self.settings.get('logging.logging_conf_file') or '' + + if not os.path.isfile(logging_conf): + log.error('Unable to setup logging based on %s, ' + 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf) + return + + with open(logging_conf, 'rt') as f: + ini_template = textwrap.dedent(f.read()) + ini_template = string.Template(ini_template).safe_substitute( + RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level, + RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter + ) + + with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f: + log.info('Saved Temporary LOGGING config at %s', f.name) + f.write(ini_template) + + logging.config.fileConfig(f.name) + os.remove(f.name) + + def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None): + input_val = self.settings.get(key, default) + + if default_when_empty and not input_val: + # use default value when value is set in the config but it is empty + input_val = default + + parser_func = { + 'bool': self._bool_func, + 'int': self._int_func, + 'list': self._list_func, + 'list:newline': functools.partial(self._list_func, sep='/n'), + 'list:spacesep': functools.partial(self._list_func, sep=' '), + 'string': functools.partial(self._string_func, lower=lower), + 'dir': self._dir_func, + 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True), + 'file': self._file_path_func, + 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True), + None: lambda i: i + }[parser] + + envvar_value = self.maybe_env_key(key) + if envvar_value: + input_val = envvar_value + set_keys[key] = input_val + + self.settings[key] = parser_func(input_val) + return self.settings[key] diff --git a/vcsserver/echo_stub/__init__.py b/vcsserver/echo_stub/__init__.py --- a/vcsserver/echo_stub/__init__.py +++ b/vcsserver/echo_stub/__init__.py @@ -1,3 +1,5 @@ +# Copyright (C) 2014-2023 RhodeCode GmbH + """ Provides a stub implementation for VCS operations. diff --git a/vcsserver/echo_stub/echo_app.py b/vcsserver/echo_stub/echo_app.py --- a/vcsserver/echo_stub/echo_app.py +++ b/vcsserver/echo_stub/echo_app.py @@ -1,3 +1,5 @@ +# Copyright (C) 2014-2023 RhodeCode GmbH + """ Implementation of :class:`EchoApp`. @@ -10,7 +12,7 @@ import logging log = logging.getLogger(__name__) -class EchoApp(object): +class EchoApp: def __init__(self, repo_path, repo_name, config): self._repo_path = repo_path @@ -23,10 +25,10 @@ class EchoApp(object): status = '200 OK' headers = [('Content-Type', 'text/plain')] start_response(status, headers) - return ["ECHO"] + return [b"ECHO"] -class EchoAppStream(object): +class EchoAppStream: def __init__(self, repo_path, repo_name, config): self._repo_path = repo_path @@ -41,8 +43,8 @@ class EchoAppStream(object): start_response(status, headers) def generator(): - for _ in xrange(1000000): - yield "ECHO" + for _ in range(1000000): + yield b"ECHO_STREAM" return generator() diff --git a/vcsserver/echo_stub/remote_wsgi.py b/vcsserver/echo_stub/remote_wsgi.py --- a/vcsserver/echo_stub/remote_wsgi.py +++ b/vcsserver/echo_stub/remote_wsgi.py @@ -1,3 +1,5 @@ +# Copyright (C) 2014-2023 RhodeCode GmbH + """ Provides the same API as :mod:`remote_wsgi`. @@ -13,7 +15,7 @@ from vcsserver import wsgi_app_caller log = logging.getLogger(__name__) -class GitRemoteWsgi(object): +class GitRemoteWsgi: def handle(self, environ, input_data, *args, **kwargs): app = wsgi_app_caller.WSGIAppCaller( create_echo_wsgi_app(*args, **kwargs)) @@ -21,7 +23,7 @@ class GitRemoteWsgi(object): return app.handle(environ, input_data) -class HgRemoteWsgi(object): +class HgRemoteWsgi: def handle(self, environ, input_data, *args, **kwargs): app = wsgi_app_caller.WSGIAppCaller( create_echo_wsgi_app(*args, **kwargs)) diff --git a/vcsserver/exceptions.py b/vcsserver/exceptions.py --- a/vcsserver/exceptions.py +++ b/vcsserver/exceptions.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -109,7 +109,7 @@ class HTTPRepoLocked(HTTPLocked): def __init__(self, title, status_code=None, **kwargs): self.code = status_code or HTTPLocked.code self.title = title - super(HTTPRepoLocked, self).__init__(**kwargs) + super().__init__(**kwargs) class HTTPRepoBranchProtected(HTTPForbidden): diff --git a/vcsserver/git_lfs/__init__.py b/vcsserver/git_lfs/__init__.py --- a/vcsserver/git_lfs/__init__.py +++ b/vcsserver/git_lfs/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,4 +16,4 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -from app import create_app +from .app import create_app # noqa diff --git a/vcsserver/git_lfs/app.py b/vcsserver/git_lfs/app.py --- a/vcsserver/git_lfs/app.py +++ b/vcsserver/git_lfs/app.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,23 +17,22 @@ import re import logging -from wsgiref.util import FileWrapper -import simplejson as json from pyramid.config import Configurator from pyramid.response import Response, FileIter from pyramid.httpexceptions import ( HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden, HTTPUnprocessableEntity) +from vcsserver.lib.rc_json import json from vcsserver.git_lfs.lib import OidHandler, LFSOidStore from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator -from vcsserver.utils import safe_int +from vcsserver.str_utils import safe_int log = logging.getLogger(__name__) -GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ? +GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' # +json ? GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))') @@ -48,7 +47,7 @@ def write_response_error(http_exception, return _exception -class AuthHeaderRequired(object): +class AuthHeaderRequired: """ Decorator to check if request has proper auth-header """ @@ -95,7 +94,7 @@ def lfs_objects_batch(request): if operation not in ('download', 'upload'): log.debug('LFS: unsupported operation:%s', operation) return write_response_error( - HTTPBadRequest, 'unsupported operation mode: `%s`' % operation) + HTTPBadRequest, f'unsupported operation mode: `{operation}`') if 'objects' not in data: log.debug('LFS: missing objects data') @@ -115,8 +114,13 @@ def lfs_objects_batch(request): HTTPBadRequest, 'unsupported data in objects') obj_data = {'oid': oid} + if http_scheme == 'http': + # Note(marcink): when using http, we might have a custom port + # so we skip setting it to http, url dispatch then wont generate a port in URL + # for development we need this + http_scheme = None - obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid, + obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid, _scheme=http_scheme) obj_verify_href = request.route_url('lfs_objects_verify', repo=repo, _scheme=http_scheme) @@ -179,7 +183,7 @@ def lfs_objects_oid_download(request): if not store.has_oid(): log.debug('LFS: oid %s does not exists in store', oid) return write_response_error( - HTTPNotFound, 'requested file with oid `%s` not found in store' % oid) + HTTPNotFound, f'requested file with oid `{oid}` not found in store') # TODO(marcink): support range header ? # Range: bytes=0-, `bytes=(\d+)\-.*` @@ -208,11 +212,11 @@ def lfs_objects_verify(request): if not store.has_oid(): log.debug('LFS: oid %s does not exists in store', oid) return write_response_error( - HTTPNotFound, 'oid `%s` does not exists in store' % oid) + HTTPNotFound, f'oid `{oid}` does not exists in store') store_size = store.size_oid() if store_size != size: - msg = 'requested file size mismatch store size:%s requested:%s' % ( + msg = 'requested file size mismatch store size:{} requested:{}'.format( store_size, size) return write_response_error( HTTPUnprocessableEntity, msg) diff --git a/vcsserver/git_lfs/lib.py b/vcsserver/git_lfs/lib.py --- a/vcsserver/git_lfs/lib.py +++ b/vcsserver/git_lfs/lib.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,7 +23,7 @@ from collections import OrderedDict log = logging.getLogger(__name__) -class OidHandler(object): +class OidHandler: def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href, obj_verify_href=None): @@ -51,7 +51,7 @@ class OidHandler(object): if not store.has_oid(): # error reply back to client that something is wrong with dl - err_msg = 'object: {} does not exist in store'.format(store.oid) + err_msg = f'object: {store.oid} does not exist in store' has_errors = OrderedDict( error=OrderedDict( code=404, @@ -113,12 +113,14 @@ class OidHandler(object): return handler(*args, **kwargs) -class LFSOidStore(object): +class LFSOidStore: def __init__(self, oid, repo, store_location=None): self.oid = oid self.repo = repo - self.store_path = store_location or self.get_default_store() + defined_store_path = store_location or self.get_default_store() + self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}" + self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}" self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp') self.oid_path = os.path.join(self.store_path, oid) self.fd = None @@ -130,7 +132,7 @@ class LFSOidStore(object): f.write('...') """ - class StoreEngine(object): + class StoreEngine: def __init__(self, mode, store_path, oid_path, tmp_oid_path): self.mode = mode self.store_path = store_path diff --git a/vcsserver/git_lfs/tests/__init__.py b/vcsserver/git_lfs/tests/__init__.py --- a/vcsserver/git_lfs/tests/__init__.py +++ b/vcsserver/git_lfs/tests/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/git_lfs/tests/test_lfs_app.py b/vcsserver/git_lfs/tests/test_lfs_app.py --- a/vcsserver/git_lfs/tests/test_lfs_app.py +++ b/vcsserver/git_lfs/tests/test_lfs_app.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,9 +18,11 @@ import os import pytest from webtest.app import TestApp as WebObTestApp -import simplejson as json +from vcsserver.lib.rc_json import json +from vcsserver.str_utils import safe_bytes from vcsserver.git_lfs.app import create_app +from vcsserver.git_lfs.lib import LFSOidStore @pytest.fixture(scope='function') @@ -46,7 +48,7 @@ def http_auth(): return {'HTTP_AUTHORIZATION': "Basic XXXXX"} -class TestLFSApplication(object): +class TestLFSApplication: def test_app_wrong_path(self, git_lfs_app): git_lfs_app.get('/repo/info/lfs/xxx', status=404) @@ -54,19 +56,19 @@ class TestLFSApplication(object): def test_app_deprecated_endpoint(self, git_lfs_app): response = git_lfs_app.post('/repo/info/lfs/objects', status=501) assert response.status_code == 501 - assert json.loads(response.text) == {u'message': u'LFS: v1 api not supported'} + assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'} def test_app_lock_verify_api_not_available(self, git_lfs_app): response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501) assert response.status_code == 501 assert json.loads(response.text) == { - u'message': u'GIT LFS locking api not supported'} + 'message': 'GIT LFS locking api not supported'} def test_app_lock_api_not_available(self, git_lfs_app): response = git_lfs_app.post('/repo/info/lfs/locks', status=501) assert response.status_code == 501 assert json.loads(response.text) == { - u'message': u'GIT LFS locking api not supported'} + 'message': 'GIT LFS locking api not supported'} def test_app_batch_api_missing_auth(self, git_lfs_app): git_lfs_app.post_json( @@ -77,14 +79,14 @@ class TestLFSApplication(object): '/repo/info/lfs/objects/batch', params={}, status=400, extra_environ=http_auth) assert json.loads(response.text) == { - u'message': u'unsupported operation mode: `None`'} + 'message': 'unsupported operation mode: `None`'} def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth): response = git_lfs_app.post_json( '/repo/info/lfs/objects/batch', params={'operation': 'download'}, status=400, extra_environ=http_auth) assert json.loads(response.text) == { - u'message': u'missing objects data'} + 'message': 'missing objects data'} def test_app_batch_api_unsupported_data_in_objects( self, git_lfs_app, http_auth): @@ -94,7 +96,7 @@ class TestLFSApplication(object): '/repo/info/lfs/objects/batch', params=params, status=400, extra_environ=http_auth) assert json.loads(response.text) == { - u'message': u'unsupported data in objects'} + 'message': 'unsupported data in objects'} def test_app_batch_api_download_missing_object( self, git_lfs_app, http_auth): @@ -105,23 +107,23 @@ class TestLFSApplication(object): extra_environ=http_auth) expected_objects = [ - {u'authenticated': True, - u'errors': {u'error': { - u'code': 404, - u'message': u'object: 123 does not exist in store'}}, - u'oid': u'123', - u'size': u'1024'} + {'authenticated': True, + 'errors': {'error': { + 'code': 404, + 'message': 'object: 123 does not exist in store'}}, + 'oid': '123', + 'size': '1024'} ] assert json.loads(response.text) == { 'objects': expected_objects, 'transfer': 'basic'} def test_app_batch_api_download(self, git_lfs_app, http_auth): oid = '456' - oid_path = os.path.join(git_lfs_app._store, oid) + oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'operation': 'download', 'objects': [{'oid': oid, 'size': '1024'}]} @@ -130,14 +132,14 @@ class TestLFSApplication(object): extra_environ=http_auth) expected_objects = [ - {u'authenticated': True, - u'actions': { - u'download': { - u'header': {u'Authorization': u'Basic XXXXX'}, - u'href': u'http://localhost/repo/info/lfs/objects/456'}, + {'authenticated': True, + 'actions': { + 'download': { + 'header': {'Authorization': 'Basic XXXXX'}, + 'href': 'http://localhost/repo/info/lfs/objects/456'}, }, - u'oid': u'456', - u'size': u'1024'} + 'oid': '456', + 'size': '1024'} ] assert json.loads(response.text) == { 'objects': expected_objects, 'transfer': 'basic'} @@ -149,18 +151,18 @@ class TestLFSApplication(object): '/repo/info/lfs/objects/batch', params=params, extra_environ=http_auth) expected_objects = [ - {u'authenticated': True, - u'actions': { - u'upload': { - u'header': {u'Authorization': u'Basic XXXXX', - u'Transfer-Encoding': u'chunked'}, - u'href': u'http://localhost/repo/info/lfs/objects/123'}, - u'verify': { - u'header': {u'Authorization': u'Basic XXXXX'}, - u'href': u'http://localhost/repo/info/lfs/verify'} + {'authenticated': True, + 'actions': { + 'upload': { + 'header': {'Authorization': 'Basic XXXXX', + 'Transfer-Encoding': 'chunked'}, + 'href': 'http://localhost/repo/info/lfs/objects/123'}, + 'verify': { + 'header': {'Authorization': 'Basic XXXXX'}, + 'href': 'http://localhost/repo/info/lfs/verify'} }, - u'oid': u'123', - u'size': u'1024'} + 'oid': '123', + 'size': '1024'} ] assert json.loads(response.text) == { 'objects': expected_objects, 'transfer': 'basic'} @@ -172,18 +174,18 @@ class TestLFSApplication(object): '/repo/info/lfs/objects/batch', params=params, extra_environ=http_auth) expected_objects = [ - {u'authenticated': True, - u'actions': { - u'upload': { - u'header': {u'Authorization': u'Basic XXXXX', - u'Transfer-Encoding': u'chunked'}, - u'href': u'https://localhost/repo/info/lfs/objects/123'}, - u'verify': { - u'header': {u'Authorization': u'Basic XXXXX'}, - u'href': u'https://localhost/repo/info/lfs/verify'} + {'authenticated': True, + 'actions': { + 'upload': { + 'header': {'Authorization': 'Basic XXXXX', + 'Transfer-Encoding': 'chunked'}, + 'href': 'https://localhost/repo/info/lfs/objects/123'}, + 'verify': { + 'header': {'Authorization': 'Basic XXXXX'}, + 'href': 'https://localhost/repo/info/lfs/verify'} }, - u'oid': u'123', - u'size': u'1024'} + 'oid': '123', + 'size': '1024'} ] assert json.loads(response.text) == { 'objects': expected_objects, 'transfer': 'basic'} @@ -195,7 +197,7 @@ class TestLFSApplication(object): status=400) assert json.loads(response.text) == { - u'message': u'missing oid and size in request data'} + 'message': 'missing oid and size in request data'} def test_app_verify_api_missing_obj(self, git_lfs_app): params = {'oid': 'missing', 'size': '1024'} @@ -204,38 +206,38 @@ class TestLFSApplication(object): status=404) assert json.loads(response.text) == { - u'message': u'oid `missing` does not exists in store'} + 'message': 'oid `missing` does not exists in store'} def test_app_verify_api_size_mismatch(self, git_lfs_app): oid = 'existing' - oid_path = os.path.join(git_lfs_app._store, oid) + oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'oid': oid, 'size': '1024'} response = git_lfs_app.post_json( '/repo/info/lfs/verify', params=params, status=422) assert json.loads(response.text) == { - u'message': u'requested file size mismatch ' - u'store size:11 requested:1024'} + 'message': 'requested file size mismatch ' + 'store size:11 requested:1024'} def test_app_verify_api(self, git_lfs_app): oid = 'existing' - oid_path = os.path.join(git_lfs_app._store, oid) + oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) params = {'oid': oid, 'size': 11} response = git_lfs_app.post_json( '/repo/info/lfs/verify', params=params) assert json.loads(response.text) == { - u'message': {u'size': u'ok', u'in_store': u'ok'}} + 'message': {'size': 'ok', 'in_store': 'ok'}} def test_app_download_api_oid_not_existing(self, git_lfs_app): oid = 'missing' @@ -244,15 +246,15 @@ class TestLFSApplication(object): '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404) assert json.loads(response.text) == { - u'message': u'requested file with oid `missing` not found in store'} + 'message': 'requested file with oid `missing` not found in store'} def test_app_download_api(self, git_lfs_app): oid = 'existing' - oid_path = os.path.join(git_lfs_app._store, oid) + oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path if not os.path.isdir(os.path.dirname(oid_path)): os.makedirs(os.path.dirname(oid_path)) with open(oid_path, 'wb') as f: - f.write('OID_CONTENT') + f.write(safe_bytes('OID_CONTENT')) response = git_lfs_app.get( '/repo/info/lfs/objects/{oid}'.format(oid=oid)) @@ -264,9 +266,9 @@ class TestLFSApplication(object): response = git_lfs_app.put( '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT') - assert json.loads(response.text) == {u'upload': u'ok'} + assert json.loads(response.text) == {'upload': 'ok'} # verify that we actually wrote that OID - oid_path = os.path.join(git_lfs_app._store, oid) + oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path assert os.path.isfile(oid_path) assert 'CONTENT' == open(oid_path).read() diff --git a/vcsserver/git_lfs/tests/test_lib.py b/vcsserver/git_lfs/tests/test_lib.py --- a/vcsserver/git_lfs/tests/test_lib.py +++ b/vcsserver/git_lfs/tests/test_lib.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ import os import pytest +from vcsserver.str_utils import safe_bytes from vcsserver.git_lfs.lib import OidHandler, LFSOidStore @@ -42,7 +43,7 @@ def oid_handler(lfs_store): return oid_handler -class TestOidHandler(object): +class TestOidHandler: @pytest.mark.parametrize('exec_action', [ 'download', @@ -70,7 +71,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) response, has_errors = oid_handler.exec_operation('download') @@ -86,7 +87,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) oid_handler.obj_size = 7 response, has_errors = oid_handler.exec_operation('upload') assert has_errors is None @@ -98,7 +99,7 @@ class TestOidHandler(object): os.makedirs(os.path.dirname(store.oid_path)) with open(store.oid_path, 'wb') as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) oid_handler.obj_size = 10240 response, has_errors = oid_handler.exec_operation('upload') @@ -119,7 +120,7 @@ class TestOidHandler(object): } -class TestLFSStore(object): +class TestLFSStore: def test_write_oid(self, lfs_store): oid_location = lfs_store.oid_path @@ -127,7 +128,7 @@ class TestLFSStore(object): engine = lfs_store.get_engine(mode='wb') with engine as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) assert os.path.isfile(oid_location) @@ -136,6 +137,6 @@ class TestLFSStore(object): assert lfs_store.has_oid() is False engine = lfs_store.get_engine(mode='wb') with engine as f: - f.write('CONTENT') + f.write(safe_bytes('CONTENT')) - assert lfs_store.has_oid() is True \ No newline at end of file + assert lfs_store.has_oid() is True diff --git a/vcsserver/git_lfs/utils.py b/vcsserver/git_lfs/utils.py --- a/vcsserver/git_lfs/utils.py +++ b/vcsserver/git_lfs/utils.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/hgcompat.py b/vcsserver/hgcompat.py --- a/vcsserver/hgcompat.py +++ b/vcsserver/hgcompat.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,8 +21,11 @@ Mercurial libs compatibility import mercurial from mercurial import demandimport + # patch demandimport, due to bug in mercurial when it always triggers # demandimport.enable() +from vcsserver.str_utils import safe_bytes + demandimport.enable = lambda *args, **kwargs: 1 from mercurial import ui @@ -39,7 +42,8 @@ from mercurial import subrepo from mercurial import subrepoutil from mercurial import tags as hg_tag from mercurial import util as hgutil -from mercurial.commands import clone, nullid, pull +from mercurial.commands import clone, pull +from mercurial.node import nullid from mercurial.context import memctx, memfilectx from mercurial.error import ( LookupError, RepoError, RepoLookupError, Abort, InterventionRequired, @@ -53,7 +57,7 @@ from mercurial.encoding import tolocal from mercurial.discovery import findcommonoutgoing from mercurial.hg import peer from mercurial.httppeer import makepeer -from mercurial.util import url as hg_url +from mercurial.utils.urlutil import url as hg_url from mercurial.scmutil import revrange, revsymbol from mercurial.node import nullrev from mercurial import exchange @@ -63,17 +67,26 @@ from hgext import largefiles # infinit looping when given invalid resources from mercurial.url import httpbasicauthhandler, httpdigestauthhandler +# hg strip is in core now +from mercurial import strip as hgext_strip + def get_ctx(repo, ref): + if not isinstance(ref, int): + ref = safe_bytes(ref) + try: ctx = repo[ref] + return ctx except (ProgrammingError, TypeError): # we're unable to find the rev using a regular lookup, we fallback # to slower, but backward compat revsymbol usage - ctx = revsymbol(repo, ref) + pass except (LookupError, RepoLookupError): # Similar case as above but only for refs that are not numeric - if isinstance(ref, (int, long)): + if isinstance(ref, int): raise - ctx = revsymbol(repo, ref) + + ctx = revsymbol(repo, ref) + return ctx diff --git a/vcsserver/hgpatches.py b/vcsserver/hgpatches.py --- a/vcsserver/hgpatches.py +++ b/vcsserver/hgpatches.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -62,7 +62,7 @@ def _dynamic_capabilities_wrapper(lfprot def patch_subrepo_type_mapping(): from collections import defaultdict - from hgcompat import subrepo, subrepoutil + from .hgcompat import subrepo, subrepoutil from vcsserver.exceptions import SubrepoMergeException class NoOpSubrepo(subrepo.abstractsubrepo): diff --git a/vcsserver/hook_utils/__init__.py b/vcsserver/hook_utils/__init__.py --- a/vcsserver/hook_utils/__init__.py +++ b/vcsserver/hook_utils/__init__.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,9 +23,23 @@ import logging import pkg_resources import vcsserver +from vcsserver.str_utils import safe_bytes log = logging.getLogger(__name__) +HOOKS_DIR_MODE = 0o755 +HOOKS_FILE_MODE = 0o755 + + +def set_permissions_if_needed(path_to_check, perms: oct): + # Get current permissions + current_permissions = os.stat(path_to_check).st_mode & 0o777 # Extract permission bits + + # Check if current permissions are lower than required + if current_permissions < int(perms): + # Change the permissions if they are lower than required + os.chmod(path_to_check, perms) + def get_git_hooks_path(repo_path, bare): hooks_path = os.path.join(repo_path, 'hooks') @@ -42,14 +54,19 @@ def install_git_hooks(repo_path, bare, e Creates a RhodeCode hook inside a git repository :param repo_path: path to repository + :param bare: defines if repository is considered a bare git repo :param executable: binary executable to put in the hooks - :param force_create: Create even if same name hook exists + :param force_create: Creates even if the same name hook exists """ executable = executable or sys.executable hooks_path = get_git_hooks_path(repo_path, bare) - if not os.path.isdir(hooks_path): - os.makedirs(hooks_path, mode=0o777) + # we always call it to ensure dir exists and it has a proper mode + if not os.path.exists(hooks_path): + # If it doesn't exist, create a new directory with the specified mode + os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True) + # If it exists, change the directory's mode to the specified mode + set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE) tmpl_post = pkg_resources.resource_string( 'vcsserver', '/'.join( @@ -63,21 +80,20 @@ def install_git_hooks(repo_path, bare, e for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]: log.debug('Installing git hook in repo %s', repo_path) - _hook_file = os.path.join(hooks_path, '%s-receive' % h_type) + _hook_file = os.path.join(hooks_path, f'{h_type}-receive') _rhodecode_hook = check_rhodecode_hook(_hook_file) if _rhodecode_hook or force_create: log.debug('writing git %s hook file at %s !', h_type, _hook_file) try: with open(_hook_file, 'wb') as f: - template = template.replace( - '_TMPL_', vcsserver.__version__) - template = template.replace('_DATE_', timestamp) - template = template.replace('_ENV_', executable) - template = template.replace('_PATH_', path) + template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version())) + template = template.replace(b'_DATE_', safe_bytes(timestamp)) + template = template.replace(b'_ENV_', safe_bytes(executable)) + template = template.replace(b'_PATH_', safe_bytes(path)) f.write(template) - os.chmod(_hook_file, 0o755) - except IOError: + set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE) + except OSError: log.exception('error writing hook file %s', _hook_file) else: log.debug('skipping writing hook file') @@ -102,7 +118,7 @@ def install_svn_hooks(repo_path, executa executable = executable or sys.executable hooks_path = get_svn_hooks_path(repo_path) if not os.path.isdir(hooks_path): - os.makedirs(hooks_path, mode=0o777) + os.makedirs(hooks_path, mode=0o777, exist_ok=True) tmpl_post = pkg_resources.resource_string( 'vcsserver', '/'.join( @@ -116,7 +132,7 @@ def install_svn_hooks(repo_path, executa for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]: log.debug('Installing svn hook in repo %s', repo_path) - _hook_file = os.path.join(hooks_path, '%s-commit' % h_type) + _hook_file = os.path.join(hooks_path, f'{h_type}-commit') _rhodecode_hook = check_rhodecode_hook(_hook_file) if _rhodecode_hook or force_create: @@ -124,15 +140,14 @@ def install_svn_hooks(repo_path, executa try: with open(_hook_file, 'wb') as f: - template = template.replace( - '_TMPL_', vcsserver.__version__) - template = template.replace('_DATE_', timestamp) - template = template.replace('_ENV_', executable) - template = template.replace('_PATH_', path) + template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version())) + template = template.replace(b'_DATE_', safe_bytes(timestamp)) + template = template.replace(b'_ENV_', safe_bytes(executable)) + template = template.replace(b'_PATH_', safe_bytes(path)) f.write(template) os.chmod(_hook_file, 0o755) - except IOError: + except OSError: log.exception('error writing hook file %s', _hook_file) else: log.debug('skipping writing hook file') @@ -141,16 +156,16 @@ def install_svn_hooks(repo_path, executa def get_version_from_hook(hook_path): - version = '' + version = b'' hook_content = read_hook_content(hook_path) - matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content) + matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content) if matches: try: version = matches.groups()[0] log.debug('got version %s from hooks.', version) except Exception: log.exception("Exception while reading the hook version.") - return version.replace("'", "") + return version.replace(b"'", b"") def check_rhodecode_hook(hook_path): @@ -169,8 +184,8 @@ def check_rhodecode_hook(hook_path): return False -def read_hook_content(hook_path): - content = '' +def read_hook_content(hook_path) -> bytes: + content = b'' if os.path.isfile(hook_path): with open(hook_path, 'rb') as f: content = f.read() diff --git a/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl b/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl --- a/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl @@ -11,7 +11,7 @@ try: except ImportError: if os.environ.get('RC_DEBUG_GIT_HOOK'): import traceback - print traceback.format_exc() + print(traceback.format_exc()) hooks = None @@ -42,7 +42,7 @@ def main(): # TODO: johbo: Improve handling of this special case if not getattr(error, '_vcs_kind', None) == 'repo_locked': raise - print 'ERROR:', error + print(f'ERROR: {error}') sys.exit(1) sys.exit(0) diff --git a/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl b/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl --- a/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl @@ -11,7 +11,7 @@ try: except ImportError: if os.environ.get('RC_DEBUG_GIT_HOOK'): import traceback - print traceback.format_exc() + print(traceback.format_exc()) hooks = None @@ -42,7 +42,7 @@ def main(): # TODO: johbo: Improve handling of this special case if not getattr(error, '_vcs_kind', None) == 'repo_locked': raise - print 'ERROR:', error + print(f'ERROR: {error}') sys.exit(1) sys.exit(0) diff --git a/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl b/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl --- a/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl @@ -12,7 +12,7 @@ try: except ImportError: if os.environ.get('RC_DEBUG_SVN_HOOK'): import traceback - print traceback.format_exc() + print(traceback.format_exc()) hooks = None @@ -40,7 +40,7 @@ def main(): # TODO: johbo: Improve handling of this special case if not getattr(error, '_vcs_kind', None) == 'repo_locked': raise - print 'ERROR:', error + print(f'ERROR: {error}') sys.exit(1) sys.exit(0) diff --git a/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl b/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl --- a/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl +++ b/vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl @@ -12,7 +12,7 @@ try: except ImportError: if os.environ.get('RC_DEBUG_SVN_HOOK'): import traceback - print traceback.format_exc() + print(traceback.format_exc()) hooks = None @@ -43,7 +43,7 @@ def main(): # TODO: johbo: Improve handling of this special case if not getattr(error, '_vcs_kind', None) == 'repo_locked': raise - print 'ERROR:', error + print(f'ERROR: {error}') sys.exit(1) sys.exit(0) diff --git a/vcsserver/hooks.py b/vcsserver/hooks.py --- a/vcsserver/hooks.py +++ b/vcsserver/hooks.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,71 +20,102 @@ import os import sys import logging import collections -import importlib import base64 +import msgpack +import dataclasses +import pygit2 -from httplib import HTTPConnection - +import http.client +from celery import Celery import mercurial.scmutil import mercurial.node -import simplejson as json +from vcsserver.lib.rc_json import json from vcsserver import exceptions, subprocessio, settings +from vcsserver.str_utils import ascii_str, safe_str +from vcsserver.remote.git_remote import Repository +celery_app = Celery('__vcsserver__') log = logging.getLogger(__name__) -class HooksHttpClient(object): +class HooksHttpClient: + proto = 'msgpack.v1' connection = None def __init__(self, hooks_uri): self.hooks_uri = hooks_uri + def __repr__(self): + return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})' + def __call__(self, method, extras): - connection = HTTPConnection(self.hooks_uri) - body = self._serialize(method, extras) - try: - connection.request('POST', '/', body) - except Exception: - log.error('Hooks calling Connection failed on %s', connection.__dict__) - raise - response = connection.getresponse() - - response_data = response.read() + connection = http.client.HTTPConnection(self.hooks_uri) + # binary msgpack body + headers, body = self._serialize(method, extras) + log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri) try: - return json.loads(response_data) - except Exception: - log.exception('Failed to decode hook response json data. ' - 'response_code:%s, raw_data:%s', - response.status, response_data) - raise + try: + connection.request('POST', '/', body, headers) + except Exception as error: + log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error) + raise - def _serialize(self, hook_name, extras): + response = connection.getresponse() + try: + return msgpack.load(response) + except Exception: + response_data = response.read() + log.exception('Failed to decode hook response json data. ' + 'response_code:%s, raw_data:%s', + response.status, response_data) + raise + finally: + connection.close() + + @classmethod + def _serialize(cls, hook_name, extras): data = { 'method': hook_name, 'extras': extras } - return json.dumps(data) + headers = { + "rc-hooks-protocol": cls.proto, + "Connection": "keep-alive" + } + return headers, msgpack.packb(data) -class HooksDummyClient(object): - def __init__(self, hooks_module): - self._hooks_module = importlib.import_module(hooks_module) +class HooksCeleryClient: + TASK_TIMEOUT = 60 # time in seconds - def __call__(self, hook_name, extras): - with self._hooks_module.Hooks() as hooks: - return getattr(hooks, hook_name)(extras) + def __init__(self, queue, backend): + celery_app.config_from_object({ + 'broker_url': queue, 'result_backend': backend, + 'broker_connection_retry_on_startup': True, + 'task_serializer': 'msgpack', + 'accept_content': ['json', 'msgpack'], + 'result_serializer': 'msgpack', + 'result_accept_content': ['json', 'msgpack'] + }) + self.celery_app = celery_app + + def __call__(self, method, extras): + inquired_task = self.celery_app.signature( + f'rhodecode.lib.celerylib.tasks.{method}' + ) + return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT) -class HooksShadowRepoClient(object): +class HooksShadowRepoClient: def __call__(self, hook_name, extras): return {'output': '', 'status': 0} -class RemoteMessageWriter(object): +class RemoteMessageWriter: """Writer base class.""" def write(self, message): raise NotImplementedError() @@ -98,7 +127,7 @@ class HgMessageWriter(RemoteMessageWrite def __init__(self, ui): self.ui = ui - def write(self, message): + def write(self, message: str): # TODO: Check why the quiet flag is set by default. old = self.ui.quiet self.ui.quiet = False @@ -112,8 +141,8 @@ class GitMessageWriter(RemoteMessageWrit def __init__(self, stdout=None): self.stdout = stdout or sys.stdout - def write(self, message): - self.stdout.write(message.encode('utf-8')) + def write(self, message: str): + self.stdout.write(message) class SvnMessageWriter(RemoteMessageWriter): @@ -130,6 +159,7 @@ class SvnMessageWriter(RemoteMessageWrit def _handle_exception(result): exception_class = result.get('exception') exception_traceback = result.get('exception_traceback') + log.debug('Handling hook-call exception: %s', exception_class) if exception_traceback: log.error('Got traceback from remote call:%s', exception_traceback) @@ -141,19 +171,25 @@ def _handle_exception(result): elif exception_class == 'RepositoryError': raise exceptions.VcsException()(*result['exception_args']) elif exception_class: - raise Exception('Got remote exception "%s" with args "%s"' % - (exception_class, result['exception_args'])) + raise Exception( + f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """ + ) def _get_hooks_client(extras): hooks_uri = extras.get('hooks_uri') + task_queue = extras.get('task_queue') + task_backend = extras.get('task_backend') is_shadow_repo = extras.get('is_shadow_repo') + if hooks_uri: - return HooksHttpClient(extras['hooks_uri']) + return HooksHttpClient(hooks_uri) + elif task_queue and task_backend: + return HooksCeleryClient(task_queue, task_backend) elif is_shadow_repo: return HooksShadowRepoClient() else: - return HooksDummyClient(extras['hooks_module']) + raise Exception("Hooks client not found!") def _call_hook(hook_name, extras, writer): @@ -161,7 +197,6 @@ def _call_hook(hook_name, extras, writer log.debug('Hooks, using client:%s', hooks_client) result = hooks_client(hook_name, extras) log.debug('Hooks got result: %s', result) - _handle_exception(result) writer.write(result['output']) @@ -169,7 +204,7 @@ def _call_hook(hook_name, extras, writer def _extras_from_ui(ui): - hook_data = ui.config('rhodecode', 'RC_SCM_DATA') + hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA') if not hook_data: # maybe it's inside environ ? env_hook_data = os.environ.get('RC_SCM_DATA') @@ -192,8 +227,8 @@ def _rev_range_hash(repo, node, check_he for rev in range(start, end): revs.append(rev) ctx = get_ctx(repo, rev) - commit_id = mercurial.node.hex(ctx.node()) - branch = ctx.branch() + commit_id = ascii_str(mercurial.node.hex(ctx.node())) + branch = safe_str(ctx.branch()) commits.append((commit_id, branch)) parent_heads = [] @@ -217,9 +252,9 @@ def _check_heads(repo, start, end, commi for p in parents: branch = get_ctx(repo, p).branch() # The heads descending from that parent, on the same branch - parent_heads = set([p]) - reachable = set([p]) - for x in xrange(p + 1, end): + parent_heads = {p} + reachable = {p} + for x in range(p + 1, end): if get_ctx(repo, x).branch() != branch: continue for pp in changelog.parentrevs(x): @@ -295,14 +330,16 @@ def pre_push(ui, repo, node=None, **kwar detect_force_push = extras.get('detect_force_push') rev_data = [] - if node and kwargs.get('hooktype') == 'pretxnchangegroup': + hook_type: str = safe_str(kwargs.get('hooktype')) + + if node and hook_type == 'pretxnchangegroup': branches = collections.defaultdict(list) commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push) for commit_id, branch in commits: branches[branch].append(commit_id) for branch, commits in branches.items(): - old_rev = kwargs.get('node_last') or commits[0] + old_rev = ascii_str(kwargs.get('node_last')) or commits[0] rev_data.append({ 'total_commits': len(commits), 'old_rev': old_rev, @@ -319,10 +356,10 @@ def pre_push(ui, repo, node=None, **kwar extras.get('repo_store', ''), extras.get('repository', '')) push_ref['hg_env'] = _get_hg_env( old_rev=push_ref['old_rev'], - new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'), + new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')), repo_path=repo_path) - extras['hook_type'] = kwargs.get('hooktype', 'pre_push') + extras['hook_type'] = hook_type or 'pre_push' extras['commit_ids'] = rev_data return _call_hook('pre_push', extras, HgMessageWriter(ui)) @@ -363,6 +400,7 @@ def post_push(ui, repo, node, **kwargs): branches = [] bookmarks = [] tags = [] + hook_type: str = safe_str(kwargs.get('hooktype')) commits, _heads = _rev_range_hash(repo, node) for commit_id, branch in commits: @@ -370,11 +408,12 @@ def post_push(ui, repo, node, **kwargs): if branch not in branches: branches.append(branch) - if hasattr(ui, '_rc_pushkey_branches'): - bookmarks = ui._rc_pushkey_branches + if hasattr(ui, '_rc_pushkey_bookmarks'): + bookmarks = ui._rc_pushkey_bookmarks - extras['hook_type'] = kwargs.get('hooktype', 'post_push') + extras['hook_type'] = hook_type or 'post_push' extras['commit_ids'] = commit_ids + extras['new_refs'] = { 'branches': branches, 'bookmarks': bookmarks, @@ -395,9 +434,10 @@ def post_push_ssh(ui, repo, node, **kwar def key_push(ui, repo, **kwargs): from vcsserver.hgcompat import get_ctx - if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks': + + if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks': # store new bookmarks in our UI object propagated later to post_push - ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks() + ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks() return @@ -426,10 +466,13 @@ def handle_git_post_receive(unused_repo_ pass -HookResponse = collections.namedtuple('HookResponse', ('status', 'output')) +@dataclasses.dataclass +class HookResponse: + status: int + output: str -def git_pre_pull(extras): +def git_pre_pull(extras) -> HookResponse: """ Pre pull hook. @@ -439,20 +482,23 @@ def git_pre_pull(extras): :return: status code of the hook. 0 for success. :rtype: int """ + if 'pull' not in extras['hooks']: return HookResponse(0, '') - stdout = io.BytesIO() + stdout = io.StringIO() try: - status = _call_hook('pre_pull', extras, GitMessageWriter(stdout)) + status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout)) + except Exception as error: - status = 128 - stdout.write('ERROR: %s\n' % str(error)) + log.exception('Failed to call pre_pull hook') + status_code = 128 + stdout.write(f'ERROR: {error}\n') - return HookResponse(status, stdout.getvalue()) + return HookResponse(status_code, stdout.getvalue()) -def git_post_pull(extras): +def git_post_pull(extras) -> HookResponse: """ Post pull hook. @@ -465,12 +511,12 @@ def git_post_pull(extras): if 'pull' not in extras['hooks']: return HookResponse(0, '') - stdout = io.BytesIO() + stdout = io.StringIO() try: status = _call_hook('post_pull', extras, GitMessageWriter(stdout)) except Exception as error: status = 128 - stdout.write('ERROR: %s\n' % error) + stdout.write(f'ERROR: {error}\n') return HookResponse(status, stdout.getvalue()) @@ -495,15 +541,11 @@ def _parse_git_ref_lines(revision_lines) return rev_data -def git_pre_receive(unused_repo_path, revision_lines, env): +def git_pre_receive(unused_repo_path, revision_lines, env) -> int: """ Pre push hook. - :param extras: dictionary containing the keys defined in simplevcs - :type extras: dict - :return: status code of the hook. 0 for success. - :rtype: int """ extras = json.loads(env['RC_SCM_DATA']) rev_data = _parse_git_ref_lines(revision_lines) @@ -527,7 +569,7 @@ def git_pre_receive(unused_repo_path, re if type_ == 'heads' and not (new_branch or delete_branch): old_rev = push_ref['old_rev'] new_rev = push_ref['new_rev'] - cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)] + cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}'] stdout, stderr = subprocessio.run_command( cmd, env=os.environ.copy()) # means we're having some non-reachable objects, this forced push was used @@ -536,18 +578,18 @@ def git_pre_receive(unused_repo_path, re extras['hook_type'] = 'pre_receive' extras['commit_ids'] = rev_data - return _call_hook('pre_push', extras, GitMessageWriter()) + + stdout = sys.stdout + status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout)) + + return status_code -def git_post_receive(unused_repo_path, revision_lines, env): +def git_post_receive(unused_repo_path, revision_lines, env) -> int: """ Post push hook. - :param extras: dictionary containing the keys defined in simplevcs - :type extras: dict - :return: status code of the hook. 0 for success. - :rtype: int """ extras = json.loads(env['RC_SCM_DATA']) if 'push' not in extras['hooks']: @@ -567,26 +609,28 @@ def git_post_receive(unused_repo_path, r type_ = push_ref['type'] if type_ == 'heads': + # starting new branch case if push_ref['old_rev'] == empty_commit_id: - # starting new branch case - if push_ref['name'] not in branches: - branches.append(push_ref['name']) + push_ref_name = push_ref['name'] + + if push_ref_name not in branches: + branches.append(push_ref_name) - # Fix up head revision if needed - cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD'] - try: - subprocessio.run_command(cmd, env=os.environ.copy()) - except Exception: - cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', 'HEAD', - 'refs/heads/%s' % push_ref['name']] - print("Setting default branch to %s" % push_ref['name']) - subprocessio.run_command(cmd, env=os.environ.copy()) + need_head_set = '' + with Repository(os.getcwd()) as repo: + try: + repo.head + except pygit2.GitError: + need_head_set = f'refs/heads/{push_ref_name}' - cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', - '--format=%(refname)', 'refs/heads/*'] + if need_head_set: + repo.set_head(need_head_set) + print(f"Setting default branch to {push_ref_name}") + + cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*'] stdout, stderr = subprocessio.run_command( cmd, env=os.environ.copy()) - heads = stdout + heads = safe_str(stdout) heads = heads.replace(push_ref['ref'], '') heads = ' '.join(head for head in heads.splitlines() if head) or '.' @@ -595,24 +639,43 @@ def git_post_receive(unused_repo_path, r '--not', heads] stdout, stderr = subprocessio.run_command( cmd, env=os.environ.copy()) - git_revs.extend(stdout.splitlines()) + git_revs.extend(list(map(ascii_str, stdout.splitlines()))) + + # delete branch case elif push_ref['new_rev'] == empty_commit_id: - # delete branch case - git_revs.append('delete_branch=>%s' % push_ref['name']) + git_revs.append(f'delete_branch=>{push_ref["name"]}') else: if push_ref['name'] not in branches: branches.append(push_ref['name']) cmd = [settings.GIT_EXECUTABLE, 'log', - '{old_rev}..{new_rev}'.format(**push_ref), + f'{push_ref["old_rev"]}..{push_ref["new_rev"]}', '--reverse', '--pretty=format:%H'] stdout, stderr = subprocessio.run_command( cmd, env=os.environ.copy()) - git_revs.extend(stdout.splitlines()) + # we get bytes from stdout, we need str to be consistent + log_revs = list(map(ascii_str, stdout.splitlines())) + git_revs.extend(log_revs) + + # Pure pygit2 impl. but still 2-3x slower :/ + # results = [] + # + # with Repository(os.getcwd()) as repo: + # repo_new_rev = repo[push_ref['new_rev']] + # repo_old_rev = repo[push_ref['old_rev']] + # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL) + # + # for commit in walker: + # if commit.id == repo_old_rev.id: + # break + # results.append(commit.id.hex) + # # reverse the order, can't use GIT_SORT_REVERSE + # log_revs = results[::-1] + elif type_ == 'tags': if push_ref['name'] not in tags: tags.append(push_ref['name']) - git_revs.append('tag=>%s' % push_ref['name']) + git_revs.append(f'tag=>{push_ref["name"]}') extras['hook_type'] = 'post_receive' extras['commit_ids'] = git_revs @@ -622,13 +685,16 @@ def git_post_receive(unused_repo_path, r 'tags': tags, } + stdout = sys.stdout + if 'repo_size' in extras['hooks']: try: - _call_hook('repo_size', extras, GitMessageWriter()) - except: + _call_hook('repo_size', extras, GitMessageWriter(stdout)) + except Exception: pass - return _call_hook('post_push', extras, GitMessageWriter()) + status_code = _call_hook('post_push', extras, GitMessageWriter(stdout)) + return status_code def _get_extras_from_txn_id(path, txn_id): diff --git a/vcsserver/http_main.py b/vcsserver/http_main.py --- a/vcsserver/http_main.py +++ b/vcsserver/http_main.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,28 +15,76 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +import io import os +import platform import sys -import base64 import locale import logging import uuid +import time import wsgiref.util -import traceback import tempfile import psutil + from itertools import chain -from cStringIO import StringIO -import simplejson as json import msgpack +import configparser + from pyramid.config import Configurator -from pyramid.settings import asbool, aslist from pyramid.wsgi import wsgiapp -from pyramid.compat import configparser from pyramid.response import Response -from vcsserver.utils import safe_int +from vcsserver.base import BytesEnvelope, BinaryEnvelope +from vcsserver.lib.rc_json import json +from vcsserver.config.settings_maker import SettingsMaker +from vcsserver.str_utils import safe_int +from vcsserver.lib.statsd_client import StatsdClient +from vcsserver.tweens.request_wrapper import get_headers_call_context + +import vcsserver +from vcsserver import remote_wsgi, scm_app, settings, hgpatches +from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT +from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub +from vcsserver.echo_stub.echo_app import EchoApp +from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected +from vcsserver.lib.exc_tracking import store_exception, format_exc +from vcsserver.server import VcsServer + +strict_vcs = True + +git_import_err = None +try: + from vcsserver.remote.git_remote import GitFactory, GitRemote +except ImportError as e: + GitFactory = None + GitRemote = None + git_import_err = e + if strict_vcs: + raise + + +hg_import_err = None +try: + from vcsserver.remote.hg_remote import MercurialFactory, HgRemote +except ImportError as e: + MercurialFactory = None + HgRemote = None + hg_import_err = e + if strict_vcs: + raise + + +svn_import_err = None +try: + from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote +except ImportError as e: + SubversionFactory = None + SvnRemote = None + svn_import_err = e + if strict_vcs: + raise log = logging.getLogger(__name__) @@ -50,73 +98,12 @@ except locale.Error as e: 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e) os.environ['LC_ALL'] = 'C' -import vcsserver -from vcsserver import remote_wsgi, scm_app, settings, hgpatches -from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT -from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub -from vcsserver.echo_stub.echo_app import EchoApp -from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected -from vcsserver.lib.exc_tracking import store_exception -from vcsserver.server import VcsServer - -try: - from vcsserver.git import GitFactory, GitRemote -except ImportError: - GitFactory = None - GitRemote = None - -try: - from vcsserver.hg import MercurialFactory, HgRemote -except ImportError: - MercurialFactory = None - HgRemote = None - -try: - from vcsserver.svn import SubversionFactory, SvnRemote -except ImportError: - SubversionFactory = None - SvnRemote = None - def _is_request_chunked(environ): stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked' return stream -def _int_setting(settings, name, default): - settings[name] = int(settings.get(name, default)) - return settings[name] - - -def _bool_setting(settings, name, default): - input_val = settings.get(name, default) - if isinstance(input_val, unicode): - input_val = input_val.encode('utf8') - settings[name] = asbool(input_val) - return settings[name] - - -def _list_setting(settings, name, default): - raw_value = settings.get(name, default) - - # Otherwise we assume it uses pyramids space/newline separation. - settings[name] = aslist(raw_value) - return settings[name] - - -def _string_setting(settings, name, default, lower=True, default_when_empty=False): - value = settings.get(name, default) - - if default_when_empty and not value: - # use default value when value is empty - value = default - - if lower: - value = value.lower() - settings[name] = value - return settings[name] - - def log_max_fd(): try: maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1] @@ -125,7 +112,7 @@ def log_max_fd(): pass -class VCS(object): +class VCS: def __init__(self, locale_conf=None, cache_config=None): self.locale = locale_conf self.cache_config = cache_config @@ -137,13 +124,13 @@ class VCS(object): git_factory = GitFactory() self._git_remote = GitRemote(git_factory) else: - log.info("Git client import failed") + log.error("Git client import failed: %s", git_import_err) if MercurialFactory and HgRemote: hg_factory = MercurialFactory() self._hg_remote = HgRemote(hg_factory) else: - log.info("Mercurial client import failed") + log.error("Mercurial client import failed: %s", hg_import_err) if SubversionFactory and SvnRemote: svn_factory = SubversionFactory() @@ -152,7 +139,7 @@ class VCS(object): hg_factory = MercurialFactory() self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory) else: - log.info("Subversion client import failed") + log.error("Subversion client import failed: %s", svn_import_err) self._vcsserver = VcsServer() @@ -160,8 +147,7 @@ class VCS(object): if self.locale: log.info('Settings locale: `LC_ALL` to %s', self.locale) else: - log.info( - 'Configuring locale subsystem based on environment variables') + log.info('Configuring locale subsystem based on environment variables') try: # If self.locale is the empty string, then the locale # module will use the environment variables. See the @@ -173,11 +159,10 @@ class VCS(object): 'Locale set to language code "%s" with encoding "%s".', language_code, encoding) except locale.Error: - log.exception( - 'Cannot set locale, not configuring the locale system') + log.exception('Cannot set locale, not configuring the locale system') -class WsgiProxy(object): +class WsgiProxy: def __init__(self, wsgi): self.wsgi = wsgi @@ -215,12 +200,12 @@ def not_found(request): return {'status': '404 NOT FOUND'} -class VCSViewPredicate(object): +class VCSViewPredicate: def __init__(self, val, config): self.remotes = val def text(self): - return 'vcs view method = %s' % (self.remotes.keys(),) + return f'vcs view method = {list(self.remotes.keys())}' phash = text @@ -233,18 +218,22 @@ class VCSViewPredicate(object): return backend in self.remotes -class HTTPApplication(object): +class HTTPApplication: ALLOWED_EXCEPTIONS = ('KeyError', 'URLError') remote_wsgi = remote_wsgi _use_echo_app = False def __init__(self, settings=None, global_config=None): - self._sanitize_settings_and_apply_defaults(settings) self.config = Configurator(settings=settings) + # Init our statsd at very start + self.config.registry.statsd = StatsdClient.statsd + self.config.registry.vcs_call_context = {} + self.global_config = global_config self.config.include('vcsserver.lib.rc_cache') + self.config.include('vcsserver.lib.rc_cache.archive_cache') settings_locale = settings.get('locale', '') or 'en_US.UTF-8' vcs = VCS(locale_conf=settings_locale, cache_config=settings) @@ -281,40 +270,6 @@ class HTTPApplication(object): vcsserver.PYRAMID_SETTINGS = settings_merged vcsserver.CONFIG = settings_merged - def _sanitize_settings_and_apply_defaults(self, settings): - temp_store = tempfile.gettempdir() - default_cache_dir = os.path.join(temp_store, 'rc_cache') - - # save default, cache dir, and use it for all backends later. - default_cache_dir = _string_setting( - settings, - 'cache_dir', - default_cache_dir, lower=False, default_when_empty=True) - - # ensure we have our dir created - if not os.path.isdir(default_cache_dir): - os.makedirs(default_cache_dir, mode=0o755) - - # exception store cache - _string_setting( - settings, - 'exception_tracker.store_path', - temp_store, lower=False, default_when_empty=True) - - # repo_object cache - _string_setting( - settings, - 'rc_cache.repo_object.backend', - 'dogpile.cache.rc.file_namespace', lower=False) - _int_setting( - settings, - 'rc_cache.repo_object.expiration_time', - 30 * 24 * 60 * 60) - _string_setting( - settings, - 'rc_cache.repo_object.arguments.filename', - os.path.join(default_cache_dir, 'vcsserver_cache_1'), lower=False) - def _configure(self): self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory) @@ -359,16 +314,13 @@ class HTTPApplication(object): 'vcsserver.lib.request_counter.get_request_counter', 'request_count') - self.config.add_request_method( - 'vcsserver.lib._vendor.statsd.get_statsd_client', - 'statsd', reify=True) - def wsgi_app(self): return self.config.make_wsgi_app() def _vcs_view_params(self, request): remote = self._remotes[request.matchdict['backend']] payload = msgpack.unpackb(request.body, use_list=True) + method = payload.get('method') params = payload['params'] wire = params.get('wire') @@ -376,6 +328,11 @@ class HTTPApplication(object): kwargs = params.get('kwargs') context_uid = None + request.registry.vcs_call_context = { + 'method': method, + 'repo_name': payload.get('_repo_name'), + } + if wire: try: wire['context'] = context_uid = uuid.UUID(wire['context']) @@ -386,17 +343,34 @@ class HTTPApplication(object): # NOTE(marcink): trading complexity for slight performance if log.isEnabledFor(logging.DEBUG): - no_args_methods = [ - - ] - if method in no_args_methods: + # also we SKIP printing out any of those methods args since they maybe excessive + just_args_methods = { + 'commitctx': ('content', 'removed', 'updated'), + 'commit': ('content', 'removed', 'updated') + } + if method in just_args_methods: + skip_args = just_args_methods[method] call_args = '' + call_kwargs = {} + for k in kwargs: + if k in skip_args: + # replace our skip key with dummy + call_kwargs[k] = f'RemovedParam({k})' + else: + call_kwargs[k] = kwargs[k] else: call_args = args[1:] + call_kwargs = kwargs log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s', - method, call_args, kwargs, context_uid, repo_state_uid) + method, call_args, call_kwargs, context_uid, repo_state_uid) + statsd = request.registry.statsd + if statsd: + statsd.incr( + 'vcsserver_method_total', tags=[ + f"method:{method}", + ]) return payload, remote, method, args, kwargs def vcs_view(self, request): @@ -435,8 +409,7 @@ class HTTPApplication(object): if should_store_exc: store_exception(id(exc_info), exc_info, request_path=request.path) - tb_info = ''.join( - traceback.format_exception(exc_type, exc_value, exc_traceback)) + tb_info = format_exc(exc_info) type_ = e.__class__.__name__ if type_ not in self.ALLOWED_EXCEPTIONS: @@ -445,7 +418,7 @@ class HTTPApplication(object): resp = { 'id': payload_id, 'error': { - 'message': e.message, + 'message': str(e), 'traceback': tb_info, 'org_exc': org_exc_name, 'org_exc_tb': org_exc_tb, @@ -462,7 +435,7 @@ class HTTPApplication(object): 'id': payload_id, 'result': resp } - + log.debug('Serving data for method %s', method) return resp def vcs_stream_view(self, request): @@ -471,13 +444,10 @@ class HTTPApplication(object): method = method.split('stream:')[-1] chunk_size = safe_int(payload.get('chunk_size')) or 4096 - try: - resp = getattr(remote, method)(*args, **kwargs) - except Exception as e: - raise + resp = getattr(remote, method)(*args, **kwargs) def get_chunked_data(method_resp): - stream = StringIO(method_resp) + stream = io.BytesIO(method_resp) while 1: chunk = stream.read(chunk_size) if not chunk: @@ -491,8 +461,14 @@ class HTTPApplication(object): def status_view(self, request): import vcsserver - return {'status': 'OK', 'vcsserver_version': vcsserver.__version__, - 'pid': os.getpid()} + _platform_id = platform.uname()[1] or 'instance' + + return { + "status": "OK", + "vcsserver_version": vcsserver.get_version(), + "platform": _platform_id, + "pid": os.getpid(), + } def service_view(self, request): import vcsserver @@ -514,12 +490,12 @@ class HTTPApplication(object): except Exception: log.exception('Failed to read .ini file for display') - environ = os.environ.items() + environ = list(os.environ.items()) resp = { 'id': payload.get('id'), 'result': dict( - version=vcsserver.__version__, + version=vcsserver.get_version(), config=server_config, app_config=app_config, environ=environ, @@ -529,14 +505,28 @@ class HTTPApplication(object): return resp def _msgpack_renderer_factory(self, info): + def _render(value, system): + bin_type = False + res = value.get('result') + if isinstance(res, BytesEnvelope): + log.debug('Result is wrapped in BytesEnvelope type') + bin_type = True + elif isinstance(res, BinaryEnvelope): + log.debug('Result is wrapped in BinaryEnvelope type') + value['result'] = res.val + bin_type = True + request = system.get('request') if request is not None: response = request.response ct = response.content_type if ct == response.default_content_type: response.content_type = 'application/x-msgpack' - return msgpack.packb(value) + if bin_type: + response.content_type = 'application/x-msgpack-bin' + + return msgpack.packb(value, use_bin_type=bin_type) return _render def set_env_from_config(self, environ, config): @@ -589,16 +579,17 @@ class HTTPApplication(object): @wsgiapp def _hg_stream(environ, start_response): log.debug('http-app: handling hg stream') - repo_path = environ['HTTP_X_RC_REPO_PATH'] - repo_name = environ['HTTP_X_RC_REPO_NAME'] - packed_config = base64.b64decode( - environ['HTTP_X_RC_REPO_CONFIG']) - config = msgpack.unpackb(packed_config) + call_context = get_headers_call_context(environ) + + repo_path = call_context['repo_path'] + repo_name = call_context['repo_name'] + config = call_context['repo_config'] + app = scm_app.create_hg_wsgi_app( repo_path, repo_name, config) # Consistent path information for hgweb - environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO'] + environ['PATH_INFO'] = call_context['path_info'] environ['REPO_NAME'] = repo_name self.set_env_from_config(environ, config) @@ -618,13 +609,14 @@ class HTTPApplication(object): @wsgiapp def _git_stream(environ, start_response): log.debug('http-app: handling git stream') - repo_path = environ['HTTP_X_RC_REPO_PATH'] - repo_name = environ['HTTP_X_RC_REPO_NAME'] - packed_config = base64.b64decode( - environ['HTTP_X_RC_REPO_CONFIG']) - config = msgpack.unpackb(packed_config) + + call_context = get_headers_call_context(environ) - environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO'] + repo_path = call_context['repo_path'] + repo_name = call_context['repo_name'] + config = call_context['repo_config'] + + environ['PATH_INFO'] = call_context['path_info'] self.set_env_from_config(environ, config) content_type = environ.get('CONTENT_TYPE', '') @@ -660,31 +652,39 @@ class HTTPApplication(object): def handle_vcs_exception(self, exception, request): _vcs_kind = getattr(exception, '_vcs_kind', '') + if _vcs_kind == 'repo_locked': - # Get custom repo-locked status code if present. - status_code = request.headers.get('X-RC-Locked-Status-Code') + headers_call_context = get_headers_call_context(request.environ) + status_code = safe_int(headers_call_context['locked_status_code']) + return HTTPRepoLocked( - title=exception.message, status_code=status_code) + title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')]) elif _vcs_kind == 'repo_branch_protected': # Get custom repo-branch-protected status code if present. - return HTTPRepoBranchProtected(title=exception.message) + return HTTPRepoBranchProtected( + title=str(exception), headers=[('X-Rc-Branch-Protection', '1')]) exc_info = request.exc_info store_exception(id(exc_info), exc_info) traceback_info = 'unavailable' if request.exc_info: - exc_type, exc_value, exc_tb = request.exc_info - traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb)) + traceback_info = format_exc(request.exc_info) log.error( - 'error occurred handling this request for path: %s, \n tb: %s', + 'error occurred handling this request for path: %s, \n%s', request.path, traceback_info) + + statsd = request.registry.statsd + if statsd: + exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}" + statsd.incr('vcsserver_exception_total', + tags=[f"type:{exc_type}"]) raise exception -class ResponseFilter(object): +class ResponseFilter: def __init__(self, start_response): self._start_response = start_response @@ -696,10 +696,80 @@ class ResponseFilter(object): return self._start_response(status, headers, exc_info) +def sanitize_settings_and_apply_defaults(global_config, settings): + _global_settings_maker = SettingsMaker(global_config) + settings_maker = SettingsMaker(settings) + + settings_maker.make_setting('logging.autoconfigure', False, parser='bool') + + logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini') + settings_maker.enable_logging(logging_conf) + + # Default includes, possible to change as a user + pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline') + log.debug("Using the following pyramid.includes: %s", pyramid_includes) + + settings_maker.make_setting('__file__', global_config.get('__file__')) + + settings_maker.make_setting('pyramid.default_locale_name', 'en') + settings_maker.make_setting('locale', 'en_US.UTF-8') + + settings_maker.make_setting('core.binary_dir', '') + + temp_store = tempfile.gettempdir() + default_cache_dir = os.path.join(temp_store, 'rc_cache') + # save default, cache dir, and use it for all backends later. + default_cache_dir = settings_maker.make_setting( + 'cache_dir', + default=default_cache_dir, default_when_empty=True, + parser='dir:ensured') + + # exception store cache + settings_maker.make_setting( + 'exception_tracker.store_path', + default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True, + parser='dir:ensured' + ) + + # repo_object cache defaults + settings_maker.make_setting( + 'rc_cache.repo_object.backend', + default='dogpile.cache.rc.file_namespace', + parser='string') + settings_maker.make_setting( + 'rc_cache.repo_object.expiration_time', + default=30 * 24 * 60 * 60, # 30days + parser='int') + settings_maker.make_setting( + 'rc_cache.repo_object.arguments.filename', + default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'), + parser='string') + + # statsd + settings_maker.make_setting('statsd.enabled', False, parser='bool') + settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string') + settings_maker.make_setting('statsd.statsd_port', 9125, parser='int') + settings_maker.make_setting('statsd.statsd_prefix', '') + settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool') + + settings_maker.env_expand() + + def main(global_config, **settings): + start_time = time.time() + log.info('Pyramid app config starting') + if MercurialFactory: hgpatches.patch_largefiles_capabilities() hgpatches.patch_subrepo_type_mapping() - app = HTTPApplication(settings=settings, global_config=global_config) - return app.wsgi_app() + # Fill in and sanitize the defaults & do ENV expansion + sanitize_settings_and_apply_defaults(global_config, settings) + + # init and bootstrap StatsdClient + StatsdClient.setup(settings) + + pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app() + total_time = time.time() - start_time + log.info('Pyramid app created and configured in %.2fs', total_time) + return pyramid_app diff --git a/vcsserver/lib/__init__.py b/vcsserver/lib/__init__.py --- a/vcsserver/lib/__init__.py +++ b/vcsserver/lib/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/lib/_vendor/__init__.py b/vcsserver/lib/_vendor/__init__.py --- a/vcsserver/lib/_vendor/__init__.py +++ b/vcsserver/lib/_vendor/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/lib/_vendor/jsonlogger/__init__.py b/vcsserver/lib/_vendor/jsonlogger/__init__.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/_vendor/jsonlogger/__init__.py @@ -0,0 +1,243 @@ +''' +This library is provided to allow standard python logging +to output log data as JSON formatted strings +''' +import logging +import json +import re +from datetime import date, datetime, time, tzinfo, timedelta +import traceback +import importlib + +from inspect import istraceback + +from collections import OrderedDict + + +def _inject_req_id(record, *args, **kwargs): + return record + + +ExceptionAwareFormatter = logging.Formatter + + +ZERO = timedelta(0) +HOUR = timedelta(hours=1) + + +class UTC(tzinfo): + """UTC""" + + def utcoffset(self, dt): + return ZERO + + def tzname(self, dt): + return "UTC" + + def dst(self, dt): + return ZERO + +utc = UTC() + + +# skip natural LogRecord attributes +# http://docs.python.org/library/logging.html#logrecord-attributes +RESERVED_ATTRS = ( + 'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename', + 'funcName', 'levelname', 'levelno', 'lineno', 'module', + 'msecs', 'message', 'msg', 'name', 'pathname', 'process', + 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName') + + +def merge_record_extra(record, target, reserved): + """ + Merges extra attributes from LogRecord object into target dictionary + + :param record: logging.LogRecord + :param target: dict to update + :param reserved: dict or list with reserved keys to skip + """ + for key, value in record.__dict__.items(): + # this allows to have numeric keys + if (key not in reserved + and not (hasattr(key, "startswith") + and key.startswith('_'))): + target[key] = value + return target + + +class JsonEncoder(json.JSONEncoder): + """ + A custom encoder extending the default JSONEncoder + """ + + def default(self, obj): + if isinstance(obj, (date, datetime, time)): + return self.format_datetime_obj(obj) + + elif istraceback(obj): + return ''.join(traceback.format_tb(obj)).strip() + + elif type(obj) == Exception \ + or isinstance(obj, Exception) \ + or type(obj) == type: + return str(obj) + + try: + return super().default(obj) + + except TypeError: + try: + return str(obj) + + except Exception: + return None + + def format_datetime_obj(self, obj): + return obj.isoformat() + + +class JsonFormatter(ExceptionAwareFormatter): + """ + A custom formatter to format logging records as json strings. + Extra values will be formatted as str() if not supported by + json default encoder + """ + + def __init__(self, *args, **kwargs): + """ + :param json_default: a function for encoding non-standard objects + as outlined in http://docs.python.org/2/library/json.html + :param json_encoder: optional custom encoder + :param json_serializer: a :meth:`json.dumps`-compatible callable + that will be used to serialize the log record. + :param json_indent: an optional :meth:`json.dumps`-compatible numeric value + that will be used to customize the indent of the output json. + :param prefix: an optional string prefix added at the beginning of + the formatted string + :param json_indent: indent parameter for json.dumps + :param json_ensure_ascii: ensure_ascii parameter for json.dumps + :param reserved_attrs: an optional list of fields that will be skipped when + outputting json log record. Defaults to all log record attributes: + http://docs.python.org/library/logging.html#logrecord-attributes + :param timestamp: an optional string/boolean field to add a timestamp when + outputting the json log record. If string is passed, timestamp will be added + to log record using string as key. If True boolean is passed, timestamp key + will be "timestamp". Defaults to False/off. + """ + self.json_default = self._str_to_fn(kwargs.pop("json_default", None)) + self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None)) + self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps)) + self.json_indent = kwargs.pop("json_indent", None) + self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True) + self.prefix = kwargs.pop("prefix", "") + reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS) + self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs))) + self.timestamp = kwargs.pop("timestamp", True) + + # super(JsonFormatter, self).__init__(*args, **kwargs) + logging.Formatter.__init__(self, *args, **kwargs) + if not self.json_encoder and not self.json_default: + self.json_encoder = JsonEncoder + + self._required_fields = self.parse() + self._skip_fields = dict(list(zip(self._required_fields, + self._required_fields))) + self._skip_fields.update(self.reserved_attrs) + + def _str_to_fn(self, fn_as_str): + """ + If the argument is not a string, return whatever was passed in. + Parses a string such as package.module.function, imports the module + and returns the function. + + :param fn_as_str: The string to parse. If not a string, return it. + """ + if not isinstance(fn_as_str, str): + return fn_as_str + + path, _, function = fn_as_str.rpartition('.') + module = importlib.import_module(path) + return getattr(module, function) + + def parse(self): + """ + Parses format string looking for substitutions + + This method is responsible for returning a list of fields (as strings) + to include in all log messages. + """ + standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE) + return standard_formatters.findall(self._fmt) + + def add_fields(self, log_record, record, message_dict): + """ + Override this method to implement custom logic for adding fields. + """ + for field in self._required_fields: + log_record[field] = record.__dict__.get(field) + log_record.update(message_dict) + merge_record_extra(record, log_record, reserved=self._skip_fields) + + if self.timestamp: + key = self.timestamp if type(self.timestamp) == str else 'timestamp' + log_record[key] = datetime.fromtimestamp(record.created, tz=utc) + + def process_log_record(self, log_record): + """ + Override this method to implement custom logic + on the possibly ordered dictionary. + """ + return log_record + + def jsonify_log_record(self, log_record): + """Returns a json string of the log record.""" + return self.json_serializer(log_record, + default=self.json_default, + cls=self.json_encoder, + indent=self.json_indent, + ensure_ascii=self.json_ensure_ascii) + + def serialize_log_record(self, log_record): + """Returns the final representation of the log record.""" + return "{}{}".format(self.prefix, self.jsonify_log_record(log_record)) + + def format(self, record): + """Formats a log record and serializes to json""" + message_dict = {} + # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed + # are always type of str. We shouldn't need to override that. + if isinstance(record.msg, dict): + message_dict = record.msg + record.message = None + else: + record.message = record.getMessage() + # only format time if needed + if "asctime" in self._required_fields: + record.asctime = self.formatTime(record, self.datefmt) + + # Display formatted exception, but allow overriding it in the + # user-supplied dict. + if record.exc_info and not message_dict.get('exc_info'): + message_dict['exc_info'] = self.formatException(record.exc_info) + if not message_dict.get('exc_info') and record.exc_text: + message_dict['exc_info'] = record.exc_text + # Display formatted record of stack frames + # default format is a string returned from :func:`traceback.print_stack` + try: + if record.stack_info and not message_dict.get('stack_info'): + message_dict['stack_info'] = self.formatStack(record.stack_info) + except AttributeError: + # Python2.7 doesn't have stack_info. + pass + + try: + log_record = OrderedDict() + except NameError: + log_record = {} + + _inject_req_id(record, with_prefix=False) + self.add_fields(log_record, record, message_dict) + log_record = self.process_log_record(log_record) + + return self.serialize_log_record(log_record) diff --git a/vcsserver/lib/_vendor/redis_lock/__init__.py b/vcsserver/lib/_vendor/redis_lock/__init__.py --- a/vcsserver/lib/_vendor/redis_lock/__init__.py +++ b/vcsserver/lib/_vendor/redis_lock/__init__.py @@ -1,13 +1,14 @@ -import sys + import threading import weakref from base64 import b64encode from logging import getLogger from os import urandom +from typing import Union from redis import StrictRedis -__version__ = '3.7.0' +__version__ = '4.0.0' loggers = { k: getLogger("vcsserver." + ".".join((__name__, k))) @@ -23,14 +24,8 @@ loggers = { ] } -PY3 = sys.version_info[0] == 3 - -if PY3: - text_type = str - binary_type = bytes -else: - text_type = unicode # noqa - binary_type = str +text_type = str +binary_type = bytes # Check if the id match. If not, return an error code. @@ -107,15 +102,19 @@ class NotExpirable(RuntimeError): pass -class Lock(object): +class Lock: """ A Lock context manager implemented via redis SETNX/BLPOP. """ + unlock_script = None extend_script = None reset_script = None reset_all_script = None + _lock_renewal_interval: float + _lock_renewal_thread: Union[threading.Thread, None] + def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000): """ :param redis_client: @@ -172,7 +171,7 @@ class Lock(object): elif isinstance(id, text_type): self._id = id else: - raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id)) + raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.") self._name = 'lock:' + name self._signal = 'lock-signal:' + name self._lock_renewal_interval = (float(expire) * 2 / 3 @@ -186,11 +185,11 @@ class Lock(object): def register_scripts(cls, redis_client): global reset_all_script if reset_all_script is None: - reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT) cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT) cls.extend_script = redis_client.register_script(EXTEND_SCRIPT) cls.reset_script = redis_client.register_script(RESET_SCRIPT) cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT) + reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT) @property def _held(self): @@ -221,7 +220,7 @@ class Lock(object): """ logger = loggers["acquire"] - logger.debug("Getting acquire on %r ...", self._name) + logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name) if self._held: owner_id = self.get_owner_id() @@ -233,10 +232,10 @@ class Lock(object): if timeout: timeout = int(timeout) if timeout < 0: - raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout) + raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0") if self._expire and not self._lock_renewal_interval and timeout > self._expire: - raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire)) + raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})") busy = True blpop_timeout = timeout or self._expire or 0 @@ -249,16 +248,17 @@ class Lock(object): elif blocking: timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout else: - logger.warning("Failed to get %r.", self._name) + logger.warning("Failed to acquire Lock(%r).", self._name) return False - logger.info("Got lock for %r.", self._name) + logger.debug("Acquired Lock(%r).", self._name) if self._lock_renewal_interval is not None: self._start_lock_renewer() return True def extend(self, expire=None): - """Extends expiration time of the lock. + """ + Extends expiration time of the lock. :param expire: New expiration time. If ``None`` - `expire` provided during @@ -278,29 +278,29 @@ class Lock(object): error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire)) if error == 1: - raise NotAcquired("Lock %s is not acquired or it already expired." % self._name) + raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.") elif error == 2: - raise NotExpirable("Lock %s has no assigned expiration time" % self._name) + raise NotExpirable(f"Lock {self._name} has no assigned expiration time") elif error: - raise RuntimeError("Unsupported error code %s from EXTEND script" % error) + raise RuntimeError(f"Unsupported error code {error} from EXTEND script") @staticmethod - def _lock_renewer(lockref, interval, stop): + def _lock_renewer(name, lockref, interval, stop): """ Renew the lock key in redis every `interval` seconds for as long as `self._lock_renewal_thread.should_exit` is False. """ while not stop.wait(timeout=interval): - loggers["refresh.thread.start"].debug("Refreshing lock") - lock = lockref() + loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name) + lock: "Lock" = lockref() if lock is None: loggers["refresh.thread.stop"].debug( - "The lock no longer exists, stopping lock refreshing" + "Stopping loop because Lock(%r) was garbage collected.", name ) break lock.extend(expire=lock._expire) del lock - loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing") + loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name) def _start_lock_renewer(self): """ @@ -310,18 +310,21 @@ class Lock(object): raise AlreadyStarted("Lock refresh thread already started") loggers["refresh.start"].debug( - "Starting thread to refresh lock every %s seconds", - self._lock_renewal_interval + "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.", + self._name, self._lock_renewal_interval ) self._lock_renewal_stop = threading.Event() self._lock_renewal_thread = threading.Thread( group=None, target=self._lock_renewer, - kwargs={'lockref': weakref.ref(self), - 'interval': self._lock_renewal_interval, - 'stop': self._lock_renewal_stop} + kwargs={ + 'name': self._name, + 'lockref': weakref.ref(self), + 'interval': self._lock_renewal_interval, + 'stop': self._lock_renewal_stop, + }, ) - self._lock_renewal_thread.setDaemon(True) + self._lock_renewal_thread.daemon = True self._lock_renewal_thread.start() def _stop_lock_renewer(self): @@ -332,15 +335,16 @@ class Lock(object): """ if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive(): return - loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop") + loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name) self._lock_renewal_stop.set() self._lock_renewal_thread.join() self._lock_renewal_thread = None - loggers["refresh.exit"].debug("Lock refresher has stopped") + loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name) def __enter__(self): acquired = self.acquire(blocking=True) - assert acquired, "Lock wasn't acquired, but blocking=True" + if not acquired: + raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!") return self def __exit__(self, exc_type=None, exc_value=None, traceback=None): @@ -358,12 +362,12 @@ class Lock(object): """ if self._lock_renewal_thread is not None: self._stop_lock_renewer() - loggers["release"].debug("Releasing %r.", self._name) + loggers["release"].debug("Releasing Lock(%r).", self._name) error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire)) if error == 1: - raise NotAcquired("Lock %s is not acquired or it already expired." % self._name) + raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.") elif error: - raise RuntimeError("Unsupported error code %s from EXTEND script." % error) + raise RuntimeError(f"Unsupported error code {error} from EXTEND script.") def locked(self): """ diff --git a/vcsserver/lib/_vendor/statsd/__init__.py b/vcsserver/lib/_vendor/statsd/__init__.py --- a/vcsserver/lib/_vendor/statsd/__init__.py +++ b/vcsserver/lib/_vendor/statsd/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import logging from .stream import TCPStatsClient, UnixSocketStatsClient # noqa @@ -38,8 +36,14 @@ def client_from_config(configuration, pr ipv6 = asbool(_config.pop('statsd_ipv6', IPV6)) log.debug('configured statsd client %s:%s', host, port) - return StatsClient( - host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6) + try: + client = StatsClient( + host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6) + except Exception: + log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd') + client = None + + return client def get_statsd_client(request): diff --git a/vcsserver/lib/_vendor/statsd/base.py b/vcsserver/lib/_vendor/statsd/base.py --- a/vcsserver/lib/_vendor/statsd/base.py +++ b/vcsserver/lib/_vendor/statsd/base.py @@ -1,13 +1,35 @@ -from __future__ import absolute_import, division, unicode_literals - +import re import random from collections import deque from datetime import timedelta +from repoze.lru import lru_cache from .timer import Timer +TAG_INVALID_CHARS_RE = re.compile( + r"[^\w\d_\-:/\.]", + #re.UNICODE +) +TAG_INVALID_CHARS_SUBS = "_" -class StatsClientBase(object): +# we save and expose methods called by statsd for discovery +buckets_dict = { + +} + + +@lru_cache(maxsize=500) +def _normalize_tags_with_cache(tag_list): + return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list] + + +def normalize_tags(tag_list): + # We have to turn our input tag list into a non-mutable tuple for it to + # be hashable (and thus usable) by the @lru_cache decorator. + return _normalize_tags_with_cache(tuple(tag_list)) + + +class StatsClientBase: """A Base class for various statsd clients.""" def close(self): @@ -20,10 +42,19 @@ class StatsClientBase(object): def pipeline(self): raise NotImplementedError() - def timer(self, stat, rate=1): - return Timer(self, stat, rate) + def timer(self, stat, rate=1, tags=None, auto_send=True): + """ + statsd = StatsdClient.statsd + with statsd.timer('bucket_name', auto_send=True) as tmr: + # This block will be timed. + for i in range(0, 100000): + i ** 2 + # you can access time here... + elapsed_ms = tmr.ms + """ + return Timer(self, stat, rate, tags, auto_send=auto_send) - def timing(self, stat, delta, rate=1): + def timing(self, stat, delta, rate=1, tags=None, use_decimals=True): """ Send new timing information. @@ -32,17 +63,21 @@ class StatsClientBase(object): if isinstance(delta, timedelta): # Convert timedelta to number of milliseconds. delta = delta.total_seconds() * 1000. - self._send_stat(stat, '%0.6f|ms' % delta, rate) - - def incr(self, stat, count=1, rate=1): - """Increment a stat by `count`.""" - self._send_stat(stat, '%s|c' % count, rate) + if use_decimals: + fmt = '%0.6f|ms' + else: + fmt = '%s|ms' + self._send_stat(stat, fmt % delta, rate, tags) - def decr(self, stat, count=1, rate=1): + def incr(self, stat, count=1, rate=1, tags=None): + """Increment a stat by `count`.""" + self._send_stat(stat, f'{count}|c', rate, tags) + + def decr(self, stat, count=1, rate=1, tags=None): """Decrement a stat by `count`.""" - self.incr(stat, -count, rate) + self.incr(stat, -count, rate, tags) - def gauge(self, stat, value, rate=1, delta=False): + def gauge(self, stat, value, rate=1, delta=False, tags=None): """Set a gauge value.""" if value < 0 and not delta: if rate < 1: @@ -50,28 +85,40 @@ class StatsClientBase(object): return with self.pipeline() as pipe: pipe._send_stat(stat, '0|g', 1) - pipe._send_stat(stat, '%s|g' % value, 1) + pipe._send_stat(stat, f'{value}|g', 1) else: prefix = '+' if delta and value >= 0 else '' - self._send_stat(stat, '%s%s|g' % (prefix, value), rate) + self._send_stat(stat, f'{prefix}{value}|g', rate, tags) def set(self, stat, value, rate=1): """Set a set value.""" - self._send_stat(stat, '%s|s' % value, rate) + self._send_stat(stat, f'{value}|s', rate) + + def histogram(self, stat, value, rate=1, tags=None): + """Set a histogram""" + self._send_stat(stat, f'{value}|h', rate, tags) - def _send_stat(self, stat, value, rate): - self._after(self._prepare(stat, value, rate)) + def _send_stat(self, stat, value, rate, tags=None): + self._after(self._prepare(stat, value, rate, tags)) - def _prepare(self, stat, value, rate): + def _prepare(self, stat, value, rate, tags=None): + global buckets_dict + buckets_dict[stat] = 1 + if rate < 1: if random.random() > rate: return - value = '%s|@%s' % (value, rate) + value = f'{value}|@{rate}' if self._prefix: - stat = '%s.%s' % (self._prefix, stat) + stat = f'{self._prefix}.{stat}' - return '%s:%s' % (stat, value) + res = '%s:%s%s' % ( + stat, + value, + ("|#" + ",".join(normalize_tags(tags))) if tags else "", + ) + return res def _after(self, data): if data: diff --git a/vcsserver/lib/_vendor/statsd/stream.py b/vcsserver/lib/_vendor/statsd/stream.py --- a/vcsserver/lib/_vendor/statsd/stream.py +++ b/vcsserver/lib/_vendor/statsd/stream.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import socket from .base import StatsClientBase, PipelineBase diff --git a/vcsserver/lib/_vendor/statsd/timer.py b/vcsserver/lib/_vendor/statsd/timer.py --- a/vcsserver/lib/_vendor/statsd/timer.py +++ b/vcsserver/lib/_vendor/statsd/timer.py @@ -1,14 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals - import functools - -# Use timer that's not susceptible to time of day adjustments. -try: - # perf_counter is only present on Py3.3+ - from time import perf_counter as time_now -except ImportError: - # fall back to using time - from time import time as time_now +from time import perf_counter as time_now def safe_wraps(wrapper, *args, **kwargs): @@ -18,16 +9,19 @@ def safe_wraps(wrapper, *args, **kwargs) return functools.wraps(wrapper, *args, **kwargs) -class Timer(object): +class Timer: """A context manager/decorator for statsd.timing().""" - def __init__(self, client, stat, rate=1): + def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True): self.client = client self.stat = stat self.rate = rate + self.tags = tags self.ms = None self._sent = False self._start_time = None + self.use_decimals = use_decimals + self.auto_send = auto_send def __call__(self, f): """Thread-safe timing function decorator.""" @@ -38,14 +32,15 @@ class Timer(object): return f(*args, **kwargs) finally: elapsed_time_ms = 1000.0 * (time_now() - start_time) - self.client.timing(self.stat, elapsed_time_ms, self.rate) + self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals) + self._sent = True return _wrapped def __enter__(self): return self.start() def __exit__(self, typ, value, tb): - self.stop() + self.stop(send=self.auto_send) def start(self): self.ms = None @@ -68,4 +63,4 @@ class Timer(object): if self._sent: raise RuntimeError('Already sent data.') self._sent = True - self.client.timing(self.stat, self.ms, self.rate) + self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals) diff --git a/vcsserver/lib/_vendor/statsd/udp.py b/vcsserver/lib/_vendor/statsd/udp.py --- a/vcsserver/lib/_vendor/statsd/udp.py +++ b/vcsserver/lib/_vendor/statsd/udp.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import socket from .base import StatsClientBase, PipelineBase @@ -8,7 +6,7 @@ from .base import StatsClientBase, Pipel class Pipeline(PipelineBase): def __init__(self, client): - super(Pipeline, self).__init__(client) + super().__init__(client) self._maxudpsize = client._maxudpsize def _send(self): diff --git a/vcsserver/lib/exc_tracking.py b/vcsserver/lib/exc_tracking.py --- a/vcsserver/lib/exc_tracking.py +++ b/vcsserver/lib/exc_tracking.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,16 +15,16 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - +import io import os import time +import sys import datetime import msgpack import logging import traceback import tempfile - -from pyramid import compat +import glob log = logging.getLogger(__name__) @@ -35,16 +33,17 @@ global_prefix = 'vcsserver' exc_store_dir_name = 'rc_exception_store_v1' -def exc_serialize(exc_id, tb, exc_type): - +def exc_serialize(exc_id, tb, exc_type, extra_data=None): data = { - 'version': 'v1', - 'exc_id': exc_id, - 'exc_utc_date': datetime.datetime.utcnow().isoformat(), - 'exc_timestamp': repr(time.time()), - 'exc_message': tb, - 'exc_type': exc_type, + "version": "v1", + "exc_id": exc_id, + "exc_utc_date": datetime.datetime.utcnow().isoformat(), + "exc_timestamp": repr(time.time()), + "exc_message": tb, + "exc_type": exc_type, } + if extra_data: + data.update(extra_data) return msgpack.packb(data), data @@ -52,59 +51,153 @@ def exc_unserialize(tb): return msgpack.unpackb(tb) +_exc_store = None + + def get_exc_store(): """ Get and create exception store if it's not existing """ + global _exc_store + + if _exc_store is not None: + # quick global cache + return _exc_store + import vcsserver as app - exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir() + exc_store_dir = ( + app.CONFIG.get("exception_tracker.store_path", "") or tempfile.gettempdir() + ) _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name) _exc_store_path = os.path.abspath(_exc_store_path) if not os.path.isdir(_exc_store_path): os.makedirs(_exc_store_path) - log.debug('Initializing exceptions store at %s', _exc_store_path) + log.debug("Initializing exceptions store at %s", _exc_store_path) + _exc_store = _exc_store_path + return _exc_store_path -def _store_exception(exc_id, exc_info, prefix, request_path=''): - exc_type, exc_value, exc_traceback = exc_info +def get_detailed_tb(exc_info): + try: + from pip._vendor.rich import ( + traceback as rich_tb, + scope as rich_scope, + console as rich_console, + ) + except ImportError: + try: + from rich import ( + traceback as rich_tb, + scope as rich_scope, + console as rich_console, + ) + except ImportError: + return None + + console = rich_console.Console(width=160, file=io.StringIO()) + + exc = rich_tb.Traceback.extract(*exc_info, show_locals=True) + + tb_rich = rich_tb.Traceback( + trace=exc, + width=160, + extra_lines=3, + theme=None, + word_wrap=False, + show_locals=False, + max_frames=100, + ) - tb = ''.join(traceback.format_exception( - exc_type, exc_value, exc_traceback, None)) + # last_stack = exc.stacks[-1] + # last_frame = last_stack.frames[-1] + # if last_frame and last_frame.locals: + # console.print( + # rich_scope.render_scope( + # last_frame.locals, + # title=f'{last_frame.filename}:{last_frame.lineno}')) + + console.print(tb_rich) + formatted_locals = console.file.getvalue() + + return formatted_locals + - detailed_tb = getattr(exc_value, '_org_exc_tb', None) +def get_request_metadata(request=None) -> dict: + request_metadata = {} + if not request: + from pyramid.threadlocal import get_current_request + + request = get_current_request() + + # NOTE(marcink): store request information into exc_data + if request: + request_metadata["client_address"] = getattr(request, "client_addr", "") + request_metadata["user_agent"] = getattr(request, "user_agent", "") + request_metadata["method"] = getattr(request, "method", "") + request_metadata["url"] = getattr(request, "url", "") + return request_metadata + + +def format_exc(exc_info, use_detailed_tb=True): + exc_type, exc_value, exc_traceback = exc_info + tb = "++ TRACEBACK ++\n\n" + tb += "".join(traceback.format_exception(exc_type, exc_value, exc_traceback, None)) + + detailed_tb = getattr(exc_value, "_org_exc_tb", None) if detailed_tb: - if isinstance(detailed_tb, compat.string_types): + remote_tb = detailed_tb + if isinstance(detailed_tb, str): remote_tb = [detailed_tb] tb += ( - '\n+++ BEG SOURCE EXCEPTION +++\n\n' - '{}\n' - '+++ END SOURCE EXCEPTION +++\n' - ''.format('\n'.join(remote_tb)) + "\n+++ BEG SOURCE EXCEPTION +++\n\n" + "{}\n" + "+++ END SOURCE EXCEPTION +++\n" + "".format("\n".join(remote_tb)) ) # Avoid that remote_tb also appears in the frame del remote_tb + if use_detailed_tb: + locals_tb = get_detailed_tb(exc_info) + if locals_tb: + tb += f"\n+++ DETAILS +++\n\n{locals_tb}\n" "" + return tb + + +def _store_exception(exc_id, exc_info, prefix, request_path=''): + """ + Low level function to store exception in the exception tracker + """ + + extra_data = {} + extra_data.update(get_request_metadata()) + + exc_type, exc_value, exc_traceback = exc_info + tb = format_exc(exc_info) + exc_type_name = exc_type.__name__ + exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name, extra_data=extra_data) + + exc_pref_id = f"{exc_id}_{prefix}_{org_data['exc_timestamp']}" exc_store_path = get_exc_store() - exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name) - exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp']) if not os.path.isdir(exc_store_path): os.makedirs(exc_store_path) stored_exc_path = os.path.join(exc_store_path, exc_pref_id) - with open(stored_exc_path, 'wb') as f: + with open(stored_exc_path, "wb") as f: f.write(exc_data) - log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path) + log.debug("Stored generated exception %s as: %s", exc_id, stored_exc_path) - log.error( - 'error occurred handling this request.\n' - 'Path: `%s`, tb: %s', - request_path, tb) + if request_path: + log.error( + 'error occurred handling this request.\n' + 'Path: `%s`, %s', + request_path, tb) def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''): @@ -116,10 +209,15 @@ def store_exception(exc_id, exc_info, pr """ try: - _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix, - request_path=request_path) + exc_type = exc_info[0] + exc_type_name = exc_type.__name__ + + _store_exception( + exc_id=exc_id, exc_info=exc_info, prefix=prefix, request_path=request_path, + ) + return exc_id, exc_type_name except Exception: - log.exception('Failed to store exception `%s` information', exc_id) + log.exception("Failed to store exception `%s` information", exc_id) # there's no way this can fail, it will crash server badly if it does. pass @@ -127,30 +225,26 @@ def store_exception(exc_id, exc_info, pr def _find_exc_file(exc_id, prefix=global_prefix): exc_store_path = get_exc_store() if prefix: - exc_id = '{}_{}'.format(exc_id, prefix) + exc_id = f"{exc_id}_{prefix}" else: # search without a prefix - exc_id = '{}'.format(exc_id) + exc_id = f"{exc_id}" - # we need to search the store for such start pattern as above - for fname in os.listdir(exc_store_path): - if fname.startswith(exc_id): - exc_id = os.path.join(exc_store_path, fname) - break - continue - else: - exc_id = None + found_exc_id = None + matches = glob.glob(os.path.join(exc_store_path, exc_id) + "*") + if matches: + found_exc_id = matches[0] - return exc_id + return found_exc_id def _read_exception(exc_id, prefix): exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix) if exc_id_file_path: - with open(exc_id_file_path, 'rb') as f: + with open(exc_id_file_path, "rb") as f: return exc_unserialize(f.read()) else: - log.debug('Exception File `%s` not found', exc_id_file_path) + log.debug("Exception File `%s` not found", exc_id_file_path) return None @@ -158,7 +252,7 @@ def read_exception(exc_id, prefix=global try: return _read_exception(exc_id=exc_id, prefix=prefix) except Exception: - log.exception('Failed to read exception `%s` information', exc_id) + log.exception("Failed to read exception `%s` information", exc_id) # there's no way this can fail, it will crash server badly if it does. return None @@ -170,6 +264,10 @@ def delete_exception(exc_id, prefix=glob os.remove(exc_id_file_path) except Exception: - log.exception('Failed to remove exception `%s` information', exc_id) + log.exception("Failed to remove exception `%s` information", exc_id) # there's no way this can fail, it will crash server badly if it does. pass + + +def generate_id(): + return id(object()) diff --git a/vcsserver/lib/logging_formatter.py b/vcsserver/lib/logging_formatter.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/logging_formatter.py @@ -0,0 +1,53 @@ +# Copyright (C) 2010-2023 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import sys +import logging + + +BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38)) + +# Sequences +RESET_SEQ = "\033[0m" +COLOR_SEQ = "\033[0;%dm" +BOLD_SEQ = "\033[1m" + +COLORS = { + 'CRITICAL': MAGENTA, + 'ERROR': RED, + 'WARNING': CYAN, + 'INFO': GREEN, + 'DEBUG': BLUE, + 'SQL': YELLOW +} + + +class ColorFormatter(logging.Formatter): + + def format(self, record): + """ + Change record's levelname to use with COLORS enum + """ + def_record = super().format(record) + + levelname = record.levelname + start = COLOR_SEQ % (COLORS[levelname]) + end = RESET_SEQ + + colored_record = ''.join([start, def_record, end]) + return colored_record diff --git a/vcsserver/lib/memory_lru_dict.py b/vcsserver/lib/memory_lru_dict.py --- a/vcsserver/lib/memory_lru_dict.py +++ b/vcsserver/lib/memory_lru_dict.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +20,7 @@ import logging from repoze.lru import LRUCache -from vcsserver.utils import safe_str +from vcsserver.str_utils import safe_str log = logging.getLogger(__name__) @@ -45,7 +43,7 @@ class LRUDict(LRUCache): del self.data[key] def keys(self): - return self.data.keys() + return list(self.data.keys()) class LRUDictDebug(LRUDict): @@ -53,11 +51,11 @@ class LRUDictDebug(LRUDict): Wrapper to provide some debug options """ def _report_keys(self): - elems_cnt = '%s/%s' % (len(self.keys()), self.size) + elems_cnt = f'{len(list(self.keys()))}/{self.size}' # trick for pformat print it more nicely fmt = '\n' for cnt, elem in enumerate(self.keys()): - fmt += '%s - %s\n' % (cnt+1, safe_str(elem)) + fmt += f'{cnt+1} - {safe_str(elem)}\n' log.debug('current LRU keys (%s):%s', elems_cnt, fmt) def __getitem__(self, key): diff --git a/vcsserver/lib/rc_cache/__init__.py b/vcsserver/lib/rc_cache/__init__.py --- a/vcsserver/lib/rc_cache/__init__.py +++ b/vcsserver/lib/rc_cache/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,31 +16,59 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import logging +import threading + from dogpile.cache import register_backend +from . import region_meta +from .utils import ( + backend_key_generator, + clear_cache_namespace, + get_default_cache_settings, + get_or_create_region, + make_region, + str2bool, +) + +module_name = 'vcsserver' + register_backend( - "dogpile.cache.rc.memory_lru", "vcsserver.lib.rc_cache.backends", + "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends", "LRUMemoryBackend") register_backend( - "dogpile.cache.rc.file_namespace", "vcsserver.lib.rc_cache.backends", + "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends", "FileNamespaceBackend") register_backend( - "dogpile.cache.rc.redis", "vcsserver.lib.rc_cache.backends", + "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends", "RedisPickleBackend") register_backend( - "dogpile.cache.rc.redis_msgpack", "vcsserver.lib.rc_cache.backends", + "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends", "RedisMsgPackBackend") log = logging.getLogger(__name__) -from . import region_meta -from .utils import ( - get_default_cache_settings, backend_key_generator, get_or_create_region, - clear_cache_namespace, make_region) + +CACHE_OBJ_CACHE_VER = 'v2' + +CLEAR_DELETE = 'delete' +CLEAR_INVALIDATE = 'invalidate' + + +def async_creation_runner(cache, cache_key, creator, mutex): + + def runner(): + try: + value = creator() + cache.set(cache_key, value) + finally: + mutex.release() + + thread = threading.Thread(target=runner) + thread.start() def configure_dogpile_cache(settings): @@ -62,15 +90,22 @@ def configure_dogpile_cache(settings): new_region = make_region( name=namespace_name, - function_key_generator=None + function_key_generator=None, + async_creation_runner=None ) - new_region.configure_from_config(settings, 'rc_cache.{}.'.format(namespace_name)) + new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.') new_region.function_key_generator = backend_key_generator(new_region.actual_backend) + + async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false')) + if async_creator: + log.debug('configuring region %s with async creator', new_region) + new_region.async_creation_runner = async_creation_runner + if log.isEnabledFor(logging.DEBUG): - region_args = dict(backend=new_region.actual_backend.__class__, + region_args = dict(backend=new_region.actual_backend, region_invalidator=new_region.region_invalidator.__class__) - log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args) + log.debug('dogpile: registering a new region key=`%s` args=%s', namespace_name, region_args) region_meta.dogpile_cache_regions[namespace_name] = new_region diff --git a/vcsserver/lib/rc_cache/archive_cache.py b/vcsserver/lib/rc_cache/archive_cache.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/rc_cache/archive_cache.py @@ -0,0 +1,87 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import logging +import os +import diskcache +from diskcache import RLock + +log = logging.getLogger(__name__) + +cache_meta = None + + +class ReentrantLock(RLock): + def __enter__(self): + reentrant_lock_key = self._key + + log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key) + #self.acquire() + log.debug('Lock for key=%s acquired', reentrant_lock_key) + + def __exit__(self, *exc_info): + #self.release() + pass + + +def get_archival_config(config): + + final_config = { + 'archive_cache.eviction_policy': 'least-frequently-used' + } + + for k, v in config.items(): + if k.startswith('archive_cache'): + final_config[k] = v + + return final_config + + +def get_archival_cache_store(config): + + global cache_meta + if cache_meta is not None: + return cache_meta + + config = get_archival_config(config) + + archive_cache_dir = config['archive_cache.store_dir'] + archive_cache_size_gb = config['archive_cache.cache_size_gb'] + archive_cache_shards = config['archive_cache.cache_shards'] + archive_cache_eviction_policy = config['archive_cache.eviction_policy'] + + log.debug('Initializing archival cache instance under %s', archive_cache_dir) + + # check if it's ok to write, and re-create the archive cache + if not os.path.isdir(archive_cache_dir): + os.makedirs(archive_cache_dir, exist_ok=True) + + d_cache = diskcache.FanoutCache( + archive_cache_dir, shards=archive_cache_shards, + cull_limit=0, # manual eviction required + size_limit=archive_cache_size_gb * 1024 * 1024 * 1024, + eviction_policy=archive_cache_eviction_policy, + timeout=30 + ) + cache_meta = d_cache + return cache_meta + + +def includeme(config): + # init our cache at start, for vcsserver we don't init at runtime + # because our cache config is sent via wire on make archive call, this call just lazy-enables the client + return diff --git a/vcsserver/lib/rc_cache/backends.py b/vcsserver/lib/rc_cache/backends.py --- a/vcsserver/lib/rc_cache/backends.py +++ b/vcsserver/lib/rc_cache/backends.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,25 +15,31 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import time -import errno +#import errno +import fcntl +import functools import logging +import os +import pickle +#import time +#import gevent import msgpack import redis -from dogpile.cache.api import CachedValue -from dogpile.cache.backends import memory as memory_backend +flock_org = fcntl.flock +from typing import Union + +from dogpile.cache.api import Deserializer, Serializer from dogpile.cache.backends import file as file_backend +from dogpile.cache.backends import memory as memory_backend from dogpile.cache.backends import redis as redis_backend -from dogpile.cache.backends.file import NO_VALUE, compat, FileLock +from dogpile.cache.backends.file import FileLock from dogpile.cache.util import memoized_property -from pyramid.settings import asbool - from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug -from vcsserver.utils import safe_str - +from vcsserver.str_utils import safe_bytes, safe_str +from vcsserver.type_utils import str2bool _default_max_size = 1024 @@ -45,14 +51,20 @@ class LRUMemoryBackend(memory_backend.Me pickle_values = False def __init__(self, arguments): - max_size = arguments.pop('max_size', _default_max_size) + self.max_size = arguments.pop('max_size', _default_max_size) LRUDictClass = LRUDict if arguments.pop('log_key_count', None): LRUDictClass = LRUDictDebug - arguments['cache_dict'] = LRUDictClass(max_size) - super(LRUMemoryBackend, self).__init__(arguments) + arguments['cache_dict'] = LRUDictClass(self.max_size) + super().__init__(arguments) + + def __repr__(self): + return f'{self.__class__}(maxsize=`{self.max_size}`)' + + def __str__(self): + return self.__repr__() def delete(self, key): try: @@ -61,60 +73,37 @@ class LRUMemoryBackend(memory_backend.Me # we don't care if key isn't there at deletion pass + def list_keys(self, prefix): + return list(self._cache.keys()) + def delete_multi(self, keys): for key in keys: self.delete(key) - -class PickleSerializer(object): - - def _dumps(self, value, safe=False): - try: - return compat.pickle.dumps(value) - except Exception: - if safe: - return NO_VALUE - else: - raise - - def _loads(self, value, safe=True): - try: - return compat.pickle.loads(value) - except Exception: - if safe: - return NO_VALUE - else: - raise + def delete_multi_by_prefix(self, prefix): + cache_keys = self.list_keys(prefix=prefix) + num_affected_keys = len(cache_keys) + if num_affected_keys: + self.delete_multi(cache_keys) + return num_affected_keys -class MsgPackSerializer(object): - - def _dumps(self, value, safe=False): - try: - return msgpack.packb(value) - except Exception: - if safe: - return NO_VALUE - else: - raise - - def _loads(self, value, safe=True): - """ - pickle maintained the `CachedValue` wrapper of the tuple - msgpack does not, so it must be added back in. - """ - try: - value = msgpack.unpackb(value, use_list=False) - return CachedValue(*value) - except Exception: - if safe: - return NO_VALUE - else: - raise +class PickleSerializer: + serializer: None | Serializer = staticmethod( # type: ignore + functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL) + ) + deserializer: None | Deserializer = staticmethod( # type: ignore + functools.partial(pickle.loads) + ) -import fcntl -flock_org = fcntl.flock +class MsgPackSerializer: + serializer: None | Serializer = staticmethod( # type: ignore + msgpack.packb + ) + deserializer: None | Deserializer = staticmethod( # type: ignore + functools.partial(msgpack.unpackb, use_list=False) + ) class CustomLockFactory(FileLock): @@ -129,80 +118,75 @@ class FileNamespaceBackend(PickleSeriali arguments['lock_factory'] = CustomLockFactory db_file = arguments.get('filename') - log.debug('initialing %s DB in %s', self.__class__.__name__, db_file) + log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file) + db_file_dir = os.path.dirname(db_file) + if not os.path.isdir(db_file_dir): + os.makedirs(db_file_dir) + try: - super(FileNamespaceBackend, self).__init__(arguments) + super().__init__(arguments) except Exception: - log.error('Failed to initialize db at: %s', db_file) + log.exception('Failed to initialize db at: %s', db_file) raise def __repr__(self): - return '{} `{}`'.format(self.__class__, self.filename) + return f'{self.__class__}(file=`{self.filename}`)' + + def __str__(self): + return self.__repr__() - def list_keys(self, prefix=''): - prefix = '{}:{}'.format(self.key_prefix, prefix) + def _get_keys_pattern(self, prefix: bytes = b''): + return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix)) - def cond(v): + def list_keys(self, prefix: bytes = b''): + prefix = self._get_keys_pattern(prefix) + + def cond(dbm_key: bytes): if not prefix: return True - if v.startswith(prefix): + if dbm_key.startswith(prefix): return True return False with self._dbm_file(True) as dbm: try: - return filter(cond, dbm.keys()) + return list(filter(cond, dbm.keys())) except Exception: log.error('Failed to fetch DBM keys from DB: %s', self.get_store()) raise + def delete_multi_by_prefix(self, prefix): + cache_keys = self.list_keys(prefix=prefix) + num_affected_keys = len(cache_keys) + if num_affected_keys: + self.delete_multi(cache_keys) + return num_affected_keys + def get_store(self): return self.filename - def _dbm_get(self, key): - with self._dbm_file(False) as dbm: - if hasattr(dbm, 'get'): - value = dbm.get(key, NO_VALUE) - else: - # gdbm objects lack a .get method - try: - value = dbm[key] - except KeyError: - value = NO_VALUE - if value is not NO_VALUE: - value = self._loads(value) - return value - - def get(self, key): - try: - return self._dbm_get(key) - except Exception: - log.error('Failed to fetch DBM key %s from DB: %s', key, self.get_store()) - raise - - def set(self, key, value): - with self._dbm_file(True) as dbm: - dbm[key] = self._dumps(value) - - def set_multi(self, mapping): - with self._dbm_file(True) as dbm: - for key, value in mapping.items(): - dbm[key] = self._dumps(value) - class BaseRedisBackend(redis_backend.RedisBackend): key_prefix = '' def __init__(self, arguments): - super(BaseRedisBackend, self).__init__(arguments) + self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host' + super().__init__(arguments) + self._lock_timeout = self.lock_timeout - self._lock_auto_renewal = asbool(arguments.pop("lock_auto_renewal", True)) + self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True)) if self._lock_auto_renewal and not self._lock_timeout: # set default timeout for auto_renewal self._lock_timeout = 30 + def __repr__(self): + return f'{self.__class__}(conn=`{self.db_conn}`)' + + def __str__(self): + return self.__repr__() + def _create_client(self): args = {} @@ -216,58 +200,48 @@ class BaseRedisBackend(redis_backend.Red ) connection_pool = redis.ConnectionPool(**args) + self.writer_client = redis.StrictRedis( + connection_pool=connection_pool + ) + self.reader_client = self.writer_client - return redis.StrictRedis(connection_pool=connection_pool) + def _get_keys_pattern(self, prefix: bytes = b''): + return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix)) + + def list_keys(self, prefix: bytes = b''): + prefix = self._get_keys_pattern(prefix) + return self.reader_client.keys(prefix) - def list_keys(self, prefix=''): - prefix = '{}:{}*'.format(self.key_prefix, prefix) - return self.client.keys(prefix) + def delete_multi_by_prefix(self, prefix, use_lua=False): + if use_lua: + # high efficient LUA script to delete ALL keys by prefix... + lua = """local keys = redis.call('keys', ARGV[1]) + for i=1,#keys,5000 do + redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys))) + end + return #keys""" + num_affected_keys = self.writer_client.eval( + lua, + 0, + f"{prefix}*") + else: + cache_keys = self.list_keys(prefix=prefix) + num_affected_keys = len(cache_keys) + if num_affected_keys: + self.delete_multi(cache_keys) + return num_affected_keys def get_store(self): - return self.client.connection_pool - - def get(self, key): - value = self.client.get(key) - if value is None: - return NO_VALUE - return self._loads(value) - - def get_multi(self, keys): - if not keys: - return [] - values = self.client.mget(keys) - loads = self._loads - return [ - loads(v) if v is not None else NO_VALUE - for v in values] - - def set(self, key, value): - if self.redis_expiration_time: - self.client.setex(key, self.redis_expiration_time, - self._dumps(value)) - else: - self.client.set(key, self._dumps(value)) - - def set_multi(self, mapping): - dumps = self._dumps - mapping = dict( - (k, dumps(v)) - for k, v in mapping.items() - ) - - if not self.redis_expiration_time: - self.client.mset(mapping) - else: - pipe = self.client.pipeline() - for key, value in mapping.items(): - pipe.setex(key, self.redis_expiration_time, value) - pipe.execute() + return self.reader_client.connection_pool def get_mutex(self, key): if self.distributed_lock: - lock_key = redis_backend.u('_lock_{0}').format(safe_str(key)) - return get_mutex_lock(self.client, lock_key, self._lock_timeout, - auto_renewal=self._lock_auto_renewal) + lock_key = f'_lock_{safe_str(key)}' + return get_mutex_lock( + self.writer_client, lock_key, + self._lock_timeout, + auto_renewal=self._lock_auto_renewal + ) else: return None @@ -283,9 +257,9 @@ class RedisMsgPackBackend(MsgPackSeriali def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False): - import redis_lock + from vcsserver.lib._vendor import redis_lock - class _RedisLockWrapper(object): + class _RedisLockWrapper: """LockWrapper for redis_lock""" @classmethod @@ -299,10 +273,10 @@ def get_mutex_lock(client, lock_key, loc ) def __repr__(self): - return "{}:{}".format(self.__class__.__name__, lock_key) + return f"{self.__class__.__name__}:{lock_key}" def __str__(self): - return "{}:{}".format(self.__class__.__name__, lock_key) + return f"{self.__class__.__name__}:{lock_key}" def __init__(self): self.lock = self.get_lock() diff --git a/vcsserver/lib/rc_cache/region_meta.py b/vcsserver/lib/rc_cache/region_meta.py --- a/vcsserver/lib/rc_cache/region_meta.py +++ b/vcsserver/lib/rc_cache/region_meta.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/lib/rc_cache/utils.py b/vcsserver/lib/rc_cache/utils.py --- a/vcsserver/lib/rc_cache/utils.py +++ b/vcsserver/lib/rc_cache/utils.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,115 +15,69 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import os -import time +import functools import logging -import functools +import os +import threading +import time +import decorator from dogpile.cache import CacheRegion -from dogpile.cache.util import compat + -from vcsserver.utils import safe_str, sha1 +from vcsserver.utils import sha1 +from vcsserver.str_utils import safe_bytes +from vcsserver.type_utils import str2bool # noqa :required by imports from .utils -from vcsserver.lib.rc_cache import region_meta +from . import region_meta log = logging.getLogger(__name__) class RhodeCodeCacheRegion(CacheRegion): + def __repr__(self): + return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`' + def conditional_cache_on_arguments( self, namespace=None, expiration_time=None, should_cache_fn=None, - to_str=compat.string_type, + to_str=str, function_key_generator=None, condition=True): """ Custom conditional decorator, that will not touch any dogpile internals if - condition isn't meet. This works a bit different than should_cache_fn + condition isn't meet. This works a bit different from should_cache_fn And it's faster in cases we don't ever want to compute cached values """ - expiration_time_is_callable = compat.callable(expiration_time) + expiration_time_is_callable = callable(expiration_time) + if not namespace: + namespace = getattr(self, '_default_namespace', None) if function_key_generator is None: function_key_generator = self.function_key_generator - # workaround for py2 and cython problems, this block should be removed - # once we've migrated to py3 - if 'cython' == 'cython': - def decorator(fn): - if to_str is compat.string_type: - # backwards compatible - key_generator = function_key_generator(namespace, fn) - else: - key_generator = function_key_generator(namespace, fn, to_str=to_str) - - @functools.wraps(fn) - def decorate(*arg, **kw): - key = key_generator(*arg, **kw) - - @functools.wraps(fn) - def creator(): - return fn(*arg, **kw) - - if not condition: - return creator() - - timeout = expiration_time() if expiration_time_is_callable \ - else expiration_time - - return self.get_or_create(key, creator, timeout, should_cache_fn) - - def invalidate(*arg, **kw): - key = key_generator(*arg, **kw) - self.delete(key) - - def set_(value, *arg, **kw): - key = key_generator(*arg, **kw) - self.set(key, value) - - def get(*arg, **kw): - key = key_generator(*arg, **kw) - return self.get(key) - - def refresh(*arg, **kw): - key = key_generator(*arg, **kw) - value = fn(*arg, **kw) - self.set(key, value) - return value - - decorate.set = set_ - decorate.invalidate = invalidate - decorate.refresh = refresh - decorate.get = get - decorate.original = fn - decorate.key_generator = key_generator - decorate.__wrapped__ = fn - - return decorate - return decorator - - def get_or_create_for_user_func(key_generator, user_func, *arg, **kw): + def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw): if not condition: - log.debug('Calling un-cached method:%s', user_func.func_name) + log.debug('Calling un-cached method:%s', user_func.__name__) start = time.time() result = user_func(*arg, **kw) total = time.time() - start - log.debug('un-cached method:%s took %.4fs', user_func.func_name, total) + log.debug('un-cached method:%s took %.4fs', user_func.__name__, total) return result - key = key_generator(*arg, **kw) + key = func_key_generator(*arg, **kw) timeout = expiration_time() if expiration_time_is_callable \ else expiration_time - log.debug('Calling cached method:`%s`', user_func.func_name) + log.debug('Calling cached method:`%s`', user_func.__name__) return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw)) def cache_decorator(user_func): - if to_str is compat.string_type: + if to_str is str: # backwards compatible key_generator = function_key_generator(namespace, user_func) else: @@ -176,7 +130,7 @@ def get_default_cache_settings(settings, if key.startswith(prefix): name = key.split(prefix)[1].strip() val = settings[key] - if isinstance(val, compat.string_types): + if isinstance(val, str): val = val.strip() cache_settings[name] = val return cache_settings @@ -186,7 +140,21 @@ def compute_key_from_params(*args): """ Helper to compute key from given params to be used in cache manager """ - return sha1("_".join(map(safe_str, args))) + return sha1(safe_bytes("_".join(map(str, args)))) + + +def custom_key_generator(backend, namespace, fn): + func_name = fn.__name__ + + def generate_key(*args): + backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix' + namespace_pref = namespace or 'default_namespace' + arg_key = compute_key_from_params(*args) + final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}" + + return final_key + + return generate_key def backend_key_generator(backend): @@ -194,49 +162,51 @@ def backend_key_generator(backend): Special wrapper that also sends over the backend to the key generator """ def wrapper(namespace, fn): - return key_generator(backend, namespace, fn) + return custom_key_generator(backend, namespace, fn) return wrapper -def key_generator(backend, namespace, fn): - fname = fn.__name__ +def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False): + from .backends import FileNamespaceBackend + from . import async_creation_runner - def generate_key(*args): - backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix' - namespace_pref = namespace or 'default_namespace' - arg_key = compute_key_from_params(*args) - final_key = "{}:{}:{}_{}".format(backend_prefix, namespace_pref, fname, arg_key) - - return final_key - - return generate_key - - -def get_or_create_region(region_name, region_namespace=None): - from vcsserver.lib.rc_cache.backends import FileNamespaceBackend region_obj = region_meta.dogpile_cache_regions.get(region_name) if not region_obj: - raise EnvironmentError( - 'Region `{}` not in configured: {}.'.format( - region_name, region_meta.dogpile_cache_regions.keys())) + reg_keys = list(region_meta.dogpile_cache_regions.keys()) + raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.') + + region_uid_name = f'{region_name}:{region_namespace}' - region_uid_name = '{}:{}'.format(region_name, region_namespace) + # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region if isinstance(region_obj.actual_backend, FileNamespaceBackend): + if not region_namespace: + raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param') + region_exist = region_meta.dogpile_cache_regions.get(region_namespace) if region_exist: log.debug('Using already configured region: %s', region_namespace) return region_exist - cache_dir = region_meta.dogpile_config_defaults['cache_dir'] + expiration_time = region_obj.expiration_time - if not os.path.isdir(cache_dir): - os.makedirs(cache_dir) + cache_dir = region_meta.dogpile_config_defaults['cache_dir'] + namespace_cache_dir = cache_dir + + # we default the namespace_cache_dir to our default cache dir. + # however, if this backend is configured with filename= param, we prioritize that + # so all caches within that particular region, even those namespaced end up in the same path + if region_obj.actual_backend.filename: + namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename) + + if not os.path.isdir(namespace_cache_dir): + os.makedirs(namespace_cache_dir) new_region = make_region( name=region_uid_name, function_key_generator=backend_key_generator(region_obj.actual_backend) ) + namespace_filename = os.path.join( - cache_dir, "{}.cache.dbm".format(region_namespace)) + namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db") # special type that allows 1db per namespace new_region.configure( backend='dogpile.cache.rc.file_namespace', @@ -248,16 +218,28 @@ def get_or_create_region(region_name, re log.debug('configuring new region: %s', region_uid_name) region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region + region_obj._default_namespace = region_namespace + if use_async_runner: + region_obj.async_creation_runner = async_creation_runner return region_obj -def clear_cache_namespace(cache_region, cache_namespace_uid, invalidate=False): - region = get_or_create_region(cache_region, cache_namespace_uid) - cache_keys = region.backend.list_keys(prefix=cache_namespace_uid) - num_delete_keys = len(cache_keys) - if invalidate: - region.invalidate(hard=False) - else: - if num_delete_keys: - region.delete_multi(cache_keys) - return num_delete_keys +def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int: + from . import CLEAR_DELETE, CLEAR_INVALIDATE + + if not isinstance(cache_region, RhodeCodeCacheRegion): + cache_region = get_or_create_region(cache_region, cache_namespace_uid) + log.debug('clearing cache region: %s [prefix:%s] with method=%s', + cache_region, cache_namespace_uid, method) + + num_affected_keys = 0 + + if method == CLEAR_INVALIDATE: + # NOTE: The CacheRegion.invalidate() method’s default mode of + # operation is to set a timestamp local to this CacheRegion in this Python process only. + # It does not impact other Python processes or regions as the timestamp is only stored locally in memory. + cache_region.invalidate(hard=True) + + if method == CLEAR_DELETE: + num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid) + return num_affected_keys diff --git a/vcsserver/lib/rc_json.py b/vcsserver/lib/rc_json.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/rc_json.py @@ -0,0 +1,2 @@ +# use orjson by default +import orjson as json diff --git a/vcsserver/lib/request_counter.py b/vcsserver/lib/request_counter.py --- a/vcsserver/lib/request_counter.py +++ b/vcsserver/lib/request_counter.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/lib/statsd_client.py b/vcsserver/lib/statsd_client.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/statsd_client.py @@ -0,0 +1,70 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +from vcsserver.lib._vendor.statsd import client_from_config + + +class StatsdClientNotInitialised(Exception): + pass + + +class _Singleton(type): + """A metaclass that creates a Singleton base class when called.""" + + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super().__call__(*args, **kwargs) + return cls._instances[cls] + + +class Singleton(_Singleton("SingletonMeta", (object,), {})): + pass + + +class StatsdClientClass(Singleton): + setup_run = False + statsd_client = None + statsd = None + strict_mode_init = False + + def __getattribute__(self, name): + + if name.startswith("statsd"): + if self.setup_run: + return super().__getattribute__(name) + else: + if self.strict_mode_init: + raise StatsdClientNotInitialised(f"requested key was {name}") + return None + + return super().__getattribute__(name) + + def setup(self, settings): + """ + Initialize the client + """ + strict_init_mode = settings.pop('statsd_strict_init', False) + + statsd = client_from_config(settings) + self.statsd = statsd + self.statsd_client = statsd + self.setup_run = True + + +StatsdClient = StatsdClientClass() diff --git a/vcsserver/lib/svnremoterepo.py b/vcsserver/lib/svnremoterepo.py new file mode 100644 --- /dev/null +++ b/vcsserver/lib/svnremoterepo.py @@ -0,0 +1,160 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import os +import tempfile + +from svn import client +from svn import core +from svn import ra + +from mercurial import error + +from vcsserver.str_utils import safe_bytes + +core.svn_config_ensure(None) +svn_config = core.svn_config_get_config(None) + + +class RaCallbacks(ra.Callbacks): + @staticmethod + def open_tmp_file(pool): # pragma: no cover + (fd, fn) = tempfile.mkstemp() + os.close(fd) + return fn + + @staticmethod + def get_client_string(pool): + return b'RhodeCode-subversion-url-checker' + + +class SubversionException(Exception): + pass + + +class SubversionConnectionException(SubversionException): + """Exception raised when a generic error occurs when connecting to a repository.""" + + +def normalize_url(url): + if not url: + return url + if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'): + url = url[4:] + url = url.rstrip(b'/') + return url + + +def _create_auth_baton(pool): + """Create a Subversion authentication baton. """ + # Give the client context baton a suite of authentication + # providers.h + platform_specific = [ + 'svn_auth_get_gnome_keyring_simple_provider', + 'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider', + 'svn_auth_get_keychain_simple_provider', + 'svn_auth_get_keychain_ssl_client_cert_pw_provider', + 'svn_auth_get_kwallet_simple_provider', + 'svn_auth_get_kwallet_ssl_client_cert_pw_provider', + 'svn_auth_get_ssl_client_cert_file_provider', + 'svn_auth_get_windows_simple_provider', + 'svn_auth_get_windows_ssl_server_trust_provider', + ] + + providers = [] + + for p in platform_specific: + if getattr(core, p, None) is not None: + try: + providers.append(getattr(core, p)()) + except RuntimeError: + pass + + providers += [ + client.get_simple_provider(), + client.get_username_provider(), + client.get_ssl_client_cert_file_provider(), + client.get_ssl_client_cert_pw_file_provider(), + client.get_ssl_server_trust_file_provider(), + ] + + return core.svn_auth_open(providers, pool) + + +class SubversionRepo: + """Wrapper for a Subversion repository. + + It uses the SWIG Python bindings, see above for requirements. + """ + def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''): + + self.username = username + self.password = password + self.svn_url = core.svn_path_canonicalize(svn_url) + + self.auth_baton_pool = core.Pool() + self.auth_baton = _create_auth_baton(self.auth_baton_pool) + # self.init_ra_and_client() assumes that a pool already exists + self.pool = core.Pool() + + self.ra = self.init_ra_and_client() + self.uuid = ra.get_uuid(self.ra, self.pool) + + def init_ra_and_client(self): + """Initializes the RA and client layers, because sometimes getting + unified diffs runs the remote server out of open files. + """ + + if self.username: + core.svn_auth_set_parameter(self.auth_baton, + core.SVN_AUTH_PARAM_DEFAULT_USERNAME, + self.username) + if self.password: + core.svn_auth_set_parameter(self.auth_baton, + core.SVN_AUTH_PARAM_DEFAULT_PASSWORD, + self.password) + + callbacks = RaCallbacks() + callbacks.auth_baton = self.auth_baton + + try: + return ra.open2(self.svn_url, callbacks, svn_config, self.pool) + except SubversionException as e: + # e.child contains a detailed error messages + msglist = [] + svn_exc = e + while svn_exc: + if svn_exc.args[0]: + msglist.append(svn_exc.args[0]) + svn_exc = svn_exc.child + msg = '\n'.join(msglist) + raise SubversionConnectionException(msg) + + +class svnremoterepo: + """ the dumb wrapper for actual Subversion repositories """ + + def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''): + self.username = username or b'' + self.password = password or b'' + self.path = normalize_url(svn_url) + + def svn(self): + try: + return SubversionRepo(self.path, self.username, self.password) + except SubversionConnectionException as e: + raise error.Abort(safe_bytes(e)) diff --git a/vcsserver/pygrack.py b/vcsserver/pygrack.py --- a/vcsserver/pygrack.py +++ b/vcsserver/pygrack.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,17 +21,19 @@ import os import socket import logging -import simplejson as json import dulwich.protocol +from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K from webob import Request, Response, exc +from vcsserver.lib.rc_json import json from vcsserver import hooks, subprocessio +from vcsserver.str_utils import ascii_bytes log = logging.getLogger(__name__) -class FileWrapper(object): +class FileWrapper: """File wrapper that ensures how much data is read from it.""" def __init__(self, fd, content_length): @@ -54,34 +56,33 @@ class FileWrapper(object): return data def __repr__(self): - return '' % ( + return ''.format( self.fd, self.content_length, self.content_length - self.remain ) -class GitRepository(object): +class GitRepository: """WSGI app for handling Git smart protocol endpoints.""" - git_folder_signature = frozenset( - ('config', 'head', 'info', 'objects', 'refs')) + git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs')) commands = frozenset(('git-upload-pack', 'git-receive-pack')) - valid_accepts = frozenset(('application/x-%s-result' % - c for c in commands)) + valid_accepts = frozenset(f'application/x-{c}-result' for c in commands) # The last bytes are the SHA1 of the first 12 bytes. EMPTY_PACK = ( - 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' + - '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' + b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' + + b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e' ) - SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k')) + FLUSH_PACKET = b"0000" - def __init__(self, repo_name, content_path, git_path, update_server_info, - extras): + SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K)) + + def __init__(self, repo_name, content_path, git_path, update_server_info, extras): files = frozenset(f.lower() for f in os.listdir(content_path)) valid_dir_signature = self.git_folder_signature.issubset(files) if not valid_dir_signature: - raise OSError('%s missing git signature' % content_path) + raise OSError(f'{content_path} missing git signature') self.content_path = content_path self.repo_name = repo_name @@ -122,8 +123,8 @@ class GitRepository(object): # blows up if you sprinkle "flush" (0000) as "0001\n". # It reads binary, per number of bytes specified. # if you do add '\n' as part of data, count it. - server_advert = '# service=%s\n' % git_command - packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower() + server_advert = f'# service={git_command}\n' + packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower() try: gitenv = dict(os.environ) # forget all configs @@ -133,15 +134,15 @@ class GitRepository(object): out = subprocessio.SubprocessIOChunker( command, env=gitenv, - starting_values=[packet_len + server_advert + '0000'], + starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET], shell=False ) - except EnvironmentError: + except OSError: log.exception('Error processing command') raise exc.HTTPExpectationFailed() resp = Response() - resp.content_type = 'application/x-%s-advertisement' % str(git_command) + resp.content_type = f'application/x-{git_command}-advertisement' resp.charset = None resp.app_iter = out @@ -166,34 +167,103 @@ class GitRepository(object): We also print in the error output a message explaining why the command was aborted. - If aditionally, the user is accepting messages we send them the output + If additionally, the user is accepting messages we send them the output of the pre-pull hook. Note that for clients not supporting side-band we just send them the emtpy PACK file. """ + if self.SIDE_BAND_CAPS.intersection(capabilities): response = [] proto = dulwich.protocol.Protocol(None, response.append) - proto.write_pkt_line('NAK\n') - self._write_sideband_to_proto(pre_pull_messages, proto, - capabilities) + proto.write_pkt_line(dulwich.protocol.NAK_LINE) + + self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities) # N.B.(skreft): Do not change the sideband channel to 3, as that # produces a fatal error in the client: # fatal: error in sideband demultiplexer - proto.write_sideband(2, 'Pre pull hook failed: aborting\n') - proto.write_sideband(1, self.EMPTY_PACK) + proto.write_sideband( + dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, + ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True)) + proto.write_sideband( + dulwich.protocol.SIDE_BAND_CHANNEL_DATA, + ascii_bytes(self.EMPTY_PACK, allow_bytes=True)) - # writes 0000 + # writes b"0000" as default proto.write_pkt_line(None) return response else: - return [self.EMPTY_PACK] + return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)] + + def _build_post_pull_response(self, response, capabilities, start_message, end_message): + """ + Given a list response we inject the post-pull messages. + + We only inject the messages if the client supports sideband, and the + response has the format: + 0008NAK\n...0000 + + Note that we do not check the no-progress capability as by default, git + sends it, which effectively would block all messages. + """ + + if not self.SIDE_BAND_CAPS.intersection(capabilities): + return response + + if not start_message and not end_message: + return response + + try: + iter(response) + # iterator probably will work, we continue + except TypeError: + raise TypeError(f'response must be an iterator: got {type(response)}') + if isinstance(response, (list, tuple)): + raise TypeError(f'response must be an iterator: got {type(response)}') + + def injected_response(): - def _write_sideband_to_proto(self, data, proto, capabilities): + do_loop = 1 + header_injected = 0 + next_item = None + has_item = False + item = b'' + + while do_loop: + + try: + next_item = next(response) + except StopIteration: + do_loop = 0 + + if has_item: + # last item ! alter it now + if do_loop == 0 and item.endswith(self.FLUSH_PACKET): + new_response = [item[:-4]] + new_response.extend(self._get_messages(end_message, capabilities)) + new_response.append(self.FLUSH_PACKET) + item = b''.join(new_response) + + yield item + + has_item = True + item = next_item + + # alter item if it's the initial chunk + if not header_injected and item.startswith(b'0008NAK\n'): + new_response = [b'0008NAK\n'] + new_response.extend(self._get_messages(start_message, capabilities)) + new_response.append(item[8:]) + item = b''.join(new_response) + header_injected = 1 + + return injected_response() + + def _write_sideband_to_proto(self, proto, data, capabilities): """ - Write the data to the proto's sideband number 2. + Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS We do not use dulwich's write_sideband directly as it only supports side-band-64k. @@ -204,68 +274,27 @@ class GitRepository(object): # N.B.(skreft): The values below are explained in the pack protocol # documentation, section Packfile Data. # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt - if 'side-band-64k' in capabilities: + if CAPABILITY_SIDE_BAND_64K in capabilities: chunk_size = 65515 - elif 'side-band' in capabilities: + elif CAPABILITY_SIDE_BAND in capabilities: chunk_size = 995 else: return - chunker = ( - data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size)) + chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size)) for chunk in chunker: - proto.write_sideband(2, chunk) + proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True)) def _get_messages(self, data, capabilities): """Return a list with packets for sending data in sideband number 2.""" response = [] proto = dulwich.protocol.Protocol(None, response.append) - self._write_sideband_to_proto(data, proto, capabilities) + self._write_sideband_to_proto(proto, data, capabilities) return response - def _inject_messages_to_response(self, response, capabilities, - start_messages, end_messages): - """ - Given a list response we inject the pre/post-pull messages. - - We only inject the messages if the client supports sideband, and the - response has the format: - 0008NAK\n...0000 - - Note that we do not check the no-progress capability as by default, git - sends it, which effectively would block all messages. - """ - if not self.SIDE_BAND_CAPS.intersection(capabilities): - return response - - if not start_messages and not end_messages: - return response - - # make a list out of response if it's an iterator - # so we can investigate it for message injection. - if hasattr(response, '__iter__'): - response = list(response) - - if (not response[0].startswith('0008NAK\n') or - not response[-1].endswith('0000')): - return response - - new_response = ['0008NAK\n'] - new_response.extend(self._get_messages(start_messages, capabilities)) - if len(response) == 1: - new_response.append(response[0][8:-4]) - else: - new_response.append(response[0][8:]) - new_response.extend(response[1:-1]) - new_response.append(response[-1][:-4]) - new_response.extend(self._get_messages(end_messages, capabilities)) - new_response.append('0000') - - return new_response - def backend(self, request, environ): """ WSGI Response producer for HTTP POST Git Smart HTTP requests. @@ -304,14 +333,15 @@ class GitRepository(object): inputstream = request.body_file_seekable resp = Response() - resp.content_type = ('application/x-%s-result' % - git_command.encode('utf8')) + resp.content_type = f'application/x-{git_command}-result' resp.charset = None pre_pull_messages = '' + # Upload-pack == clone if git_command == 'git-upload-pack': - status, pre_pull_messages = hooks.git_pre_pull(self.extras) - if status != 0: + hook_response = hooks.git_pre_pull(self.extras) + if hook_response.status != 0: + pre_pull_messages = hook_response.output resp.app_iter = self._build_failed_pre_pull_response( capabilities, pre_pull_messages) return resp @@ -326,7 +356,7 @@ class GitRepository(object): out = subprocessio.SubprocessIOChunker( cmd, - inputstream=inputstream, + input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, @@ -346,7 +376,7 @@ class GitRepository(object): log.debug('handling cmd %s', cmd) output = subprocessio.SubprocessIOChunker( cmd, - inputstream=inputstream, + input_stream=inputstream, env=gitenv, cwd=self.content_path, shell=False, @@ -357,10 +387,11 @@ class GitRepository(object): for _ in output: pass + # Upload-pack == clone if git_command == 'git-upload-pack': - unused_status, post_pull_messages = hooks.git_post_pull(self.extras) - resp.app_iter = self._inject_messages_to_response( - out, capabilities, pre_pull_messages, post_pull_messages) + hook_response = hooks.git_post_pull(self.extras) + post_pull_messages = hook_response.output + resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages) else: resp.app_iter = out diff --git a/vcsserver/remote/__init__.py b/vcsserver/remote/__init__.py new file mode 100644 --- /dev/null +++ b/vcsserver/remote/__init__.py @@ -0,0 +1,17 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + diff --git a/vcsserver/git.py b/vcsserver/remote/git_remote.py rename from vcsserver/git.py rename to vcsserver/remote/git_remote.py --- a/vcsserver/git.py +++ b/vcsserver/remote/git_remote.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,12 +18,12 @@ import collections import logging import os -import posixpath as vcspath import re import stat import traceback -import urllib -import urllib2 +import urllib.request +import urllib.parse +import urllib.error from functools import wraps import more_itertools @@ -31,17 +31,17 @@ import pygit2 from pygit2 import Repository as LibGit2Repo from pygit2 import index as LibGit2Index from dulwich import index, objects -from dulwich.client import HttpGitClient, LocalGitClient +from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult from dulwich.errors import ( NotGitRepository, ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing, HangupException, UnexpectedCommandError) from dulwich.repo import Repo as DulwichRepo -from dulwich.server import update_server_info +import rhodecode from vcsserver import exceptions, settings, subprocessio -from vcsserver.utils import safe_str, safe_int, safe_unicode -from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo +from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str +from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) from vcsserver.git_lfs.lib import LFSOidStore @@ -50,19 +50,12 @@ from vcsserver.vcs_base import RemoteBas DIR_STAT = stat.S_IFDIR FILE_MODE = stat.S_IFMT GIT_LINK = objects.S_IFGITLINK -PEELED_REF_MARKER = '^{}' - +PEELED_REF_MARKER = b'^{}' +HEAD_MARKER = b'HEAD' log = logging.getLogger(__name__) -def str_to_dulwich(value): - """ - Dulwich 0.10.1a requires `unicode` objects to be passed in. - """ - return value.decode(settings.WIRE_ENCODING) - - def reraise_safe_exceptions(func): """Converts Dulwich exceptions to something neutral.""" @@ -76,8 +69,8 @@ def reraise_safe_exceptions(func): except (HangupException, UnexpectedCommandError) as e: exc = exceptions.VcsException(org_exc=e) raise exc(safe_str(e)) - except Exception as e: - # NOTE(marcink): becuase of how dulwich handles some exceptions + except Exception: + # NOTE(marcink): because of how dulwich handles some exceptions # (KeyError on empty repos), we cannot track this and catch all # exceptions, it's an exceptions from other handlers #if not hasattr(e, '_vcs_kind'): @@ -114,10 +107,14 @@ class GitFactory(RepoFactory): def _create_repo(self, wire, create, use_libgit2=False): if use_libgit2: - return Repository(wire['path']) + repo = Repository(safe_bytes(wire['path'])) else: - repo_path = str_to_dulwich(wire['path']) - return Repo(repo_path) + # dulwich mode + repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING) + repo = Repo(repo_path) + + log.debug('repository created: got GIT object: %s', repo) + return repo def repo(self, wire, create=False, use_libgit2=False): """ @@ -129,6 +126,28 @@ class GitFactory(RepoFactory): return self.repo(wire, use_libgit2=True) +def create_signature_from_string(author_str, **kwargs): + """ + Creates a pygit2.Signature object from a string of the format 'Name '. + + :param author_str: String of the format 'Name ' + :return: pygit2.Signature object + """ + match = re.match(r'^(.+) <(.+)>$', author_str) + if match is None: + raise ValueError(f"Invalid format: {author_str}") + + name, email = match.groups() + return pygit2.Signature(name, email, **kwargs) + + +def get_obfuscated_url(url_obj): + url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd + url_obj.query = obfuscate_qs(url_obj.query) + obfuscated_uri = str(url_obj) + return obfuscated_uri + + class GitRemote(RemoteBase): def __init__(self, factory): @@ -141,35 +160,45 @@ class GitRemote(RemoteBase): "parents": self.parents, "_commit": self.revision, } + self._bulk_file_methods = { + "size": self.get_node_size, + "data": self.get_node_data, + "flags": self.get_node_flags, + "is_binary": self.get_node_is_binary, + "md5": self.md5_hash + } def _wire_to_config(self, wire): if 'config' in wire: - return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']]) + return {x[0] + '_' + x[1]: x[2] for x in wire['config']} return {} def _remote_conf(self, config): params = [ '-c', 'core.askpass=""', ] - ssl_cert_dir = config.get('vcs_ssl_dir') - if ssl_cert_dir: - params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)]) + config_attrs = { + 'vcs_ssl_dir': 'http.sslCAinfo={}', + 'vcs_git_lfs_store_location': 'lfs.storage={}' + } + for key, param in config_attrs.items(): + if value := config.get(key): + params.extend(['-c', param.format(value)]) return params @reraise_safe_exceptions def discover_git_version(self): stdout, _ = self.run_git_command( {}, ['--version'], _bare=True, _safe=True) - prefix = 'git version' + prefix = b'git version' if stdout.startswith(prefix): stdout = stdout[len(prefix):] - return stdout.strip() + return safe_str(stdout.strip()) @reraise_safe_exceptions def is_empty(self, wire): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: - try: has_head = repo.head.name if has_head: @@ -186,20 +215,27 @@ class GitRemote(RemoteBase): def assert_correct_path(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) - def _assert_correct_path(_context_uid, _repo_id): - try: - repo_init = self._factory.repo_libgit2(wire) - with repo_init as repo: - pass - except pygit2.GitError: - path = wire.get('path') - tb = traceback.format_exc() - log.debug("Invalid Git path `%s`, tb: %s", path, tb) + def _assert_correct_path(_context_uid, _repo_id, fast_check): + if fast_check: + path = safe_str(wire['path']) + if pygit2.discover_repository(path): + return True return False + else: + try: + repo_init = self._factory.repo_libgit2(wire) + with repo_init: + pass + except pygit2.GitError: + path = wire.get('path') + tb = traceback.format_exc() + log.debug("Invalid Git path `%s`, tb: %s", path, tb) + return False + return True - return True - return _assert_correct_path(context_uid, repo_id) + return _assert_correct_path(context_uid, repo_id, True) @reraise_safe_exceptions def bare(self, wire): @@ -208,17 +244,69 @@ class GitRemote(RemoteBase): return repo.is_bare @reraise_safe_exceptions + def get_node_data(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return BytesEnvelope(blob_obj.data) + + @reraise_safe_exceptions + def get_node_size(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.size + + @reraise_safe_exceptions + def get_node_flags(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.filemode + + @reraise_safe_exceptions + def get_node_is_binary(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.is_binary + + @reraise_safe_exceptions def blob_as_pretty_string(self, wire, sha): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: blob_obj = repo[sha] - blob = blob_obj.data - return blob + return BytesEnvelope(blob_obj.data) @reraise_safe_exceptions def blob_raw_length(self, wire, sha): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _blob_raw_length(_repo_id, _sha): @@ -230,10 +318,10 @@ class GitRemote(RemoteBase): return _blob_raw_length(repo_id, sha) def _parse_lfs_pointer(self, raw_content): + spec_string = b'version https://git-lfs.github.com/spec' + if raw_content and raw_content.startswith(spec_string): - spec_string = 'version https://git-lfs.github.com/spec' - if raw_content and raw_content.startswith(spec_string): - pattern = re.compile(r""" + pattern = re.compile(rb""" (?:\n)? ^version[ ]https://git-lfs\.github\.com/spec/(?Pv\d+)\n ^oid[ ] sha256:(?P[0-9a-f]{64})\n @@ -249,8 +337,8 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def is_large_file(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_large_file(_repo_id, _sha): repo_init = self._factory.repo_libgit2(wire) @@ -266,8 +354,8 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def is_binary(self, wire, tree_id): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _tree_id): repo_init = self._factory.repo_libgit2(wire) @@ -278,6 +366,26 @@ class GitRemote(RemoteBase): return _is_binary(repo_id, tree_id) @reraise_safe_exceptions + def md5_hash(self, wire, commit_id, path): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _md5_hash(_repo_id, _commit_id, _path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[_commit_id] + blob_obj = commit.tree[_path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}') + + return '' + + return _md5_hash(repo_id, commit_id, path) + + @reraise_safe_exceptions def in_largefiles_store(self, wire, oid): conf = self._wire_to_config(wire) repo_init = self._factory.repo_libgit2(wire) @@ -305,90 +413,104 @@ class GitRemote(RemoteBase): store = LFSOidStore( oid=oid, repo=repo_name, store_location=store_location) return store.oid_path - raise ValueError('Unable to fetch oid with path {}'.format(oid)) + raise ValueError(f'Unable to fetch oid with path {oid}') @reraise_safe_exceptions def bulk_request(self, wire, rev, pre_load): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _bulk_request(_repo_id, _rev, _pre_load): result = {} for attr in pre_load: try: method = self._bulk_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache args = [wire, rev] result[attr] = method(*args) except KeyError as e: - raise exceptions.VcsException(e)( - "Unknown bulk attribute: %s" % attr) + raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}") return result return _bulk_request(repo_id, rev, sorted(pre_load)) - def _build_opener(self, url): + @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return result + + return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load))) + + def _build_opener(self, url: str): handlers = [] - url_obj = url_parser(url) - _, authinfo = url_obj.authinfo() + url_obj = url_parser(safe_bytes(url)) + authinfo = url_obj.authinfo()[1] if authinfo: # create a password manager - passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() - passmgr.add_password(*authinfo) + passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() + passmgr.add_password(*convert_to_str(authinfo)) handlers.extend((httpbasicauthhandler(passmgr), httpdigestauthhandler(passmgr))) - return urllib2.build_opener(*handlers) - - def _type_id_to_name(self, type_id): - return { - 1: b'commit', - 2: b'tree', - 3: b'blob', - 4: b'tag' - }[type_id] + return urllib.request.build_opener(*handlers) @reraise_safe_exceptions def check_url(self, url, config): - url_obj = url_parser(url) - test_uri, _ = url_obj.authinfo() - url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd - url_obj.query = obfuscate_qs(url_obj.query) - cleaned_uri = str(url_obj) - log.info("Checking URL for remote cloning/import: %s", cleaned_uri) + url_obj = url_parser(safe_bytes(url)) + + test_uri = safe_str(url_obj.authinfo()[0]) + obfuscated_uri = get_obfuscated_url(url_obj) + + log.info("Checking URL for remote cloning/import: %s", obfuscated_uri) if not test_uri.endswith('info/refs'): test_uri = test_uri.rstrip('/') + '/info/refs' - o = self._build_opener(url) + o = self._build_opener(url=url) o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git q = {"service": 'git-upload-pack'} - qs = '?%s' % urllib.urlencode(q) - cu = "%s%s" % (test_uri, qs) - req = urllib2.Request(cu, None, {}) + qs = f'?{urllib.parse.urlencode(q)}' + cu = f"{test_uri}{qs}" try: - log.debug("Trying to open URL %s", cleaned_uri) + req = urllib.request.Request(cu, None, {}) + log.debug("Trying to open URL %s", obfuscated_uri) resp = o.open(req) if resp.code != 200: raise exceptions.URLError()('Return Code is not 200') except Exception as e: - log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True) + log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True) # means it cannot be cloned - raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e)) + raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}") # now detect if it's proper git repo - gitdata = resp.read() - if 'service=git-upload-pack' in gitdata: + gitdata: bytes = resp.read() + + if b'service=git-upload-pack' in gitdata: pass - elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata): - # old style git can return some other format ! + elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata): + # old style git can return some other format! pass else: - raise exceptions.URLError()( - "url [%s] does not look like an git" % (cleaned_uri,)) + e = None + raise exceptions.URLError(e)( + f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}") return True @@ -415,6 +537,7 @@ class GitRemote(RemoteBase): def branch(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _branch(_context_uid, _repo_id, _commit_id): regex = re.compile('^refs/heads') @@ -422,7 +545,7 @@ class GitRemote(RemoteBase): def filter_with(ref): return regex.match(ref[0]) and ref[1] == _commit_id - branches = filter(filter_with, self.get_refs(wire).items()) + branches = list(filter(filter_with, list(self.get_refs(wire).items()))) return [x[0].split('refs/heads/')[-1] for x in branches] return _branch(context_uid, repo_id, commit_id) @@ -431,6 +554,7 @@ class GitRemote(RemoteBase): def commit_branches(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _commit_branches(_context_uid, _repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -449,152 +573,150 @@ class GitRemote(RemoteBase): repo.object_store.add_object(blob) return blob.id - # TODO: this is quite complex, check if that can be simplified + @reraise_safe_exceptions + def create_commit(self, wire, author, committer, message, branch, new_tree_id, + date_args: list[int, int] = None, + parents: list | None = None): + + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + + if date_args: + current_time, offset = date_args + + kw = { + 'time': current_time, + 'offset': offset + } + author = create_signature_from_string(author, **kw) + committer = create_signature_from_string(committer, **kw) + + tree = new_tree_id + if isinstance(tree, (bytes, str)): + # validate this tree is in the repo... + tree = repo[safe_str(tree)].id + + if parents: + # run via sha's and validate them in repo + parents = [repo[c].id for c in parents] + else: + parents = [] + # ensure we COMMIT on top of given branch head + # check if this repo has ANY branches, otherwise it's a new branch case we need to make + if branch in repo.branches.local: + parents += [repo.branches[branch].target] + elif [x for x in repo.branches.local]: + parents += [repo.head.target] + #else: + # in case we want to commit on new branch we create it on top of HEAD + #repo.branches.local.create(branch, repo.revparse_single('HEAD')) + + # # Create a new commit + commit_oid = repo.create_commit( + f'refs/heads/{branch}', # the name of the reference to update + author, # the author of the commit + committer, # the committer of the commit + message, # the commit message + tree, # the tree produced by the index + parents # list of parents for the new commit, usually just one, + ) + + new_commit_id = safe_str(commit_oid) + + return new_commit_id + @reraise_safe_exceptions def commit(self, wire, commit_data, branch, commit_tree, updated, removed): - # Defines the root tree - class _Root(object): - def __repr__(self): - return 'ROOT TREE' - ROOT = _Root() - repo = self._factory.repo(wire) - object_store = repo.object_store - - # Create tree and populates it with blobs - - if commit_tree and repo[commit_tree]: - git_commit = repo[commit_data['parents'][0]] - commit_tree = repo[git_commit.tree] # root tree - else: - commit_tree = objects.Tree() - - for node in updated: - # Compute subdirs if needed - dirpath, nodename = vcspath.split(node['path']) - dirnames = map(safe_str, dirpath and dirpath.split('/') or []) - parent = commit_tree - ancestors = [('', parent)] + def mode2pygit(mode): + """ + git only supports two filemode 644 and 755 - # Tries to dig for the deepest existing tree - while dirnames: - curdir = dirnames.pop(0) - try: - dir_id = parent[curdir][1] - except KeyError: - # put curdir back into dirnames and stops - dirnames.insert(0, curdir) - break - else: - # If found, updates parent - parent = repo[dir_id] - ancestors.append((curdir, parent)) - # Now parent is deepest existing tree and we need to create - # subtrees for dirnames (in reverse order) - # [this only applies for nodes from added] - new_trees = [] + 0o100755 -> 33261 + 0o100644 -> 33188 + """ + return { + 0o100644: pygit2.GIT_FILEMODE_BLOB, + 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE, + 0o120000: pygit2.GIT_FILEMODE_LINK + }.get(mode) or pygit2.GIT_FILEMODE_BLOB - blob = objects.Blob.from_string(node['content']) + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + repo_index = repo.index - if dirnames: - # If there are trees which should be created we need to build - # them now (in reverse order) - reversed_dirnames = list(reversed(dirnames)) - curtree = objects.Tree() - curtree[node['node_path']] = node['mode'], blob.id - new_trees.append(curtree) - for dirname in reversed_dirnames[:-1]: - newtree = objects.Tree() - newtree[dirname] = (DIR_STAT, curtree.id) - new_trees.append(newtree) - curtree = newtree - parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id) - else: - parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id) + commit_parents = None + if commit_tree and commit_data['parents']: + commit_parents = commit_data['parents'] + parent_commit = repo[commit_parents[0]] + repo_index.read_tree(parent_commit.tree) - new_trees.append(parent) - # Update ancestors - reversed_ancestors = reversed( - [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])]) - for parent, tree, path in reversed_ancestors: - parent[path] = (DIR_STAT, tree.id) - object_store.add_object(tree) + for pathspec in updated: + blob_id = repo.create_blob(pathspec['content']) + ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode'])) + repo_index.add(ie) + + for pathspec in removed: + repo_index.remove(pathspec) - object_store.add_object(blob) - for tree in new_trees: - object_store.add_object(tree) + # Write changes to the index + repo_index.write() + + # Create a tree from the updated index + written_commit_tree = repo_index.write_tree() + + new_tree_id = written_commit_tree - for node_path in removed: - paths = node_path.split('/') - tree = commit_tree # start with top-level - trees = [{'tree': tree, 'path': ROOT}] - # Traverse deep into the forest... - # resolve final tree by iterating the path. - # e.g a/b/c.txt will get - # - root as tree then - # - 'a' as tree, - # - 'b' as tree, - # - stop at c as blob. - for path in paths: - try: - obj = repo[tree[path][1]] - if isinstance(obj, objects.Tree): - trees.append({'tree': obj, 'path': path}) - tree = obj - except KeyError: - break - #PROBLEM: - """ - We're not editing same reference tree object - """ - # Cut down the blob and all rotten trees on the way back... - for path, tree_data in reversed(zip(paths, trees)): - tree = tree_data['tree'] - tree.__delitem__(path) - # This operation edits the tree, we need to mark new commit back + author = commit_data['author'] + committer = commit_data['committer'] + message = commit_data['message'] + + date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])] - if len(tree) > 0: - # This tree still has elements - don't remove it or any - # of it's parents - break - - object_store.add_object(commit_tree) + new_commit_id = self.create_commit(wire, author, committer, message, branch, + new_tree_id, date_args=date_args, parents=commit_parents) - # Create commit - commit = objects.Commit() - commit.tree = commit_tree.id - for k, v in commit_data.items(): - setattr(commit, k, v) - object_store.add_object(commit) + # libgit2, ensure the branch is there and exists + self.create_branch(wire, branch, new_commit_id) - self.create_branch(wire, branch, commit.id) + # libgit2, set new ref to this created commit + self.set_refs(wire, f'refs/heads/{branch}', new_commit_id) - # dulwich set-ref - ref = 'refs/heads/%s' % branch - repo.refs[ref] = commit.id - - return commit.id + return new_commit_id @reraise_safe_exceptions def pull(self, wire, url, apply_refs=True, refs=None, update_after=False): if url != 'default' and '://' not in url: client = LocalGitClient(url) else: - url_obj = url_parser(url) + url_obj = url_parser(safe_bytes(url)) o = self._build_opener(url) - url, _ = url_obj.authinfo() + url = url_obj.authinfo()[0] client = HttpGitClient(base_url=url, opener=o) repo = self._factory.repo(wire) determine_wants = repo.object_store.determine_wants_all + if refs: - def determine_wants_requested(references): - return [references[r] for r in references if r in refs] + refs: list[bytes] = [ascii_bytes(x) for x in refs] + + def determine_wants_requested(_remote_refs): + determined = [] + for ref_name, ref_hash in _remote_refs.items(): + bytes_ref_name = safe_bytes(ref_name) + + if bytes_ref_name in refs: + bytes_ref_hash = safe_bytes(ref_hash) + determined.append(bytes_ref_hash) + return determined + + # swap with our custom requested wants determine_wants = determine_wants_requested try: remote_refs = client.fetch( path=url, target=repo, determine_wants=determine_wants) + except NotGitRepository as e: log.warning( 'Trying to fetch from "%s" failed, not a Git repository.', url) @@ -618,19 +740,27 @@ class GitRemote(RemoteBase): repo[k] = remote_refs[k] if refs and not update_after: + # update to ref # mikhail: explicitly set the head to the last ref. - repo["HEAD"] = remote_refs[refs[-1]] + update_to_ref = refs[-1] + if isinstance(update_after, str): + update_to_ref = update_after + + repo[HEAD_MARKER] = remote_refs[update_to_ref] if update_after: - # we want to checkout HEAD - repo["HEAD"] = remote_refs["HEAD"] + # we want to check out HEAD + repo[HEAD_MARKER] = remote_refs[HEAD_MARKER] index.build_index_from_tree(repo.path, repo.index_path(), - repo.object_store, repo["HEAD"].tree) + repo.object_store, repo[HEAD_MARKER].tree) + + if isinstance(remote_refs, FetchPackResult): + return remote_refs.refs return remote_refs @reraise_safe_exceptions - def sync_fetch(self, wire, url, refs=None, all_refs=False): - repo = self._factory.repo(wire) + def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs): + self._factory.repo(wire) if refs and not isinstance(refs, (list, tuple)): refs = [refs] @@ -649,7 +779,7 @@ class GitRemote(RemoteBase): fetch_refs = [] for ref_line in output.splitlines(): - sha, ref = ref_line.split('\t') + sha, ref = ref_line.split(b'\t') sha = sha.strip() if ref in remote_refs: # duplicate, skip @@ -658,32 +788,38 @@ class GitRemote(RemoteBase): log.debug("Skipping peeled reference %s", ref) continue # don't sync HEAD - if ref in ['HEAD']: + if ref in [HEAD_MARKER]: continue remote_refs[ref] = sha if refs and sha in refs: # we filter fetch using our specified refs - fetch_refs.append('{}:{}'.format(ref, ref)) + fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}') elif not refs: - fetch_refs.append('{}:{}'.format(ref, ref)) + fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}') log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs)) if fetch_refs: - for chunk in more_itertools.chunked(fetch_refs, 1024 * 4): + for chunk in more_itertools.chunked(fetch_refs, 128): fetch_refs_chunks = list(chunk) log.debug('Fetching %s refs from import url', len(fetch_refs_chunks)) - _out, _err = self.run_git_command( + self.run_git_command( wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks, fail_on_stderr=False, _copts=self._remote_conf(config), extra_env={'GIT_TERMINAL_PROMPT': '0'}) + if kwargs.get('sync_large_objects'): + self.run_git_command( + wire, ['lfs', 'fetch', url, '--all'], + fail_on_stderr=False, + _copts=self._remote_conf(config), + ) return remote_refs @reraise_safe_exceptions - def sync_push(self, wire, url, refs=None): + def sync_push(self, wire, url, refs=None, **kwargs): if not self.check_url(url, wire): return config = self._wire_to_config(wire) @@ -692,6 +828,12 @@ class GitRemote(RemoteBase): wire, ['push', url, '--mirror'], fail_on_stderr=False, _copts=self._remote_conf(config), extra_env={'GIT_TERMINAL_PROMPT': '0'}) + if kwargs.get('sync_large_objects'): + self.run_git_command( + wire, ['lfs', 'push', url, '--all'], + fail_on_stderr=False, + _copts=self._remote_conf(config), + ) @reraise_safe_exceptions def get_remote_refs(self, wire, url): @@ -704,24 +846,29 @@ class GitRemote(RemoteBase): return repo.get_description() @reraise_safe_exceptions - def get_missing_revs(self, wire, rev1, rev2, path2): + def get_missing_revs(self, wire, rev1, rev2, other_repo_path): + origin_repo_path = wire['path'] repo = self._factory.repo(wire) - LocalGitClient(thin_packs=False).fetch(path2, repo) + # fetch from other_repo_path to our origin repo + LocalGitClient(thin_packs=False).fetch(other_repo_path, repo) wire_remote = wire.copy() - wire_remote['path'] = path2 + wire_remote['path'] = other_repo_path repo_remote = self._factory.repo(wire_remote) - LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote) + + # fetch from origin_repo_path to our remote repo + LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote) revs = [ x.commit.id - for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])] + for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])] return revs @reraise_safe_exceptions def get_object(self, wire, sha, maybe_unreachable=False): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_object(_context_uid, _repo_id, _sha): repo_init = self._factory.repo_libgit2(wire) @@ -766,11 +913,11 @@ class GitRemote(RemoteBase): raise exceptions.LookupException(e)(missing_commit_err) commit_id = commit.hex - type_id = commit.type + type_str = commit.type_str return { 'id': commit_id, - 'type': self._type_id_to_name(type_id), + 'type': type_str, 'commit_id': commit_id, 'idx': 0 } @@ -781,6 +928,7 @@ class GitRemote(RemoteBase): def get_refs(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_refs(_context_uid, _repo_id): @@ -788,7 +936,7 @@ class GitRemote(RemoteBase): with repo_init as repo: regex = re.compile('^refs/(heads|tags)/') return {x.name: x.target.hex for x in - filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())} + [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]} return _get_refs(context_uid, repo_id) @@ -796,13 +944,14 @@ class GitRemote(RemoteBase): def get_branch_pointers(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_branch_pointers(_context_uid, _repo_id): repo_init = self._factory.repo_libgit2(wire) regex = re.compile('^refs/heads') with repo_init as repo: - branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects()) + branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)] return {x.target.hex: x.shorthand for x in branches} return _get_branch_pointers(context_uid, repo_id) @@ -811,6 +960,7 @@ class GitRemote(RemoteBase): def head(self, wire, show_exc=True): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _head(_context_uid, _repo_id, _show_exc): repo_init = self._factory.repo_libgit2(wire) @@ -824,19 +974,22 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def init(self, wire): - repo_path = str_to_dulwich(wire['path']) - self.repo = Repo.init(repo_path) + repo_path = safe_str(wire['path']) + os.makedirs(repo_path, mode=0o755) + pygit2.init_repository(repo_path, bare=False) @reraise_safe_exceptions def init_bare(self, wire): - repo_path = str_to_dulwich(wire['path']) - self.repo = Repo.init_bare(repo_path) + repo_path = safe_str(wire['path']) + os.makedirs(repo_path, mode=0o755) + pygit2.init_repository(repo_path, bare=True) @reraise_safe_exceptions def revision(self, wire, rev): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _revision(_context_uid, _repo_id, _rev): repo_init = self._factory.repo_libgit2(wire) @@ -856,6 +1009,7 @@ class GitRemote(RemoteBase): def date(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _date(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -876,6 +1030,7 @@ class GitRemote(RemoteBase): def author(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _author(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -888,12 +1043,12 @@ class GitRemote(RemoteBase): author = commit.get_object().author if author.email: - return u"{} <{}>".format(author.name, author.email) + return f"{author.name} <{author.email}>" try: - return u"{}".format(author.name) + return f"{author.name}" except Exception: - return u"{}".format(safe_unicode(author.raw_name)) + return f"{safe_str(author.raw_name)}" return _author(repo_id, commit_id) @@ -901,6 +1056,7 @@ class GitRemote(RemoteBase): def message(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _message(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -913,6 +1069,7 @@ class GitRemote(RemoteBase): def parents(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _parents(_repo_id, _commit_id): repo_init = self._factory.repo_libgit2(wire) @@ -930,17 +1087,23 @@ class GitRemote(RemoteBase): def children(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + + head = self.head(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _children(_repo_id, _commit_id): + output, __ = self.run_git_command( - wire, ['rev-list', '--all', '--children']) + wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}']) child_ids = [] - pat = re.compile(r'^%s' % commit_id) - for l in output.splitlines(): - if pat.match(l): - found_ids = l.split(' ')[1:] + pat = re.compile(fr'^{commit_id}') + for line in output.splitlines(): + line = safe_str(line) + if pat.match(line): + found_ids = line.split(' ')[1:] child_ids.extend(found_ids) + break return child_ids return _children(repo_id, commit_id) @@ -952,10 +1115,22 @@ class GitRemote(RemoteBase): repo.references.create(key, value, force=True) @reraise_safe_exceptions + def update_refs(self, wire, key, value): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + if key not in repo.references: + raise ValueError(f'Reference {key} not found in the repository') + repo.references.create(key, value, force=True) + + @reraise_safe_exceptions def create_branch(self, wire, branch_name, commit_id, force=False): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: - commit = repo[commit_id] + if commit_id: + commit = repo[commit_id] + else: + # if commit is not given just use the HEAD + commit = repo.head() if force: repo.branches.local.create(branch_name, commit, force=force) @@ -973,23 +1148,39 @@ class GitRemote(RemoteBase): def tag_remove(self, wire, tag_name): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: - key = 'refs/tags/{}'.format(tag_name) + key = f'refs/tags/{tag_name}' repo.references.delete(key) @reraise_safe_exceptions def tree_changes(self, wire, source_id, target_id): - # TODO(marcink): remove this seems it's only used by tests repo = self._factory.repo(wire) + # source can be empty + source_id = safe_bytes(source_id if source_id else b'') + target_id = safe_bytes(target_id) + source = repo[source_id].tree if source_id else None target = repo[target_id].tree result = repo.object_store.tree_changes(source, target) - return list(result) + + added = set() + modified = set() + deleted = set() + for (old_path, new_path), (_, _), (_, _) in list(result): + if new_path and old_path: + modified.add(new_path) + elif new_path and not old_path: + added.add(new_path) + elif not new_path and old_path: + deleted.add(old_path) + + return list(added), list(modified), list(deleted) @reraise_safe_exceptions def tree_and_type_for_path(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path): repo_init = self._factory.repo_libgit2(wire) @@ -1001,13 +1192,14 @@ class GitRemote(RemoteBase): except KeyError: return None, None, None - return tree.id.hex, tree.type, tree.filemode + return tree.id.hex, tree.type_str, tree.filemode return _tree_and_type_for_path(context_uid, repo_id, commit_id, path) @reraise_safe_exceptions def tree_items(self, wire, tree_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _tree_items(_repo_id, _tree_id): @@ -1016,13 +1208,13 @@ class GitRemote(RemoteBase): try: tree = repo[tree_id] except KeyError: - raise ObjectMissing('No tree with id: {}'.format(tree_id)) + raise ObjectMissing(f'No tree with id: {tree_id}') result = [] for item in tree: item_sha = item.hex item_mode = item.filemode - item_type = item.type + item_type = item.type_str if item_type == 'commit': # NOTE(marcink): submodules we translate to 'link' for backward compat @@ -1039,7 +1231,7 @@ class GitRemote(RemoteBase): """ flags = [ - '-U%s' % context, '--patch', + f'-U{context}', '--patch', '--binary', '--find-renames', '--no-indent-heuristic', @@ -1066,7 +1258,7 @@ class GitRemote(RemoteBase): lines = diff.splitlines() x = 0 for line in lines: - if line.startswith('diff'): + if line.startswith(b'diff'): break x += 1 # Append new line just like 'diff' command do @@ -1076,6 +1268,7 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context): repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: swap = True flags = 0 @@ -1101,15 +1294,17 @@ class GitRemote(RemoteBase): if file_filter: for p in diff_obj: if p.delta.old_file.path == file_filter: - return p.patch or '' + return BytesEnvelope(p.data) or BytesEnvelope(b'') # fo matching path == no diff - return '' - return diff_obj.patch or '' + return BytesEnvelope(b'') + + return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'') @reraise_safe_exceptions def node_history(self, wire, commit_id, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit): # optimize for n==1, rev-list is much faster for that use-case @@ -1122,14 +1317,14 @@ class GitRemote(RemoteBase): cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path]) output, __ = self.run_git_command(wire, cmd) - commit_ids = re.findall(r'[0-9a-fA-F]{40}', output) + commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output) return [x for x in commit_ids] return _node_history(context_uid, repo_id, commit_id, path, limit) @reraise_safe_exceptions - def node_annotate(self, wire, commit_id, path): - + def node_annotate_legacy(self, wire, commit_id, path): + # note: replaced by pygit2 implementation cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path] # -l ==> outputs long shas (and we need all 40 characters) # --root ==> doesn't put '^' character for boundaries @@ -1137,22 +1332,44 @@ class GitRemote(RemoteBase): output, __ = self.run_git_command(wire, cmd) result = [] - for i, blame_line in enumerate(output.split('\n')[:-1]): + for i, blame_line in enumerate(output.splitlines()[:-1]): line_no = i + 1 - commit_id, line = re.split(r' ', blame_line, 1) - result.append((line_no, commit_id, line)) + blame_commit_id, line = re.split(rb' ', blame_line, 1) + result.append((line_no, blame_commit_id, line)) + return result @reraise_safe_exceptions - def update_server_info(self, wire): - repo = self._factory.repo(wire) - update_server_info(repo) + def node_annotate(self, wire, commit_id, path): + + result_libgit = [] + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blame_obj = repo.blame(path, newest_commit=commit_id) + for i, line in enumerate(commit.tree[path].data.splitlines()): + line_no = i + 1 + hunk = blame_obj.for_line(line_no) + blame_commit_id = hunk.final_commit_id.hex + + result_libgit.append((line_no, blame_commit_id, line)) + + return BinaryEnvelope(result_libgit) + + @reraise_safe_exceptions + def update_server_info(self, wire, force=False): + cmd = ['update-server-info'] + if force: + cmd += ['--force'] + output, __ = self.run_git_command(wire, cmd) + return output.splitlines() @reraise_safe_exceptions def get_all_commit_ids(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_all_commit_ids(_context_uid, _repo_id): @@ -1163,11 +1380,22 @@ class GitRemote(RemoteBase): except Exception: # Can be raised for empty repositories return [] + + @region.conditional_cache_on_arguments(condition=cache_on) + def _get_all_commit_ids_pygit2(_context_uid, _repo_id): + repo_init = self._factory.repo_libgit2(wire) + from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL + results = [] + with repo_init as repo: + for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE): + results.append(commit.id.hex) + return _get_all_commit_ids(context_uid, repo_id) @reraise_safe_exceptions def run_git_command(self, wire, cmd, **opts): path = wire.get('path', None) + debug_mode = rhodecode.ConfigGet().get_bool('debug') if path and os.path.isdir(path): opts['cwd'] = path @@ -1176,7 +1404,7 @@ class GitRemote(RemoteBase): _copts = [] del opts['_bare'] else: - _copts = ['-c', 'core.quotepath=false', ] + _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false'] safe_call = False if '_safe' in opts: # no exc on failure @@ -1203,13 +1431,17 @@ class GitRemote(RemoteBase): _opts.update(opts) proc = subprocessio.SubprocessIOChunker(cmd, **_opts) - return ''.join(proc), ''.join(proc.error) - except (EnvironmentError, OSError) as err: - cmd = ' '.join(cmd) # human friendly CMD - tb_err = ("Couldn't run git command (%s).\n" - "Original error was:%s\n" - "Call options:%s\n" - % (cmd, err, _opts)) + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: + cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD + call_opts = {} + if debug_mode: + call_opts = _opts + + tb_err = ("Couldn't run git command ({}).\n" + "Original error was:{}\n" + "Call options:{}\n" + .format(cmd, err, call_opts)) log.exception(tb_err) if safe_call: return '', err @@ -1224,6 +1456,9 @@ class GitRemote(RemoteBase): from vcsserver.hook_utils import install_git_hooks bare = self.bare(wire) path = wire['path'] + binary_dir = settings.BINARY_DIR + if binary_dir: + os.path.join(binary_dir, 'python3') return install_git_hooks(path, bare, force_create=force) @reraise_safe_exceptions @@ -1240,13 +1475,15 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def set_head_ref(self, wire, head_name): log.debug('Setting refs/head to `%s`', head_name) - cmd = ['symbolic-ref', 'HEAD', 'refs/heads/%s' % head_name] - output, __ = self.run_git_command(wire, cmd) - return [head_name] + output.splitlines() + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + repo.set_head(f'refs/heads/{head_name}') + + return [head_name] + [f'set HEAD to refs/heads/{head_name}'] @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def file_walker(_commit_id, path): repo_init = self._factory.repo_libgit2(wire) @@ -1262,20 +1499,20 @@ class GitRemote(RemoteBase): try: tree = repo[tree_id] except KeyError: - raise ObjectMissing('No tree with id: {}'.format(tree_id)) + raise ObjectMissing(f'No tree with id: {tree_id}') index = LibGit2Index.Index() index.read_tree(tree) file_iter = index - for fn in file_iter: - file_path = fn.path - mode = fn.mode + for file_node in file_iter: + file_path = file_node.path + mode = file_node.mode is_link = stat.S_ISLNK(mode) if mode == pygit2.GIT_FILEMODE_COMMIT: log.debug('Skipping path %s as a commit node', file_path) continue - yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw) + yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) diff --git a/vcsserver/hg.py b/vcsserver/remote/hg_remote.py rename from vcsserver/hg.py rename to vcsserver/remote/hg_remote.py --- a/vcsserver/hg.py +++ b/vcsserver/remote/hg_remote.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,33 +14,73 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import functools + +import binascii import io import logging -import os import stat -import urllib -import urllib2 -import traceback +import sys +import urllib.request +import urllib.parse +import hashlib -from hgext import largefiles, rebase, purge -from hgext.strip import strip as hgext_strip +from hgext import largefiles, rebase + from mercurial import commands from mercurial import unionrepo from mercurial import verify from mercurial import repair +from mercurial.error import AmbiguousPrefixLookupError import vcsserver from vcsserver import exceptions -from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode +from vcsserver.base import ( + RepoFactory, + obfuscate_qs, + raise_from_original, + store_archive_in_cache, + ArchiveNode, + BytesEnvelope, + BinaryEnvelope, +) from vcsserver.hgcompat import ( - archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx, - hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler, - makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge, - patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError, - RepoLookupError, InterventionRequired, RequirementError, - alwaysmatcher, patternmatcher, hgutil) + archival, + bin, + clone, + config as hgconfig, + diffopts, + hex, + get_ctx, + hg_url as url_parser, + httpbasicauthhandler, + httpdigestauthhandler, + makepeer, + instance, + match, + memctx, + exchange, + memfilectx, + nullrev, + hg_merge, + patch, + peer, + revrange, + ui, + hg_tag, + Abort, + LookupError, + RepoError, + RepoLookupError, + InterventionRequired, + RequirementError, + alwaysmatcher, + patternmatcher, + hgext_strip, +) +from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str from vcsserver.vcs_base import RemoteBase +from vcsserver.config import hooks as hooks_config +from vcsserver.lib.exc_tracking import format_exc log = logging.getLogger(__name__) @@ -48,25 +88,31 @@ log = logging.getLogger(__name__) def make_ui_from_config(repo_config): class LoggingUI(ui.ui): + def status(self, *msg, **opts): - log.info(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).status(*msg, **opts) + str_msg = map(safe_str, msg) + log.info(' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).status(*msg, **opts) def warn(self, *msg, **opts): - log.warn(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).warn(*msg, **opts) + str_msg = map(safe_str, msg) + log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).warn(*msg, **opts) def error(self, *msg, **opts): - log.error(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).error(*msg, **opts) + str_msg = map(safe_str, msg) + log.error('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).error(*msg, **opts) def note(self, *msg, **opts): - log.info(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).note(*msg, **opts) + str_msg = map(safe_str, msg) + log.info('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).note(*msg, **opts) def debug(self, *msg, **opts): - log.debug(' '.join(msg).rstrip('\n')) - super(LoggingUI, self).debug(*msg, **opts) + str_msg = map(safe_str, msg) + log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n')) + #super(LoggingUI, self).debug(*msg, **opts) baseui = LoggingUI() @@ -76,26 +122,26 @@ def make_ui_from_config(repo_config): baseui._tcfg = hgconfig.config() for section, option, value in repo_config: - baseui.setconfig(section, option, value) + baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value)) # make our hgweb quiet so it doesn't print output - baseui.setconfig('ui', 'quiet', 'true') + baseui.setconfig(b'ui', b'quiet', b'true') - baseui.setconfig('ui', 'paginate', 'never') + baseui.setconfig(b'ui', b'paginate', b'never') # for better Error reporting of Mercurial - baseui.setconfig('ui', 'message-output', 'stderr') + baseui.setconfig(b'ui', b'message-output', b'stderr') # force mercurial to only use 1 thread, otherwise it may try to set a # signal in a non-main thread, thus generating a ValueError. - baseui.setconfig('worker', 'numcpus', 1) + baseui.setconfig(b'worker', b'numcpus', 1) # If there is no config for the largefiles extension, we explicitly disable # it here. This overrides settings from repositories hgrc file. Recent # mercurial versions enable largefiles in hgrc on clone from largefile # repo. - if not baseui.hasconfig('extensions', 'largefiles'): + if not baseui.hasconfig(b'extensions', b'largefiles'): log.debug('Explicitly disable largefiles extension for repo.') - baseui.setconfig('extensions', 'largefiles', '!') + baseui.setconfig(b'extensions', b'largefiles', b'!') return baseui @@ -107,19 +153,19 @@ def reraise_safe_exceptions(func): try: return func(*args, **kwargs) except (Abort, InterventionRequired) as e: - raise_from_original(exceptions.AbortException(e)) + raise_from_original(exceptions.AbortException(e), e) except RepoLookupError as e: - raise_from_original(exceptions.LookupException(e)) + raise_from_original(exceptions.LookupException(e), e) except RequirementError as e: - raise_from_original(exceptions.RequirementException(e)) + raise_from_original(exceptions.RequirementException(e), e) except RepoError as e: - raise_from_original(exceptions.VcsException(e)) + raise_from_original(exceptions.VcsException(e), e) except LookupError as e: - raise_from_original(exceptions.LookupException(e)) + raise_from_original(exceptions.LookupException(e), e) except Exception as e: if not hasattr(e, '_vcs_kind'): log.exception("Unhandled exception in hg remote call") - raise_from_original(exceptions.UnhandledException(e)) + raise_from_original(exceptions.UnhandledException(e), e) raise return wrapper @@ -130,9 +176,18 @@ class MercurialFactory(RepoFactory): def _create_config(self, config, hooks=True): if not hooks: - hooks_to_clean = frozenset(( - 'changegroup.repo_size', 'preoutgoing.pre_pull', - 'outgoing.pull_logger', 'prechangegroup.pre_push')) + + hooks_to_clean = { + + hooks_config.HOOK_REPO_SIZE, + hooks_config.HOOK_PRE_PULL, + hooks_config.HOOK_PULL, + + hooks_config.HOOK_PRE_PUSH, + # TODO: what about PRETXT, this was disabled in pre 5.0.0 + hooks_config.HOOK_PRETX_PUSH, + + } new_config = [] for section, option, value in config: if section == 'hooks' and option in hooks_to_clean: @@ -145,7 +200,9 @@ class MercurialFactory(RepoFactory): def _create_repo(self, wire, create): baseui = self._create_config(wire["config"]) - return instance(baseui, wire["path"], create) + repo = instance(baseui, safe_bytes(wire["path"]), create) + log.debug('repository created: got HG object: %s', repo) + return repo def repo(self, wire, create=False): """ @@ -155,7 +212,7 @@ class MercurialFactory(RepoFactory): def patch_ui_message_output(baseui): - baseui.setconfig('ui', 'quiet', 'false') + baseui.setconfig(b'ui', b'quiet', b'false') output = io.BytesIO() def write(data, **unused_kwargs): @@ -169,6 +226,22 @@ def patch_ui_message_output(baseui): return baseui, output +def get_obfuscated_url(url_obj): + url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd + url_obj.query = obfuscate_qs(url_obj.query) + obfuscated_uri = str(url_obj) + return obfuscated_uri + + +def normalize_url_for_hg(url: str): + _proto = None + + if '+' in url[:url.find('://')]: + _proto = url[0:url.find('+')] + url = url[url.find('+') + 1:] + return url, _proto + + class HgRemote(RemoteBase): def __init__(self, factory): @@ -187,6 +260,13 @@ class HgRemote(RemoteBase): "hidden": self.ctx_hidden, "_file_paths": self.ctx_list, } + self._bulk_file_methods = { + "size": self.fctx_size, + "data": self.fctx_node_data, + "flags": self.fctx_flags, + "is_binary": self.is_binary, + "md5": self.md5_hash, + } def _get_ctx(self, repo, ref): return get_ctx(repo, ref) @@ -194,7 +274,7 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def discover_hg_version(self): from mercurial import util - return util.version() + return safe_str(util.version()) @reraise_safe_exceptions def is_empty(self, wire): @@ -210,10 +290,11 @@ class HgRemote(RemoteBase): def bookmarks(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _bookmarks(_context_uid, _repo_id): repo = self._factory.repo(wire) - return dict(repo._bookmarks) + return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()} return _bookmarks(context_uid, repo_id) @@ -221,16 +302,17 @@ class HgRemote(RemoteBase): def branches(self, wire, normal, closed): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _branches(_context_uid, _repo_id, _normal, _closed): repo = self._factory.repo(wire) iter_branches = repo.branchmap().iterbranches() bt = {} - for branch_name, _heads, tip, is_closed in iter_branches: + for branch_name, _heads, tip_node, is_closed in iter_branches: if normal and not is_closed: - bt[branch_name] = tip + bt[safe_str(branch_name)] = ascii_str(hex(tip_node)) if closed and is_closed: - bt[branch_name] = tip + bt[safe_str(branch_name)] = ascii_str(hex(tip_node)) return bt @@ -240,16 +322,18 @@ class HgRemote(RemoteBase): def bulk_request(self, wire, commit_id, pre_load): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _bulk_request(_repo_id, _commit_id, _pre_load): result = {} for attr in pre_load: try: method = self._bulk_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache result[attr] = method(wire, commit_id) except KeyError as e: raise exceptions.VcsException(e)( - 'Unknown bulk attribute: "%s"' % attr) + f'Unknown bulk attribute: "{attr}"') return result return _bulk_request(repo_id, commit_id, sorted(pre_load)) @@ -258,6 +342,7 @@ class HgRemote(RemoteBase): def ctx_branch(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_branch(_repo_id, _commit_id): repo = self._factory.repo(wire) @@ -269,6 +354,7 @@ class HgRemote(RemoteBase): def ctx_date(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_date(_repo_id, _commit_id): repo = self._factory.repo(wire) @@ -286,6 +372,7 @@ class HgRemote(RemoteBase): def ctx_files(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_files(_repo_id, _commit_id): repo = self._factory.repo(wire) @@ -304,6 +391,7 @@ class HgRemote(RemoteBase): def ctx_parents(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_parents(_repo_id, _commit_id): repo = self._factory.repo(wire) @@ -317,6 +405,7 @@ class HgRemote(RemoteBase): def ctx_children(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_children(_repo_id, _commit_id): repo = self._factory.repo(wire) @@ -330,6 +419,7 @@ class HgRemote(RemoteBase): def ctx_phase(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_phase(_context_uid, _repo_id, _commit_id): repo = self._factory.repo(wire) @@ -342,6 +432,7 @@ class HgRemote(RemoteBase): def ctx_obsolete(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_obsolete(_context_uid, _repo_id, _commit_id): repo = self._factory.repo(wire) @@ -353,6 +444,7 @@ class HgRemote(RemoteBase): def ctx_hidden(self, wire, commit_id): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _ctx_hidden(_context_uid, _repo_id, _commit_id): repo = self._factory.repo(wire) @@ -384,46 +476,42 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def check_url(self, url, config): - _proto = None - if '+' in url[:url.find('://')]: - _proto = url[0:url.find('+')] - url = url[url.find('+') + 1:] + url, _proto = normalize_url_for_hg(url) + url_obj = url_parser(safe_bytes(url)) + + test_uri = safe_str(url_obj.authinfo()[0]) + authinfo = url_obj.authinfo()[1] + obfuscated_uri = get_obfuscated_url(url_obj) + log.info("Checking URL for remote cloning/import: %s", obfuscated_uri) + handlers = [] - url_obj = url_parser(url) - test_uri, authinfo = url_obj.authinfo() - url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd - url_obj.query = obfuscate_qs(url_obj.query) - - cleaned_uri = str(url_obj) - log.info("Checking URL for remote cloning/import: %s", cleaned_uri) - if authinfo: # create a password manager - passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() - passmgr.add_password(*authinfo) + passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() + passmgr.add_password(*convert_to_str(authinfo)) handlers.extend((httpbasicauthhandler(passmgr), httpdigestauthhandler(passmgr))) - o = urllib2.build_opener(*handlers) + o = urllib.request.build_opener(*handlers) o.addheaders = [('Content-Type', 'application/mercurial-0.1'), ('Accept', 'application/mercurial-0.1')] q = {"cmd": 'between'} - q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)}) - qs = '?%s' % urllib.urlencode(q) - cu = "%s%s" % (test_uri, qs) - req = urllib2.Request(cu, None, {}) + q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)}) + qs = f'?{urllib.parse.urlencode(q)}' + cu = f"{test_uri}{qs}" try: - log.debug("Trying to open URL %s", cleaned_uri) + req = urllib.request.Request(cu, None, {}) + log.debug("Trying to open URL %s", obfuscated_uri) resp = o.open(req) if resp.code != 200: raise exceptions.URLError()('Return Code is not 200') except Exception as e: - log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True) + log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True) # means it cannot be cloned - raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e)) + raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}") # now check if it's a proper hg repo, but don't do it for svn try: @@ -432,19 +520,17 @@ class HgRemote(RemoteBase): else: # check for pure hg repos log.debug( - "Verifying if URL is a Mercurial repository: %s", - cleaned_uri) + "Verifying if URL is a Mercurial repository: %s", obfuscated_uri) ui = make_ui_from_config(config) - peer_checker = makepeer(ui, url) - peer_checker.lookup('tip') + peer_checker = makepeer(ui, safe_bytes(url)) + peer_checker.lookup(b'tip') except Exception as e: log.warning("URL is not a valid Mercurial repository: %s", - cleaned_uri) + obfuscated_uri) raise exceptions.URLError(e)( - "url [%s] does not look like an hg repo org_exc: %s" - % (cleaned_uri, e)) + f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}") - log.info("URL is a valid Mercurial repository: %s", cleaned_uri) + log.info("URL is a valid Mercurial repository: %s", obfuscated_uri) return True @reraise_safe_exceptions @@ -452,14 +538,17 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) if file_filter: - match_filter = match(file_filter[0], '', [file_filter[1]]) + # unpack the file-filter + repo_path, node_path = file_filter + match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)]) else: match_filter = file_filter opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1) try: - return "".join(patch.diff( - repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)) + diff_iter = patch.diff( + repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts) + return BytesEnvelope(b"".join(diff_iter)) except RepoLookupError as e: raise exceptions.LookupException(e)() @@ -467,23 +556,27 @@ class HgRemote(RemoteBase): def node_history(self, wire, revision, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history(_context_uid, _repo_id, _revision, _path, _limit): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) def history_iter(): limit_rev = fctx.rev() - for obj in reversed(list(fctx.filelog())): - obj = fctx.filectx(obj) - ctx = obj.changectx() - if ctx.hidden() or ctx.obsolete(): + + for fctx_candidate in reversed(list(fctx.filelog())): + f_obj = fctx.filectx(fctx_candidate) + + # NOTE: This can be problematic...we can hide ONLY history node resulting in empty history + _ctx = f_obj.changectx() + if _ctx.hidden() or _ctx.obsolete(): continue - if limit_rev >= obj.rev(): - yield obj + if limit_rev >= f_obj.rev(): + yield f_obj history = [] for cnt, obj in enumerate(history_iter()): @@ -495,14 +588,15 @@ class HgRemote(RemoteBase): return _node_history(context_uid, repo_id, revision, path, limit) @reraise_safe_exceptions - def node_history_untill(self, wire, revision, path, limit): + def node_history_until(self, wire, revision, path, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_history_until(_context_uid, _repo_id): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) file_log = list(fctx.filelog()) if limit: @@ -513,35 +607,55 @@ class HgRemote(RemoteBase): return _node_history_until(context_uid, repo_id, revision, path, limit) @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return result + + return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load))) + + @reraise_safe_exceptions def fctx_annotate(self, wire, revision, path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) result = [] for i, annotate_obj in enumerate(fctx.annotate(), 1): ln_no = i sha = hex(annotate_obj.fctx.node()) content = annotate_obj.text - result.append((ln_no, sha, content)) - return result + result.append((ln_no, ascii_str(sha), content)) + return BinaryEnvelope(result) @reraise_safe_exceptions def fctx_node_data(self, wire, revision, path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - fctx = ctx.filectx(path) - return fctx.data() + fctx = ctx.filectx(safe_bytes(path)) + return BytesEnvelope(fctx.data()) @reraise_safe_exceptions def fctx_flags(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _fctx_flags(_repo_id, _commit_id, _path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, commit_id) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) return fctx.flags() return _fctx_flags(repo_id, commit_id, path) @@ -550,11 +664,12 @@ class HgRemote(RemoteBase): def fctx_size(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _fctx_size(_repo_id, _revision, _path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, commit_id) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) return fctx.size() return _fctx_size(repo_id, commit_id, path) @@ -562,44 +677,59 @@ class HgRemote(RemoteBase): def get_all_commit_ids(self, wire, name): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_all_commit_ids(_context_uid, _repo_id, _name): repo = self._factory.repo(wire) - repo = repo.filtered(name) - revs = map(lambda x: hex(x[7]), repo.changelog.index) + revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()] return revs return _get_all_commit_ids(context_uid, repo_id, name) @reraise_safe_exceptions def get_config_value(self, wire, section, name, untrusted=False): repo = self._factory.repo(wire) - return repo.ui.config(section, name, untrusted=untrusted) + return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted) @reraise_safe_exceptions def is_large_file(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _is_large_file(_context_uid, _repo_id, _commit_id, _path): - return largefiles.lfutil.isstandin(path) + return largefiles.lfutil.isstandin(safe_bytes(path)) return _is_large_file(context_uid, repo_id, commit_id, path) @reraise_safe_exceptions def is_binary(self, wire, revision, path): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _sha, _path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - fctx = ctx.filectx(path) + fctx = ctx.filectx(safe_bytes(path)) return fctx.isbinary() return _is_binary(repo_id, revision, path) @reraise_safe_exceptions + def md5_hash(self, wire, revision, path): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _md5_hash(_repo_id, _sha, _path): + repo = self._factory.repo(wire) + ctx = self._get_ctx(repo, revision) + fctx = ctx.filectx(safe_bytes(path)) + return hashlib.md5(fctx.data()).hexdigest() + + return _md5_hash(repo_id, revision, path) + + @reraise_safe_exceptions def in_largefiles_store(self, wire, sha): repo = self._factory.repo(wire) return largefiles.lfutil.instore(repo, sha) @@ -627,11 +757,10 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def lookup(self, wire, revision, both): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _lookup(_context_uid, _repo_id, _revision, _both): - repo = self._factory.repo(wire) rev = _revision if isinstance(rev, int): @@ -644,11 +773,15 @@ class HgRemote(RemoteBase): rev = rev + -1 try: ctx = self._get_ctx(repo, rev) - except (TypeError, RepoLookupError) as e: - e._org_exc_tb = traceback.format_exc() + except AmbiguousPrefixLookupError: + e = RepoLookupError(rev) + e._org_exc_tb = format_exc(sys.exc_info()) + raise exceptions.LookupException(e)(rev) + except (TypeError, RepoLookupError, binascii.Error) as e: + e._org_exc_tb = format_exc(sys.exc_info()) raise exceptions.LookupException(e)(rev) except LookupError as e: - e._org_exc_tb = traceback.format_exc() + e._org_exc_tb = format_exc(sys.exc_info()) raise exceptions.LookupException(e)(e.name) if not both: @@ -667,12 +800,12 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig('ui', 'interactive', 'off', '-y') + repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') - bookmarks = dict(repo._bookmarks).keys() - remote = peer(repo, {}, url) + bookmarks = list(dict(repo._bookmarks).keys()) + remote = peer(repo, {}, safe_bytes(url)) # Disable any prompts for this remote - remote.ui.setconfig('ui', 'interactive', 'off', '-y') + remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') return exchange.push( repo, remote, newbranch=True, bookmarks=bookmarks).cgresult @@ -686,12 +819,15 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def rev_range(self, wire, commit_filter): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _rev_range(_context_uid, _repo_id, _filter): repo = self._factory.repo(wire) - revisions = [rev for rev in revrange(repo, commit_filter)] + revisions = [ + ascii_str(repo[rev].hex()) + for rev in revrange(repo, list(map(ascii_bytes, commit_filter))) + ] return revisions return _rev_range(context_uid, repo_id, sorted(commit_filter)) @@ -710,17 +846,18 @@ class HgRemote(RemoteBase): return len(repo) - 1, 0 stop, start = get_revs(repo, [node + ':']) - revs = [hex(repo[r].node()) for r in xrange(start, stop + 1)] + revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)] return revs @reraise_safe_exceptions def revs_from_revspec(self, wire, rev_spec, *args, **kwargs): - other_path = kwargs.pop('other_path', None) + org_path = safe_bytes(wire["path"]) + other_path = safe_bytes(kwargs.pop('other_path', '')) # case when we want to compare two independent repositories if other_path and other_path != wire["path"]: baseui = self._factory._create_config(wire["config"]) - repo = unionrepo.makeunionrepository(baseui, other_path, wire["path"]) + repo = unionrepo.makeunionrepository(baseui, other_path, org_path) else: repo = self._factory.repo(wire) return list(repo.revs(rev_spec, *args)) @@ -764,17 +901,20 @@ class HgRemote(RemoteBase): def tags(self, wire): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _tags(_context_uid, _repo_id): repo = self._factory.repo(wire) - return repo.tags() + return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()} return _tags(context_uid, repo_id) @reraise_safe_exceptions - def update(self, wire, node=None, clean=False): + def update(self, wire, node='', clean=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) + node = safe_bytes(node) + commands.update(baseui, repo, node=node, clean=clean) @reraise_safe_exceptions @@ -800,10 +940,10 @@ class HgRemote(RemoteBase): baseui.write = write if branch: - args = [branch] + args = [safe_bytes(branch)] else: args = [] - commands.heads(baseui, repo, template='{node} ', *args) + commands.heads(baseui, repo, template=b'{node} ', *args) return output.getvalue() @@ -812,63 +952,61 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) changelog = repo.changelog lookup = repo.lookup - a = changelog.ancestor(lookup(revision1), lookup(revision2)) + a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2))) return hex(a) @reraise_safe_exceptions def clone(self, wire, source, dest, update_after_clone=False, hooks=True): baseui = self._factory._create_config(wire["config"], hooks=hooks) - clone(baseui, source, dest, noupdate=not update_after_clone) + clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone) @reraise_safe_exceptions def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - publishing = baseui.configbool('phases', 'publish') - if publishing: - new_commit = 'public' - else: - new_commit = 'draft' + publishing = baseui.configbool(b'phases', b'publish') - def _filectxfn(_repo, ctx, path): + def _filectxfn(_repo, ctx, path: bytes): """ Marks given path as added/changed/removed in a given _repo. This is for internal mercurial commit function. """ # check if this path is removed - if path in removed: + if safe_str(path) in removed: # returning None is a way to mark node for removal return None # check if this path is added for node in updated: - if node['path'] == path: + if safe_bytes(node['path']) == path: return memfilectx( _repo, changectx=ctx, - path=node['path'], - data=node['content'], + path=safe_bytes(node['path']), + data=safe_bytes(node['content']), islink=False, isexec=bool(node['mode'] & stat.S_IXUSR), copysource=False) + abort_exc = exceptions.AbortException() + raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})") - raise exceptions.AbortException()( - "Given path haven't been marked as added, " - "changed or removed (%s)" % path) - - with repo.ui.configoverride({('phases', 'new-commit'): new_commit}): - + if publishing: + new_commit_phase = b'public' + else: + new_commit_phase = b'draft' + with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}): + kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()} commit_ctx = memctx( repo=repo, parents=parents, - text=message, - files=files, + text=safe_bytes(message), + files=[safe_bytes(x) for x in files], filectxfn=_filectxfn, - user=user, + user=safe_bytes(user), date=(commit_time, commit_timezone), - extra=extra) + extra=kwargs) n = repo.commitctx(commit_ctx) new_id = hex(n) @@ -879,11 +1017,11 @@ class HgRemote(RemoteBase): def pull(self, wire, url, commit_ids=None): repo = self._factory.repo(wire) # Disable any prompts for this repo - repo.ui.setconfig('ui', 'interactive', 'off', '-y') + repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y') - remote = peer(repo, {}, url) + remote = peer(repo, {}, safe_bytes(url)) # Disable any prompts for this remote - remote.ui.setconfig('ui', 'interactive', 'off', '-y') + remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y') if commit_ids: commit_ids = [bin(commit_id) for commit_id in commit_ids] @@ -892,34 +1030,47 @@ class HgRemote(RemoteBase): repo, remote, heads=commit_ids, force=None).cgresult @reraise_safe_exceptions - def pull_cmd(self, wire, source, bookmark=None, branch=None, revision=None, hooks=True): + def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config'], hooks=hooks) + source = safe_bytes(source) + # Mercurial internally has a lot of logic that checks ONLY if # option is defined, we just pass those if they are defined then opts = {} + if bookmark: - opts['bookmark'] = bookmark + opts['bookmark'] = [safe_bytes(x) for x in bookmark] \ + if isinstance(bookmark, list) else safe_bytes(bookmark) + if branch: - opts['branch'] = branch + opts['branch'] = [safe_bytes(x) for x in branch] \ + if isinstance(branch, list) else safe_bytes(branch) + if revision: - opts['rev'] = revision + opts['rev'] = [safe_bytes(x) for x in revision] \ + if isinstance(revision, list) else safe_bytes(revision) commands.pull(baseui, repo, source, **opts) @reraise_safe_exceptions - def push(self, wire, revisions, dest_path, hooks=True, push_branches=False): + def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config'], hooks=hooks) - commands.push(baseui, repo, dest=dest_path, rev=revisions, + + revisions = [safe_bytes(x) for x in revisions] \ + if isinstance(revisions, list) else safe_bytes(revisions) + + commands.push(baseui, repo, safe_bytes(dest_path), + rev=revisions, new_branch=push_branches) @reraise_safe_exceptions def strip(self, wire, revision, update, backup): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - hgext_strip( + hgext_strip.strip( repo.baseui, repo, ctx.node(), update=update, backup=backup) @reraise_safe_exceptions @@ -943,25 +1094,25 @@ class HgRemote(RemoteBase): def merge(self, wire, revision): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) - commands.merge(baseui, repo, rev=revision) + repo.ui.setconfig(b'ui', b'interactive', False) + commands.merge(baseui, repo, rev=safe_bytes(revision)) @reraise_safe_exceptions def merge_state(self, wire): repo = self._factory.repo(wire) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) + repo.ui.setconfig(b'ui', b'interactive', False) ms = hg_merge.mergestate(repo) return [x for x in ms.unresolved()] @@ -969,20 +1120,34 @@ class HgRemote(RemoteBase): def commit(self, wire, message, username, close_branch=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'username', username) - commands.commit(baseui, repo, message=message, close_branch=close_branch) + repo.ui.setconfig(b'ui', b'username', safe_bytes(username)) + commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch) @reraise_safe_exceptions - def rebase(self, wire, source=None, dest=None, abort=False): + def rebase(self, wire, source='', dest='', abort=False): + repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig('ui', 'merge', 'internal:dump') + repo.ui.setconfig(b'ui', b'merge', b'internal:dump') # In case of sub repositories are used mercurial prompts the user in # case of merge conflicts or different sub repository sources. By # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. - repo.ui.setconfig('ui', 'interactive', False) - rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort) + repo.ui.setconfig(b'ui', b'interactive', False) + + rebase_kws = dict( + keep=not abort, + abort=abort + ) + + if source: + source = repo[source] + rebase_kws['base'] = [source.hex()] + if dest: + dest = repo[dest] + rebase_kws['dest'] = dest.hex() + + rebase.rebase(baseui, repo, **rebase_kws) @reraise_safe_exceptions def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone): @@ -992,17 +1157,18 @@ class HgRemote(RemoteBase): date = (tag_time, tag_timezone) try: - hg_tag.tag(repo, name, node, message, local, user, date) + hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date) except Abort as e: log.exception("Tag operation aborted") # Exception can contain unicode which we convert raise exceptions.AbortException(e)(repr(e)) @reraise_safe_exceptions - def bookmark(self, wire, bookmark, revision=None): + def bookmark(self, wire, bookmark, revision=''): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - commands.bookmark(baseui, repo, bookmark, rev=revision, force=True) + revision = revision or '' + commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True) @reraise_safe_exceptions def install_hooks(self, wire, force=False): @@ -1012,8 +1178,8 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def get_hooks_info(self, wire): return { - 'pre_version': vcsserver.__version__, - 'post_version': vcsserver.__version__, + 'pre_version': vcsserver.get_version(), + 'post_version': vcsserver.get_version(), } @reraise_safe_exceptions @@ -1021,8 +1187,8 @@ class HgRemote(RemoteBase): pass @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def file_walker(_commit_id, path): repo = self._factory.repo(wire) @@ -1031,7 +1197,7 @@ class HgRemote(RemoteBase): if is_root: matcher = alwaysmatcher(badfn=None) else: - matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None) + matcher = patternmatcher('', [(b'glob', safe_bytes(path)+b'/**', b'')], badfn=None) file_iter = ctx.manifest().walk(matcher) for fn in file_iter: @@ -1042,6 +1208,6 @@ class HgRemote(RemoteBase): yield ArchiveNode(file_path, mode, is_link, ctx[fn].data) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) diff --git a/vcsserver/svn.py b/vcsserver/remote/svn_remote.py rename from vcsserver/svn.py rename to vcsserver/remote/svn_remote.py --- a/vcsserver/svn.py +++ b/vcsserver/remote/svn_remote.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,31 +15,42 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -from __future__ import absolute_import import os import subprocess -import time -from urllib2 import URLError -import urlparse +from urllib.error import URLError +import urllib.parse import logging import posixpath as vcspath -import StringIO -import urllib +import io +import urllib.request +import urllib.parse +import urllib.error import traceback -import svn.client -import svn.core -import svn.delta -import svn.diff -import svn.fs -import svn.repos + +import svn.client # noqa +import svn.core # noqa +import svn.delta # noqa +import svn.diff # noqa +import svn.fs # noqa +import svn.repos # noqa +import rhodecode from vcsserver import svn_diff, exceptions, subprocessio, settings -from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo +from vcsserver.base import ( + RepoFactory, + raise_from_original, + ArchiveNode, + store_archive_in_cache, + BytesEnvelope, + BinaryEnvelope, +) from vcsserver.exceptions import NoContentException -from vcsserver.utils import safe_str +from vcsserver.str_utils import safe_str, safe_bytes +from vcsserver.type_utils import assert_bytes from vcsserver.vcs_base import RemoteBase +from vcsserver.lib.svnremoterepo import svnremoterepo log = logging.getLogger(__name__) @@ -52,7 +63,7 @@ svn_compatible_versions_map = { 'pre-1.9-compatible': '1.8', } -current_compatible_version = '1.12' +current_compatible_version = '1.14' def reraise_safe_exceptions(func): @@ -63,7 +74,7 @@ def reraise_safe_exceptions(func): except Exception as e: if not hasattr(e, '_vcs_kind'): log.exception("Unhandled exception in svn remote call") - raise_from_original(exceptions.UnhandledException(e)) + raise_from_original(exceptions.UnhandledException(e), e) raise return wrapper @@ -82,12 +93,12 @@ class SubversionFactory(RepoFactory): or compatible_version fs_config['compatible-version'] = compatible_version_string - log.debug('Create SVN repo with config "%s"', fs_config) + log.debug('Create SVN repo with config `%s`', fs_config) repo = svn.repos.create(path, "", "", None, fs_config) else: repo = svn.repos.open(path) - log.debug('Got SVN object: %s', repo) + log.debug('repository created: got SVN object: %s', repo) return repo def repo(self, wire, create=False, compatible_version=None): @@ -107,9 +118,39 @@ class SvnRemote(RemoteBase): def __init__(self, factory, hg_factory=None): self._factory = factory - # TODO: Remove once we do not use internal Mercurial objects anymore - # for subversion - self._hg_factory = hg_factory + + self._bulk_methods = { + # NOT supported in SVN ATM... + } + self._bulk_file_methods = { + "size": self.get_file_size, + "data": self.get_file_content, + "flags": self.get_node_type, + "is_binary": self.is_binary, + "md5": self.md5_hash + } + + @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + # since we use unified API, we need to cast from str to in for SVN + commit_id = int(commit_id) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return result + + return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load))) @reraise_safe_exceptions def discover_svn_version(self): @@ -118,61 +159,64 @@ class SvnRemote(RemoteBase): svn_ver = svn.core.SVN_VERSION except ImportError: svn_ver = None - return svn_ver + return safe_str(svn_ver) @reraise_safe_exceptions def is_empty(self, wire): - try: return self.lookup(wire, -1) == 0 except Exception: log.exception("failed to read object_store") return False - def check_url(self, url, config_items): - # this can throw exception if not installed, but we detect this - from hgsubversion import svnrepo + def check_url(self, url, config): - baseui = self._hg_factory._create_config(config_items) - # uuid function get's only valid UUID from proper repo, else + # uuid function gets only valid UUID from proper repo, else # throws exception + username, password, src_url = self.get_url_and_credentials(url) try: - svnrepo.svnremoterepo(baseui, url).svn.uuid + svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid except Exception: tb = traceback.format_exc() log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb) - raise URLError( - '"%s" is not a valid Subversion source url.' % (url, )) + raise URLError(f'"{url}" is not a valid Subversion source url.') return True def is_path_valid_repository(self, wire, path): - # NOTE(marcink): short circuit the check for SVN repo # the repos.open might be expensive to check, but we have one cheap - # pre condition that we can use, to check for 'format' file - + # pre-condition that we can use, to check for 'format' file if not os.path.isfile(os.path.join(path, 'format')): return False - try: - svn.repos.open(path) - except svn.core.SubversionException: - tb = traceback.format_exc() - log.debug("Invalid Subversion path `%s`, tb: %s", path, tb) - return False - return True + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _assert_correct_path(_context_uid, _repo_id, fast_check): + + try: + svn.repos.open(path) + except svn.core.SubversionException: + tb = traceback.format_exc() + log.debug("Invalid Subversion path `%s`, tb: %s", path, tb) + return False + return True + + return _assert_correct_path(context_uid, repo_id, True) @reraise_safe_exceptions def verify(self, wire,): repo_path = wire['path'] if not self.is_path_valid_repository(wire, repo_path): raise Exception( - "Path %s is not a valid Subversion repository." % repo_path) + f"Path {repo_path} is not a valid Subversion repository.") cmd = ['svnadmin', 'info', repo_path] stdout, stderr = subprocessio.run_command(cmd) return stdout + @reraise_safe_exceptions def lookup(self, wire, revision): if revision not in [-1, None, 'HEAD']: raise NotImplementedError @@ -181,6 +225,7 @@ class SvnRemote(RemoteBase): head = svn.fs.youngest_rev(fs_ptr) return head + @reraise_safe_exceptions def lookup_interval(self, wire, start_ts, end_ts): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) @@ -198,10 +243,12 @@ class SvnRemote(RemoteBase): end_rev = svn.fs.youngest_rev(fsobj) return start_rev, end_rev + @reraise_safe_exceptions def revision_properties(self, wire, revision): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _revision_properties(_repo_id, _revision): repo = self._factory.repo(wire) @@ -228,7 +275,7 @@ class SvnRemote(RemoteBase): removed = [] # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs - for path, change in editor.changes.iteritems(): + for path, change in editor.changes.items(): # TODO: Decide what to do with directory nodes. Subversion can add # empty directories. @@ -243,7 +290,7 @@ class SvnRemote(RemoteBase): removed.append(path) else: raise NotImplementedError( - "Action %s not supported on path %s" % ( + "Action {} not supported on path {}".format( change.action, path)) changes = { @@ -257,6 +304,7 @@ class SvnRemote(RemoteBase): def node_history(self, wire, path, revision, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit): cross_copies = False @@ -276,9 +324,11 @@ class SvnRemote(RemoteBase): return history_revisions return _assert_correct_path(context_uid, repo_id, path, revision, limit) + @reraise_safe_exceptions def node_properties(self, wire, path, revision): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _node_properties(_repo_id, _path, _revision): repo = self._factory.repo(wire) @@ -288,7 +338,7 @@ class SvnRemote(RemoteBase): return _node_properties(repo_id, path, revision) def file_annotate(self, wire, path, revision): - abs_path = 'file://' + urllib.pathname2url( + abs_path = 'file://' + urllib.request.pathname2url( vcspath.join(wire['path'], path)) file_uri = svn.core.svn_path_canonicalize(abs_path) @@ -309,17 +359,19 @@ class SvnRemote(RemoteBase): except svn.core.SubversionException as exc: log.exception("Error during blame operation.") raise Exception( - "Blame not supported or file does not exist at path %s. " - "Error %s." % (path, exc)) + f"Blame not supported or file does not exist at path {path}. " + f"Error {exc}.") - return annotations + return BinaryEnvelope(annotations) - def get_node_type(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_node_type(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) - def _get_node_type(_repo_id, _path, _revision): + def _get_node_type(_repo_id, _revision, _path): repo = self._factory.repo(wire) fs_ptr = svn.repos.fs(repo) if _revision is None: @@ -327,12 +379,14 @@ class SvnRemote(RemoteBase): root = svn.fs.revision_root(fs_ptr, _revision) node = svn.fs.check_path(root, path) return NODE_TYPE_MAPPING.get(node, None) - return _get_node_type(repo_id, path, revision) + return _get_node_type(repo_id, revision, path) - def get_nodes(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_nodes(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _get_nodes(_repo_id, _path, _revision): repo = self._factory.repo(wire) @@ -342,27 +396,32 @@ class SvnRemote(RemoteBase): root = svn.fs.revision_root(fsobj, _revision) entries = svn.fs.dir_entries(root, path) result = [] - for entry_path, entry_info in entries.iteritems(): + for entry_path, entry_info in entries.items(): result.append( (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None))) return result return _get_nodes(repo_id, path, revision) - def get_file_content(self, wire, path, rev=None): + @reraise_safe_exceptions + def get_file_content(self, wire, rev=None, path=''): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) + if rev is None: - rev = svn.fs.youngest_revision(fsobj) + rev = svn.fs.youngest_rev(fsobj) + root = svn.fs.revision_root(fsobj, rev) content = svn.core.Stream(svn.fs.file_contents(root, path)) - return content.read() + return BytesEnvelope(content.read()) - def get_file_size(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_file_size(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) - def _get_file_size(_repo_id, _path, _revision): + def _get_file_size(_repo_id, _revision, _path): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) if _revision is None: @@ -370,24 +429,24 @@ class SvnRemote(RemoteBase): root = svn.fs.revision_root(fsobj, _revision) size = svn.fs.file_length(root, path) return size - return _get_file_size(repo_id, path, revision) + return _get_file_size(repo_id, revision, path) def create_repository(self, wire, compatible_version=None): log.info('Creating Subversion repository in path "%s"', wire['path']) self._factory.repo(wire, create=True, compatible_version=compatible_version) - def get_url_and_credentials(self, src_url): - obj = urlparse.urlparse(src_url) - username = obj.username or None - password = obj.password or None + def get_url_and_credentials(self, src_url) -> tuple[str, str, str]: + obj = urllib.parse.urlparse(src_url) + username = obj.username or '' + password = obj.password or '' return username, password, src_url def import_remote_repository(self, wire, src_url): repo_path = wire['path'] if not self.is_path_valid_repository(wire, repo_path): raise Exception( - "Path %s is not a valid Subversion repository." % repo_path) + f"Path {repo_path} is not a valid Subversion repository.") username, password, src_url = self.get_url_and_credentials(src_url) rdump_cmd = ['svnrdump', 'dump', '--non-interactive', @@ -411,25 +470,26 @@ class SvnRemote(RemoteBase): log.debug('Return process ended with code: %s', rdump.returncode) if rdump.returncode != 0: errors = rdump.stderr.read() - log.error('svnrdump dump failed: statuscode %s: message: %s', - rdump.returncode, errors) + log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors) + reason = 'UNKNOWN' - if 'svnrdump: E230001:' in errors: + if b'svnrdump: E230001:' in errors: reason = 'INVALID_CERTIFICATE' if reason == 'UNKNOWN': - reason = 'UNKNOWN:{}'.format(errors) + reason = f'UNKNOWN:{safe_str(errors)}' + raise Exception( - 'Failed to dump the remote repository from %s. Reason:%s' % ( + 'Failed to dump the remote repository from {}. Reason:{}'.format( src_url, reason)) if load.returncode != 0: raise Exception( - 'Failed to load the dump of remote repository from %s.' % - (src_url, )) + f'Failed to load the dump of remote repository from {src_url}.') def commit(self, wire, message, author, timestamp, updated, removed): - assert isinstance(message, str) - assert isinstance(author, str) + + message = safe_bytes(message) + author = safe_bytes(author) repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) @@ -453,6 +513,7 @@ class SvnRemote(RemoteBase): log.debug('Committed revision "%s" to "%s".', commit_id, wire['path']) return commit_id + @reraise_safe_exceptions def diff(self, wire, rev1, rev2, path1=None, path2=None, ignore_whitespace=False, context=3): @@ -461,12 +522,12 @@ class SvnRemote(RemoteBase): diff_creator = SvnDiffer( repo, rev1, path1, rev2, path2, ignore_whitespace, context) try: - return diff_creator.generate_diff() + return BytesEnvelope(diff_creator.generate_diff()) except svn.core.SubversionException as e: log.exception( "Error during diff operation operation. " - "Path might not exist %s, %s" % (path1, path2)) - return "" + "Path might not exist %s, %s", path1, path2) + return BytesEnvelope(b'') @reraise_safe_exceptions def is_large_file(self, wire, path): @@ -475,18 +536,32 @@ class SvnRemote(RemoteBase): @reraise_safe_exceptions def is_binary(self, wire, rev, path): cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) - region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _rev, _path): - raw_bytes = self.get_file_content(wire, path, rev) - return raw_bytes and '\0' in raw_bytes + raw_bytes = self.get_file_content(wire, rev, path) + if not raw_bytes: + return False + return b'\0' in raw_bytes return _is_binary(repo_id, rev, path) @reraise_safe_exceptions + def md5_hash(self, wire, rev, path): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _md5_hash(_repo_id, _rev, _path): + return '' + + return _md5_hash(repo_id, rev, path) + + @reraise_safe_exceptions def run_svn_command(self, wire, cmd, **opts): path = wire.get('path', None) + debug_mode = rhodecode.ConfigGet().get_bool('debug') if path and os.path.isdir(path): opts['cwd'] = path @@ -500,18 +575,22 @@ class SvnRemote(RemoteBase): try: _opts.update(opts) - p = subprocessio.SubprocessIOChunker(cmd, **_opts) + proc = subprocessio.SubprocessIOChunker(cmd, **_opts) - return ''.join(p), ''.join(p.error) - except (EnvironmentError, OSError) as err: + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: if safe_call: return '', safe_str(err).strip() else: - cmd = ' '.join(cmd) # human friendly CMD - tb_err = ("Couldn't run svn command (%s).\n" - "Original error was:%s\n" - "Call options:%s\n" - % (cmd, err, _opts)) + cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD + call_opts = {} + if debug_mode: + call_opts = _opts + + tb_err = ("Couldn't run svn command ({}).\n" + "Original error was:{}\n" + "Call options:{}\n" + .format(cmd, err, call_opts)) log.exception(tb_err) raise exceptions.VcsException()(tb_err) @@ -522,9 +601,8 @@ class SvnRemote(RemoteBase): binary_dir = settings.BINARY_DIR executable = None if binary_dir: - executable = os.path.join(binary_dir, 'python') - return install_svn_hooks( - repo_path, executable=executable, force_create=force) + executable = os.path.join(binary_dir, 'python3') + return install_svn_hooks(repo_path, force_create=force) @reraise_safe_exceptions def get_hooks_info(self, wire): @@ -541,13 +619,14 @@ class SvnRemote(RemoteBase): pass @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def walk_tree(root, root_dir, _commit_id): """ Special recursive svn repo walker """ + root_dir = safe_bytes(root_dir) filemode_default = 0o100644 filemode_executable = 0o100755 @@ -560,10 +639,10 @@ class SvnRemote(RemoteBase): # return only DIR, and then all entries in that dir yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type new_root = os.path.join(root_dir, f_name) - for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id): - yield _f_name, _f_data, _f_type + yield from walk_tree(root, new_root, _commit_id) else: - f_path = os.path.join(root_dir, f_name).rstrip('/') + + f_path = os.path.join(root_dir, f_name).rstrip(b'/') prop_list = svn.fs.node_proplist(root, f_path) f_mode = filemode_default @@ -601,11 +680,11 @@ class SvnRemote(RemoteBase): data_stream = f_data['content_stream'] yield ArchiveNode(file_path, mode, is_link, data_stream) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) -class SvnDiffer(object): +class SvnDiffer: """ Utility to create diffs based on difflib and the Subversion api """ @@ -643,15 +722,15 @@ class SvnDiffer(object): "Source type: %s, target type: %s" % (self.src_kind, self.tgt_kind)) - def generate_diff(self): - buf = StringIO.StringIO() + def generate_diff(self) -> bytes: + buf = io.BytesIO() if self.tgt_kind == svn.core.svn_node_dir: self._generate_dir_diff(buf) else: self._generate_file_diff(buf) return buf.getvalue() - def _generate_dir_diff(self, buf): + def _generate_dir_diff(self, buf: io.BytesIO): editor = DiffChangeEditor() editor_ptr, editor_baton = svn.delta.make_editor(editor) svn.repos.dir_delta2( @@ -672,7 +751,7 @@ class SvnDiffer(object): self._generate_node_diff( buf, change, path, self.tgt_path, path, self.src_path) - def _generate_file_diff(self, buf): + def _generate_file_diff(self, buf: io.BytesIO): change = None if self.src_kind == svn.core.svn_node_none: change = "add" @@ -684,7 +763,13 @@ class SvnDiffer(object): buf, change, tgt_path, tgt_base, src_path, src_base) def _generate_node_diff( - self, buf, change, tgt_path, tgt_base, src_path, src_base): + self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base): + + tgt_path_bytes = safe_bytes(tgt_path) + tgt_path = safe_str(tgt_path) + + src_path_bytes = safe_bytes(src_path) + src_path = safe_str(src_path) if self.src_rev == self.tgt_rev and tgt_base == src_base: # makes consistent behaviour with git/hg to return empty diff if @@ -697,55 +782,55 @@ class SvnDiffer(object): self.binary_content = False mime_type = self._get_mime_type(tgt_full_path) - if mime_type and not mime_type.startswith('text'): + if mime_type and not mime_type.startswith(b'text'): self.binary_content = True - buf.write("=" * 67 + '\n') - buf.write("Cannot display: file marked as a binary type.\n") - buf.write("svn:mime-type = %s\n" % mime_type) - buf.write("Index: %s\n" % (tgt_path, )) - buf.write("=" * 67 + '\n') - buf.write("diff --git a/%(tgt_path)s b/%(tgt_path)s\n" % { - 'tgt_path': tgt_path}) + buf.write(b"=" * 67 + b'\n') + buf.write(b"Cannot display: file marked as a binary type.\n") + buf.write(b"svn:mime-type = %s\n" % mime_type) + buf.write(b"Index: %b\n" % tgt_path_bytes) + buf.write(b"=" * 67 + b'\n') + buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes)) if change == 'add': # TODO: johbo: SVN is missing a zero here compared to git - buf.write("new file mode 10644\n") + buf.write(b"new file mode 10644\n") + + # TODO(marcink): intro to binary detection of svn patches + # if self.binary_content: + # buf.write(b'GIT binary patch\n') - #TODO(marcink): intro to binary detection of svn patches + buf.write(b"--- /dev/null\t(revision 0)\n") + src_lines = [] + else: + if change == 'delete': + buf.write(b"deleted file mode 10644\n") + + # TODO(marcink): intro to binary detection of svn patches # if self.binary_content: # buf.write('GIT binary patch\n') - buf.write("--- /dev/null\t(revision 0)\n") - src_lines = [] - else: - if change == 'delete': - buf.write("deleted file mode 10644\n") - - #TODO(marcink): intro to binary detection of svn patches - # if self.binary_content: - # buf.write('GIT binary patch\n') - - buf.write("--- a/%s\t(revision %s)\n" % ( - src_path, self.src_rev)) + buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev)) src_lines = self._svn_readlines(self.src_root, src_full_path) if change == 'delete': - buf.write("+++ /dev/null\t(revision %s)\n" % (self.tgt_rev, )) + buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev) tgt_lines = [] else: - buf.write("+++ b/%s\t(revision %s)\n" % ( - tgt_path, self.tgt_rev)) + buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev)) tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path) + # we made our diff header, time to generate the diff content into our buffer + if not self.binary_content: udiff = svn_diff.unified_diff( src_lines, tgt_lines, context=self.context, ignore_blank_lines=self.ignore_whitespace, ignore_case=False, ignore_space_changes=self.ignore_whitespace) + buf.writelines(udiff) - def _get_mime_type(self, path): + def _get_mime_type(self, path) -> bytes: try: mime_type = svn.fs.node_prop( self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE) @@ -761,7 +846,9 @@ class SvnDiffer(object): if node_kind not in ( svn.core.svn_node_file, svn.core.svn_node_symlink): return [] - content = svn.core.Stream(svn.fs.file_contents(fs_root, node_path)).read() + content = svn.core.Stream( + svn.fs.file_contents(fs_root, node_path)).read() + return content.splitlines(True) @@ -789,7 +876,7 @@ def authorization_callback_allow_all(roo return True -class TxnNodeProcessor(object): +class TxnNodeProcessor: """ Utility to process the change of one node within a transaction root. @@ -799,7 +886,7 @@ class TxnNodeProcessor(object): """ def __init__(self, node, txn_root): - assert isinstance(node['path'], str) + assert_bytes(node['path']) self.node = node self.txn_root = txn_root @@ -835,23 +922,24 @@ class TxnNodeProcessor(object): svn.fs.make_file(self.txn_root, self.node['path']) def _update_file_content(self): - assert isinstance(self.node['content'], str) + assert_bytes(self.node['content']) + handler, baton = svn.fs.apply_textdelta( self.txn_root, self.node['path'], None, None) svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton) def _update_file_properties(self): properties = self.node.get('properties', {}) - for key, value in properties.iteritems(): + for key, value in properties.items(): svn.fs.change_node_prop( - self.txn_root, self.node['path'], key, value) + self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value)) def apr_time_t(timestamp): """ Convert a Python timestamp into APR timestamp type apr_time_t """ - return timestamp * 1E6 + return int(timestamp * 1E6) def svn_opt_revision_value_t(num): diff --git a/vcsserver/remote_wsgi.py b/vcsserver/remote_wsgi.py --- a/vcsserver/remote_wsgi.py +++ b/vcsserver/remote_wsgi.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ from vcsserver import scm_app, wsgi_app_caller -class GitRemoteWsgi(object): +class GitRemoteWsgi: def handle(self, environ, input_data, *args, **kwargs): app = wsgi_app_caller.WSGIAppCaller( scm_app.create_git_wsgi_app(*args, **kwargs)) @@ -26,7 +26,7 @@ class GitRemoteWsgi(object): return app.handle(environ, input_data) -class HgRemoteWsgi(object): +class HgRemoteWsgi: def handle(self, environ, input_data, *args, **kwargs): app = wsgi_app_caller.WSGIAppCaller( scm_app.create_hg_wsgi_app(*args, **kwargs)) diff --git a/vcsserver/scm_app.py b/vcsserver/scm_app.py --- a/vcsserver/scm_app.py +++ b/vcsserver/scm_app.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ import mercurial.hgweb.hgweb_mod import webob.exc from vcsserver import pygrack, exceptions, settings, git_lfs - +from vcsserver.str_utils import ascii_bytes, safe_bytes log = logging.getLogger(__name__) @@ -81,7 +81,7 @@ class HgWeb(mercurial.hgweb.hgweb_mod.hg first_chunk = None try: - data = gen.next() + data = next(gen) def first_chunk(): yield data @@ -94,16 +94,29 @@ class HgWeb(mercurial.hgweb.hgweb_mod.hg def _runwsgi(self, req, res, repo): - cmd = req.qsparams.get('cmd', '') + cmd = req.qsparams.get(b'cmd', '') if not mercurial.wireprotoserver.iscmd(cmd): # NOTE(marcink): for unsupported commands, we return bad request # internally from HG + log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd) from mercurial.hgweb.common import statusmessage res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST) - res.setbodybytes('') + res.setbodybytes(b'') return res.sendresponse() - return super(HgWeb, self)._runwsgi(req, res, repo) + return super()._runwsgi(req, res, repo) + + +def sanitize_hg_ui(baseui): + # NOTE(marcink): since python3 hgsubversion is deprecated. + # From old installations we might still have this set enabled + # we explicitly remove this now here to make sure it wont propagate further + + if baseui.config(b'extensions', b'hgsubversion') is not None: + for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg): + if b'extensions' in cfg: + if b'hgsubversion' in cfg[b'extensions']: + del cfg[b'extensions'][b'hgsubversion'] def make_hg_ui_from_config(repo_config): @@ -115,10 +128,13 @@ def make_hg_ui_from_config(repo_config): baseui._tcfg = mercurial.config.config() for section, option, value in repo_config: - baseui.setconfig(section, option, value) + baseui.setconfig( + ascii_bytes(section, allow_bytes=True), + ascii_bytes(option, allow_bytes=True), + ascii_bytes(value, allow_bytes=True)) # make our hgweb quiet so it doesn't print output - baseui.setconfig('ui', 'quiet', 'true') + baseui.setconfig(b'ui', b'quiet', b'true') return baseui @@ -131,11 +147,14 @@ def update_hg_ui_from_hgrc(baseui, repo_ return log.debug('reading hgrc from %s', path) cfg = mercurial.config.config() - cfg.read(path) + cfg.read(ascii_bytes(path)) for section in HG_UI_SECTIONS: for k, v in cfg.items(section): log.debug('settings ui from file: [%s] %s=%s', section, k, v) - baseui.setconfig(section, k, v) + baseui.setconfig( + ascii_bytes(section, allow_bytes=True), + ascii_bytes(k, allow_bytes=True), + ascii_bytes(v, allow_bytes=True)) def create_hg_wsgi_app(repo_path, repo_name, config): @@ -149,14 +168,15 @@ def create_hg_wsgi_app(repo_path, repo_n baseui = make_hg_ui_from_config(config) update_hg_ui_from_hgrc(baseui, repo_path) + sanitize_hg_ui(baseui) try: - return HgWeb(repo_path, name=repo_name, baseui=baseui) + return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui) except mercurial.error.RequirementError as e: raise exceptions.RequirementException(e)(e) -class GitHandler(object): +class GitHandler: """ Handler for Git operations like push/pull etc """ @@ -202,7 +222,7 @@ def create_git_wsgi_app(repo_path, repo_ return app -class GitLFSHandler(object): +class GitLFSHandler: """ Handler for Git LFS operations """ diff --git a/vcsserver/server.py b/vcsserver/server.py --- a/vcsserver/server.py +++ b/vcsserver/server.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ import time log = logging.getLogger(__name__) -class VcsServer(object): +class VcsServer: """ Exposed remote interface of the vcsserver itself. diff --git a/vcsserver/settings.py b/vcsserver/settings.py --- a/vcsserver/settings.py +++ b/vcsserver/settings.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/str_utils.py b/vcsserver/str_utils.py new file mode 100644 --- /dev/null +++ b/vcsserver/str_utils.py @@ -0,0 +1,144 @@ +# RhodeCode VCSServer provides access to different vcs backends via network. +# Copyright (C) 2014-2023 RhodeCode GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import typing +import base64 +import logging + + +log = logging.getLogger(__name__) + + +def safe_int(val, default=None) -> int: + """ + Returns int() of val if val is not convertable to int use default + instead + + :param val: + :param default: + """ + + try: + val = int(val) + except (ValueError, TypeError): + val = default + + return val + + +def base64_to_str(text) -> str: + return safe_str(base64.encodebytes(safe_bytes(text))).strip() + + +def get_default_encodings() -> list[str]: + return ['utf8'] + + +def safe_str(str_, to_encoding=None) -> str: + """ + safe str function. Does few trick to turn unicode_ into string + + :param str_: str to encode + :param to_encoding: encode to this type UTF8 default + """ + if isinstance(str_, str): + return str_ + + # if it's bytes cast to str + if not isinstance(str_, bytes): + return str(str_) + + to_encoding = to_encoding or get_default_encodings() + if not isinstance(to_encoding, (list, tuple)): + to_encoding = [to_encoding] + + for enc in to_encoding: + try: + return str(str_, enc) + except UnicodeDecodeError: + pass + + return str(str_, to_encoding[0], 'replace') + + +def safe_bytes(str_, from_encoding=None) -> bytes: + """ + safe bytes function. Does few trick to turn str_ into bytes string: + + :param str_: string to decode + :param from_encoding: encode from this type UTF8 default + """ + if isinstance(str_, bytes): + return str_ + + if not isinstance(str_, str): + raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}') + + from_encoding = from_encoding or get_default_encodings() + if not isinstance(from_encoding, (list, tuple)): + from_encoding = [from_encoding] + + for enc in from_encoding: + try: + return str_.encode(enc) + except UnicodeDecodeError: + pass + + return str_.encode(from_encoding[0], 'replace') + + +def ascii_bytes(str_, allow_bytes=False) -> bytes: + """ + Simple conversion from str to bytes, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings already have been encoded in other ways but still are unicode + string - for example to hex, base64, json, urlencoding, or are known to be + identifiers. + """ + if allow_bytes and isinstance(str_, bytes): + return str_ + + if not isinstance(str_, str): + raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}') + return str_.encode('ascii') + + +def ascii_str(str_) -> str: + """ + Simple conversion from bytes to str, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings are encoded but also in other ways are known to be ASCII, and + where a unicode string is wanted without caring about encoding. For example + to hex, base64, urlencoding, or are known to be identifiers. + """ + + if not isinstance(str_, bytes): + raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}') + return str_.decode('ascii') + + +def convert_to_str(data): + if isinstance(data, bytes): + return safe_str(data) + elif isinstance(data, tuple): + return tuple(convert_to_str(item) for item in data) + elif isinstance(data, list): + return list(convert_to_str(item) for item in data) + else: + return data diff --git a/vcsserver/subprocessio.py b/vcsserver/subprocessio.py --- a/vcsserver/subprocessio.py +++ b/vcsserver/subprocessio.py @@ -23,15 +23,17 @@ along with git_http_backend.py Project. If not, see . """ import os +import collections import logging -import subprocess32 as subprocess -from collections import deque -from threading import Event, Thread +import subprocess +import threading + +from vcsserver.str_utils import safe_str log = logging.getLogger(__name__) -class StreamFeeder(Thread): +class StreamFeeder(threading.Thread): """ Normal writing into pipe-like is blocking once the buffer is filled. This thread allows a thread to seep data from a file-like into a pipe @@ -40,24 +42,18 @@ class StreamFeeder(Thread): """ def __init__(self, source): - super(StreamFeeder, self).__init__() + super().__init__() self.daemon = True filelike = False - self.bytes = bytes() - if type(source) in (type(''), bytes, bytearray): # string-like + self.bytes = b'' + if type(source) in (str, bytes, bytearray): # string-like self.bytes = bytes(source) else: # can be either file pointer or file-like - if type(source) in (int, long): # file pointer it is + if isinstance(source, int): # file pointer it is # converting file descriptor (int) stdin into file-like - try: - source = os.fdopen(source, 'rb', 16384) - except Exception: - pass + source = os.fdopen(source, 'rb', 16384) # let's see if source is file-like by now - try: - filelike = source.read - except Exception: - pass + filelike = hasattr(source, 'read') if not filelike and not self.bytes: raise TypeError("StreamFeeder's source object must be a readable " "file-like, a file descriptor, or a string-like.") @@ -65,28 +61,31 @@ class StreamFeeder(Thread): self.readiface, self.writeiface = os.pipe() def run(self): - t = self.writeiface + writer = self.writeiface try: if self.bytes: - os.write(t, self.bytes) + os.write(writer, self.bytes) else: s = self.source - b = s.read(4096) - while b: - os.write(t, b) - b = s.read(4096) + + while 1: + _bytes = s.read(4096) + if not _bytes: + break + os.write(writer, _bytes) + finally: - os.close(t) + os.close(writer) @property def output(self): return self.readiface -class InputStreamChunker(Thread): +class InputStreamChunker(threading.Thread): def __init__(self, source, target, buffer_size, chunk_size): - super(InputStreamChunker, self).__init__() + super().__init__() self.daemon = True # die die die. @@ -95,16 +94,16 @@ class InputStreamChunker(Thread): self.chunk_count_max = int(buffer_size / chunk_size) + 1 self.chunk_size = chunk_size - self.data_added = Event() + self.data_added = threading.Event() self.data_added.clear() - self.keep_reading = Event() + self.keep_reading = threading.Event() self.keep_reading.set() - self.EOF = Event() + self.EOF = threading.Event() self.EOF.clear() - self.go = Event() + self.go = threading.Event() self.go.set() def stop(self): @@ -115,7 +114,7 @@ class InputStreamChunker(Thread): # go of the input because, if successful, .close() will send EOF # down the pipe. self.source.close() - except: + except Exception: pass def run(self): @@ -146,14 +145,14 @@ class InputStreamChunker(Thread): try: b = s.read(cs) - except ValueError: + except ValueError: # probably "I/O operation on closed file" b = '' self.EOF.set() da.set() # for cases when done but there was no input. -class BufferedGenerator(object): +class BufferedGenerator: """ Class behaves as a non-blocking, buffered pipe reader. Reads chunks of data (through a thread) @@ -166,18 +165,20 @@ class BufferedGenerator(object): StopIteration after the last chunk of data is yielded. """ - def __init__(self, source, buffer_size=65536, chunk_size=4096, + def __init__(self, name, source, buffer_size=65536, chunk_size=4096, starting_values=None, bottomless=False): starting_values = starting_values or [] + self.name = name + self.buffer_size = buffer_size + self.chunk_size = chunk_size if bottomless: maxlen = int(buffer_size / chunk_size) else: maxlen = None - self.data = deque(starting_values, maxlen) - self.worker = InputStreamChunker(source, self.data, buffer_size, - chunk_size) + self.data_queue = collections.deque(starting_values, maxlen) + self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size) if starting_values: self.worker.data_added.set() self.worker.start() @@ -185,17 +186,21 @@ class BufferedGenerator(object): #################### # Generator's methods #################### + def __str__(self): + return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})' def __iter__(self): return self - def next(self): - while not len(self.data) and not self.worker.EOF.is_set(): + def __next__(self): + + while not self.length and not self.worker.EOF.is_set(): self.worker.data_added.clear() self.worker.data_added.wait(0.2) - if len(self.data): + + if self.length: self.worker.keep_reading.set() - return bytes(self.data.popleft()) + return bytes(self.data_queue.popleft()) elif self.worker.EOF.is_set(): raise StopIteration @@ -249,7 +254,7 @@ class BufferedGenerator(object): @property def done_reading(self): """ - Done_reding does not mean that the iterator's buffer is empty. + Done_reading does not mean that the iterator's buffer is empty. Iterator might have done reading from underlying source, but the read chunks might still be available for serving through .next() method. @@ -262,34 +267,34 @@ class BufferedGenerator(object): """ returns int. - This is the lenght of the que of chunks, not the length of + This is the length of the queue of chunks, not the length of the combined contents in those chunks. __len__() cannot be meaningfully implemented because this - reader is just flying throuh a bottomless pit content and - can only know the lenght of what it already saw. + reader is just flying through a bottomless pit content and + can only know the length of what it already saw. If __len__() on WSGI server per PEP 3333 returns a value, - the responce's length will be set to that. In order not to + the response's length will be set to that. In order not to confuse WSGI PEP3333 servers, we will not implement __len__ at all. """ - return len(self.data) + return len(self.data_queue) def prepend(self, x): - self.data.appendleft(x) + self.data_queue.appendleft(x) def append(self, x): - self.data.append(x) + self.data_queue.append(x) def extend(self, o): - self.data.extend(o) + self.data_queue.extend(o) def __getitem__(self, i): - return self.data[i] + return self.data_queue[i] -class SubprocessIOChunker(object): +class SubprocessIOChunker: """ Processor class wrapping handling of subprocess IO. @@ -314,7 +319,7 @@ class SubprocessIOChunker(object): - We are multithreaded. Writing in and reading out, err are all sep threads. - We support concurrent (in and out) stream processing. - - The output is not a stream. It's a queue of read string (bytes, not unicode) + - The output is not a stream. It's a queue of read string (bytes, not str) chunks. The object behaves as an iterable. You can "for chunk in obj:" us. - We are non-blocking in more respects than communicate() (reading from subprocess out pauses when internal buffer is full, but @@ -323,16 +328,16 @@ class SubprocessIOChunker(object): does not block the parallel inpipe reading occurring parallel thread.) The purpose of the object is to allow us to wrap subprocess interactions into - and interable that can be passed to a WSGI server as the application's return + an iterable that can be passed to a WSGI server as the application's return value. Because of stream-processing-ability, WSGI does not have to read ALL of the subprocess's output and buffer it, before handing it to WSGI server for HTTP response. Instead, the class initializer reads just a bit of the stream - to figure out if error ocurred or likely to occur and if not, just hands the + to figure out if error occurred or likely to occur and if not, just hands the further iteration over subprocess output to the server for completion of HTTP response. The real or perceived subprocess error is trapped and raised as one of - EnvironmentError family of exceptions + OSError family of exceptions Example usage: # try: @@ -342,7 +347,7 @@ class SubprocessIOChunker(object): # buffer_size = 65536, # chunk_size = 4096 # ) - # except (EnvironmentError) as e: + # except (OSError) as e: # print str(e) # raise e # @@ -358,15 +363,17 @@ class SubprocessIOChunker(object): _close_input_fd = None _closed = False + _stdout = None + _stderr = None - def __init__(self, cmd, inputstream=None, buffer_size=65536, + def __init__(self, cmd, input_stream=None, buffer_size=65536, chunk_size=4096, starting_values=None, fail_on_stderr=True, fail_on_return_code=True, **kwargs): """ Initializes SubprocessIOChunker :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings - :param inputstream: (Default: None) A file-like, string, or file pointer. + :param input_stream: (Default: None) A file-like, string, or file pointer. :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes. :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller. :param starting_values: (Default: []) An array of strings to put in front of output que. @@ -376,71 +383,86 @@ class SubprocessIOChunker(object): exception if the return code is not 0. """ + kwargs['shell'] = kwargs.get('shell', True) + starting_values = starting_values or [] - if inputstream: - input_streamer = StreamFeeder(inputstream) + if input_stream: + input_streamer = StreamFeeder(input_stream) input_streamer.start() - inputstream = input_streamer.output - self._close_input_fd = inputstream + input_stream = input_streamer.output + self._close_input_fd = input_stream self._fail_on_stderr = fail_on_stderr self._fail_on_return_code = fail_on_return_code - - _shell = kwargs.get('shell', True) - kwargs['shell'] = _shell + self.cmd = cmd - _p = subprocess.Popen(cmd, bufsize=-1, - stdin=inputstream, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) + self.process = _p - bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size, - starting_values) - bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True) + bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values) + bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True) while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length: # doing this until we reach either end of file, or end of buffer. - bg_out.data_added_event.wait(1) + bg_out.data_added_event.wait(0.2) bg_out.data_added_event.clear() # at this point it's still ambiguous if we are done reading or just full buffer. # Either way, if error (returned by ended process, or implied based on # presence of stuff in stderr output) we error out. # Else, we are happy. - _returncode = _p.poll() + return_code = _p.poll() + ret_code_ok = return_code in [None, 0] + ret_code_fail = return_code is not None and return_code != 0 + if ( + (ret_code_fail and fail_on_return_code) or + (ret_code_ok and fail_on_stderr and bg_err.length) + ): - if ((_returncode and fail_on_return_code) or - (fail_on_stderr and _returncode is None and bg_err.length)): try: _p.terminate() except Exception: pass + bg_out.stop() + out = b''.join(bg_out) + self._stdout = out + bg_err.stop() - if fail_on_stderr: - err = ''.join(bg_err) - raise EnvironmentError( - "Subprocess exited due to an error:\n" + err) - if _returncode and fail_on_return_code: - err = ''.join(bg_err) + err = b''.join(bg_err) + self._stderr = err + + # code from https://github.com/schacon/grack/pull/7 + if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '): + bg_out = iter([out]) + _p = None + elif err and fail_on_stderr: + text_err = err.decode() + raise OSError( + f"Subprocess exited due to an error:\n{text_err}") + + if ret_code_fail and fail_on_return_code: + text_err = err.decode() if not err: # maybe get empty stderr, try stdout instead # in many cases git reports the errors on stdout too - err = ''.join(bg_out) - raise EnvironmentError( - "Subprocess exited with non 0 ret code:%s: stderr:%s" % ( - _returncode, err)) + text_err = out.decode() + raise OSError( + f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}") - self.process = _p - self.output = bg_out - self.error = bg_err - self.inputstream = inputstream + self.stdout = bg_out + self.stderr = bg_err + self.inputstream = input_stream + + def __str__(self): + proc = getattr(self, 'process', 'NO_PROCESS') + return f'SubprocessIOChunker: {proc}' def __iter__(self): return self - def next(self): + def __next__(self): # Note: mikhail: We need to be sure that we are checking the return # code after the stdout stream is closed. Some processes, e.g. git # are doing some magic in between closing stdout and terminating the @@ -449,27 +471,31 @@ class SubprocessIOChunker(object): result = None stop_iteration = None try: - result = self.output.next() + result = next(self.stdout) except StopIteration as e: stop_iteration = e - if self.process.poll() and self._fail_on_return_code: - err = '%s' % ''.join(self.error) - raise EnvironmentError( - "Subprocess exited due to an error:\n" + err) + if self.process: + return_code = self.process.poll() + ret_code_fail = return_code is not None and return_code != 0 + if ret_code_fail and self._fail_on_return_code: + self.stop_streams() + err = self.get_stderr() + raise OSError( + f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}") if stop_iteration: raise stop_iteration return result - def throw(self, type, value=None, traceback=None): - if self.output.length or not self.output.done_reading: - raise type(value) + def throw(self, exc_type, value=None, traceback=None): + if self.stdout.length or not self.stdout.done_reading: + raise exc_type(value) def close(self): if self._closed: return - self._closed = True + try: self.process.terminate() except Exception: @@ -477,11 +503,11 @@ class SubprocessIOChunker(object): if self._close_input_fd: os.close(self._close_input_fd) try: - self.output.close() + self.stdout.close() except Exception: pass try: - self.error.close() + self.stderr.close() except Exception: pass try: @@ -489,6 +515,24 @@ class SubprocessIOChunker(object): except Exception: pass + self._closed = True + + def stop_streams(self): + getattr(self.stdout, 'stop', lambda: None)() + getattr(self.stderr, 'stop', lambda: None)() + + def get_stdout(self): + if self._stdout: + return self._stdout + else: + return b''.join(self.stdout) + + def get_stderr(self): + if self._stderr: + return self._stderr + else: + return b''.join(self.stderr) + def run_command(arguments, env=None): """ @@ -506,9 +550,9 @@ def run_command(arguments, env=None): if env: _opts.update({'env': env}) proc = SubprocessIOChunker(cmd, **_opts) - return ''.join(proc), ''.join(proc.error) - except (EnvironmentError, OSError) as err: - cmd = ' '.join(cmd) # human friendly CMD + return b''.join(proc), b''.join(proc.stderr) + except OSError as err: + cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD tb_err = ("Couldn't run subprocessio command (%s).\n" "Original error was:%s\n" % (cmd, err)) log.exception(tb_err) diff --git a/vcsserver/svn_diff.py b/vcsserver/svn_diff.py --- a/vcsserver/svn_diff.py +++ b/vcsserver/svn_diff.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2004-2009 Edgewall Software # Copyright (C) 2004-2006 Christopher Lenz +# Copyright (C) 2014-2023 RhodeCode GmbH # All rights reserved. # # This software is licensed as described in the file COPYING, which @@ -17,15 +17,15 @@ import difflib -def get_filtered_hunks(fromlines, tolines, context=None, - ignore_blank_lines=False, ignore_case=False, - ignore_space_changes=False): +def get_filtered_hunks(from_lines, to_lines, context=None, + ignore_blank_lines: bool = False, ignore_case: bool = False, + ignore_space_changes: bool = False): """Retrieve differences in the form of `difflib.SequenceMatcher` opcodes, grouped according to the ``context`` and ``ignore_*`` parameters. - :param fromlines: list of lines corresponding to the old content - :param tolines: list of lines corresponding to the new content + :param from_lines: list of lines corresponding to the old content + :param to_lines: list of lines corresponding to the new content :param ignore_blank_lines: differences about empty lines only are ignored :param ignore_case: upper case / lower case only differences are ignored :param ignore_space_changes: differences in amount of spaces are ignored @@ -37,27 +37,27 @@ def get_filtered_hunks(fromlines, toline to filter out the results will come straight from the SequenceMatcher. """ - hunks = get_hunks(fromlines, tolines, context) + hunks = get_hunks(from_lines, to_lines, context) if ignore_space_changes or ignore_case or ignore_blank_lines: - hunks = filter_ignorable_lines(hunks, fromlines, tolines, context, + hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes) return hunks -def get_hunks(fromlines, tolines, context=None): +def get_hunks(from_lines, to_lines, context=None): """Generator yielding grouped opcodes describing differences . See `get_filtered_hunks` for the parameter descriptions. """ - matcher = difflib.SequenceMatcher(None, fromlines, tolines) + matcher = difflib.SequenceMatcher(None, from_lines, to_lines) if context is None: return (hunk for hunk in [matcher.get_opcodes()]) else: return matcher.get_grouped_opcodes(context) -def filter_ignorable_lines(hunks, fromlines, tolines, context, +def filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): """Detect line changes that should be ignored and emits them as @@ -67,11 +67,12 @@ def filter_ignorable_lines(hunks, fromli See `get_filtered_hunks` for the parameter descriptions. """ def is_ignorable(tag, fromlines, tolines): + if tag == 'delete' and ignore_blank_lines: - if ''.join(fromlines) == '': + if b''.join(fromlines) == b'': return True elif tag == 'insert' and ignore_blank_lines: - if ''.join(tolines) == '': + if b''.join(tolines) == b'': return True elif tag == 'replace' and (ignore_case or ignore_space_changes): if len(fromlines) != len(tolines): @@ -81,7 +82,7 @@ def filter_ignorable_lines(hunks, fromli if ignore_case: input_str = input_str.lower() if ignore_space_changes: - input_str = ' '.join(input_str.split()) + input_str = b' '.join(input_str.split()) return input_str for i in range(len(fromlines)): @@ -101,7 +102,7 @@ def filter_ignorable_lines(hunks, fromli else: prev = (tag, i1, i2, j1, j2) else: - if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]): + if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]): ignored_lines = True if prev: prev = 'equal', prev[1], i2, prev[3], j2 @@ -125,10 +126,11 @@ def filter_ignorable_lines(hunks, fromli nn = n + n group = [] + def all_equal(): all(op[0] == 'equal' for op in group) for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes): - if idx == 0 and tag == 'equal': # Fixup leading unchanged block + if idx == 0 and tag == 'equal': # Fixup leading unchanged block i1, j1 = max(i1, i2 - n), max(j1, j2 - n) elif tag == 'equal' and i2 - i1 > nn: group.append((tag, i1, min(i2, i1 + n), j1, @@ -140,7 +142,7 @@ def filter_ignorable_lines(hunks, fromli group.append((tag, i1, i2, j1, j2)) if group and not (len(group) == 1 and group[0][0] == 'equal'): - if group[-1][0] == 'equal': # Fixup trailing unchanged block + if group[-1][0] == 'equal': # Fixup trailing unchanged block tag, i1, i2, j1, j2 = group[-1] group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n) if not all_equal(): @@ -150,22 +152,30 @@ def filter_ignorable_lines(hunks, fromli yield hunk -NO_NEWLINE_AT_END = '\\ No newline at end of file' +NO_NEWLINE_AT_END = b'\\ No newline at end of file' +LINE_TERM = b'\n' -def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0, - ignore_case=0, ignore_space_changes=0, lineterm='\n'): +def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False, + ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes: """ Generator producing lines corresponding to a textual diff. See `get_filtered_hunks` for the parameter descriptions. """ # TODO: johbo: Check if this can be nicely integrated into the matching + if ignore_space_changes: - fromlines = [l.strip() for l in fromlines] - tolines = [l.strip() for l in tolines] + from_lines = [l.strip() for l in from_lines] + to_lines = [l.strip() for l in to_lines] - for group in get_filtered_hunks(fromlines, tolines, context, + def _hunk_range(start, length) -> bytes: + if length != 1: + return b'%d,%d' % (start, length) + else: + return b'%d' % (start,) + + for group in get_filtered_hunks(from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] @@ -173,37 +183,30 @@ def unified_diff(fromlines, tolines, con i1, i2 = -1, -1 # support for Add changes if j1 == 0 and j2 == 0: j1, j2 = -1, -1 # support for Delete changes - yield '@@ -%s +%s @@%s' % ( + yield b'@@ -%b +%b @@%b' % ( _hunk_range(i1 + 1, i2 - i1), _hunk_range(j1 + 1, j2 - j1), lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': - for line in fromlines[i1:i2]: + for line in from_lines[i1:i2]: if not line.endswith(lineterm): - yield ' ' + line + lineterm + yield b' ' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield ' ' + line + yield b' ' + line else: if tag in ('replace', 'delete'): - for line in fromlines[i1:i2]: + for line in from_lines[i1:i2]: if not line.endswith(lineterm): - yield '-' + line + lineterm + yield b'-' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield '-' + line + yield b'-' + line if tag in ('replace', 'insert'): - for line in tolines[j1:j2]: + for line in to_lines[j1:j2]: if not line.endswith(lineterm): - yield '+' + line + lineterm + yield b'+' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield '+' + line - - -def _hunk_range(start, length): - if length != 1: - return '%d,%d' % (start, length) - else: - return '%d' % (start, ) + yield b'+' + line diff --git a/vcsserver/tests/__init__.py b/vcsserver/tests/__init__.py --- a/vcsserver/tests/__init__.py +++ b/vcsserver/tests/__init__.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/tests/fixture.py b/vcsserver/tests/fixture.py --- a/vcsserver/tests/fixture.py +++ b/vcsserver/tests/fixture.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,11 +18,10 @@ import os import shutil import tempfile - -import configobj +import configparser -class ContextINI(object): +class ContextINI: """ Allows to create a new test.ini file as a copy of existing one with edited data. If existing file is not present, it creates a new one. Example usage:: @@ -53,17 +52,17 @@ class ContextINI(object): with open(self.new_path, 'wb'): pass - config = configobj.ConfigObj( - self.new_path, file_error=True, write_empty_values=True) + parser = configparser.ConfigParser() + parser.read(self.ini_file_path) for data in self.ini_params: - section, ini_params = data.items()[0] - key, val = ini_params.items()[0] - if section not in config: - config[section] = {} - config[section][key] = val - - config.write() + section, ini_params = list(data.items())[0] + key, val = list(ini_params.items())[0] + if section not in parser: + parser[section] = {} + parser[section][key] = val + with open(self.ini_file_path, 'w') as f: + parser.write(f) return self.new_path def __exit__(self, exc_type, exc_val, exc_tb): diff --git a/vcsserver/tests/test_git.py b/vcsserver/tests/test_git.py --- a/vcsserver/tests/test_git.py +++ b/vcsserver/tests/test_git.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,8 +21,7 @@ import pytest import dulwich.errors from mock import Mock, patch -from vcsserver import git - +from vcsserver.remote import git_remote SAMPLE_REFS = { 'HEAD': 'fd627b9e0dd80b47be81af07c4a98518244ed2f7', @@ -34,26 +33,26 @@ SAMPLE_REFS = { @pytest.fixture -def git_remote(): +def git_remote_fix(): """ A GitRemote instance with a mock factory. """ factory = Mock() - remote = git.GitRemote(factory) + remote = git_remote.GitRemote(factory) return remote -def test_discover_git_version(git_remote): - version = git_remote.discover_git_version() +def test_discover_git_version(git_remote_fix): + version = git_remote_fix.discover_git_version() assert version -class TestGitFetch(object): - def setup(self): +class TestGitFetch: + def setup_method(self): self.mock_repo = Mock() factory = Mock() factory.repo = Mock(return_value=self.mock_repo) - self.remote_git = git.GitRemote(factory) + self.remote_git = git_remote.GitRemote(factory) def test_fetches_all_when_no_commit_ids_specified(self): def side_effect(determine_wants, *args, **kwargs): @@ -67,8 +66,8 @@ class TestGitFetch(object): def test_fetches_specified_commits(self): selected_refs = { - 'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68', - 'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e', + 'refs/tags/v0.1.8': b'74ebce002c088b8a5ecf40073db09375515ecd68', + 'refs/tags/v0.1.3': b'5a3a8fb005554692b16e21dee62bf02667d8dc3e', } def side_effect(determine_wants, *args, **kwargs): @@ -80,41 +79,41 @@ class TestGitFetch(object): mock_fetch.side_effect = side_effect self.remote_git.pull( wire={}, url='/tmp/', apply_refs=False, - refs=selected_refs.keys()) + refs=list(selected_refs.keys())) determine_wants = self.mock_repo.object_store.determine_wants_all assert determine_wants.call_count == 0 def test_get_remote_refs(self): factory = Mock() - remote_git = git.GitRemote(factory) - url = 'http://example.com/test/test.git' + remote_git = git_remote.GitRemote(factory) + url = 'https://example.com/test/test.git' sample_refs = { 'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68', 'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e', } - with patch('vcsserver.git.Repo', create=False) as mock_repo: + with patch('vcsserver.remote.git_remote.Repo', create=False) as mock_repo: mock_repo().get_refs.return_value = sample_refs remote_refs = remote_git.get_remote_refs(wire={}, url=url) mock_repo().get_refs.assert_called_once_with() assert remote_refs == sample_refs -class TestReraiseSafeExceptions(object): +class TestReraiseSafeExceptions: def test_method_decorated_with_reraise_safe_exceptions(self): factory = Mock() - git_remote = git.GitRemote(factory) + git_remote_instance = git_remote.GitRemote(factory) def fake_function(): return None - decorator = git.reraise_safe_exceptions(fake_function) + decorator = git_remote.reraise_safe_exceptions(fake_function) - methods = inspect.getmembers(git_remote, predicate=inspect.ismethod) + methods = inspect.getmembers(git_remote_instance, predicate=inspect.ismethod) for method_name, method in methods: if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']: - assert method.im_func.__code__ == decorator.__code__ + assert method.__func__.__code__ == decorator.__code__ @pytest.mark.parametrize('side_effect, expected_type', [ (dulwich.errors.ChecksumMismatch('0000000', 'deadbeef'), 'lookup'), @@ -125,7 +124,7 @@ class TestReraiseSafeExceptions(object): (dulwich.errors.UnexpectedCommandError('test-cmd'), 'error'), ]) def test_safe_exceptions_reraised(self, side_effect, expected_type): - @git.reraise_safe_exceptions + @git_remote.reraise_safe_exceptions def fake_method(): raise side_effect @@ -135,26 +134,29 @@ class TestReraiseSafeExceptions(object): assert exc_info.value._vcs_kind == expected_type -class TestDulwichRepoWrapper(object): +class TestDulwichRepoWrapper: def test_calls_close_on_delete(self): isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True) - with isdir_patcher: - repo = git.Repo('/tmp/abcde') - with patch.object(git.DulwichRepo, 'close') as close_mock: - del repo - close_mock.assert_called_once_with() + with patch.object(git_remote.Repo, 'close') as close_mock: + with isdir_patcher: + repo = git_remote.Repo('/tmp/abcde') + assert repo is not None + repo.__del__() + # can't use del repo as in python3 this isn't always calling .__del__() + + close_mock.assert_called_once_with() -class TestGitFactory(object): +class TestGitFactory: def test_create_repo_returns_dulwich_wrapper(self): with patch('vcsserver.lib.rc_cache.region_meta.dogpile_cache_regions') as mock: mock.side_effect = {'repo_objects': ''} - factory = git.GitFactory() + factory = git_remote.GitFactory() wire = { 'path': '/tmp/abcde' } isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True) with isdir_patcher: result = factory._create_repo(wire, True) - assert isinstance(result, git.Repo) + assert isinstance(result, git_remote.Repo) diff --git a/vcsserver/tests/test_hg.py b/vcsserver/tests/test_hg.py --- a/vcsserver/tests/test_hg.py +++ b/vcsserver/tests/test_hg.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,21 +21,22 @@ import traceback import pytest from mercurial.error import LookupError -from mock import Mock, MagicMock, patch +from mock import Mock, patch -from vcsserver import exceptions, hg, hgcompat +from vcsserver import exceptions, hgcompat +from vcsserver.remote import hg_remote -class TestDiff(object): +class TestDiff: def test_raising_safe_exception_when_lookup_failed(self): factory = Mock() - hg_remote = hg.HgRemote(factory) + hg_remote_instance = hg_remote.HgRemote(factory) with patch('mercurial.patch.diff') as diff_mock: - diff_mock.side_effect = LookupError( - 'deadbeef', 'index', 'message') + diff_mock.side_effect = LookupError(b'deadbeef', b'index', b'message') + with pytest.raises(Exception) as exc_info: - hg_remote.diff( + hg_remote_instance.diff( wire={}, commit_id_1='deadbeef', commit_id_2='deadbee1', file_filter=None, opt_git=True, opt_ignorews=True, context=3) @@ -43,26 +44,28 @@ class TestDiff(object): assert exc_info.value._vcs_kind == 'lookup' -class TestReraiseSafeExceptions(object): +class TestReraiseSafeExceptions: + original_traceback = None + def test_method_decorated_with_reraise_safe_exceptions(self): factory = Mock() - hg_remote = hg.HgRemote(factory) - methods = inspect.getmembers(hg_remote, predicate=inspect.ismethod) - decorator = hg.reraise_safe_exceptions(None) + hg_remote_instance = hg_remote.HgRemote(factory) + methods = inspect.getmembers(hg_remote_instance, predicate=inspect.ismethod) + decorator = hg_remote.reraise_safe_exceptions(None) for method_name, method in methods: if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']: - assert method.im_func.__code__ == decorator.__code__ + assert method.__func__.__code__ == decorator.__code__ @pytest.mark.parametrize('side_effect, expected_type', [ - (hgcompat.Abort(), 'abort'), - (hgcompat.InterventionRequired(), 'abort'), + (hgcompat.Abort(b'failed-abort'), 'abort'), + (hgcompat.InterventionRequired(b'intervention-required'), 'abort'), (hgcompat.RepoLookupError(), 'lookup'), - (hgcompat.LookupError('deadbeef', 'index', 'message'), 'lookup'), + (hgcompat.LookupError(b'deadbeef', b'index', b'message'), 'lookup'), (hgcompat.RepoError(), 'error'), (hgcompat.RequirementError(), 'requirement'), ]) def test_safe_exceptions_reraised(self, side_effect, expected_type): - @hg.reraise_safe_exceptions + @hg_remote.reraise_safe_exceptions def fake_method(): raise side_effect @@ -72,15 +75,16 @@ class TestReraiseSafeExceptions(object): assert exc_info.value._vcs_kind == expected_type def test_keeps_original_traceback(self): - @hg.reraise_safe_exceptions + + @hg_remote.reraise_safe_exceptions def fake_method(): try: - raise hgcompat.Abort() + raise hgcompat.Abort(b'test-abort') except: - self.original_traceback = traceback.format_tb( - sys.exc_info()[2]) + self.original_traceback = traceback.format_tb(sys.exc_info()[2]) raise + new_traceback = None try: fake_method() except Exception: @@ -89,8 +93,8 @@ class TestReraiseSafeExceptions(object): new_traceback_tail = new_traceback[-len(self.original_traceback):] assert new_traceback_tail == self.original_traceback - def test_maps_unknow_exceptions_to_unhandled(self): - @hg.reraise_safe_exceptions + def test_maps_unknown_exceptions_to_unhandled(self): + @hg_remote.reraise_safe_exceptions def stub_method(): raise ValueError('stub') @@ -99,7 +103,7 @@ class TestReraiseSafeExceptions(object): assert exc_info.value._vcs_kind == 'unhandled' def test_does_not_map_known_exceptions(self): - @hg.reraise_safe_exceptions + @hg_remote.reraise_safe_exceptions def stub_method(): raise exceptions.LookupException()('stub') diff --git a/vcsserver/tests/test_hgpatches.py b/vcsserver/tests/test_hgpatches.py --- a/vcsserver/tests/test_hgpatches.py +++ b/vcsserver/tests/test_hgpatches.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,14 +21,14 @@ import pytest from vcsserver import hgcompat, hgpatches -LARGEFILES_CAPABILITY = 'largefiles=serve' +LARGEFILES_CAPABILITY = b'largefiles=serve' def test_patch_largefiles_capabilities_applies_patch( patched_capabilities): lfproto = hgcompat.largefiles.proto hgpatches.patch_largefiles_capabilities() - assert lfproto._capabilities.func_name == '_dynamic_capabilities' + assert lfproto._capabilities.__name__ == '_dynamic_capabilities' def test_dynamic_capabilities_uses_original_function_if_not_enabled( @@ -72,11 +72,6 @@ def test_dynamic_capabilities_uses_large assert LARGEFILES_CAPABILITY in caps -def test_hgsubversion_import(): - from hgsubversion import svnrepo - assert svnrepo - - @pytest.fixture def patched_capabilities(request): """ diff --git a/vcsserver/tests/test_hooks.py b/vcsserver/tests/test_hooks.py --- a/vcsserver/tests/test_hooks.py +++ b/vcsserver/tests/test_hooks.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,17 +15,18 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -import contextlib -import io import threading -from BaseHTTPServer import BaseHTTPRequestHandler -from SocketServer import TCPServer +import msgpack + +from http.server import BaseHTTPRequestHandler +from socketserver import TCPServer import mercurial.ui import mock import pytest -import simplejson as json +from vcsserver.hooks import HooksHttpClient +from vcsserver.lib.rc_json import json from vcsserver import hooks @@ -44,7 +45,7 @@ def get_hg_ui(extras=None): } required_extras.update(extras) hg_ui = mercurial.ui.ui() - hg_ui.setconfig('rhodecode', 'RC_SCM_DATA', json.dumps(required_extras)) + hg_ui.setconfig(b'rhodecode', b'RC_SCM_DATA', json.dumps(required_extras)) return hg_ui @@ -67,6 +68,7 @@ def test_git_post_receive_is_disabled(): def test_git_post_receive_calls_repo_size(): extras = {'hooks': ['push', 'repo_size']} + with mock.patch.object(hooks, '_call_hook') as call_hook_mock: hooks.git_post_receive( None, '', {'RC_SCM_DATA': json.dumps(extras)}) @@ -81,6 +83,7 @@ def test_git_post_receive_calls_repo_siz def test_git_post_receive_does_not_call_disabled_repo_size(): extras = {'hooks': ['push']} + with mock.patch.object(hooks, '_call_hook') as call_hook_mock: hooks.git_post_receive( None, '', {'RC_SCM_DATA': json.dumps(extras)}) @@ -118,7 +121,7 @@ def test_git_post_pull_is_disabled(): hooks.git_post_pull({'hooks': ['push']}) == hooks.HookResponse(0, '')) -class TestGetHooksClient(object): +class TestGetHooksClient: def test_returns_http_client_when_protocol_matches(self): hooks_uri = 'localhost:8000' @@ -129,38 +132,35 @@ class TestGetHooksClient(object): assert isinstance(result, hooks.HooksHttpClient) assert result.hooks_uri == hooks_uri - def test_returns_dummy_client_when_hooks_uri_not_specified(self): - fake_module = mock.Mock() - import_patcher = mock.patch.object( - hooks.importlib, 'import_module', return_value=fake_module) - fake_module_name = 'fake.module' - with import_patcher as import_mock: - result = hooks._get_hooks_client( - {'hooks_module': fake_module_name}) - - import_mock.assert_called_once_with(fake_module_name) - assert isinstance(result, hooks.HooksDummyClient) - assert result._hooks_module == fake_module + def test_return_celery_client_when_queue_and_backend_provided(self): + task_queue = 'redis://task_queue:0' + task_backend = task_queue + result = hooks._get_hooks_client({ + 'task_queue': task_queue, + 'task_backend': task_backend + }) + assert isinstance(result, hooks.HooksCeleryClient) -class TestHooksHttpClient(object): +class TestHooksHttpClient: def test_init_sets_hooks_uri(self): uri = 'localhost:3000' client = hooks.HooksHttpClient(uri) assert client.hooks_uri == uri - def test_serialize_returns_json_string(self): + def test_serialize_returns_serialized_string(self): client = hooks.HooksHttpClient('localhost:3000') hook_name = 'test' extras = { 'first': 1, 'second': 'two' } - result = client._serialize(hook_name, extras) - expected_result = json.dumps({ + hooks_proto, result = client._serialize(hook_name, extras) + expected_result = msgpack.packb({ 'method': hook_name, - 'extras': extras + 'extras': extras, }) + assert hooks_proto == {'rc-hooks-protocol': 'msgpack.v1', 'Connection': 'keep-alive'} assert result == expected_result def test_call_queries_http_server(self, http_mirror): @@ -171,38 +171,13 @@ class TestHooksHttpClient(object): 'second': 'two' } result = client(hook_name, extras) - expected_result = { + expected_result = msgpack.unpackb(msgpack.packb({ 'method': hook_name, 'extras': extras - } + }), raw=False) assert result == expected_result -class TestHooksDummyClient(object): - def test_init_imports_hooks_module(self): - hooks_module_name = 'rhodecode.fake.module' - hooks_module = mock.MagicMock() - - import_patcher = mock.patch.object( - hooks.importlib, 'import_module', return_value=hooks_module) - with import_patcher as import_mock: - client = hooks.HooksDummyClient(hooks_module_name) - import_mock.assert_called_once_with(hooks_module_name) - assert client._hooks_module == hooks_module - - def test_call_returns_hook_result(self): - hooks_module_name = 'rhodecode.fake.module' - hooks_module = mock.MagicMock() - import_patcher = mock.patch.object( - hooks.importlib, 'import_module', return_value=hooks_module) - with import_patcher: - client = hooks.HooksDummyClient(hooks_module_name) - - result = client('post_push', {}) - hooks_module.Hooks.assert_called_once_with() - assert result == hooks_module.Hooks().__enter__().post_push() - - @pytest.fixture def http_mirror(request): server = MirrorHttpServer() @@ -211,15 +186,16 @@ def http_mirror(request): class MirrorHttpHandler(BaseHTTPRequestHandler): + def do_POST(self): length = int(self.headers['Content-Length']) - body = self.rfile.read(length).decode('utf-8') + body = self.rfile.read(length) self.send_response(200) self.end_headers() self.wfile.write(body) -class MirrorHttpServer(object): +class MirrorHttpServer: ip_address = '127.0.0.1' port = 0 @@ -239,3 +215,43 @@ class MirrorHttpServer(object): @property def uri(self): return '{}:{}'.format(self.ip_address, self.port) + + +def test_hooks_http_client_init(): + hooks_uri = 'http://localhost:8000' + client = HooksHttpClient(hooks_uri) + assert client.hooks_uri == hooks_uri + + +def test_hooks_http_client_call(): + hooks_uri = 'http://localhost:8000' + + method = 'test_method' + extras = {'key': 'value'} + + with \ + mock.patch('http.client.HTTPConnection') as mock_connection,\ + mock.patch('msgpack.load') as mock_load: + + client = HooksHttpClient(hooks_uri) + + mock_load.return_value = {'result': 'success'} + response = mock.MagicMock() + response.status = 200 + mock_connection.request.side_effect = None + mock_connection.getresponse.return_value = response + + result = client(method, extras) + + mock_connection.assert_called_with(hooks_uri) + mock_connection.return_value.request.assert_called_once() + assert result == {'result': 'success'} + + +def test_hooks_http_client_serialize(): + method = 'test_method' + extras = {'key': 'value'} + headers, body = HooksHttpClient._serialize(method, extras) + + assert headers == {'rc-hooks-protocol': HooksHttpClient.proto, 'Connection': 'keep-alive'} + assert msgpack.unpackb(body) == {'method': method, 'extras': extras} diff --git a/vcsserver/tests/test_http_performance.py b/vcsserver/tests/test_http_performance.py --- a/vcsserver/tests/test_http_performance.py +++ b/vcsserver/tests/test_http_performance.py @@ -14,7 +14,10 @@ def vcs_app(): 'dev.use_echo_app': 'true', 'locale': 'en_US.UTF-8', } - vcs_app = main({}, **stub_settings) + stub_global_conf = { + '__file__': '' + } + vcs_app = main(stub_global_conf, **stub_settings) app = webtest.TestApp(vcs_app) return app @@ -27,13 +30,13 @@ def data(): def test_http_app_streaming_with_data(data, repeat, vcs_app): app = vcs_app - for x in xrange(repeat / 10): + for x in range(repeat // 10): response = app.post('/stream/git/', params=data) assert response.status_code == 200 def test_http_app_streaming_no_data(repeat, vcs_app): app = vcs_app - for x in xrange(repeat / 10): + for x in range(repeat // 10): response = app.post('/stream/git/') assert response.status_code == 200 diff --git a/vcsserver/tests/test_install_hooks.py b/vcsserver/tests/test_install_hooks.py --- a/vcsserver/tests/test_install_hooks.py +++ b/vcsserver/tests/test_install_hooks.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,16 +22,18 @@ import pytest import vcsserver import tempfile from vcsserver import hook_utils +from vcsserver.hook_utils import set_permissions_if_needed, HOOKS_DIR_MODE, HOOKS_FILE_MODE from vcsserver.tests.fixture import no_newline_id_generator +from vcsserver.str_utils import safe_bytes from vcsserver.utils import AttributeDict -class TestCheckRhodecodeHook(object): +class TestCheckRhodecodeHook: def test_returns_false_when_hook_file_is_wrong_found(self, tmpdir): hook = os.path.join(str(tmpdir), 'fake_hook_file.py') with open(hook, 'wb') as f: - f.write('dummy test') + f.write(b'dummy test') result = hook_utils.check_rhodecode_hook(hook) assert result is False @@ -47,23 +49,32 @@ class TestCheckRhodecodeHook(object): def test_signatures(self, file_content, expected_result, tmpdir): hook = os.path.join(str(tmpdir), 'fake_hook_file_1.py') with open(hook, 'wb') as f: - f.write(file_content) + f.write(safe_bytes(file_content)) result = hook_utils.check_rhodecode_hook(hook) assert result is expected_result -class BaseInstallHooks(object): +class BaseInstallHooks: HOOK_FILES = () + def _check_hook_file_dir_mode(self, file_path): + dir_path = os.path.dirname(file_path) + assert os.path.exists(dir_path), f'dir {file_path} missing' + stat_info = os.stat(dir_path) + + file_mode = stat.S_IMODE(stat_info.st_mode) + expected_mode = int(HOOKS_DIR_MODE) + assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {dir_path}' + def _check_hook_file_mode(self, file_path): - assert os.path.exists(file_path), 'path %s missing' % file_path + assert os.path.exists(file_path), f'path {file_path} missing' stat_info = os.stat(file_path) file_mode = stat.S_IMODE(stat_info.st_mode) - expected_mode = int('755', 8) - assert expected_mode == file_mode + expected_mode = int(HOOKS_FILE_MODE) + assert expected_mode == file_mode, f'expected mode: {oct(expected_mode)} got: {oct(file_mode)} for {file_path}' def _check_hook_file_content(self, file_path, executable): executable = executable or sys.executable @@ -71,8 +82,7 @@ class BaseInstallHooks(object): content = hook_file.read() expected_env = '#!{}'.format(executable) - expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format( - vcsserver.__version__) + expected_rc_version = "\nRC_HOOK_VER = '{}'\n".format(vcsserver.get_version()) assert content.strip().startswith(expected_env) assert expected_rc_version in content @@ -102,6 +112,8 @@ class BaseInstallHooks(object): file_path = os.path.join(repo_path, 'hooks', file_name) else: file_path = os.path.join(repo_path, '.git', 'hooks', file_name) + + self._check_hook_file_dir_mode(file_path) self._check_hook_file_mode(file_path) self._check_hook_file_content(file_path, sys.executable) @@ -204,3 +216,74 @@ class TestInstallSvnHooks(BaseInstallHoo repo.path, force_create=True) assert result self.check_hooks(repo.path, ) + + +def create_test_file(filename): + """Utility function to create a test file.""" + with open(filename, 'w') as f: + f.write("Test file") + + +def remove_test_file(filename): + """Utility function to remove a test file.""" + if os.path.exists(filename): + os.remove(filename) + + +@pytest.fixture +def test_file(): + filename = 'test_file.txt' + create_test_file(filename) + yield filename + remove_test_file(filename) + + +def test_increase_permissions(test_file): + # Set initial lower permissions + initial_perms = 0o644 + os.chmod(test_file, initial_perms) + + # Set higher permissions + new_perms = 0o666 + set_permissions_if_needed(test_file, new_perms) + + # Check if permissions were updated + assert (os.stat(test_file).st_mode & 0o777) == new_perms + + +def test_no_permission_change_needed(test_file): + # Set initial permissions + initial_perms = 0o666 + os.chmod(test_file, initial_perms) + + # Attempt to set the same permissions + set_permissions_if_needed(test_file, initial_perms) + + # Check if permissions were unchanged + assert (os.stat(test_file).st_mode & 0o777) == initial_perms + + +def test_no_permission_reduction(test_file): + # Set initial higher permissions + initial_perms = 0o666 + os.chmod(test_file, initial_perms) + + # Attempt to set lower permissions + lower_perms = 0o644 + set_permissions_if_needed(test_file, lower_perms) + + # Check if permissions were not reduced + assert (os.stat(test_file).st_mode & 0o777) == initial_perms + + +def test_no_permission_reduction_when_on_777(test_file): + # Set initial higher permissions + initial_perms = 0o777 + os.chmod(test_file, initial_perms) + + # Attempt to set lower permissions + lower_perms = 0o755 + set_permissions_if_needed(test_file, lower_perms) + + # Check if permissions were not reduced + assert (os.stat(test_file).st_mode & 0o777) == initial_perms diff --git a/vcsserver/tests/test_main_http.py b/vcsserver/tests/test_main_http.py --- a/vcsserver/tests/test_main_http.py +++ b/vcsserver/tests/test_main_http.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ from vcsserver.base import obfuscate_qs @mock.patch('vcsserver.http_main.VCS', mock.Mock()) @mock.patch('vcsserver.hgpatches.patch_largefiles_capabilities') def test_applies_largefiles_patch(patch_largefiles_capabilities): - http_main.main({}) + http_main.main({'__file__': ''}) patch_largefiles_capabilities.assert_called_once_with() @@ -35,15 +35,14 @@ def test_applies_largefiles_patch(patch_ 'vcsserver.hgpatches.patch_largefiles_capabilities', mock.Mock(side_effect=Exception("Must not be called"))) def test_applies_largefiles_patch_only_if_mercurial_is_available(): - http_main.main({}) + http_main.main({'__file__': ''}) @pytest.mark.parametrize('given, expected', [ ('bad', 'bad'), ('query&foo=bar', 'query&foo=bar'), ('equery&auth_token=bar', 'equery&auth_token=*****'), - ('a;b;c;query&foo=bar&auth_token=secret', - 'a&b&c&query&foo=bar&auth_token=*****'), + ('a;b;c;query&foo=bar&auth_token=secret', 'a;b;c;query&foo=bar&auth_token=*****'), ('', ''), (None, None), ('foo=bar', 'foo=bar'), diff --git a/vcsserver/tests/test_pygrack.py b/vcsserver/tests/test_pygrack.py --- a/vcsserver/tests/test_pygrack.py +++ b/vcsserver/tests/test_pygrack.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import io +import more_itertools import dulwich.protocol import mock @@ -25,7 +26,7 @@ import webtest from vcsserver import hooks, pygrack -# pylint: disable=redefined-outer-name,protected-access +from vcsserver.str_utils import ascii_bytes @pytest.fixture() @@ -71,12 +72,11 @@ def test_invalid_endpoint_returns_403(py def test_pre_pull_hook_fails_with_sideband(pygrack_app, sideband): request = ''.join([ '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ', - 'multi_ack %s ofs-delta\n' % sideband, + f'multi_ack {sideband} ofs-delta\n', '0000', '0009done\n', ]) - with mock.patch('vcsserver.hooks.git_pre_pull', - return_value=hooks.HookResponse(1, 'foo')): + with mock.patch('vcsserver.hooks.git_pre_pull', return_value=hooks.HookResponse(1, 'foo')): response = pygrack_app.post( '/git-upload-pack', params=request, content_type='application/x-git-upload-pack') @@ -86,8 +86,8 @@ def test_pre_pull_hook_fails_with_sideba packets = list(proto.read_pkt_seq()) expected_packets = [ - 'NAK\n', '\x02foo', '\x02Pre pull hook failed: aborting\n', - '\x01' + pygrack.GitRepository.EMPTY_PACK, + b'NAK\n', b'\x02foo', b'\x02Pre pull hook failed: aborting\n', + b'\x01' + pygrack.GitRepository.EMPTY_PACK, ] assert packets == expected_packets @@ -115,12 +115,16 @@ def test_pull_has_hook_messages(pygrack_ '0000', '0009done\n', ]) + + pre_pull = 'pre_pull_output' + post_pull = 'post_pull_output' + with mock.patch('vcsserver.hooks.git_pre_pull', - return_value=hooks.HookResponse(0, 'foo')): + return_value=hooks.HookResponse(0, pre_pull)): with mock.patch('vcsserver.hooks.git_post_pull', - return_value=hooks.HookResponse(1, 'bar')): + return_value=hooks.HookResponse(1, post_pull)): with mock.patch('vcsserver.subprocessio.SubprocessIOChunker', - return_value=['0008NAK\n0009subp\n0000']): + return_value=more_itertools.always_iterable([b'0008NAK\n0009subp\n0000'])): response = pygrack_app.post( '/git-upload-pack', params=request, content_type='application/x-git-upload-pack') @@ -129,13 +133,17 @@ def test_pull_has_hook_messages(pygrack_ proto = dulwich.protocol.Protocol(data.read, None) packets = list(proto.read_pkt_seq()) - assert packets == ['NAK\n', '\x02foo', 'subp\n', '\x02bar'] + assert packets == [b'NAK\n', + # pre-pull only outputs if IT FAILS as in != 0 ret code + #b'\x02pre_pull_output', + b'subp\n', + b'\x02post_pull_output'] def test_get_want_capabilities(pygrack_instance): data = io.BytesIO( - '0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' + - 'multi_ack side-band-64k ofs-delta\n00000009done\n') + b'0054want 74730d410fcb6603ace96f1dc55ea6196122532d ' + + b'multi_ack side-band-64k ofs-delta\n00000009done\n') request = webob.Request({ 'wsgi.input': data, @@ -146,20 +154,20 @@ def test_get_want_capabilities(pygrack_i capabilities = pygrack_instance._get_want_capabilities(request) assert capabilities == frozenset( - ('ofs-delta', 'multi_ack', 'side-band-64k')) + (b'ofs-delta', b'multi_ack', b'side-band-64k')) assert data.tell() == 0 @pytest.mark.parametrize('data,capabilities,expected', [ ('foo', [], []), - ('', ['side-band-64k'], []), - ('', ['side-band'], []), - ('foo', ['side-band-64k'], ['0008\x02foo']), - ('foo', ['side-band'], ['0008\x02foo']), - ('f'*1000, ['side-band-64k'], ['03ed\x02' + 'f' * 1000]), - ('f'*1000, ['side-band'], ['03e8\x02' + 'f' * 995, '000a\x02fffff']), - ('f'*65520, ['side-band-64k'], ['fff0\x02' + 'f' * 65515, '000a\x02fffff']), - ('f'*65520, ['side-band'], ['03e8\x02' + 'f' * 995] * 65 + ['0352\x02' + 'f' * 845]), + ('', [pygrack.CAPABILITY_SIDE_BAND_64K], []), + ('', [pygrack.CAPABILITY_SIDE_BAND], []), + ('foo', [pygrack.CAPABILITY_SIDE_BAND_64K], [b'0008\x02foo']), + ('foo', [pygrack.CAPABILITY_SIDE_BAND], [b'0008\x02foo']), + ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'03ed\x02' + b'f' * 1000]), + ('f'*1000, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995, b'000a\x02fffff']), + ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND_64K], [b'fff0\x02' + b'f' * 65515, b'000a\x02fffff']), + ('f'*65520, [pygrack.CAPABILITY_SIDE_BAND], [b'03e8\x02' + b'f' * 995] * 65 + [b'0352\x02' + b'f' * 845]), ], ids=[ 'foo-empty', 'empty-64k', 'empty', @@ -174,54 +182,59 @@ def test_get_messages(pygrack_instance, @pytest.mark.parametrize('response,capabilities,pre_pull_messages,post_pull_messages', [ # Unexpected response - ('unexpected_response', ['side-band-64k'], 'foo', 'bar'), + ([b'unexpected_response[no_initial_header]'], [pygrack.CAPABILITY_SIDE_BAND_64K], 'foo', 'bar'), # No sideband - ('no-sideband', [], 'foo', 'bar'), + ([b'no-sideband'], [], 'foo', 'bar'), # No messages - ('no-messages', ['side-band-64k'], '', ''), + ([b'no-messages'], [pygrack.CAPABILITY_SIDE_BAND_64K], '', ''), ]) def test_inject_messages_to_response_nothing_to_do( - pygrack_instance, response, capabilities, pre_pull_messages, - post_pull_messages): - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, pre_pull_messages, post_pull_messages) + pygrack_instance, response, capabilities, pre_pull_messages, post_pull_messages): - assert new_response == response + new_response = pygrack_instance._build_post_pull_response( + more_itertools.always_iterable(response), capabilities, pre_pull_messages, post_pull_messages) + + assert list(new_response) == response @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], ]) -def test_inject_messages_to_response_single_element(pygrack_instance, - capabilities): - response = ['0008NAK\n0009subp\n0000'] - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, 'foo', 'bar') +def test_inject_messages_to_response_single_element(pygrack_instance, capabilities): + response = [b'0008NAK\n0009subp\n0000'] + new_response = pygrack_instance._build_post_pull_response( + more_itertools.always_iterable(response), capabilities, 'foo', 'bar') - expected_response = [ - '0008NAK\n', '0008\x02foo', '0009subp\n', '0008\x02bar', '0000'] + expected_response = b''.join([ + b'0008NAK\n', + b'0008\x02foo', + b'0009subp\n', + b'0008\x02bar', + b'0000']) - assert new_response == expected_response + assert b''.join(new_response) == expected_response @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], ]) -def test_inject_messages_to_response_multi_element(pygrack_instance, - capabilities): - response = [ - '0008NAK\n000asubp1\n', '000asubp2\n', '000asubp3\n', '000asubp4\n0000'] - new_response = pygrack_instance._inject_messages_to_response( - response, capabilities, 'foo', 'bar') +def test_inject_messages_to_response_multi_element(pygrack_instance, capabilities): + response = more_itertools.always_iterable([ + b'0008NAK\n000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n0000' + ]) + new_response = pygrack_instance._build_post_pull_response(response, capabilities, 'foo', 'bar') - expected_response = [ - '0008NAK\n', '0008\x02foo', '000asubp1\n', '000asubp2\n', '000asubp3\n', - '000asubp4\n', '0008\x02bar', '0000' - ] + expected_response = b''.join([ + b'0008NAK\n', + b'0008\x02foo', + b'000asubp1\n', b'000asubp2\n', b'000asubp3\n', b'000asubp4\n', + b'0008\x02bar', + b'0000' + ]) - assert new_response == expected_response + assert b''.join(new_response) == expected_response def test_build_failed_pre_pull_response_no_sideband(pygrack_instance): @@ -231,19 +244,52 @@ def test_build_failed_pre_pull_response_ @pytest.mark.parametrize('capabilities', [ - ['side-band'], - ['side-band-64k'], - ['side-band-64k', 'no-progress'], + [pygrack.CAPABILITY_SIDE_BAND], + [pygrack.CAPABILITY_SIDE_BAND_64K], + [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], ]) def test_build_failed_pre_pull_response(pygrack_instance, capabilities): - response = pygrack_instance._build_failed_pre_pull_response( - capabilities, 'foo') + response = pygrack_instance._build_failed_pre_pull_response(capabilities, 'foo') expected_response = [ - '0008NAK\n', '0008\x02foo', '0024\x02Pre pull hook failed: aborting\n', - '%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, - pygrack.GitRepository.EMPTY_PACK), - '0000', + b'0008NAK\n', b'0008\x02foo', b'0024\x02Pre pull hook failed: aborting\n', + b'%04x\x01%s' % (len(pygrack.GitRepository.EMPTY_PACK) + 5, pygrack.GitRepository.EMPTY_PACK), + pygrack.GitRepository.FLUSH_PACKET, ] assert response == expected_response + + +def test_inject_messages_to_response_generator(pygrack_instance): + + def response_generator(): + response = [ + # protocol start + b'0008NAK\n', + ] + response += [ascii_bytes(f'000asubp{x}\n') for x in range(1000)] + response += [ + # protocol end + pygrack.GitRepository.FLUSH_PACKET + ] + for elem in response: + yield elem + + new_response = pygrack_instance._build_post_pull_response( + response_generator(), [pygrack.CAPABILITY_SIDE_BAND_64K, b'no-progress'], 'PRE_PULL_MSG\n', 'POST_PULL_MSG\n') + + assert iter(new_response) + + expected_response = b''.join([ + # start + b'0008NAK\n0012\x02PRE_PULL_MSG\n', + ] + [ + # ... rest + ascii_bytes(f'000asubp{x}\n') for x in range(1000) + ] + [ + # final message, + b'0013\x02POST_PULL_MSG\n0000', + + ]) + + assert b''.join(new_response) == expected_response diff --git a/vcsserver/tests/test_scm_app.py b/vcsserver/tests/test_scm_app.py --- a/vcsserver/tests/test_scm_app.py +++ b/vcsserver/tests/test_scm_app.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,10 +25,11 @@ import pytest import webtest from vcsserver import scm_app +from vcsserver.str_utils import ascii_bytes def test_hg_does_not_accept_invalid_cmd(tmpdir): - repo = mercurial.hg.repository(mercurial.ui.ui(), str(tmpdir), create=True) + repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True) app = webtest.TestApp(scm_app.HgWeb(repo)) response = app.get('/repo?cmd=invalidcmd', expect_errors=True) @@ -37,7 +38,7 @@ def test_hg_does_not_accept_invalid_cmd( def test_create_hg_wsgi_app_requirement_error(tmpdir): - repo = mercurial.hg.repository(mercurial.ui.ui(), str(tmpdir), create=True) + repo = mercurial.hg.repository(mercurial.ui.ui(), ascii_bytes(str(tmpdir)), create=True) config = ( ('paths', 'default', ''), ) diff --git a/vcsserver/tests/test_server.py b/vcsserver/tests/test_server.py --- a/vcsserver/tests/test_server.py +++ b/vcsserver/tests/test_server.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/vcsserver/tests/test_subprocessio.py b/vcsserver/tests/test_subprocessio.py --- a/vcsserver/tests/test_subprocessio.py +++ b/vcsserver/tests/test_subprocessio.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,22 +22,23 @@ import sys import pytest from vcsserver import subprocessio +from vcsserver.str_utils import ascii_bytes -class KindaFilelike(object): # pragma: no cover +class FileLikeObj: # pragma: no cover - def __init__(self, data, size): - chunks = size / len(data) + def __init__(self, data: bytes, size): + chunks = size // len(data) self.stream = self._get_stream(data, chunks) def _get_stream(self, data, chunks): - for x in xrange(chunks): + for x in range(chunks): yield data def read(self, n): - buffer_stream = '' + buffer_stream = b'' for chunk in self.stream: buffer_stream += chunk if len(buffer_stream) >= n: @@ -51,7 +52,7 @@ class KindaFilelike(object): # pragma: def environ(): """Delete coverage variables, as they make the tests fail.""" env = dict(os.environ) - for key in env.keys(): + for key in list(env.keys()): if key.startswith('COV_CORE_'): del env[key] @@ -63,93 +64,92 @@ def _get_python_args(script): def test_raise_exception_on_non_zero_return_code(environ): - args = _get_python_args('sys.exit(1)') - with pytest.raises(EnvironmentError): - list(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args('raise ValueError("fail")') + with pytest.raises(OSError): + b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)) def test_does_not_fail_on_non_zero_return_code(environ): - args = _get_python_args('sys.exit(1)') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_return_code=False, env=environ - ) - ) + call_args = _get_python_args('sys.stdout.write("hello"); sys.exit(1)') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ) + output = b''.join(proc) - assert output == '' + assert output == b'hello' def test_raise_exception_on_stderr(environ): - args = _get_python_args('sys.stderr.write("X"); time.sleep(1);') - with pytest.raises(EnvironmentError) as excinfo: - list(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); time.sleep(1);') - assert 'exited due to an error:\nX' in str(excinfo.value) + with pytest.raises(OSError) as excinfo: + b''.join(subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ)) + + assert 'exited due to an error:\nWRITE_TO_STDERR' in str(excinfo.value) def test_does_not_fail_on_stderr(environ): - args = _get_python_args('sys.stderr.write("X"); time.sleep(1);') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_stderr=False, env=environ - ) - ) + call_args = _get_python_args('sys.stderr.write("WRITE_TO_STDERR"); sys.stderr.flush; time.sleep(2);') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_stderr=False, env=environ) + output = b''.join(proc) - assert output == '' + assert output == b'' -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_no_input(size, environ): - print(type(environ)) - data = 'X' - args = _get_python_args('sys.stdout.write("%s" * %d)' % (data, size)) - output = ''.join(subprocessio.SubprocessIOChunker(args, shell=False, env=environ)) + call_args = _get_python_args(f'sys.stdout.write("X" * {size});') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, env=environ) + output = b''.join(proc) - assert output == data * size + assert output == ascii_bytes("X" * size) -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_no_input_does_not_fail(size, environ): - data = 'X' - args = _get_python_args('sys.stdout.write("%s" * %d); sys.exit(1)' % (data, size)) - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, fail_on_return_code=False, env=environ - ) - ) - print("{} {}".format(len(data * size), len(output))) - assert output == data * size + call_args = _get_python_args(f'sys.stdout.write("X" * {size}); sys.exit(1)') + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, fail_on_return_code=False, env=environ) + output = b''.join(proc) + + assert output == ascii_bytes("X" * size) -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_input(size, environ): data_len = size - inputstream = KindaFilelike('X', size) + inputstream = FileLikeObj(b'X', size) # This acts like the cat command. - args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') - output = ''.join( - subprocessio.SubprocessIOChunker( - args, shell=False, inputstream=inputstream, env=environ - ) + call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') + # note: in this tests we explicitly don't assign chunker to a variable and let it stream directly + output = b''.join( + subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ) ) assert len(output) == data_len -@pytest.mark.parametrize('size', [1, 10 ** 5]) +@pytest.mark.parametrize('size', [ + 1, + 10 ** 5 +]) def test_output_with_input_skipping_iterator(size, environ): data_len = size - inputstream = KindaFilelike('X', size) + inputstream = FileLikeObj(b'X', size) # This acts like the cat command. - args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') + call_args = _get_python_args('shutil.copyfileobj(sys.stdin, sys.stdout)') # Note: assigning the chunker makes sure that it is not deleted too early - chunker = subprocessio.SubprocessIOChunker( - args, shell=False, inputstream=inputstream, env=environ - ) - output = ''.join(chunker.output) + proc = subprocessio.SubprocessIOChunker(call_args, shell=False, input_stream=inputstream, env=environ) + output = b''.join(proc.stdout) assert len(output) == data_len diff --git a/vcsserver/tests/test_svn.py b/vcsserver/tests/test_svn.py --- a/vcsserver/tests/test_svn.py +++ b/vcsserver/tests/test_svn.py @@ -1,5 +1,5 @@ # RhodeCode VCSServer provides access to different vcs backends via network. -# Copyright (C) 2014-2020 RhodeCode GmbH +# Copyright (C) 2014-2023 RhodeCode GmbH # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,10 +20,12 @@ import mock import pytest import sys +from vcsserver.str_utils import ascii_bytes -class MockPopen(object): + +class MockPopen: def __init__(self, stderr): - self.stdout = io.BytesIO('') + self.stdout = io.BytesIO(b'') self.stderr = io.BytesIO(stderr) self.returncode = 1 @@ -44,44 +46,58 @@ INVALID_CERTIFICATE_STDERR = '\n'.join([ @pytest.mark.xfail(sys.platform == "cygwin", reason="SVN not packaged for Cygwin") def test_import_remote_repository_certificate_error(stderr, expected_reason): - from vcsserver import svn + from vcsserver.remote import svn_remote factory = mock.Mock() factory.repo = mock.Mock(return_value=mock.Mock()) - remote = svn.SvnRemote(factory) + remote = svn_remote.SvnRemote(factory) remote.is_path_valid_repository = lambda wire, path: True with mock.patch('subprocess.Popen', - return_value=MockPopen(stderr)): + return_value=MockPopen(ascii_bytes(stderr))): with pytest.raises(Exception) as excinfo: remote.import_remote_repository({'path': 'path'}, 'url') - expected_error_args = ( - 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason),) + expected_error_args = 'Failed to dump the remote repository from url. Reason:{}'.format(expected_reason) - assert excinfo.value.args == expected_error_args + assert excinfo.value.args[0] == expected_error_args def test_svn_libraries_can_be_imported(): - import svn - import svn.client + import svn.client # noqa assert svn.client is not None @pytest.mark.parametrize('example_url, parts', [ - ('http://server.com', (None, None, 'http://server.com')), - ('http://user@server.com', ('user', None, 'http://user@server.com')), + ('http://server.com', ('', '', 'http://server.com')), + ('http://user@server.com', ('user', '', 'http://user@server.com')), ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')), - ('