rhodecode-vcsserver Commit - r1292:50174967

release: Release 5.2.0

andverb -

r1292:50174967 v5.2.0 stable

parent child

vcsserver/lib/hash_utils.py

0 created 644 +53 0

			@@ -0,0 +1,53 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import hashlib
		19	from vcsserver.lib.str_utils import safe_bytes, safe_str
		20
		21
		22	def md5(s):
		23	return hashlib.md5(s).hexdigest()
		24
		25
		26	def md5_safe(s, return_type=''):
		27
		28	val = md5(safe_bytes(s))
		29	if return_type == 'str':
		30	val = safe_str(val)
		31	return val
		32
		33
		34	def sha1(s):
		35	return hashlib.sha1(s).hexdigest()
		36
		37
		38	def sha1_safe(s, return_type=''):
		39	val = sha1(safe_bytes(s))
		40	if return_type == 'str':
		41	val = safe_str(val)
		42	return val
		43
		44
		45	def sha256(s):
		46	return hashlib.sha256(s).hexdigest()
		47
		48
		49	def sha256_safe(s, return_type=''):
		50	val = sha256(safe_bytes(s))
		51	if return_type == 'str':
		52	val = safe_str(val)
		53	return val

Makefile

0 +55 -69

+             .DEFAULT_GOAL := help
+             # Pretty print values cf. https://misc.flogisoft.com/bash/tip_colors_and_formatting
+             RESET := \033[0m # Reset all formatting
+             GREEN := \033[0;32m # Resets before setting 16b colour (32 -- green)
+             YELLOW := \033[0;33m
+             ORANGE := \033[0;38;5;208m # Reset then set 256b colour (208 -- orange)
+             PEACH := \033[0;38;5;216m
+             ## ---------------------------------------------------------------------------------- ##
+             ## ------------------------- Help usage builder ------------------------------------- ##
+             ## ---------------------------------------------------------------------------------- ##
+             # use '# >>> Build commands' to create section
+             # use '# target: target description' to create help for target
+             .PHONY: help
+             help:
+             	@echo "Usage:"
+             	@cat $(MAKEFILE_LIST) | grep -E '^# >>>|^# [A-Za-z0-9_.-]+:' | sed -E 's/^# //' | awk ' \
+             		BEGIN { \
+             			green="\033[32m"; \
+             			yellow="\033[33m"; \
+             			reset="\033[0m"; \
+             			section=""; \
+             		} \
+             		/^>>>/ { \
+             			section=substr($$0, 5); \
+             			printf "\n" green ">>> %s" reset "\n", section; \
+             			next; \
+             		} \
+             		/^([A-Za-z0-9_.-]+):/ { \
+             			target=$$1; \
+             			gsub(/:$$/, "", target); \
+             			description=substr($$0, index($$0, ":") + 2); \
+             			if (description == "") { description="-"; } \
+             			printf " - " yellow "%-35s" reset " %s\n", target, description; \
+             		} \
+             	'
              # required for pushd to work..
              SHELL = /bin/bash
-             # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
-             OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
+             # >>> Tests commands
              .PHONY: clean
-             ## Cleanup compiled and cache py files
+             # clean: Cleanup compiled and cache py files
              clean:
              	make test-clean
              	find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';'
              	find . -type d -name "build" -prune -exec rm -rf '{}' ';'
              .PHONY: test
-             ## run test-clean and tests
+             # test: run test-clean and tests
              test:
              	make test-clean
-             	make test-only
+             	unset RC_SQLALCHEMY_DB1_URL && unset RC_DB_URL && make test-only
              .PHONY: test-clean
-             ## run test-clean and tests
+             # test-clean: run test-clean and tests
              test-clean:
              	rm -rf coverage.xml htmlcov junit.xml pylint.log result
              	find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
              	find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';'
              .PHONY: test-only
-             ## Run tests only without cleanup
+             # test-only: Run tests only without cleanup
              test-only:
              	PYTHONHASHSEED=random \
              	py.test -x -vv -r xw -p no:sugar \
              	--cov-report=term-missing --cov-report=html \
              	--cov=vcsserver vcsserver
-             .PHONY: ruff-check
-             ## run a ruff analysis
-             ruff-check:
-             	ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs  --exclude .dev .
+             # >>> Dev commands
-             .PHONY: pip-packages
-             ## Show outdated packages
-             pip-packages:
-             	python ${OUTDATED_PACKAGES}
-             .PHONY: build
-             ## Build sdist/egg
-             build:
-             	python -m build
              .PHONY: dev-sh
-             ## make dev-sh
+             # dev-sh: make dev-sh
              dev-sh:
              	sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list"
              	sudo apt-get update
              	sudo apt-get install -y zsh carapace-bin
              	rm -rf /home/rhodecode/.oh-my-zsh
              	curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
              	@echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
              	@echo "${RC_DEV_CMD_HELP}"
              	@PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
              .PHONY: dev-cleanup
-             ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
+             # dev-cleanup: Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
              dev-cleanup:
              	pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
              	rm -rf /tmp/*
              .PHONY: dev-env
-             ## make dev-env based on the requirements files and install develop of packages
+             # dev-env: make dev-env based on the requirements files and install develop of packages
              ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
              dev-env:
              	sudo -u root chown rhodecode:rhodecode /home/rhodecode/.cache/pip/
              	pip install build virtualenv
              	pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
              	pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
              	pip install -e .
              .PHONY: sh
-             ## shortcut for make dev-sh dev-env
+             # sh: shortcut for make dev-sh dev-env
              sh:
              	make dev-env
              	make dev-sh
              ## Allows changes of workers e.g make dev-srv-g workers=2
              workers?=1
              .PHONY: dev-srv
-             ## run gunicorn web server with reloader, use workers=N to set multiworker mode
+             # dev-srv: run gunicorn web server with reloader, use workers=N to set multiworker mode, workers=N allows changes of workers
              dev-srv:
              	gunicorn --paste=.dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py --reload --workers=$(workers)
-             # Default command on calling make
-             .DEFAULT_GOAL := show-help
+             .PHONY: ruff-check
+             # ruff-check: run a ruff analysis
+             ruff-check:
+             	ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs  --exclude .dev .
-             .PHONY: show-help
-             show-help:
-             	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
-             	@echo
-             	@sed -n -e "/^## / { \
-             		h; \
-             		s/.*//; \
-             		:doc" \
-             		-e "H; \
-             		n; \
-             		s/^## //; \
-             		t doc" \
-             		-e "s/:.*//; \
-             		G; \
-             		s/\\n## /---/; \
-             		s/\\n/ /g; \
-             		p; \
-             	}" ${MAKEFILE_LIST} \
-             	| LC_ALL='C' sort --ignore-case \
-             	| awk -F '---' \
-             		-v ncol=$$(tput cols) \
-             		-v indent=19 \
-             		-v col_on="$$(tput setaf 6)" \
-             		-v col_off="$$(tput sgr0)" \
-             	'{ \
-             		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
-             		n = split($$2, words, " "); \
-             		line_length = ncol - indent; \
-             		for (i = 1; i <= n; i++) { \
-             			line_length -= length(words[i]) + 1; \
-             			if (line_length <= 0) { \
-             				line_length = ncol - indent - length(words[i]) - 1; \
-             				printf "\n%*s ", -indent, " "; \
-             			} \
-             			printf "%s ", words[i]; \
-             		} \
-             		printf "\n"; \
-             	}'

configs/gunicorn_config.py

0 +28 -3

              """
              Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
              Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
              """
              import gc
              import os
              import sys
              import math
              import time
              import threading
              import traceback
              import random
              import socket
              import dataclasses
+             import json
              from gunicorn.glogging import Logger
              def get_workers():
                  import multiprocessing
                  return multiprocessing.cpu_count() * 2 + 1
              bind = "127.0.0.1:10010"
              # Error logging output for gunicorn (-) is stdout
              errorlog = '-'
              # Access logging output for gunicorn (-) is stdout
              accesslog = '-'
              # SERVER MECHANICS
              # None == system temp dir
              # worker_tmp_dir is recommended to be set to some tmpfs
              worker_tmp_dir = None
              tmp_upload_dir = None
-             # use re-use port logic
-             #reuse_port = True
+             # use re-use port logic to let linux internals load-balance the requests better.
+             reuse_port = True
              # Custom log format
              #access_log_format = (
              #    '%(t)s %(p)s INFO  [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
              # loki format for easier parsing in grafana
-             access_log_format = (
+             loki_access_log_format = (
                  'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
+             # JSON format
+             json_access_log_format = json.dumps({
+                 'time': r'%(t)s',
+                 'pid': r'%(p)s',
+                 'level': 'INFO',
+                 'ip': r'%(h)s',
+                 'request_time': r'%(L)s',
+                 'remote_address': r'%(h)s',
+                 'user_name': r'%(u)s',
+                 'status': r'%(s)s',
+                 'method': r'%(m)s',
+                 'url_path': r'%(U)s',
+                 'query_string': r'%(q)s',
+                 'protocol': r'%(H)s',
+                 'response_length': r'%(B)s',
+                 'referer': r'%(f)s',
+                 'user_agent': r'%(a)s',
+             })
+             access_log_format = loki_access_log_format
+             if os.environ.get('RC_LOGGING_FORMATTER') == 'json':
+                 access_log_format = json_access_log_format
              # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
              # workers = get_workers()
              # Gunicorn access log level
              loglevel = 'info'
              # Process name visible in a process list
              proc_name = "rhodecode_vcsserver"
              # Type of worker class, one of `sync`, `gevent` or `gthread`
              # currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed
              # gevent:
              # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
              # e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
              # gthread:
              # In this case, the maximum number of concurrent requests is (N workers * X threads)
              # e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled
              worker_class = 'sync'
              # Sets the number of process workers. More workers means more concurrent connections
              # RhodeCode can handle at the same time. Each additional worker also it increases
              # memory usage as each has its own set of caches.
              # The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
              # than 8-10 unless for huge deployments .e.g 700-1000 users.
              # `instance_id = *` must be set in the [app:main] section below (which is the default)
              # when using more than 1 worker.
              workers = 2
              # Threads numbers for worker class gthread
              threads = 1
              # The maximum number of simultaneous clients. Valid only for gevent
              # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
              # e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
              worker_connections = 10
              # Max number of requests that worker will handle before being gracefully restarted.
              # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
              max_requests = 2000
              max_requests_jitter = int(max_requests * 0.2)  # 20% of max_requests
              # The maximum number of pending connections.
              # Exceeding this number results in the client getting an error when attempting to connect.
              backlog = 64
              # The Amount of time a worker can spend with handling a request before it
              # gets killed and restarted. By default, set to 21600 (6hrs)
              # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
              timeout = 21600
              # The maximum size of HTTP request line in bytes.
              # 0 for unlimited
              limit_request_line = 0
              # Limit the number of HTTP headers fields in a request.
              # By default this value is 100 and can't be larger than 32768.
              limit_request_fields = 32768
              # Limit the allowed size of an HTTP request header field.
              # Value is a positive number or 0.
              # Setting it to 0 will allow unlimited header field sizes.
              limit_request_field_size = 0
              # Timeout for graceful workers restart.
              # After receiving a restart signal, workers have this much time to finish
              # serving requests. Workers still alive after the timeout (starting from the
              # receipt of the restart signal) are force killed.
              # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
              graceful_timeout = 21600
              # The number of seconds to wait for requests on a Keep-Alive connection.
              # Generally set in the 1-5 seconds range.
              keepalive = 2
              # Maximum memory usage that each worker can use before it will receive a
              # graceful restart signal 0 = memory monitoring is disabled
              # Examples: 268435456 (256MB), 536870912 (512MB)
              # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
              # Dynamic formula 1024 * 1024 * 256 == 256MBs
              memory_max_usage = 0
              # How often in seconds to check for memory usage for each gunicorn worker
              memory_usage_check_interval = 60
              # Threshold value for which we don't recycle worker if GarbageCollection
              # frees up enough resources. Before each restart, we try to run GC on worker
              # in case we get enough free memory after that; restart will not happen.
              memory_usage_recovery_threshold = 0.8
              @dataclasses.dataclass
              class MemoryCheckConfig:
                  max_usage: int
                  check_interval: int
                  recovery_threshold: float
              def _get_process_rss(pid=None):
                  try:
                      import psutil
                      if pid:
                          proc = psutil.Process(pid)
                      else:
                          proc = psutil.Process()
                      return proc.memory_info().rss
                  except Exception:
                      return None
              def _get_config(ini_path):
                  import configparser
                  try:
                      config = configparser.RawConfigParser()
                      config.read(ini_path)
                      return config
                  except Exception:
                      return None
              def get_memory_usage_params(config=None):
                  # memory spec defaults
                  _memory_max_usage = memory_max_usage
                  _memory_usage_check_interval = memory_usage_check_interval
                  _memory_usage_recovery_threshold = memory_usage_recovery_threshold
                  if config:
                      ini_path = os.path.abspath(config)
                      conf = _get_config(ini_path)
                      section = 'server:main'
                      if conf and conf.has_section(section):
                          if conf.has_option(section, 'memory_max_usage'):
                              _memory_max_usage = conf.getint(section, 'memory_max_usage')
                          if conf.has_option(section, 'memory_usage_check_interval'):
                              _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
                          if conf.has_option(section, 'memory_usage_recovery_threshold'):
                              _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
                  _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
                                          or _memory_max_usage)
                  _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
                                                     or _memory_usage_check_interval)
                  _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
                                                           or _memory_usage_recovery_threshold)
                  return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
              def _time_with_offset(check_interval):
                  return time.time() - random.randint(0, check_interval/2.0)
              def pre_fork(server, worker):
                  pass
              def post_fork(server, worker):
                  memory_conf = get_memory_usage_params()
                  _memory_max_usage = memory_conf.max_usage
                  _memory_usage_check_interval = memory_conf.check_interval
                  _memory_usage_recovery_threshold = memory_conf.recovery_threshold
                  worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
                                                 or _memory_max_usage)
                  worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
                                                            or _memory_usage_check_interval)
                  worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
                                                                  or _memory_usage_recovery_threshold)
                  # register memory last check time, with some random offset so we don't recycle all
                  # at once
                  worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
                  if _memory_max_usage:
                      server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
                                      _format_data_size(_memory_max_usage))
                  else:
                      server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
              def pre_exec(server):
                  server.log.info("Forked child, re-executing.")
              def on_starting(server):
                  server_lbl = '{} {}'.format(server.proc_name, server.address)
                  server.log.info("Server %s is starting.", server_lbl)
                  server.log.info('Config:')
                  server.log.info(f"\n{server.cfg}")
                  server.log.info(get_memory_usage_params())
              def when_ready(server):
                  server.log.info("Server %s is ready. Spawning workers", server)
              def on_reload(server):
                  pass
              def _format_data_size(size, unit="B", precision=1, binary=True):
                  """Format a number using SI units (kilo, mega, etc.).
                  ``size``: The number as a float or int.
                  ``unit``: The unit name in plural form. Examples: "bytes", "B".
                  ``precision``: How many digits to the right of the decimal point. Default
                  is 1.  0 suppresses the decimal point.
                  ``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
                  If true, use base-2 binary prefixes (kibi = Ki = 1024).
                  ``full_name``: If false (default), use the prefix abbreviation ("k" or
                  "Ki").  If true, use the full prefix ("kilo" or "kibi"). If false,
                  use abbreviation ("k" or "Ki").
                  """
                  if not binary:
                      base = 1000
                      multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
                  else:
                      base = 1024
                      multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
                  sign = ""
                  if size > 0:
                      m = int(math.log(size, base))
                  elif size < 0:
                      sign = "-"
                      size = -size
                      m = int(math.log(size, base))
                  else:
                      m = 0
                  if m > 8:
                      m = 8
                  if m == 0:
                      precision = '%.0f'
                  else:
                      precision = '%%.%df' % precision
                  size = precision % (size / math.pow(base, m))
                  return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
              def _check_memory_usage(worker):
                  _memory_max_usage = worker._memory_max_usage
                  if not _memory_max_usage:
                      return
                  _memory_usage_check_interval = worker._memory_usage_check_interval
                  _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
                  elapsed = time.time() - worker._last_memory_check_time
                  if elapsed > _memory_usage_check_interval:
                      mem_usage = _get_process_rss()
                      if mem_usage and mem_usage > _memory_max_usage:
                          worker.log.info(
                              "memory usage %s > %s, forcing gc",
                              _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                          # Try to clean it up by forcing a full collection.
                          gc.collect()
                          mem_usage = _get_process_rss()
                          if mem_usage > _memory_usage_recovery_threshold:
                              # Didn't clean up enough, we'll have to terminate.
                              worker.log.warning(
                                  "memory usage %s > %s after gc, quitting",
                                  _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                              # This will cause worker to auto-restart itself
                              worker.alive = False
                      worker._last_memory_check_time = time.time()
              def worker_int(worker):
                  worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
                  # get traceback info, when a worker crashes
                  def get_thread_id(t_id):
                      id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
                      return id2name.get(t_id, "unknown_thread_id")
                  code = []
                  for thread_id, stack in sys._current_frames().items():  # noqa
                      code.append(
                          "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
                      for fname, lineno, name, line in traceback.extract_stack(stack):
                          code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
                          if line:
                              code.append("  %s" % (line.strip()))
                  worker.log.debug("\n".join(code))
              def worker_abort(worker):
                  worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
              def worker_exit(server, worker):
                  worker.log.info("pid=[%-10s] worker exit", worker.pid)
              def child_exit(server, worker):
                  worker.log.info("pid=[%-10s] worker child exit", worker.pid)
              def pre_request(worker, req):
                  worker.start_time = time.time()
                  worker.log.debug(
                      "GNCRN PRE  WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
              def post_request(worker, req, environ, resp):
                  total_time = time.time() - worker.start_time
                  # Gunicorn sometimes has problems with reading the status_code
                  status_code = getattr(resp, 'status_code', '')
                  worker.log.debug(
                      "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
                      worker.nr, req.method, req.path, status_code, total_time)
                  _check_memory_usage(worker)
              def _filter_proxy(ip):
                  """
                  Passed in IP addresses in HEADERS can be in a special format of multiple
                  ips. Those comma separated IPs are passed from various proxies in the
                  chain of request processing. The left-most being the original client.
                  We only care about the first IP which came from the org. client.
                  :param ip: ip string from headers
                  """
                  if ',' in ip:
                      _ips = ip.split(',')
                      _first_ip = _ips[0].strip()
                      return _first_ip
                  return ip
              def _filter_port(ip):
                  """
                  Removes a port from ip, there are 4 main cases to handle here.
                  - ipv4 eg. 127.0.0.1
                  - ipv6 eg. ::1
                  - ipv4+port eg. 127.0.0.1:8080
                  - ipv6+port eg. [::1]:8080
                  :param ip:
                  """
                  def is_ipv6(ip_addr):
                      if hasattr(socket, 'inet_pton'):
                          try:
                              socket.inet_pton(socket.AF_INET6, ip_addr)
                          except socket.error:
                              return False
                      else:
                          return False
                      return True
                  if ':' not in ip:  # must be ipv4 pure ip
                      return ip
                  if '[' in ip and ']' in ip:  # ipv6 with port
                      return ip.split(']')[0][1:].lower()
                  # must be ipv6 or ipv4 with port
                  if is_ipv6(ip):
                      return ip
                  else:
                      ip, _port = ip.split(':')[:2]  # means ipv4+port
                      return ip
              def get_ip_addr(environ):
                  proxy_key = 'HTTP_X_REAL_IP'
                  proxy_key2 = 'HTTP_X_FORWARDED_FOR'
                  def_key = 'REMOTE_ADDR'
                  def _filters(x):
                      return _filter_port(_filter_proxy(x))
                  ip = environ.get(proxy_key)
                  if ip:
                      return _filters(ip)
                  ip = environ.get(proxy_key2)
                  if ip:
                      return _filters(ip)
                  ip = environ.get(def_key, '0.0.0.0')
                  return _filters(ip)
              class RhodeCodeLogger(Logger):
                  """
                  Custom Logger that allows some customization that gunicorn doesn't allow
                  """
                  datefmt = r"%Y-%m-%d %H:%M:%S"
                  def __init__(self, cfg):
                      Logger.__init__(self, cfg)
                  def now(self):
                      """ return date in RhodeCode Log format """
                      now = time.time()
                      msecs = int((now - int(now)) * 1000)
                      return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
                  def atoms(self, resp, req, environ, request_time):
                      """ Gets atoms for log formatting.
                      """
                      status = resp.status
                      if isinstance(status, str):
                          status = status.split(None, 1)[0]
                      atoms = {
                          'h': get_ip_addr(environ),
                          'l': '-',
                          'u': self._get_user(environ) or '-',
                          't': self.now(),
                          'r': "%s %s %s" % (environ['REQUEST_METHOD'],
                                             environ['RAW_URI'],
                                             environ["SERVER_PROTOCOL"]),
                          's': status,
                          'm': environ.get('REQUEST_METHOD'),
                          'U': environ.get('PATH_INFO'),
                          'q': environ.get('QUERY_STRING'),
                          'H': environ.get('SERVER_PROTOCOL'),
                          'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
                          'B': getattr(resp, 'sent', None),
                          'f': environ.get('HTTP_REFERER', '-'),
                          'a': environ.get('HTTP_USER_AGENT', '-'),
                          'T': request_time.seconds,
                          'D': (request_time.seconds * 1000000) + request_time.microseconds,
                          'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
                          'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
                          'p': "<%s>" % os.getpid()
                      }
                      # add request headers
                      if hasattr(req, 'headers'):
                          req_headers = req.headers
                      else:
                          req_headers = req
                      if hasattr(req_headers, "items"):
                          req_headers = req_headers.items()
                      atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
                      resp_headers = resp.headers
                      if hasattr(resp_headers, "items"):
                          resp_headers = resp_headers.items()
                      # add response headers
                      atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
                      # add environ variables
                      environ_variables = environ.items()
                      atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
                      return atoms
              logger_class = RhodeCodeLogger

requirements.txt

0 +12 -12

              # deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]'
              async-timeout==4.0.3
              atomicwrites==1.4.1
              celery==5.3.6
                billiard==4.2.0
                click==8.1.3
                click-didyoumean==0.3.0
                  click==8.1.3
                click-plugins==1.1.1
                  click==8.1.3
                click-repl==0.2.0
                  click==8.1.3
-                 prompt-toolkit==3.0.38
-                   wcwidth==0.2.6
+                 prompt_toolkit==3.0.47
+                   wcwidth==0.2.13
                  six==1.16.0
                kombu==5.3.5
                  amqp==5.2.0
                    vine==5.1.0
                  vine==5.1.0
                python-dateutil==2.8.2
                  six==1.16.0
                tzdata==2024.1
                vine==5.1.0
              contextlib2==21.6.0
              dogpile.cache==1.3.3
                decorator==5.1.1
                stevedore==5.1.0
                  pbr==5.11.1
              dulwich==0.21.6
                urllib3==1.26.14
-             fsspec==2024.6.0
-             gunicorn==21.2.0
-               packaging==24.0
+             fsspec==2024.9.0
+             gunicorn==23.0.0
+               packaging==24.1
              hg-evolve==11.1.3
              importlib-metadata==6.0.0
                zipp==3.15.0
              mercurial==6.7.4
              more-itertools==9.1.0
              msgpack==1.0.8
-             orjson==3.10.3
+             orjson==3.10.7
              psutil==5.9.8
              py==1.11.0
              pygit2==1.13.3
                cffi==1.16.0
                  pycparser==2.21
-             pygments==2.15.1
+             pygments==2.18.0
              pyparsing==3.1.1
              pyramid==2.0.2
                hupper==1.12
                plaster==1.1.2
                plaster-pastedeploy==1.0.1
                  pastedeploy==3.1.0
                  plaster==1.1.2
                translationstring==1.4
                venusian==3.0.0
                webob==1.8.7
                zope.deprecation==5.0.0
-               zope.interface==6.3.0
-             redis==5.0.4
+               zope.interface==6.4.post2
+             redis==5.1.0
                async-timeout==4.0.3
              repoze.lru==0.7
-             s3fs==2024.6.0
+             s3fs==2024.9.0
                aiobotocore==2.13.0
                  aiohttp==3.9.5
                    aiosignal==1.3.1
                      frozenlist==1.4.1
                    attrs==22.2.0
                    frozenlist==1.4.1
                    multidict==6.0.5
                    yarl==1.9.4
                      idna==3.4
                      multidict==6.0.5
                  aioitertools==0.11.0
                  botocore==1.34.106
                    jmespath==1.0.1
                    python-dateutil==2.8.2
                      six==1.16.0
                    urllib3==1.26.14
                  wrapt==1.16.0
                aiohttp==3.9.5
                  aiosignal==1.3.1
                    frozenlist==1.4.1
                  attrs==22.2.0
                  frozenlist==1.4.1
                  multidict==6.0.5
                  yarl==1.9.4
                    idna==3.4
                    multidict==6.0.5
-               fsspec==2024.6.0
+               fsspec==2024.9.0
              scandir==1.10.0
              setproctitle==1.3.3
              subvertpy==0.11.0
              waitress==3.0.0
-             wcwidth==0.2.6
+             wcwidth==0.2.13
              ## test related requirements
              #-r requirements_test.txt
              ## uncomment to add the debug libraries
              #-r requirements_debug.txt

requirements_test.txt

0 +8 -8

              # test related requirements
              mock==5.1.0
              pytest-cov==4.1.0
                coverage==7.4.3
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
              pytest-env==1.1.3
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
              pytest-profiling==1.7.0
                gprof2dot==2022.7.29
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
                six==1.16.0
              pytest-rerunfailures==13.0
-               packaging==24.0
+               packaging==24.1
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
              pytest-runner==6.0.1
              pytest-sugar==1.0.0
-               packaging==24.0
+               packaging==24.1
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
                termcolor==2.4.0
              pytest-timeout==2.3.1
                pytest==8.1.1
                  iniconfig==2.0.0
-                 packaging==24.0
+                 packaging==24.1
                  pluggy==1.4.0
              webtest==3.0.0
                beautifulsoup4==4.12.3
                  soupsieve==2.5
                waitress==3.0.0
                webob==1.8.7
              # RhodeCode test-data
              rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
              rc_testdata==0.11.0

vcsserver/exceptions.py

0 +6 0

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """
              Special exception handling over the wire.
              Since we cannot assume that our client is able to import our exception classes,
              this module provides a "wrapping" mechanism to raise plain exceptions
              which contain an extra attribute `_vcs_kind` to allow a client to distinguish
              different error conditions.
              """
              from pyramid.httpexceptions import HTTPLocked, HTTPForbidden
              def _make_exception(kind, org_exc, *args):
                  """
                  Prepares a base `Exception` instance to be sent over the wire.
                  To give our caller a hint what this is about, it will attach an attribute
                  `_vcs_kind` to the exception.
                  """
                  exc = Exception(*args)
                  exc._vcs_kind = kind
                  exc._org_exc = org_exc
                  exc._org_exc_tb = getattr(org_exc, '_org_exc_tb', '')
                  return exc
              def AbortException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('abort', org_exc, *args)
                  return _make_exception_wrapper
              def ArchiveException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('archive', org_exc, *args)
                  return _make_exception_wrapper
+             def ClientNotSupportedException(org_exc=None):
+                 def _make_exception_wrapper(*args):
+                     return _make_exception('client_not_supported', org_exc, *args)
+                 return _make_exception_wrapper
              def LookupException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('lookup', org_exc, *args)
                  return _make_exception_wrapper
              def VcsException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('error', org_exc, *args)
                  return _make_exception_wrapper
              def RepositoryLockedException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('repo_locked', org_exc, *args)
                  return _make_exception_wrapper
              def RepositoryBranchProtectedException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('repo_branch_protected', org_exc, *args)
                  return _make_exception_wrapper
              def RequirementException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('requirement', org_exc, *args)
                  return _make_exception_wrapper
              def UnhandledException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('unhandled', org_exc, *args)
                  return _make_exception_wrapper
              def URLError(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('url_error', org_exc, *args)
                  return _make_exception_wrapper
              def SubrepoMergeException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('subrepo_merge_error', org_exc, *args)
                  return _make_exception_wrapper
              class HTTPRepoLocked(HTTPLocked):
                  """
                  Subclass of HTTPLocked response that allows to set the title and status
                  code via constructor arguments.
                  """
                  def __init__(self, title, status_code=None, **kwargs):
                      self.code = status_code or HTTPLocked.code
                      self.title = title
                      super().__init__(**kwargs)
              class HTTPRepoBranchProtected(HTTPForbidden):
                  def __init__(self, *args, **kwargs):
                      super(HTTPForbidden, self).__init__(*args, **kwargs)
              class RefNotFoundException(KeyError):
                  pass
              class NoContentException(ValueError):
                  pass

vcsserver/git_lfs/app.py

0 +7 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import logging
+             from gunicorn.http.errors import NoMoreData
              from pyramid.config import Configurator
              from pyramid.response import Response, FileIter
              from pyramid.httpexceptions import (
                  HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
                  HTTPUnprocessableEntity)
              from vcsserver.lib.ext_json import json
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
              from vcsserver.lib.str_utils import safe_int
              log = logging.getLogger(__name__)
              GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs'  # +json ?
              GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
              def write_response_error(http_exception, text=None):
                  content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  _exception = http_exception(content_type=content_type)
                  _exception.content_type = content_type
                  if text:
                      _exception.body = json.dumps({'message': text})
                  log.debug('LFS: writing response of type %s to client with text:%s',
                            http_exception, text)
                  return _exception
              class AuthHeaderRequired:
                  """
                  Decorator to check if request has proper auth-header
                  """
                  def __call__(self, func):
                      return get_cython_compat_decorator(self.__wrapper, func)
                  def __wrapper(self, func, *fargs, **fkwargs):
                      request = fargs[1]
                      auth = request.authorization
                      if not auth:
                          return write_response_error(HTTPForbidden)
                      return func(*fargs[1:], **fkwargs)
              # views
              def lfs_objects(request):
                  # indicate not supported, V1 API
                  log.warning('LFS: v1 api not supported, reporting it back to client')
                  return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
              @AuthHeaderRequired()
              def lfs_objects_batch(request):
                  """
                  The client sends the following information to the Batch endpoint to transfer some objects:
                      operation - Should be download or upload.
                      transfers - An optional Array of String identifiers for transfer
                          adapters that the client has configured. If omitted, the basic
                          transfer adapter MUST be assumed by the server.
                      objects - An Array of objects to download.
                      oid - String OID of the LFS object.
                      size - Integer byte size of the LFS object. Must be at least zero.
                  """
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  auth = request.authorization
                  repo = request.matchdict.get('repo')
                  data = request.json
                  operation = data.get('operation')
                  http_scheme = request.registry.git_lfs_http_scheme
                  if operation not in ('download', 'upload'):
                      log.debug('LFS: unsupported operation:%s', operation)
                      return write_response_error(
                          HTTPBadRequest, f'unsupported operation mode: `{operation}`')
                  if 'objects' not in data:
                      log.debug('LFS: missing objects data')
                      return write_response_error(
                          HTTPBadRequest, 'missing objects data')
                  log.debug('LFS: handling operation of type: %s', operation)
                  objects = []
                  for o in data['objects']:
                      try:
                          oid = o['oid']
                          obj_size = o['size']
                      except KeyError:
                          log.exception('LFS, failed to extract data')
                          return write_response_error(
                              HTTPBadRequest, 'unsupported data in objects')
                      obj_data = {'oid': oid}
                      if http_scheme == 'http':
                          # Note(marcink): when using http, we might have a custom port
                          # so we skip setting it to http, url dispatch then wont generate a port in URL
                          # for development we need this
                          http_scheme = None
                      obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
                                                   _scheme=http_scheme)
                      obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
                                                          _scheme=http_scheme)
                      store = LFSOidStore(
                          oid, repo, store_location=request.registry.git_lfs_store_path)
                      handler = OidHandler(
                          store, repo, auth, oid, obj_size, obj_data,
                          obj_href, obj_verify_href)
                      # this verifies also OIDs
                      actions, errors = handler.exec_operation(operation)
                      if errors:
                          log.warning('LFS: got following errors: %s', errors)
                          obj_data['errors'] = errors
                      if actions:
                          obj_data['actions'] = actions
                      obj_data['size'] = obj_size
                      obj_data['authenticated'] = True
                      objects.append(obj_data)
                  result = {'objects': objects, 'transfer': 'basic'}
                  log.debug('LFS Response %s', safe_result(result))
                  return result
              def lfs_objects_oid_upload(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  engine = store.get_engine(mode='wb')
                  log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
                  body = request.environ['wsgi.input']
                  with engine as f:
                      blksize = 64 * 1024  # 64kb
                      while True:
                          # read in chunks as stream comes in from Gunicorn
                          # this is a specific Gunicorn support function.
                          # might work differently on waitress
-                         chunk = body.read(blksize)
+                         try:
+                             chunk = body.read(blksize)
+                         except NoMoreData:
+                             chunk = None
                          if not chunk:
                              break
                          f.write(chunk)
                  return {'upload': 'ok'}
              def lfs_objects_oid_download(request):
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, f'requested file with oid `{oid}` not found in store')
                  # TODO(marcink): support range header ?
                  # Range: bytes=0-, `bytes=(\d+)\-.*`
                  f = open(store.oid_path, 'rb')
                  response = Response(
                      content_type='application/octet-stream', app_iter=FileIter(f))
                  response.headers.add('X-RC-LFS-Response-Oid', str(oid))
                  return response
              def lfs_objects_verify(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  data = request.json
                  oid = data.get('oid')
                  size = safe_int(data.get('size'))
                  if not (oid and size):
                      return write_response_error(
                          HTTPBadRequest, 'missing oid and size in request data')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
                          HTTPNotFound, f'oid `{oid}` does not exists in store')
                  store_size = store.size_oid()
                  if store_size != size:
                      msg = 'requested file size mismatch store size:{} requested:{}'.format(
                          store_size, size)
                      return write_response_error(
                          HTTPUnprocessableEntity, msg)
                  return {'message': {'size': 'ok', 'in_store': 'ok'}}
              def lfs_objects_lock(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS locking api not supported')
              def not_found(request):
                  return write_response_error(
                      HTTPNotFound, 'request path not found')
              def lfs_disabled(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS disabled for this repo')
              def git_lfs_app(config):
                  # v1 API deprecation endpoint
                  config.add_route('lfs_objects',
                                   '/{repo:.*?[^/]}/info/lfs/objects')
                  config.add_view(lfs_objects, route_name='lfs_objects',
                                  request_method='POST', renderer='json')
                  # locking API
                  config.add_route('lfs_objects_lock',
                                   '/{repo:.*?[^/]}/info/lfs/locks')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
                                  request_method=('POST', 'GET'), renderer='json')
                  config.add_route('lfs_objects_lock_verify',
                                   '/{repo:.*?[^/]}/info/lfs/locks/verify')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
                                  request_method=('POST', 'GET'), renderer='json')
                  # batch API
                  config.add_route('lfs_objects_batch',
                                   '/{repo:.*?[^/]}/info/lfs/objects/batch')
                  config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
                                  request_method='POST', renderer='json')
                  # oid upload/download API
                  config.add_route('lfs_objects_oid',
                                   '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
                  config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
                                  request_method='PUT', renderer='json')
                  config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
                                  request_method='GET', renderer='json')
                  # verification API
                  config.add_route('lfs_objects_verify',
                                   '/{repo:.*?[^/]}/info/lfs/verify')
                  config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
                                  request_method='POST', renderer='json')
                  # not found handler for API
                  config.add_notfound_view(not_found, renderer='json')
              def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
                  config = Configurator()
                  if git_lfs_enabled:
                      config.include(git_lfs_app)
                      config.registry.git_lfs_store_path = git_lfs_store_path
                      config.registry.git_lfs_http_scheme = git_lfs_http_scheme
                  else:
                      # not found handler for API, reporting disabled LFS support
                      config.add_notfound_view(lfs_disabled, renderer='json')
                  app = config.make_wsgi_app()
                  return app

vcsserver/hooks.py

0 +2 0

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import os
              import sys
              import logging
              import collections
              import base64
              import msgpack
              import dataclasses
              import pygit2
              import http.client
              from celery import Celery
              import mercurial.scmutil
              import mercurial.node
              from vcsserver import exceptions, subprocessio, settings
              from vcsserver.lib.ext_json import json
              from vcsserver.lib.str_utils import ascii_str, safe_str
              from vcsserver.lib.svn_txn_utils import get_txn_id_from_store
              from vcsserver.remote.git_remote import Repository
              celery_app = Celery('__vcsserver__')
              log = logging.getLogger(__name__)
              class HooksHttpClient:
                  proto = 'msgpack.v1'
                  connection = None
                  def __init__(self, hooks_uri):
                      self.hooks_uri = hooks_uri
                  def __repr__(self):
                      return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
                  def __call__(self, method, extras):
                      connection = http.client.HTTPConnection(self.hooks_uri)
                      # binary msgpack body
                      headers, body = self._serialize(method, extras)
                      log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
                      try:
                          try:
                              connection.request('POST', '/', body, headers)
                          except Exception as error:
                              log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
                              raise
                          response = connection.getresponse()
                          try:
                              return msgpack.load(response)
                          except Exception:
                              response_data = response.read()
                              log.exception('Failed to decode hook response json data. '
                                            'response_code:%s, raw_data:%s',
                                            response.status, response_data)
                              raise
                      finally:
                          connection.close()
                  @classmethod
                  def _serialize(cls, hook_name, extras):
                      data = {
                          'method': hook_name,
                          'extras': extras
                      }
                      headers = {
                          "rc-hooks-protocol": cls.proto,
                          "Connection": "keep-alive"
                      }
                      return headers, msgpack.packb(data)
              class HooksCeleryClient:
                  TASK_TIMEOUT = 60  # time in seconds
                  def __init__(self, queue, backend):
                      celery_app.config_from_object({
                          'broker_url': queue, 'result_backend': backend,
                          'broker_connection_retry_on_startup': True,
                          'task_serializer': 'json',
                          'accept_content': ['json', 'msgpack'],
                          'result_serializer': 'json',
                          'result_accept_content': ['json', 'msgpack']
                      })
                      self.celery_app = celery_app
                  def __call__(self, method, extras):
                      inquired_task = self.celery_app.signature(
                          f'rhodecode.lib.celerylib.tasks.{method}'
                      )
                      return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
              class HooksShadowRepoClient:
                  def __call__(self, hook_name, extras):
                      return {'output': '', 'status': 0}
              class RemoteMessageWriter:
                  """Writer base class."""
                  def write(self, message):
                      raise NotImplementedError()
              class HgMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to mercurial clients."""
                  def __init__(self, ui):
                      self.ui = ui
                  def write(self, message: str):
                      # TODO: Check why the quiet flag is set by default.
                      old = self.ui.quiet
                      self.ui.quiet = False
                      self.ui.status(message.encode('utf-8'))
                      self.ui.quiet = old
              class GitMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to git clients."""
                  def __init__(self, stdout=None):
                      self.stdout = stdout or sys.stdout
                  def write(self, message: str):
                      self.stdout.write(message)
              class SvnMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to svn clients."""
                  def __init__(self, stderr=None):
                      # SVN needs data sent to stderr for back-to-client messaging
                      self.stderr = stderr or sys.stderr
                  def write(self, message):
                      self.stderr.write(message)
              def _handle_exception(result):
                  exception_class = result.get('exception')
                  exception_traceback = result.get('exception_traceback')
                  log.debug('Handling hook-call exception: %s', exception_class)
                  if exception_traceback:
                      log.error('Got traceback from remote call:%s', exception_traceback)
                  if exception_class == 'HTTPLockedRC':
                      raise exceptions.RepositoryLockedException()(*result['exception_args'])
+                 elif exception_class == 'ClientNotSupportedError':
+                     raise exceptions.ClientNotSupportedException()(*result['exception_args'])
                  elif exception_class == 'HTTPBranchProtected':
                      raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
                  elif exception_class == 'RepositoryError':
                      raise exceptions.VcsException()(*result['exception_args'])
                  elif exception_class:
                      raise Exception(
                          f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
                      )
              def _get_hooks_client(extras):
                  hooks_uri = extras.get('hooks_uri')
                  task_queue = extras.get('task_queue')
                  task_backend = extras.get('task_backend')
                  is_shadow_repo = extras.get('is_shadow_repo')
                  if hooks_uri:
                      return HooksHttpClient(hooks_uri)
                  elif task_queue and task_backend:
                      return HooksCeleryClient(task_queue, task_backend)
                  elif is_shadow_repo:
                      return HooksShadowRepoClient()
                  else:
                      raise Exception("Hooks client not found!")
              def _call_hook(hook_name, extras, writer):
                  hooks_client = _get_hooks_client(extras)
                  log.debug('Hooks, using client:%s', hooks_client)
                  result = hooks_client(hook_name, extras)
                  log.debug('Hooks got result: %s', result)
                  _handle_exception(result)
                  writer.write(result['output'])
                  return result['status']
              def _extras_from_ui(ui):
                  hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
                  if not hook_data:
                      # maybe it's inside environ ?
                      env_hook_data = os.environ.get('RC_SCM_DATA')
                      if env_hook_data:
                          hook_data = env_hook_data
                  extras = {}
                  if hook_data:
                      extras = json.loads(hook_data)
                  return extras
              def _rev_range_hash(repo, node, check_heads=False):
                  from vcsserver.hgcompat import get_ctx
                  commits = []
                  revs = []
                  start = get_ctx(repo, node).rev()
                  end = len(repo)
                  for rev in range(start, end):
                      revs.append(rev)
                      ctx = get_ctx(repo, rev)
                      commit_id = ascii_str(mercurial.node.hex(ctx.node()))
                      branch = safe_str(ctx.branch())
                      commits.append((commit_id, branch))
                  parent_heads = []
                  if check_heads:
                      parent_heads = _check_heads(repo, start, end, revs)
                  return commits, parent_heads
              def _check_heads(repo, start, end, commits):
                  from vcsserver.hgcompat import get_ctx
                  changelog = repo.changelog
                  parents = set()
                  for new_rev in commits:
                      for p in changelog.parentrevs(new_rev):
                          if p == mercurial.node.nullrev:
                              continue
                          if p < start:
                              parents.add(p)
                  for p in parents:
                      branch = get_ctx(repo, p).branch()
                      # The heads descending from that parent, on the same branch
                      parent_heads = {p}
                      reachable = {p}
                      for x in range(p + 1, end):
                          if get_ctx(repo, x).branch() != branch:
                              continue
                          for pp in changelog.parentrevs(x):
                              if pp in reachable:
                                  reachable.add(x)
                                  parent_heads.discard(pp)
                                  parent_heads.add(x)
                      # More than one head? Suggest merging
                      if len(parent_heads) > 1:
                          return list(parent_heads)
                  return []
              def _get_git_env():
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('GIT'):
                          env[k] = v
                  # serialized version
                  return [(k, v) for k, v in env.items()]
              def _get_hg_env(old_rev, new_rev, txnid, repo_path):
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('HG'):
                          env[k] = v
                  env['HG_NODE'] = old_rev
                  env['HG_NODE_LAST'] = new_rev
                  env['HG_TXNID'] = txnid
                  env['HG_PENDING'] = repo_path
                  return [(k, v) for k, v in env.items()]
              def _get_ini_settings(ini_file):
                  from vcsserver.http_main import sanitize_settings_and_apply_defaults
                  from vcsserver.lib.config_utils import get_app_config_lightweight, configure_and_store_settings
                  global_config = {'__file__': ini_file}
                  ini_settings = get_app_config_lightweight(ini_file)
                  sanitize_settings_and_apply_defaults(global_config, ini_settings)
                  configure_and_store_settings(global_config, ini_settings)
                  return ini_settings
              def _fix_hooks_executables(ini_path=''):
                  """
                  This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
                  especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
                  because svn is not on PATH
                  """
                  # set defaults, in case we can't read from ini_file
                  core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
                  if ini_path:
                      ini_settings = _get_ini_settings(ini_path)
                      core_binary_dir = ini_settings['core.binary_dir']
                  settings.BINARY_DIR = core_binary_dir
              def repo_size(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('repo_size', extras, HgMessageWriter(ui))
              def pre_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('pre_pull', extras, HgMessageWriter(ui))
              def pre_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return pre_pull(ui, repo, **kwargs)
                  return 0
              def post_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('post_pull', extras, HgMessageWriter(ui))
              def post_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return post_pull(ui, repo, **kwargs)
                  return 0
              def pre_push(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook
                  """
                  extras = _extras_from_ui(ui)
                  detect_force_push = extras.get('detect_force_push')
                  rev_data = []
                  hook_type: str = safe_str(kwargs.get('hooktype'))
                  if node and hook_type == 'pretxnchangegroup':
                      branches = collections.defaultdict(list)
                      commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
                      for commit_id, branch in commits:
                          branches[branch].append(commit_id)
                      for branch, commits in branches.items():
                          old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
                          rev_data.append({
                              'total_commits': len(commits),
                              'old_rev': old_rev,
                              'new_rev': commits[-1],
                              'ref': '',
                              'type': 'branch',
                              'name': branch,
                          })
                      for push_ref in rev_data:
                          push_ref['multiple_heads'] = _heads
                          repo_path = os.path.join(
                              extras.get('repo_store', ''), extras.get('repository', ''))
                          push_ref['hg_env'] = _get_hg_env(
                              old_rev=push_ref['old_rev'],
                              new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
                              repo_path=repo_path)
                  extras['hook_type'] = hook_type or 'pre_push'
                  extras['commit_ids'] = rev_data
                  return _call_hook('pre_push', extras, HgMessageWriter(ui))
              def pre_push_ssh(ui, repo, node=None, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      return pre_push(ui, repo, node, **kwargs)
                  return 0
              def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook for SSH
                  """
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      permission = extras['SSH_PERMISSIONS']
                      if 'repository.write' == permission or 'repository.admin' == permission:
                          return 0
                      # non-zero ret code
                      return 1
                  return 0
              def post_push(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook
                  """
                  extras = _extras_from_ui(ui)
                  commit_ids = []
                  branches = []
                  bookmarks = []
                  tags = []
                  hook_type: str = safe_str(kwargs.get('hooktype'))
                  commits, _heads = _rev_range_hash(repo, node)
                  for commit_id, branch in commits:
                      commit_ids.append(commit_id)
                      if branch not in branches:
                          branches.append(branch)
                  if hasattr(ui, '_rc_pushkey_bookmarks'):
                      bookmarks = ui._rc_pushkey_bookmarks
                  extras['hook_type'] = hook_type or 'post_push'
                  extras['commit_ids'] = commit_ids
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': bookmarks,
                      'tags': tags
                  }
                  return _call_hook('post_push', extras, HgMessageWriter(ui))
              def post_push_ssh(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook for SSH
                  """
                  if _extras_from_ui(ui).get('SSH'):
                      return post_push(ui, repo, node, **kwargs)
                  return 0
              def key_push(ui, repo, **kwargs):
                  from vcsserver.hgcompat import get_ctx
                  if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
                      # store new bookmarks in our UI object propagated later to post_push
                      ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
                  return
              # backward compat
              log_pull_action = post_pull
              # backward compat
              log_push_action = post_push
              def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
              def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
              @dataclasses.dataclass
              class HookResponse:
                  status: int
                  output: str
              def git_pre_pull(extras) -> HookResponse:
                  """
                  Pre pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
                  stdout = io.StringIO()
                  try:
                      status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      log.exception('Failed to call pre_pull hook')
                      status_code = 128
                      stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status_code, stdout.getvalue())
              def git_post_pull(extras) -> HookResponse:
                  """
                  Post pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
                  stdout = io.StringIO()
                  try:
                      status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      status = 128
                      stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status, stdout.getvalue())
              def _parse_git_ref_lines(revision_lines):
                  rev_data = []
                  for revision_line in revision_lines or []:
                      old_rev, new_rev, ref = revision_line.strip().split(' ')
                      ref_data = ref.split('/', 2)
                      if ref_data[1] in ('tags', 'heads'):
                          rev_data.append({
                              # NOTE(marcink):
                              # we're unable to tell total_commits for git at this point
                              # but we set the variable for consistency with GIT
                              'total_commits': -1,
                              'old_rev': old_rev,
                              'new_rev': new_rev,
                              'ref': ref,
                              'type': ref_data[1],
                              'name': ref_data[2],
                          })
                  return rev_data
              def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Pre push hook.
                  :return: status code of the hook. 0 for success.
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  rev_data = _parse_git_ref_lines(revision_lines)
                  if 'push' not in extras['hooks']:
                      return 0
                  _fix_hooks_executables(env.get('RC_INI_FILE'))
                  empty_commit_id = '0' * 40
                  detect_force_push = extras.get('detect_force_push')
                  for push_ref in rev_data:
                      # store our git-env which holds the temp store
                      push_ref['git_env'] = _get_git_env()
                      push_ref['pruned_sha'] = ''
                      if not detect_force_push:
                          # don't check for forced-push when we don't need to
                          continue
                      type_ = push_ref['type']
                      new_branch = push_ref['old_rev'] == empty_commit_id
                      delete_branch = push_ref['new_rev'] == empty_commit_id
                      if type_ == 'heads' and not (new_branch or delete_branch):
                          old_rev = push_ref['old_rev']
                          new_rev = push_ref['new_rev']
                          cmd = [settings.GIT_EXECUTABLE(), 'rev-list', old_rev, f'^{new_rev}']
                          stdout, stderr = subprocessio.run_command(
                              cmd, env=os.environ.copy())
                          # means we're having some non-reachable objects, this forced push was used
                          if stdout:
                              push_ref['pruned_sha'] = stdout.splitlines()
                  extras['hook_type'] = 'pre_receive'
                  extras['commit_ids'] = rev_data
                  stdout = sys.stdout
                  status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
                  return status_code
              def git_post_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Post push hook.
                  :return: status code of the hook. 0 for success.
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  if 'push' not in extras['hooks']:
                      return 0
                  _fix_hooks_executables(env.get('RC_INI_FILE'))
                  rev_data = _parse_git_ref_lines(revision_lines)
                  git_revs = []
                  # N.B.(skreft): it is ok to just call git, as git before calling a
                  # subcommand sets the PATH environment variable so that it point to the
                  # correct version of the git executable.
                  empty_commit_id = '0' * 40
                  branches = []
                  tags = []
                  for push_ref in rev_data:
                      type_ = push_ref['type']
                      if type_ == 'heads':
                          # starting new branch case
                          if push_ref['old_rev'] == empty_commit_id:
                              push_ref_name = push_ref['name']
                              if push_ref_name not in branches:
                                  branches.append(push_ref_name)
                              need_head_set = ''
                              with Repository(os.getcwd()) as repo:
                                  try:
                                      repo.head
                                  except pygit2.GitError:
                                      need_head_set = f'refs/heads/{push_ref_name}'
                                  if need_head_set:
                                      repo.set_head(need_head_set)
                                      print(f"Setting default branch to {push_ref_name}")
                              cmd = [settings.GIT_EXECUTABLE(), 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              heads = safe_str(stdout)
                              heads = heads.replace(push_ref['ref'], '')
                              heads = ' '.join(head for head
                                               in heads.splitlines() if head) or '.'
                              cmd = [settings.GIT_EXECUTABLE(), 'log', '--reverse',
                                     '--pretty=format:%H', '--', push_ref['new_rev'],
                                     '--not', heads]
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              git_revs.extend(list(map(ascii_str, stdout.splitlines())))
                          # delete branch case
                          elif push_ref['new_rev'] == empty_commit_id:
                              git_revs.append(f'delete_branch=>{push_ref["name"]}')
                          else:
                              if push_ref['name'] not in branches:
                                  branches.append(push_ref['name'])
                              cmd = [settings.GIT_EXECUTABLE(), 'log',
                                     f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
                                     '--reverse', '--pretty=format:%H']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
                              # we get bytes from stdout, we need str to be consistent
                              log_revs = list(map(ascii_str, stdout.splitlines()))
                              git_revs.extend(log_revs)
                              # Pure pygit2 impl. but still 2-3x slower :/
                              # results = []
                              #
                              # with Repository(os.getcwd()) as repo:
                              #     repo_new_rev = repo[push_ref['new_rev']]
                              #     repo_old_rev = repo[push_ref['old_rev']]
                              #     walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
                              #
                              #     for commit in walker:
                              #         if commit.id == repo_old_rev.id:
                              #             break
                              #         results.append(commit.id.hex)
                              #     # reverse the order, can't use GIT_SORT_REVERSE
                              #     log_revs = results[::-1]
                      elif type_ == 'tags':
                          if push_ref['name'] not in tags:
                              tags.append(push_ref['name'])
                          git_revs.append(f'tag=>{push_ref["name"]}')
                  extras['hook_type'] = 'post_receive'
                  extras['commit_ids'] = git_revs
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
                  stdout = sys.stdout
                  if 'repo_size' in extras['hooks']:
                      try:
                          _call_hook('repo_size', extras, GitMessageWriter(stdout))
                      except Exception:
                          pass
                  status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
                  return status_code
              def get_extras_from_txn_id(repo_path, txn_id):
                  extras = get_txn_id_from_store(repo_path, txn_id)
                  return extras
              def svn_pre_commit(repo_path, commit_data, env):
                  path, txn_id = commit_data
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
                      ini_path = env.get('RC_INI_FILE')
                      if ini_path:
                          _get_ini_settings(ini_path)
                      # fallback method to read from TXN-ID stored data
                      extras = get_extras_from_txn_id(path, txn_id)
                  if not extras:
                      raise ValueError('SVN-PRE-COMMIT: Failed to extract context data in called extras for hook execution')
                  if extras.get('rc_internal_commit'):
                      # special marker for internal commit, we don't call hooks client
                      return 0
                  extras['hook_type'] = 'pre_commit'
                  extras['commit_ids'] = [txn_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'total_commits': 1,
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
                  return _call_hook('pre_push', extras, SvnMessageWriter())
              def svn_post_commit(repo_path, commit_data, env):
                  """
                  commit_data is path, rev, txn_id
                  """
                  if len(commit_data) == 3:
                      path, commit_id, txn_id = commit_data
                  elif len(commit_data) == 2:
                      log.error('Failed to extract txn_id from commit_data using legacy method. '
                                'Some functionality might be limited')
                      path, commit_id = commit_data
                      txn_id = None
                  else:
                      return 0
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
                      ini_path = env.get('RC_INI_FILE')
                      if ini_path:
                          _get_ini_settings(ini_path)
                      # fallback method to read from TXN-ID stored data
                      extras = get_extras_from_txn_id(path, txn_id)
                  if not extras and txn_id:
                      raise ValueError('SVN-POST-COMMIT: Failed to extract context data in called extras for hook execution')
                  if extras.get('rc_internal_commit'):
                      # special marker for internal commit, we don't call hooks client
                      return 0
                  extras['hook_type'] = 'post_commit'
                  extras['commit_ids'] = [commit_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                      'total_commits': 1,
                  }
                  if 'repo_size' in extras['hooks']:
                      try:
                          _call_hook('repo_size', extras, SvnMessageWriter())
                      except Exception:
                          pass
                  return _call_hook('post_push', extras, SvnMessageWriter())

vcsserver/lib/archive_cache/backends/objectstore_cache.py

0 +1 -1

              # Copyright (C) 2015-2024 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import codecs
              import hashlib
              import logging
              import os
              import typing
              import fsspec
              from .base import BaseCache, BaseShard
              from ..utils import ShardFileReader, NOT_GIVEN
              from ...type_utils import str2bool
              log = logging.getLogger(__name__)
              class S3Shard(BaseShard):
                  def __init__(self, index, bucket, bucket_folder, fs, **settings):
                      self._index: int = index
                      self._bucket_folder: str = bucket_folder
                      self.storage_type: str = 'bucket'
                      self._bucket_main: str = bucket
                      self.fs = fs
                  @property
                  def bucket(self) -> str:
                      """Cache bucket final path."""
                      return os.path.join(self._bucket_main, self._bucket_folder)
                  def _get_keyfile(self, archive_key) -> tuple[str, str]:
                      key_file: str = f'{archive_key}-{self.key_suffix}'
                      return key_file, os.path.join(self.bucket, key_file)
                  def _get_writer(self, path, mode):
                      return self.fs.open(path, 'wb')
                  def _write_file(self, full_path, iterator, mode):
                      # ensure folder in bucket exists
                      destination = self.bucket
                      if not self.fs.exists(destination):
-                         self.fs.mkdir(destination, s3_additional_kwargs={})
+                         self.fs.mkdir(destination)
                      writer = self._get_writer(full_path, mode)
                      digest = hashlib.sha256()
                      with writer:
                          size = 0
                          for chunk in iterator:
                              size += len(chunk)
                              digest.update(chunk)
                              writer.write(chunk)
                          sha256 = digest.hexdigest()
                          log.debug('written new archive cache under %s, sha256: %s', full_path, sha256)
                          return size, sha256
                  def store(self, key, value_reader, metadata: dict | None = None):
                      return self._store(key, value_reader, metadata, mode='wb')
                  def fetch(self, key, retry=NOT_GIVEN,
                            retry_attempts=NOT_GIVEN, retry_backoff=1,
                            presigned_url_expires: int = 0) -> tuple[ShardFileReader, dict]:
                      return self._fetch(key, retry, retry_attempts, retry_backoff, presigned_url_expires=presigned_url_expires)
                  def remove(self, key):
                      return self._remove(key)
                  def random_filename(self):
                      """Return filename and full-path tuple for file storage.
                      Filename will be a randomly generated 28 character hexadecimal string
                      with ".archive_cache" suffixed. Two levels of sub-directories will be used to
                      reduce the size of directories. On older filesystems, lookups in
                      directories with many files may be slow.
                      """
                      hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
                      archive_name = hex_name[4:] + '.archive_cache'
                      filename = f"{hex_name[:2]}-{hex_name[2:4]}-{archive_name}"
                      full_path = os.path.join(self.bucket, filename)
                      return archive_name, full_path
                  def __repr__(self):
                      return f'{self.__class__.__name__}(index={self._index}, bucket={self.bucket})'
              class ObjectStoreCache(BaseCache):
                  shard_name: str = 'shard-{:03d}'
                  shard_cls = S3Shard
                  def __init__(self, locking_url, **settings):
                      """
                      Initialize objectstore cache instance.
                      :param str locking_url: redis url for a lock
                      :param settings: settings dict
                      """
                      self._locking_url = locking_url
                      self._config = settings
                      objectstore_url = self.get_conf('archive_cache.objectstore.url')
                      self._storage_path = objectstore_url  # common path for all from BaseCache
                      self._shard_count = int(self.get_conf('archive_cache.objectstore.bucket_shards', pop=True))
                      if self._shard_count < 1:
                          raise ValueError('cache_shards must be 1 or more')
                      self._bucket = settings.pop('archive_cache.objectstore.bucket')
                      if not self._bucket:
                          raise ValueError('archive_cache.objectstore.bucket needs to have a value')
                      self._eviction_policy = self.get_conf('archive_cache.objectstore.eviction_policy', pop=True)
                      self._cache_size_limit = self.gb_to_bytes(int(self.get_conf('archive_cache.objectstore.cache_size_gb')))
                      self.retry = str2bool(self.get_conf('archive_cache.objectstore.retry', pop=True))
                      self.retry_attempts = int(self.get_conf('archive_cache.objectstore.retry_attempts', pop=True))
                      self.retry_backoff = int(self.get_conf('archive_cache.objectstore.retry_backoff', pop=True))
                      endpoint_url = settings.pop('archive_cache.objectstore.url')
                      key = settings.pop('archive_cache.objectstore.key')
                      secret = settings.pop('archive_cache.objectstore.secret')
                      region = settings.pop('archive_cache.objectstore.region')
                      log.debug('Initializing %s archival cache instance', self)
                      fs = fsspec.filesystem(
                          's3', anon=False, endpoint_url=endpoint_url, key=key, secret=secret, client_kwargs={'region_name': region}
                      )
                      # init main bucket
                      if not fs.exists(self._bucket):
                          fs.mkdir(self._bucket)
                      self._shards = tuple(
                          self.shard_cls(
                              index=num,
                              bucket=self._bucket,
                              bucket_folder=self.shard_name.format(num),
                              fs=fs,
                              **settings,
                          )
                          for num in range(self._shard_count)
                      )
                      self._hash = self._shards[0].hash
                  def _get_size(self, shard, archive_path):
                      return shard.fs.info(archive_path)['size']
                  def set_presigned_url_expiry(self, val: int) -> None:
                      self.presigned_url_expires = val

vcsserver/lib/rc_cache/backends.py

0 +11 -4

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              #import errno
              import fcntl
              import functools
              import logging
              import os
              import pickle
              #import time
              #import gevent
              import msgpack
              import redis
              flock_org = fcntl.flock
              from typing import Union
              from dogpile.cache.api import Deserializer, Serializer
              from dogpile.cache.backends import file as file_backend
              from dogpile.cache.backends import memory as memory_backend
              from dogpile.cache.backends import redis as redis_backend
              from dogpile.cache.backends.file import FileLock
              from dogpile.cache.util import memoized_property
-             from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
-             from vcsserver.lib.str_utils import safe_bytes, safe_str
-             from vcsserver.lib.type_utils import str2bool
+             from ...lib.memory_lru_dict import LRUDict, LRUDictDebug
+             from ...lib.str_utils import safe_bytes, safe_str
+             from ...lib.type_utils import str2bool
              _default_max_size = 1024
              log = logging.getLogger(__name__)
              class LRUMemoryBackend(memory_backend.MemoryBackend):
                  key_prefix = 'lru_mem_backend'
                  pickle_values = False
                  def __init__(self, arguments):
                      self.max_size = arguments.pop('max_size', _default_max_size)
                      LRUDictClass = LRUDict
                      if arguments.pop('log_key_count', None):
                          LRUDictClass = LRUDictDebug
                      arguments['cache_dict'] = LRUDictClass(self.max_size)
                      super().__init__(arguments)
                  def __repr__(self):
                      return f'{self.__class__}(maxsize=`{self.max_size}`)'
                  def __str__(self):
                      return self.__repr__()
                  def delete(self, key):
                      try:
                          del self._cache[key]
                      except KeyError:
                          # we don't care if key isn't there at deletion
                          pass
                  def list_keys(self, prefix):
                      return list(self._cache.keys())
                  def delete_multi(self, keys):
                      for key in keys:
                          self.delete(key)
                  def delete_multi_by_prefix(self, prefix):
                      cache_keys = self.list_keys(prefix=prefix)
                      num_affected_keys = len(cache_keys)
                      if num_affected_keys:
                          self.delete_multi(cache_keys)
                      return num_affected_keys
              class PickleSerializer:
                  serializer: None | Serializer = staticmethod(  # type: ignore
                      functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
                  )
                  deserializer: None | Deserializer = staticmethod(  # type: ignore
                      functools.partial(pickle.loads)
                  )
              class MsgPackSerializer:
                  serializer: None | Serializer = staticmethod(  # type: ignore
                      msgpack.packb
                  )
                  deserializer: None | Deserializer = staticmethod(  # type: ignore
                      functools.partial(msgpack.unpackb, use_list=False)
                  )
              class CustomLockFactory(FileLock):
                  pass
              class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
                  key_prefix = 'file_backend'
                  def __init__(self, arguments):
                      arguments['lock_factory'] = CustomLockFactory
                      db_file = arguments.get('filename')
                      log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
                      db_file_dir = os.path.dirname(db_file)
                      if not os.path.isdir(db_file_dir):
                          os.makedirs(db_file_dir)
                      try:
                          super().__init__(arguments)
                      except Exception:
                          log.exception('Failed to initialize db at: %s', db_file)
                          raise
                  def __repr__(self):
                      return f'{self.__class__}(file=`{self.filename}`)'
                  def __str__(self):
                      return self.__repr__()
                  def _get_keys_pattern(self, prefix: bytes = b''):
                      return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
                  def list_keys(self, prefix: bytes = b''):
                      prefix = self._get_keys_pattern(prefix)
                      def cond(dbm_key: bytes):
                          if not prefix:
                              return True
                          if dbm_key.startswith(prefix):
                              return True
                          return False
                      with self._dbm_file(True) as dbm:
                          try:
                              return list(filter(cond, dbm.keys()))
                          except Exception:
                              log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
                              raise
                  def delete_multi_by_prefix(self, prefix):
                      cache_keys = self.list_keys(prefix=prefix)
                      num_affected_keys = len(cache_keys)
                      if num_affected_keys:
                          self.delete_multi(cache_keys)
                      return num_affected_keys
                  def get_store(self):
                      return self.filename
+                 def cleanup_store(self):
+                     for ext in ("db", "dat", "pag", "dir"):
+                         final_filename = self.filename + os.extsep + ext
+                         if os.path.exists(final_filename):
+                             os.remove(final_filename)
+                             log.warning('Removed dbm file %s', final_filename)
              class BaseRedisBackend(redis_backend.RedisBackend):
                  key_prefix = ''
                  def __init__(self, arguments):
                      self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
                      super().__init__(arguments)
                      self._lock_timeout = self.lock_timeout
                      self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
                      if self._lock_auto_renewal and not self._lock_timeout:
                          # set default timeout for auto_renewal
                          self._lock_timeout = 30
                  def __repr__(self):
                      return f'{self.__class__}(conn=`{self.db_conn}`)'
                  def __str__(self):
                      return self.__repr__()
                  def _create_client(self):
                      args = {}
                      if self.url is not None:
                          args.update(url=self.url)
                      else:
                          args.update(
                              host=self.host, password=self.password,
                              port=self.port, db=self.db
                          )
                      connection_pool = redis.ConnectionPool(**args)
                      self.writer_client = redis.StrictRedis(
                          connection_pool=connection_pool
                      )
                      self.reader_client = self.writer_client
                  def _get_keys_pattern(self, prefix: bytes = b''):
                      return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
                  def list_keys(self, prefix: bytes = b''):
                      prefix = self._get_keys_pattern(prefix)
                      return self.reader_client.keys(prefix)
                  def delete_multi_by_prefix(self, prefix, use_lua=False):
                      if use_lua:
                          # high efficient LUA script to delete ALL keys by prefix...
                          lua = """local keys = redis.call('keys', ARGV[1])
                                   for i=1,#keys,5000 do
                                   redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
                                   end
                                   return #keys"""
                          num_affected_keys = self.writer_client.eval(
                              lua,
 ,
                              f"{prefix}*")
                      else:
                          cache_keys = self.list_keys(prefix=prefix)
                          num_affected_keys = len(cache_keys)
                          if num_affected_keys:
                              self.delete_multi(cache_keys)
                      return num_affected_keys
                  def get_store(self):
                      return self.reader_client.connection_pool
                  def get_mutex(self, key):
                      if self.distributed_lock:
                          lock_key = f'_lock_{safe_str(key)}'
                          return get_mutex_lock(
                              self.writer_client, lock_key,
                              self._lock_timeout,
                              auto_renewal=self._lock_auto_renewal
                          )
                      else:
                          return None
              class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
                  key_prefix = 'redis_pickle_backend'
                  pass
              class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
                  key_prefix = 'redis_msgpack_backend'
                  pass
              def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
-                 from vcsserver.lib._vendor import redis_lock
+                 from ...lib._vendor import redis_lock
                  class _RedisLockWrapper:
                      """LockWrapper for redis_lock"""
                      @classmethod
                      def get_lock(cls):
                          return redis_lock.Lock(
                              redis_client=client,
                              name=lock_key,
                              expire=lock_timeout,
                              auto_renewal=auto_renewal,
                              strict=True,
                          )
                      def __repr__(self):
                          return f"{self.__class__.__name__}:{lock_key}"
                      def __str__(self):
                          return f"{self.__class__.__name__}:{lock_key}"
                      def __init__(self):
                          self.lock = self.get_lock()
                          self.lock_key = lock_key
                      def acquire(self, wait=True):
                          log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
                          try:
                              acquired = self.lock.acquire(wait)
                              log.debug('Got lock for key %s, %s', self.lock_key, acquired)
                              return acquired
                          except redis_lock.AlreadyAcquired:
                              return False
                          except redis_lock.AlreadyStarted:
                              # refresh thread exists, but it also means we acquired the lock
                              return True
                      def release(self):
                          try:
                              self.lock.release()
                          except redis_lock.NotAcquired:
                              pass
                  return _RedisLockWrapper()

vcsserver/lib/rc_cache/utils.py

0 +3 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import functools
              import logging
              import os
              import threading
              import time
              import decorator
              from dogpile.cache import CacheRegion
-             from vcsserver.utils import sha1
-             from vcsserver.lib.str_utils import safe_bytes
-             from vcsserver.lib.type_utils import str2bool # noqa :required by imports from .utils
+             from ...lib.hash_utils import sha1
+             from ...lib.str_utils import safe_bytes
+             from ...lib.type_utils import str2bool # noqa :required by imports from .utils
              from . import region_meta
              log = logging.getLogger(__name__)
              class RhodeCodeCacheRegion(CacheRegion):
                  def __repr__(self):
                      return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
                  def conditional_cache_on_arguments(
                          self, namespace=None,
                          expiration_time=None,
                          should_cache_fn=None,
                          to_str=str,
                          function_key_generator=None,
                          condition=True):
                      """
                      Custom conditional decorator, that will not touch any dogpile internals if
                      condition isn't meet. This works a bit different from should_cache_fn
                      And it's faster in cases we don't ever want to compute cached values
                      """
                      expiration_time_is_callable = callable(expiration_time)
                      if not namespace:
                          namespace = getattr(self, '_default_namespace', None)
                      if function_key_generator is None:
                          function_key_generator = self.function_key_generator
                      def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
                          if not condition:
                              log.debug('Calling un-cached method:%s', user_func.__name__)
                              start = time.time()
                              result = user_func(*arg, **kw)
                              total = time.time() - start
                              log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
                              return result
                          key = func_key_generator(*arg, **kw)
                          timeout = expiration_time() if expiration_time_is_callable \
                              else expiration_time
                          log.debug('Calling cached method:`%s`', user_func.__name__)
                          return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
                      def cache_decorator(user_func):
                          if to_str is str:
                              # backwards compatible
                              key_generator = function_key_generator(namespace, user_func)
                          else:
                              key_generator = function_key_generator(namespace, user_func, to_str=to_str)
                          def refresh(*arg, **kw):
                              """
                              Like invalidate, but regenerates the value instead
                              """
                              key = key_generator(*arg, **kw)
                              value = user_func(*arg, **kw)
                              self.set(key, value)
                              return value
                          def invalidate(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.delete(key)
                          def set_(value, *arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.set(key, value)
                          def get(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              return self.get(key)
                          user_func.set = set_
                          user_func.invalidate = invalidate
                          user_func.get = get
                          user_func.refresh = refresh
                          user_func.key_generator = key_generator
                          user_func.original = user_func
                          # Use `decorate` to preserve the signature of :param:`user_func`.
                          return decorator.decorate(user_func, functools.partial(
                              get_or_create_for_user_func, key_generator))
                      return cache_decorator
              def make_region(*arg, **kw):
                  return RhodeCodeCacheRegion(*arg, **kw)
              def get_default_cache_settings(settings, prefixes=None):
                  prefixes = prefixes or []
                  cache_settings = {}
                  for key in settings.keys():
                      for prefix in prefixes:
                          if key.startswith(prefix):
                              name = key.split(prefix)[1].strip()
                              val = settings[key]
                              if isinstance(val, str):
                                  val = val.strip()
                              cache_settings[name] = val
                  return cache_settings
              def compute_key_from_params(*args):
                  """
                  Helper to compute key from given params to be used in cache manager
                  """
                  return sha1(safe_bytes("_".join(map(str, args))))
              def custom_key_generator(backend, namespace, fn):
                  func_name = fn.__name__
                  def generate_key(*args):
                      backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
                      namespace_pref = namespace or 'default_namespace'
                      arg_key = compute_key_from_params(*args)
                      final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
                      return final_key
                  return generate_key
              def backend_key_generator(backend):
                  """
                  Special wrapper that also sends over the backend to the key generator
                  """
                  def wrapper(namespace, fn):
                      return custom_key_generator(backend, namespace, fn)
                  return wrapper
              def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
                  from .backends import FileNamespaceBackend
                  from . import async_creation_runner
                  region_obj = region_meta.dogpile_cache_regions.get(region_name)
                  if not region_obj:
                      reg_keys = list(region_meta.dogpile_cache_regions.keys())
                      raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
                  region_uid_name = f'{region_name}:{region_namespace}'
                  # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
                  if isinstance(region_obj.actual_backend, FileNamespaceBackend):
                      if not region_namespace:
                          raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
                      region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
                      if region_exist:
                          log.debug('Using already configured region: %s', region_namespace)
                          return region_exist
                      expiration_time = region_obj.expiration_time
                      cache_dir = region_meta.dogpile_config_defaults['cache_dir']
                      namespace_cache_dir = cache_dir
                      # we default the namespace_cache_dir to our default cache dir.
                      # however, if this backend is configured with filename= param, we prioritize that
                      # so all caches within that particular region, even those namespaced end up in the same path
                      if region_obj.actual_backend.filename:
                          namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
                      if not os.path.isdir(namespace_cache_dir):
                          os.makedirs(namespace_cache_dir)
                      new_region = make_region(
                          name=region_uid_name,
                          function_key_generator=backend_key_generator(region_obj.actual_backend)
                      )
                      namespace_filename = os.path.join(
                          namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
                      # special type that allows 1db per namespace
                      new_region.configure(
                          backend='dogpile.cache.rc.file_namespace',
                          expiration_time=expiration_time,
                          arguments={"filename": namespace_filename}
                      )
                      # create and save in region caches
                      log.debug('configuring new region: %s', region_uid_name)
                      region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
                  region_obj._default_namespace = region_namespace
                  if use_async_runner:
                      region_obj.async_creation_runner = async_creation_runner
                  return region_obj
              def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
                  from . import CLEAR_DELETE, CLEAR_INVALIDATE
                  if not isinstance(cache_region, RhodeCodeCacheRegion):
                      cache_region = get_or_create_region(cache_region, cache_namespace_uid)
                  log.debug('clearing cache region: %s [prefix:%s] with method=%s',
                            cache_region, cache_namespace_uid, method)
                  num_affected_keys = 0
                  if method == CLEAR_INVALIDATE:
                      # NOTE: The CacheRegion.invalidate() method’s default mode of
                      # operation is to set a timestamp local to this CacheRegion in this Python process only.
                      # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
                      cache_region.invalidate(hard=True)
                  if method == CLEAR_DELETE:
                      num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
                  return num_affected_keys

vcsserver/tweens/request_wrapper.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
              # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import base64
              import logging
              import time
              import msgpack
              import vcsserver
              from vcsserver.lib.str_utils import safe_str
              log = logging.getLogger(__name__)
              def get_access_path(environ):
                  path = environ.get('PATH_INFO')
                  return path
              def get_user_agent(environ):
                  return environ.get('HTTP_USER_AGENT')
              def get_call_context(request) -> dict:
                  cc = {}
                  registry = request.registry
                  if hasattr(registry, 'vcs_call_context'):
                      cc.update({
                          'X-RC-Method': registry.vcs_call_context.get('method'),
                          'X-RC-Repo-Name': registry.vcs_call_context.get('repo_name')
                      })
                  return cc
              def get_headers_call_context(environ, strict=True):
                  if 'HTTP_X_RC_VCS_STREAM_CALL_CONTEXT' in environ:
                      packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
                      return msgpack.unpackb(packed_cc)
                  elif strict:
                      raise ValueError('Expected header HTTP_X_RC_VCS_STREAM_CALL_CONTEXT not found')
              class RequestWrapperTween:
                  def __init__(self, handler, registry):
                      self.handler = handler
                      self.registry = registry
                      # one-time configuration code goes here
                  def __call__(self, request):
                      start = time.time()
-                     log.debug('Starting request time measurement')
+                     log.debug('Starting request processing')
                      response = None
                      try:
                          response = self.handler(request)
                      finally:
                          ua = get_user_agent(request.environ)
                          call_context = get_call_context(request)
                          vcs_method = call_context.get('X-RC-Method', '_NO_VCS_METHOD')
                          repo_name = call_context.get('X-RC-Repo-Name', '')
                          count = request.request_count()
                          _ver_ = vcsserver.get_version()
                          _path = safe_str(get_access_path(request.environ))
                          ip = '127.0.0.1'
                          match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
                          resp_code = getattr(response, 'status_code', 'UNDEFINED')
                          _view_path = f"{repo_name}@{_path}/{vcs_method}"
                          total = time.time() - start
                          log.info(
-                             'Req[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
+                             'Finished request processing: reqq[%4s] IP: %s %s Request to %s time: %.4fs [%s], VCSServer %s',
                              count, ip, request.environ.get('REQUEST_METHOD'),
                              _view_path, total, ua, _ver_,
                              extra={"time": total, "ver": _ver_, "code": resp_code,
                                     "path": _path, "view_name": match_route, "user_agent": ua,
                                     "vcs_method": vcs_method, "repo_name": repo_name}
                          )
                          statsd = request.registry.statsd
                          if statsd:
                              match_route = request.matched_route.name if request.matched_route else _path
                              elapsed_time_ms = round(1000.0 * total)  # use ms only
                              statsd.timing(
                                  "vcsserver_req_timing.histogram", elapsed_time_ms,
                                  tags=[
                                      f"view_name:{match_route}",
                                      f"code:{resp_code}"
                                  ],
                                  use_decimals=False
                              )
                              statsd.incr(
                                  "vcsserver_req_total", tags=[
                                      f"view_name:{match_route}",
                                      f"code:{resp_code}"
                                  ])
                      return response
              def includeme(config):
                  config.add_tween(
                      'vcsserver.tweens.request_wrapper.RequestWrapperTween',
                  )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages