rhodecode-vcsserver Commit - r1219:e9ee7632

merged default branch into stable

super-admin -

r1219:e9ee7632 stable

parent child

The requested changes are too big and content was truncated. Show full diff

configs/logging.ini

0 created 644 +53 0

			@@ -0,0 +1,53 b''
		1	; #####################
		2	; LOGGING CONFIGURATION
		3	; #####################
		4	; Logging template, used for configure the logging
		5	; some variables here are replaced by RhodeCode to default values
		6
		7	[loggers]
		8	keys = root, vcsserver
		9
		10	[handlers]
		11	keys = console
		12
		13	[formatters]
		14	keys = generic, json
		15
		16	; #######
		17	; LOGGERS
		18	; #######
		19	[logger_root]
		20	level = NOTSET
		21	handlers = console
		22
		23	[logger_vcsserver]
		24	level = $RC_LOGGING_LEVEL
		25	handlers =
		26	qualname = vcsserver
		27	propagate = 1
		28
		29	; ########
		30	; HANDLERS
		31	; ########
		32
		33	[handler_console]
		34	class = StreamHandler
		35	args = (sys.stderr, )
		36	level = $RC_LOGGING_LEVEL
		37	; To enable JSON formatted logs replace generic with json
		38	; This allows sending properly formatted logs to grafana loki or elasticsearch
		39	#formatter = json
		40	#formatter = generic
		41	formatter = $RC_LOGGING_FORMATTER
		42
		43	; ##########
		44	; FORMATTERS
		45	; ##########
		46
		47	[formatter_generic]
		48	format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
		49	datefmt = %Y-%m-%d %H:%M:%S
		50
		51	[formatter_json]
		52	format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
		53	class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

pyproject.toml

0 created 644 +73 0

			@@ -0,0 +1,73 b''
		1	[build-system]
		2	requires = ["setuptools>=61.0.0", "wheel"]
		3	build-backend = "setuptools.build_meta"
		4
		5	[project]
		6	name = "rhodecode-vcsserver"
		7	description = "Version Control System Server for RhodeCode"
		8	authors = [
		9	{name = "RhodeCode GmbH", email = "support@rhodecode.com"},
		10	]
		11
		12	license = {text = "GPL V3"}
		13	requires-python = ">=3.10"
		14	dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
		15	classifiers = [
		16	'Development Status :: 6 - Mature',
		17	'Intended Audience :: Developers',
		18	'Operating System :: OS Independent',
		19	'Topic :: Software Development :: Version Control',
		20	'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
		21	'Programming Language :: Python :: 3.10',
		22	]
		23
		24	[project.entry-points."paste.app_factory"]
		25	main = "vcsserver.http_main:main"
		26
		27
		28	[tool.setuptools]
		29	packages = ["vcsserver"]
		30
		31	[tool.setuptools.dynamic]
		32	readme = {file = ["README.rst"], content-type = "text/rst"}
		33	version = {file = "vcsserver/VERSION"}
		34	dependencies = {file = ["requirements.txt"]}
		35	optional-dependencies.tests = {file = ["requirements_test.txt"]}
		36
		37	[tool.ruff]
		38
		39	select = [
		40	# Pyflakes
		41	"F",
		42	# Pycodestyle
		43	"E",
		44	"W",
		45	# isort
		46	"I001"
		47	]
		48
		49	ignore = [
		50	"E501", # line too long, handled by black
		51	]
		52
		53	# Same as Black.
		54	line-length = 120
		55
		56	[tool.ruff.isort]
		57
		58	known-first-party = ["vcsserver"]
		59
		60	[tool.ruff.format]
		61
		62	# Like Black, use double quotes for strings.
		63	quote-style = "double"
		64
		65	# Like Black, indent with spaces, rather than tabs.
		66	indent-style = "space"
		67
		68	# Like Black, respect magic trailing commas.
		69	skip-magic-trailing-comma = false
		70
		71	# Like Black, automatically detect the appropriate line ending.
		72	line-ending = "auto"
		73

vcsserver/config/__init__.py

0 created 644 +1 0

			@@ -0,0 +1,1 b''
		1	# Copyright (C) 2014-2023 RhodeCode GmbH

vcsserver/config/hooks.py

0 created 644 +27 0

			@@ -0,0 +1,27 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	HOOK_REPO_SIZE = 'changegroup.repo_size'
		20
		21	# HG
		22	HOOK_PRE_PULL = 'preoutgoing.pre_pull'
		23	HOOK_PULL = 'outgoing.pull_logger'
		24	HOOK_PRE_PUSH = 'prechangegroup.pre_push'
		25	HOOK_PRETX_PUSH = 'pretxnchangegroup.pre_push'
		26	HOOK_PUSH = 'changegroup.push_logger'
		27	HOOK_PUSH_KEY = 'pushkey.key_push'

vcsserver/config/settings_maker.py

0 created 644 +168 0

			@@ -0,0 +1,168 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import os
		20	import textwrap
		21	import string
		22	import functools
		23	import logging
		24	import tempfile
		25	import logging.config
		26
		27	from vcsserver.type_utils import str2bool, aslist
		28
		29	log = logging.getLogger(__name__)
		30
		31	# skip keys, that are set here, so we don't double process those
		32	set_keys = {
		33	'__file__': ''
		34	}
		35
		36
		37	class SettingsMaker:
		38
		39	def __init__(self, app_settings):
		40	self.settings = app_settings
		41
		42	@classmethod
		43	def _bool_func(cls, input_val):
		44	if isinstance(input_val, bytes):
		45	# decode to str
		46	input_val = input_val.decode('utf8')
		47	return str2bool(input_val)
		48
		49	@classmethod
		50	def _int_func(cls, input_val):
		51	return int(input_val)
		52
		53	@classmethod
		54	def _list_func(cls, input_val, sep=','):
		55	return aslist(input_val, sep=sep)
		56
		57	@classmethod
		58	def _string_func(cls, input_val, lower=True):
		59	if lower:
		60	input_val = input_val.lower()
		61	return input_val
		62
		63	@classmethod
		64	def _float_func(cls, input_val):
		65	return float(input_val)
		66
		67	@classmethod
		68	def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
		69
		70	# ensure we have our dir created
		71	if not os.path.isdir(input_val) and ensure_dir:
		72	os.makedirs(input_val, mode=mode, exist_ok=True)
		73
		74	if not os.path.isdir(input_val):
		75	raise Exception(f'Dir at {input_val} does not exist')
		76	return input_val
		77
		78	@classmethod
		79	def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
		80	dirname = os.path.dirname(input_val)
		81	cls._dir_func(dirname, ensure_dir=ensure_dir)
		82	return input_val
		83
		84	@classmethod
		85	def _key_transformator(cls, key):
		86	return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
		87
		88	def maybe_env_key(self, key):
		89	# now maybe we have this KEY in env, search and use the value with higher priority.
		90	transformed_key = self._key_transformator(key)
		91	envvar_value = os.environ.get(transformed_key)
		92	if envvar_value:
		93	log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
		94
		95	return envvar_value
		96
		97	def env_expand(self):
		98	replaced = {}
		99	for k, v in self.settings.items():
		100	if k not in set_keys:
		101	envvar_value = self.maybe_env_key(k)
		102	if envvar_value:
		103	replaced[k] = envvar_value
		104	set_keys[k] = envvar_value
		105
		106	# replace ALL keys updated
		107	self.settings.update(replaced)
		108
		109	def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
		110	"""
		111	Helper to enable debug on running instance
		112	:return:
		113	"""
		114
		115	if not str2bool(self.settings.get('logging.autoconfigure')):
		116	log.info('logging configuration based on main .ini file')
		117	return
		118
		119	if logging_conf is None:
		120	logging_conf = self.settings.get('logging.logging_conf_file') or ''
		121
		122	if not os.path.isfile(logging_conf):
		123	log.error('Unable to setup logging based on %s, '
		124	'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
		125	return
		126
		127	with open(logging_conf, 'rt') as f:
		128	ini_template = textwrap.dedent(f.read())
		129	ini_template = string.Template(ini_template).safe_substitute(
		130	RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
		131	RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
		132	)
		133
		134	with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
		135	log.info('Saved Temporary LOGGING config at %s', f.name)
		136	f.write(ini_template)
		137
		138	logging.config.fileConfig(f.name)
		139	os.remove(f.name)
		140
		141	def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
		142	input_val = self.settings.get(key, default)
		143
		144	if default_when_empty and not input_val:
		145	# use default value when value is set in the config but it is empty
		146	input_val = default
		147
		148	parser_func = {
		149	'bool': self._bool_func,
		150	'int': self._int_func,
		151	'list': self._list_func,
		152	'list:newline': functools.partial(self._list_func, sep='/n'),
		153	'list:spacesep': functools.partial(self._list_func, sep=' '),
		154	'string': functools.partial(self._string_func, lower=lower),
		155	'dir': self._dir_func,
		156	'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
		157	'file': self._file_path_func,
		158	'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
		159	None: lambda i: i
		160	}[parser]
		161
		162	envvar_value = self.maybe_env_key(key)
		163	if envvar_value:
		164	input_val = envvar_value
		165	set_keys[key] = input_val
		166
		167	self.settings[key] = parser_func(input_val)
		168	return self.settings[key]

vcsserver/lib/_vendor/jsonlogger/__init__.py

0 created 644 +243 0

			@@ -0,0 +1,243 b''
		1	'''
		2	This library is provided to allow standard python logging
		3	to output log data as JSON formatted strings
		4	'''
		5	import logging
		6	import json
		7	import re
		8	from datetime import date, datetime, time, tzinfo, timedelta
		9	import traceback
		10	import importlib
		11
		12	from inspect import istraceback
		13
		14	from collections import OrderedDict
		15
		16
		17	def _inject_req_id(record, args, *kwargs):
		18	return record
		19
		20
		21	ExceptionAwareFormatter = logging.Formatter
		22
		23
		24	ZERO = timedelta(0)
		25	HOUR = timedelta(hours=1)
		26
		27
		28	class UTC(tzinfo):
		29	"""UTC"""
		30
		31	def utcoffset(self, dt):
		32	return ZERO
		33
		34	def tzname(self, dt):
		35	return "UTC"
		36
		37	def dst(self, dt):
		38	return ZERO
		39
		40	utc = UTC()
		41
		42
		43	# skip natural LogRecord attributes
		44	# http://docs.python.org/library/logging.html#logrecord-attributes
		45	RESERVED_ATTRS = (
		46	'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
		47	'funcName', 'levelname', 'levelno', 'lineno', 'module',
		48	'msecs', 'message', 'msg', 'name', 'pathname', 'process',
		49	'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
		50
		51
		52	def merge_record_extra(record, target, reserved):
		53	"""
		54	Merges extra attributes from LogRecord object into target dictionary
		55
		56	:param record: logging.LogRecord
		57	:param target: dict to update
		58	:param reserved: dict or list with reserved keys to skip
		59	"""
		60	for key, value in record.__dict__.items():
		61	# this allows to have numeric keys
		62	if (key not in reserved
		63	and not (hasattr(key, "startswith")
		64	and key.startswith('_'))):
		65	target[key] = value
		66	return target
		67
		68
		69	class JsonEncoder(json.JSONEncoder):
		70	"""
		71	A custom encoder extending the default JSONEncoder
		72	"""
		73
		74	def default(self, obj):
		75	if isinstance(obj, (date, datetime, time)):
		76	return self.format_datetime_obj(obj)
		77
		78	elif istraceback(obj):
		79	return ''.join(traceback.format_tb(obj)).strip()
		80
		81	elif type(obj) == Exception \
		82	or isinstance(obj, Exception) \
		83	or type(obj) == type:
		84	return str(obj)
		85
		86	try:
		87	return super().default(obj)
		88
		89	except TypeError:
		90	try:
		91	return str(obj)
		92
		93	except Exception:
		94	return None
		95
		96	def format_datetime_obj(self, obj):
		97	return obj.isoformat()
		98
		99
		100	class JsonFormatter(ExceptionAwareFormatter):
		101	"""
		102	A custom formatter to format logging records as json strings.
		103	Extra values will be formatted as str() if not supported by
		104	json default encoder
		105	"""
		106
		107	def __init__(self, args, *kwargs):
		108	"""
		109	:param json_default: a function for encoding non-standard objects
		110	as outlined in http://docs.python.org/2/library/json.html
		111	:param json_encoder: optional custom encoder
		112	:param json_serializer: a :meth:`json.dumps`-compatible callable
		113	that will be used to serialize the log record.
		114	:param json_indent: an optional :meth:`json.dumps`-compatible numeric value
		115	that will be used to customize the indent of the output json.
		116	:param prefix: an optional string prefix added at the beginning of
		117	the formatted string
		118	:param json_indent: indent parameter for json.dumps
		119	:param json_ensure_ascii: ensure_ascii parameter for json.dumps
		120	:param reserved_attrs: an optional list of fields that will be skipped when
		121	outputting json log record. Defaults to all log record attributes:
		122	http://docs.python.org/library/logging.html#logrecord-attributes
		123	:param timestamp: an optional string/boolean field to add a timestamp when
		124	outputting the json log record. If string is passed, timestamp will be added
		125	to log record using string as key. If True boolean is passed, timestamp key
		126	will be "timestamp". Defaults to False/off.
		127	"""
		128	self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
		129	self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
		130	self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
		131	self.json_indent = kwargs.pop("json_indent", None)
		132	self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
		133	self.prefix = kwargs.pop("prefix", "")
		134	reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
		135	self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
		136	self.timestamp = kwargs.pop("timestamp", True)
		137
		138	# super(JsonFormatter, self).__init__(args, *kwargs)
		139	logging.Formatter.__init__(self, args, *kwargs)
		140	if not self.json_encoder and not self.json_default:
		141	self.json_encoder = JsonEncoder
		142
		143	self._required_fields = self.parse()
		144	self._skip_fields = dict(list(zip(self._required_fields,
		145	self._required_fields)))
		146	self._skip_fields.update(self.reserved_attrs)
		147
		148	def _str_to_fn(self, fn_as_str):
		149	"""
		150	If the argument is not a string, return whatever was passed in.
		151	Parses a string such as package.module.function, imports the module
		152	and returns the function.
		153
		154	:param fn_as_str: The string to parse. If not a string, return it.
		155	"""
		156	if not isinstance(fn_as_str, str):
		157	return fn_as_str
		158
		159	path, _, function = fn_as_str.rpartition('.')
		160	module = importlib.import_module(path)
		161	return getattr(module, function)
		162
		163	def parse(self):
		164	"""
		165	Parses format string looking for substitutions
		166
		167	This method is responsible for returning a list of fields (as strings)
		168	to include in all log messages.
		169	"""
		170	standard_formatters = re.compile(r'$(.+?)$', re.IGNORECASE)
		171	return standard_formatters.findall(self._fmt)
		172
		173	def add_fields(self, log_record, record, message_dict):
		174	"""
		175	Override this method to implement custom logic for adding fields.
		176	"""
		177	for field in self._required_fields:
		178	log_record[field] = record.__dict__.get(field)
		179	log_record.update(message_dict)
		180	merge_record_extra(record, log_record, reserved=self._skip_fields)
		181
		182	if self.timestamp:
		183	key = self.timestamp if type(self.timestamp) == str else 'timestamp'
		184	log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
		185
		186	def process_log_record(self, log_record):
		187	"""
		188	Override this method to implement custom logic
		189	on the possibly ordered dictionary.
		190	"""
		191	return log_record
		192
		193	def jsonify_log_record(self, log_record):
		194	"""Returns a json string of the log record."""
		195	return self.json_serializer(log_record,
		196	default=self.json_default,
		197	cls=self.json_encoder,
		198	indent=self.json_indent,
		199	ensure_ascii=self.json_ensure_ascii)
		200
		201	def serialize_log_record(self, log_record):
		202	"""Returns the final representation of the log record."""
		203	return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
		204
		205	def format(self, record):
		206	"""Formats a log record and serializes to json"""
		207	message_dict = {}
		208	# FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
		209	# are always type of str. We shouldn't need to override that.
		210	if isinstance(record.msg, dict):
		211	message_dict = record.msg
		212	record.message = None
		213	else:
		214	record.message = record.getMessage()
		215	# only format time if needed
		216	if "asctime" in self._required_fields:
		217	record.asctime = self.formatTime(record, self.datefmt)
		218
		219	# Display formatted exception, but allow overriding it in the
		220	# user-supplied dict.
		221	if record.exc_info and not message_dict.get('exc_info'):
		222	message_dict['exc_info'] = self.formatException(record.exc_info)
		223	if not message_dict.get('exc_info') and record.exc_text:
		224	message_dict['exc_info'] = record.exc_text
		225	# Display formatted record of stack frames
		226	# default format is a string returned from :func:`traceback.print_stack`
		227	try:
		228	if record.stack_info and not message_dict.get('stack_info'):
		229	message_dict['stack_info'] = self.formatStack(record.stack_info)
		230	except AttributeError:
		231	# Python2.7 doesn't have stack_info.
		232	pass
		233
		234	try:
		235	log_record = OrderedDict()
		236	except NameError:
		237	log_record = {}
		238
		239	_inject_req_id(record, with_prefix=False)
		240	self.add_fields(log_record, record, message_dict)
		241	log_record = self.process_log_record(log_record)
		242
		243	return self.serialize_log_record(log_record)

vcsserver/lib/logging_formatter.py

0 created 644 +53 0

			@@ -0,0 +1,53 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import sys
		20	import logging
		21
		22
		23	BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
		24
		25	# Sequences
		26	RESET_SEQ = "\033[0m"
		27	COLOR_SEQ = "\033[0;%dm"
		28	BOLD_SEQ = "\033[1m"
		29
		30	COLORS = {
		31	'CRITICAL': MAGENTA,
		32	'ERROR': RED,
		33	'WARNING': CYAN,
		34	'INFO': GREEN,
		35	'DEBUG': BLUE,
		36	'SQL': YELLOW
		37	}
		38
		39
		40	class ColorFormatter(logging.Formatter):
		41
		42	def format(self, record):
		43	"""
		44	Change record's levelname to use with COLORS enum
		45	"""
		46	def_record = super().format(record)
		47
		48	levelname = record.levelname
		49	start = COLOR_SEQ % (COLORS[levelname])
		50	end = RESET_SEQ
		51
		52	colored_record = ''.join([start, def_record, end])
		53	return colored_record

vcsserver/lib/rc_cache/archive_cache.py

0 created 644 +87 0

			@@ -0,0 +1,87 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import logging
		19	import os
		20	import diskcache
		21	from diskcache import RLock
		22
		23	log = logging.getLogger(__name__)
		24
		25	cache_meta = None
		26
		27
		28	class ReentrantLock(RLock):
		29	def __enter__(self):
		30	reentrant_lock_key = self._key
		31
		32	log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key)
		33	#self.acquire()
		34	log.debug('Lock for key=%s acquired', reentrant_lock_key)
		35
		36	def __exit__(self, *exc_info):
		37	#self.release()
		38	pass
		39
		40
		41	def get_archival_config(config):
		42
		43	final_config = {
		44	'archive_cache.eviction_policy': 'least-frequently-used'
		45	}
		46
		47	for k, v in config.items():
		48	if k.startswith('archive_cache'):
		49	final_config[k] = v
		50
		51	return final_config
		52
		53
		54	def get_archival_cache_store(config):
		55
		56	global cache_meta
		57	if cache_meta is not None:
		58	return cache_meta
		59
		60	config = get_archival_config(config)
		61
		62	archive_cache_dir = config['archive_cache.store_dir']
		63	archive_cache_size_gb = config['archive_cache.cache_size_gb']
		64	archive_cache_shards = config['archive_cache.cache_shards']
		65	archive_cache_eviction_policy = config['archive_cache.eviction_policy']
		66
		67	log.debug('Initializing archival cache instance under %s', archive_cache_dir)
		68
		69	# check if it's ok to write, and re-create the archive cache
		70	if not os.path.isdir(archive_cache_dir):
		71	os.makedirs(archive_cache_dir, exist_ok=True)
		72
		73	d_cache = diskcache.FanoutCache(
		74	archive_cache_dir, shards=archive_cache_shards,
		75	cull_limit=0, # manual eviction required
		76	size_limit=archive_cache_size_gb * 1024 * 1024 * 1024,
		77	eviction_policy=archive_cache_eviction_policy,
		78	timeout=30
		79	)
		80	cache_meta = d_cache
		81	return cache_meta
		82
		83
		84	def includeme(config):
		85	# init our cache at start, for vcsserver we don't init at runtime
		86	# because our cache config is sent via wire on make archive call, this call just lazy-enables the client
		87	return

vcsserver/lib/rc_json.py

0 created 644 +2 0

			@@ -0,0 +1,2 b''
		1	# use orjson by default
		2	import orjson as json

vcsserver/lib/statsd_client.py

0 created 644 +70 0

			@@ -0,0 +1,70 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	from vcsserver.lib._vendor.statsd import client_from_config
		19
		20
		21	class StatsdClientNotInitialised(Exception):
		22	pass
		23
		24
		25	class _Singleton(type):
		26	"""A metaclass that creates a Singleton base class when called."""
		27
		28	_instances = {}
		29
		30	def __call__(cls, args, *kwargs):
		31	if cls not in cls._instances:
		32	cls._instances[cls] = super().__call__(args, *kwargs)
		33	return cls._instances[cls]
		34
		35
		36	class Singleton(_Singleton("SingletonMeta", (object,), {})):
		37	pass
		38
		39
		40	class StatsdClientClass(Singleton):
		41	setup_run = False
		42	statsd_client = None
		43	statsd = None
		44	strict_mode_init = False
		45
		46	def __getattribute__(self, name):
		47
		48	if name.startswith("statsd"):
		49	if self.setup_run:
		50	return super().__getattribute__(name)
		51	else:
		52	if self.strict_mode_init:
		53	raise StatsdClientNotInitialised(f"requested key was {name}")
		54	return None
		55
		56	return super().__getattribute__(name)
		57
		58	def setup(self, settings):
		59	"""
		60	Initialize the client
		61	"""
		62	strict_init_mode = settings.pop('statsd_strict_init', False)
		63
		64	statsd = client_from_config(settings)
		65	self.statsd = statsd
		66	self.statsd_client = statsd
		67	self.setup_run = True
		68
		69
		70	StatsdClient = StatsdClientClass()

vcsserver/lib/svnremoterepo.py

0 created 644 +160 0

			@@ -0,0 +1,160 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import os
		19	import tempfile
		20
		21	from svn import client
		22	from svn import core
		23	from svn import ra
		24
		25	from mercurial import error
		26
		27	from vcsserver.str_utils import safe_bytes
		28
		29	core.svn_config_ensure(None)
		30	svn_config = core.svn_config_get_config(None)
		31
		32
		33	class RaCallbacks(ra.Callbacks):
		34	@staticmethod
		35	def open_tmp_file(pool): # pragma: no cover
		36	(fd, fn) = tempfile.mkstemp()
		37	os.close(fd)
		38	return fn
		39
		40	@staticmethod
		41	def get_client_string(pool):
		42	return b'RhodeCode-subversion-url-checker'
		43
		44
		45	class SubversionException(Exception):
		46	pass
		47
		48
		49	class SubversionConnectionException(SubversionException):
		50	"""Exception raised when a generic error occurs when connecting to a repository."""
		51
		52
		53	def normalize_url(url):
		54	if not url:
		55	return url
		56	if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
		57	url = url[4:]
		58	url = url.rstrip(b'/')
		59	return url
		60
		61
		62	def _create_auth_baton(pool):
		63	"""Create a Subversion authentication baton. """
		64	# Give the client context baton a suite of authentication
		65	# providers.h
		66	platform_specific = [
		67	'svn_auth_get_gnome_keyring_simple_provider',
		68	'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
		69	'svn_auth_get_keychain_simple_provider',
		70	'svn_auth_get_keychain_ssl_client_cert_pw_provider',
		71	'svn_auth_get_kwallet_simple_provider',
		72	'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
		73	'svn_auth_get_ssl_client_cert_file_provider',
		74	'svn_auth_get_windows_simple_provider',
		75	'svn_auth_get_windows_ssl_server_trust_provider',
		76	]
		77
		78	providers = []
		79
		80	for p in platform_specific:
		81	if getattr(core, p, None) is not None:
		82	try:
		83	providers.append(getattr(core, p)())
		84	except RuntimeError:
		85	pass
		86
		87	providers += [
		88	client.get_simple_provider(),
		89	client.get_username_provider(),
		90	client.get_ssl_client_cert_file_provider(),
		91	client.get_ssl_client_cert_pw_file_provider(),
		92	client.get_ssl_server_trust_file_provider(),
		93	]
		94
		95	return core.svn_auth_open(providers, pool)
		96
		97
		98	class SubversionRepo:
		99	"""Wrapper for a Subversion repository.
		100
		101	It uses the SWIG Python bindings, see above for requirements.
		102	"""
		103	def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
		104
		105	self.username = username
		106	self.password = password
		107	self.svn_url = core.svn_path_canonicalize(svn_url)
		108
		109	self.auth_baton_pool = core.Pool()
		110	self.auth_baton = _create_auth_baton(self.auth_baton_pool)
		111	# self.init_ra_and_client() assumes that a pool already exists
		112	self.pool = core.Pool()
		113
		114	self.ra = self.init_ra_and_client()
		115	self.uuid = ra.get_uuid(self.ra, self.pool)
		116
		117	def init_ra_and_client(self):
		118	"""Initializes the RA and client layers, because sometimes getting
		119	unified diffs runs the remote server out of open files.
		120	"""
		121
		122	if self.username:
		123	core.svn_auth_set_parameter(self.auth_baton,
		124	core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
		125	self.username)
		126	if self.password:
		127	core.svn_auth_set_parameter(self.auth_baton,
		128	core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
		129	self.password)
		130
		131	callbacks = RaCallbacks()
		132	callbacks.auth_baton = self.auth_baton
		133
		134	try:
		135	return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
		136	except SubversionException as e:
		137	# e.child contains a detailed error messages
		138	msglist = []
		139	svn_exc = e
		140	while svn_exc:
		141	if svn_exc.args[0]:
		142	msglist.append(svn_exc.args[0])
		143	svn_exc = svn_exc.child
		144	msg = '\n'.join(msglist)
		145	raise SubversionConnectionException(msg)
		146
		147
		148	class svnremoterepo:
		149	""" the dumb wrapper for actual Subversion repositories """
		150
		151	def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
		152	self.username = username or b''
		153	self.password = password or b''
		154	self.path = normalize_url(svn_url)
		155
		156	def svn(self):
		157	try:
		158	return SubversionRepo(self.path, self.username, self.password)
		159	except SubversionConnectionException as e:
		160	raise error.Abort(safe_bytes(e))

vcsserver/remote/__init__.py

0 created 644 +17 0

			@@ -0,0 +1,17 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17

vcsserver/str_utils.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_utils.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/type_utils.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

.bumpversion.cfg

0 +1 -2

              [bumpversion]
-             current_version = 4.27.1
+             current_version = 5.0.0
              message = release: Bump version {current_version} to {new_version}
              [bumpversion:file:vcsserver/VERSION]

.hgignore

0 +4 0

              syntax: glob
              *.orig
              *.pyc
              *.swp
              *.sqlite
              *.tox
              *.egg-info
              *.egg
              *.eggs
              *.idea
              .DS_Store*
              syntax: regexp
              #.filename
              ^\.settings$
              ^\.project$
              ^\.pydevproject$
              ^\.coverage$
              ^\.cache.*$
+             ^\.venv.*$
+             ^\.ruff_cache.*$
              ^\.rhodecode$
              ^.dev
              ^build/
              ^coverage\.xml$
              ^data$
              ^dev.ini$
              ^acceptance_tests/dev.*\.ini$
              ^dist/
              ^fabfile.py
              ^htmlcov
              ^junit\.xml$
              ^node_modules/
              ^pylint.log$
              ^build$
              ^result$

MANIFEST.in

0 +7 0

              # top level files
              include *.rst
              include *.txt
              # package extras
              include vcsserver/VERSION
+             # all python files inside vcsserver
+             graft vcsserver
              # all config files
              recursive-include configs *
              # hook templates
              recursive-include vcsserver/hook_utils/hook_templates *
              # skip any tests files
              recursive-exclude vcsserver/tests *
+             recursive-exclude docs/_build *
+             recursive-exclude * __pycache__
+             recursive-exclude * *.py[co]
+             recursive-exclude * .*.sw[a-z]

Makefile

0 +108 -14

		@@ -1,45 +1,139 b''
1		.DEFAULT_GOAL := help
	1	# required for pushd to work..
	2	SHELL = /bin/bash
	3
2	4
3	5	# set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
4	6	OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
5	7
6	8	.PHONY: clean
7		clean: ## full clean
	9	## Cleanup compiled and cache py files
	10	clean:
8	11	make test-clean
9	12	find . -type f $ -iname '.c' -o -iname '.pyc' -o -iname '.so' -o -iname '.orig' $ -exec rm '{}' ';'
	13	find . -type d -name "build" -prune -exec rm -rf '{}' ';'
10	14
11	15
12	16	.PHONY: test
13		~~test~~: ## run test-clean and tests
	17	## run test-clean and tests
	18	test:
14	19	make test-clean
15	20	make test-only
16	21
17	22
18	23	.PHONY:test-clean
19		~~test-clean~~: ## run test-clean and tests
	24	## run test-clean and tests
	25	test-clean:
20	26	rm -rf coverage.xml htmlcov junit.xml pylint.log result
21	27	find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
22	28	find . -type f $ -iname '.coverage.*' $ -exec rm '{}' ';'
23	29
24	30
25	31	.PHONY: test-only
26		test-only: ## run tests
	32	## Run tests only without cleanup
	33	test-only:
27	34	PYTHONHASHSEED=random \
28	35	py.test -x -vv -r xw -p no:sugar \
29		~~--cov~~=~~vcsserver~~ --cov-report=term-missing --cov-report=html \
30		vcsserver
	36	--cov-report=term-missing --cov-report=html \
	37	--cov=vcsserver vcsserver
31	38
32	39
33		.PHONY: generate-pkgs
34		generate-pkgs: ## generate new python packages
35		nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses"
	40	.PHONY: ruff-check
	41	## run a ruff analysis
	42	ruff-check:
	43	ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
36	44
37	45
38	46	.PHONY: pip-packages
39		~~pip-packages~~: ~~## s~~how outdated packages
	47	## Show outdated packages
	48	pip-packages:
40	49	python ${OUTDATED_PACKAGES}
41	50
42	51
43		.PHONY: ~~help~~
44		help:
45		@grep -E '^[a-zA-Z_-]+:.?## .$$' $(MAKEFILE_LIST) \| sort \| awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-24s\033[0m %s\n", $$1, $$2}'
	52	.PHONY: build
	53	## Build sdist/egg
	54	build:
	55	python -m build
	56
	57
	58	.PHONY: dev-sh
	59	## make dev-sh
	60	dev-sh:
	61	sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" \| sudo tee -a "/etc/apt/sources.list.d/fury.list"
	62	sudo apt-get update
	63	sudo apt-get install -y zsh carapace-bin
	64	rm -rf /home/rhodecode/.oh-my-zsh
	65	curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh \| sh
	66	echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
	67	PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
	68
	69
	70	.PHONY: dev-env
	71	## make dev-env based on the requirements files and install develop of packages
	72	## Cleanup: pip freeze \| grep -v "^-e" \| grep -v "@" \| xargs pip uninstall -y
	73	dev-env:
	74	pip install build virtualenv
	75	pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
	76	pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
	77	pip install -e .
	78
	79
	80	.PHONY: sh
	81	## shortcut for make dev-sh dev-env
	82	sh:
	83	make dev-env
	84	make dev-sh
	85
	86
	87	.PHONY: dev-srv
	88	## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
	89	dev-srv:
	90	pserve --reload .dev/dev.ini
	91
	92
	93	.PHONY: dev-srv-g
	94	## run gunicorn multi process workers
	95	dev-srv-g:
	96	gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
	97
	98
	99	# Default command on calling make
	100	.DEFAULT_GOAL := show-help
	101
	102	.PHONY: show-help
	103	show-help:
	104	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
	105	@echo
	106	@sed -n -e "/^## / { \
	107	h; \
	108	s/.*//; \
	109	:doc" \
	110	-e "H; \
	111	n; \
	112	s/^## //; \
	113	t doc" \
	114	-e "s/:.*//; \
	115	G; \
	116	s/\\n## /---/; \
	117	s/\\n/ /g; \
	118	p; \
	119	}" ${MAKEFILE_LIST} \
	120	\| LC_ALL='C' sort --ignore-case \
	121	\| awk -F '---' \
	122	-v ncol=$$(tput cols) \
	123	-v indent=19 \
	124	-v col_on="$$(tput setaf 6)" \
	125	-v col_off="$$(tput sgr0)" \
	126	'{ \
	127	printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
	128	n = split($$2, words, " "); \
	129	line_length = ncol - indent; \
	130	for (i = 1; i <= n; i++) { \
	131	line_length -= length(words[i]) + 1; \
	132	if (line_length <= 0) { \
	133	line_length = ncol - indent - length(words[i]) - 1; \
	134	printf "\n%*s ", -indent, " "; \
	135	} \
	136	printf "%s ", words[i]; \
	137	} \
	138	printf "\n"; \
	139	}'

configs/development.ini

0 +42 -84

-             ## -*- coding: utf-8 -*-
+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              ; #################################
              [server:main]
              ; COMMON HOST/IP CONFIG
              host = 0.0.0.0
-             port = 9900
+             port = 10010
              ; ##################################################
              ; WAITRESS WSGI SERVER - Recommended for Development
              ; ##################################################
              ; use server type
              use = egg:waitress#main
              ; number of worker threads
              threads = 5
              ; MAX BODY SIZE 100GB
              max_request_body_size = 107374182400
              ; Use poll instead of select, fixes file descriptors limits problems.
              ; May not work on old windows systems.
              asyncore_use_poll = true
              ; ###########################
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
+             ; run with gunicorn --paste rhodecode.ini
              ; Module to use, this setting shouldn't be changed
              #use = egg:gunicorn#main
-             ; Sets the number of process workers. More workers means more concurrent connections
-             ; RhodeCode can handle at the same time. Each additional worker also it increases
-             ; memory usage as each has it's own set of caches.
-             ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-             ; than 8-10 unless for really big deployments .e.g 700-1000 users.
-             ; `instance_id = *` must be set in the [app:main] section below (which is the default)
-             ; when using more than 1 worker.
-             #workers = 2
-             ; Gunicorn access log level
-             #loglevel = info
-             ; Process name visible in process list
-             #proc_name = rhodecode_vcsserver
-             ; Type of worker class, one of `sync`, `gevent`
-             ; currently `sync` is the only option allowed.
-             #worker_class = sync
-             ; The maximum number of simultaneous clients. Valid only for gevent
-             #worker_connections = 10
-             ; Max number of requests that worker will handle before being gracefully restarted.
-             ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-             #max_requests = 1000
-             #max_requests_jitter = 30
-             ; Amount of time a worker can spend with handling a request before it
-             ; gets killed and restarted. By default set to 21600 (6hrs)
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             #timeout = 21600
-             ; The maximum size of HTTP request line in bytes.
-             ; 0 for unlimited
-             #limit_request_line = 0
-             ; Limit the number of HTTP headers fields in a request.
-             ; By default this value is 100 and can't be larger than 32768.
-             #limit_request_fields = 32768
-             ; Limit the allowed size of an HTTP request header field.
-             ; Value is a positive number or 0.
-             ; Setting it to 0 will allow unlimited header field sizes.
-             #limit_request_field_size = 0
-             ; Timeout for graceful workers restart.
-             ; After receiving a restart signal, workers have this much time to finish
-             ; serving requests. Workers still alive after the timeout (starting from the
-             ; receipt of the restart signal) are force killed.
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             #graceful_timeout = 3600
-             # The number of seconds to wait for requests on a Keep-Alive connection.
-             # Generally set in the 1-5 seconds range.
-             #keepalive = 2
-             ; Maximum memory usage that each worker can use before it will receive a
-             ; graceful restart signal 0 = memory monitoring is disabled
-             ; Examples: 268435456 (256MB), 536870912 (512MB)
-             ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-             #memory_max_usage = 0
-             ; How often in seconds to check for memory usage for each gunicorn worker
-             #memory_usage_check_interval = 60
-             ; Threshold value for which we don't recycle worker if GarbageCollection
-             ; frees up enough resources. Before each restart we try to run GC on worker
-             ; in case we get enough free memory after that, restart will not happen.
-             #memory_usage_recovery_threshold = 0.8
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
+             ; Each option in the app:main can be override by an environmental variable
+             ;
+             ;To override an option:
+             ;
+             ;RC_<KeyName>
+             ;Everything should be uppercase, . and - should be replaced by _.
+             ;For example, if you have these configuration settings:
+             ;rc_cache.repo_object.backend = foo
+             ;can be overridden by
+             ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; #############
              ; DEBUG OPTIONS
              ; #############
              # During development the we want to have the debug toolbar enabled
              pyramid.includes =
                  pyramid_debugtoolbar
              debugtoolbar.hosts = 0.0.0.0/0
              debugtoolbar.exclude_prefixes =
                  /css
                  /fonts
                  /images
                  /js
              ; #################
              ; END DEBUG OPTIONS
              ; #################
              ; Pyramid default locales, we need this to be set
-             pyramid.default_locale_name = en
+             #pyramid.default_locale_name = en
              ; default locale used by VCS systems
-             locale = en_US.UTF-8
+             #locale = en_US.UTF-8
              ; path to binaries for vcsserver, it should be set by the installer
-             ; at installation time, e.g /home/user/vcsserver-1/profile/bin
+             ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
              ; it can also be a path to nix-build output in case of development
              core.binary_dir = ""
              ; Custom exception store path, defaults to TMPDIR
              ; This is used to store exception from RhodeCode in shared directory
              #exception_tracker.store_path =
              ; #############
              ; DOGPILE CACHE
              ; #############
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
-             cache_dir = %(here)s/data
+             #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
-             rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
+             #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
-             rc_cache.repo_object.expiration_time = 2592000
+             #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
-             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
+             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; recommended for larger instance, and for better performance
              ; ***********************************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; redis_expiration_time needs to be greater then expiration_time
              #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
              #rc_cache.repo_object.arguments.host = localhost
              #rc_cache.repo_object.arguments.port = 6379
              #rc_cache.repo_object.arguments.db = 5
              #rc_cache.repo_object.arguments.socket_timeout = 30
              ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
              #rc_cache.repo_object.arguments.distributed_lock = true
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
-             ; Statsd client config
+             ; Statsd client config, this is used to send metrics to statsd
+             ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
+             ; configure logging automatically at server startup set to false
+             ; to use the below custom logging config.
+             ; RC_LOGGING_FORMATTER
+             ; RC_LOGGING_LEVEL
+             ; env variables can control the settings for logging in case of autoconfigure
+             #logging.autoconfigure = true
+             ; specify your own custom logging config file to configure logging
+             #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              [handlers]
              keys = console
              [formatters]
-             keys = generic
+             keys = generic, json
              ; #######
              ; LOGGERS
              ; #######
              [logger_root]
              level = NOTSET
              handlers = console
              [logger_vcsserver]
              level = DEBUG
              handlers =
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              [handler_console]
              class = StreamHandler
              args = (sys.stderr, )
              level = DEBUG
+             ; To enable JSON formatted logs replace 'generic' with 'json'
+             ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              ; FORMATTERS
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
+             [formatter_json]
+             format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
+             class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

configs/gunicorn_config.py

0 +300 -45

              """
              Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
              Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
              """
              import gc
              import os
              import sys
              import math
              import time
              import threading
              import traceback
              import random
+             import socket
+             import dataclasses
              from gunicorn.glogging import Logger
              def get_workers():
                  import multiprocessing
                  return multiprocessing.cpu_count() * 2 + 1
-             # GLOBAL
+             bind = "127.0.0.1:10010"
+             # Error logging output for gunicorn (-) is stdout
              errorlog = '-'
+             # Access logging output for gunicorn (-) is stdout
              accesslog = '-'
              # SERVER MECHANICS
              # None == system temp dir
              # worker_tmp_dir is recommended to be set to some tmpfs
              worker_tmp_dir = None
              tmp_upload_dir = None
+             # use re-use port logic
+             #reuse_port = True
              # Custom log format
+             #access_log_format = (
+             #    '%(t)s %(p)s INFO  [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
+             # loki format for easier parsing in grafana
              access_log_format = (
-                 '%(t)s %(p)s INFO  [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
+                 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
+             # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
+             # workers = get_workers()
+             # Gunicorn access log level
+             loglevel = 'info'
+             # Process name visible in a process list
+             proc_name = "rhodecode_vcsserver"
+             # Type of worker class, one of `sync`, `gevent` or `gthread`
+             # currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed
+             # gevent:
+             # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
+             # e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
+             # gthread:
+             # In this case, the maximum number of concurrent requests is (N workers * X threads)
+             # e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled
+             worker_class = 'sync'
+             # Sets the number of process workers. More workers means more concurrent connections
+             # RhodeCode can handle at the same time. Each additional worker also it increases
+             # memory usage as each has its own set of caches.
+             # The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
+             # than 8-10 unless for huge deployments .e.g 700-1000 users.
+             # `instance_id = *` must be set in the [app:main] section below (which is the default)
+             # when using more than 1 worker.
+             workers = 2
+             # Threads numbers for worker class gthread
+             threads = 1
+             # The maximum number of simultaneous clients. Valid only for gevent
+             # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
+             # e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
+             worker_connections = 10
+             # Max number of requests that worker will handle before being gracefully restarted.
+             # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
+             max_requests = 2000
+             max_requests_jitter = int(max_requests * 0.2)  # 20% of max_requests
+             # The maximum number of pending connections.
+             # Exceeding this number results in the client getting an error when attempting to connect.
+             backlog = 64
-             # self adjust workers based on CPU count
-             # workers = get_workers()
+             # The Amount of time a worker can spend with handling a request before it
+             # gets killed and restarted. By default, set to 21600 (6hrs)
+             # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+             timeout = 21600
+             # The maximum size of HTTP request line in bytes.
+             # 0 for unlimited
+             limit_request_line = 0
+             # Limit the number of HTTP headers fields in a request.
+             # By default this value is 100 and can't be larger than 32768.
+             limit_request_fields = 32768
+             # Limit the allowed size of an HTTP request header field.
+             # Value is a positive number or 0.
+             # Setting it to 0 will allow unlimited header field sizes.
+             limit_request_field_size = 0
+             # Timeout for graceful workers restart.
+             # After receiving a restart signal, workers have this much time to finish
+             # serving requests. Workers still alive after the timeout (starting from the
+             # receipt of the restart signal) are force killed.
+             # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+             graceful_timeout = 21600
+             # The number of seconds to wait for requests on a Keep-Alive connection.
+             # Generally set in the 1-5 seconds range.
+             keepalive = 2
+             # Maximum memory usage that each worker can use before it will receive a
+             # graceful restart signal 0 = memory monitoring is disabled
+             # Examples: 268435456 (256MB), 536870912 (512MB)
+             # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
+             # Dynamic formula 1024 * 1024 * 256 == 256MBs
+             memory_max_usage = 0
+             # How often in seconds to check for memory usage for each gunicorn worker
+             memory_usage_check_interval = 60
+             # Threshold value for which we don't recycle worker if GarbageCollection
+             # frees up enough resources. Before each restart, we try to run GC on worker
+             # in case we get enough free memory after that; restart will not happen.
+             memory_usage_recovery_threshold = 0.8
+             @dataclasses.dataclass
+             class MemoryCheckConfig:
+                 max_usage: int
+                 check_interval: int
+                 recovery_threshold: float
              def _get_process_rss(pid=None):
                  try:
                      import psutil
                      if pid:
                          proc = psutil.Process(pid)
                      else:
                          proc = psutil.Process()
                      return proc.memory_info().rss
                  except Exception:
                      return None
              def _get_config(ini_path):
+                 import configparser
                  try:
-                     import configparser
-                 except ImportError:
-                     import ConfigParser as configparser
-                 try:
                      config = configparser.RawConfigParser()
                      config.read(ini_path)
                      return config
                  except Exception:
                      return None
-             def _time_with_offset(memory_usage_check_interval):
-                 return time.time() - random.randint(0, memory_usage_check_interval/2.0)
-             def pre_fork(server, worker):
-                 pass
+             def get_memory_usage_params(config=None):
+                 # memory spec defaults
+                 _memory_max_usage = memory_max_usage
+                 _memory_usage_check_interval = memory_usage_check_interval
+                 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
-             def post_fork(server, worker):
-                 # memory spec defaults
-                 _memory_max_usage = 0
-                 _memory_usage_check_interval = 60
-                 _memory_usage_recovery_threshold = 0.8
-                 ini_path = os.path.abspath(server.cfg.paste)
+                 if config:
+                     ini_path = os.path.abspath(config)
                  conf = _get_config(ini_path)
                  section = 'server:main'
                  if conf and conf.has_section(section):
                      if conf.has_option(section, 'memory_max_usage'):
                          _memory_max_usage = conf.getint(section, 'memory_max_usage')
                      if conf.has_option(section, 'memory_usage_check_interval'):
                          _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
                      if conf.has_option(section, 'memory_usage_recovery_threshold'):
                          _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
-                 worker._memory_max_usage = _memory_max_usage
-                 worker._memory_usage_check_interval = _memory_usage_check_interval
-                 worker._memory_usage_recovery_threshold = _memory_usage_recovery_threshold
+                 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
+                                         or _memory_max_usage)
+                 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
+                                                    or _memory_usage_check_interval)
+                 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
+                                                          or _memory_usage_recovery_threshold)
+                 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
+             def _time_with_offset(check_interval):
+                 return time.time() - random.randint(0, check_interval/2.0)
+             def pre_fork(server, worker):
+                 pass
+             def post_fork(server, worker):
+                 memory_conf = get_memory_usage_params()
+                 _memory_max_usage = memory_conf.max_usage
+                 _memory_usage_check_interval = memory_conf.check_interval
+                 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
+                 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
+                                                or _memory_max_usage)
+                 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
+                                                           or _memory_usage_check_interval)
+                 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
+                                                                 or _memory_usage_recovery_threshold)
                  # register memory last check time, with some random offset so we don't recycle all
                  # at once
                  worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
                  if _memory_max_usage:
-                     server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid,
+                     server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
                                      _format_data_size(_memory_max_usage))
                  else:
-                     server.log.info("[%-10s] WORKER spawned", worker.pid)
+                     server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
              def pre_exec(server):
                  server.log.info("Forked child, re-executing.")
              def on_starting(server):
                  server_lbl = '{} {}'.format(server.proc_name, server.address)
                  server.log.info("Server %s is starting.", server_lbl)
+                 server.log.info('Config:')
+                 server.log.info(f"\n{server.cfg}")
+                 server.log.info(get_memory_usage_params())
              def when_ready(server):
                  server.log.info("Server %s is ready. Spawning workers", server)
              def on_reload(server):
                  pass
              def _format_data_size(size, unit="B", precision=1, binary=True):
                  """Format a number using SI units (kilo, mega, etc.).
                  ``size``: The number as a float or int.
                  ``unit``: The unit name in plural form. Examples: "bytes", "B".
                  ``precision``: How many digits to the right of the decimal point. Default
                  is 1.  0 suppresses the decimal point.
                  ``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
                  If true, use base-2 binary prefixes (kibi = Ki = 1024).
                  ``full_name``: If false (default), use the prefix abbreviation ("k" or
                  "Ki").  If true, use the full prefix ("kilo" or "kibi"). If false,
                  use abbreviation ("k" or "Ki").
                  """
                  if not binary:
                      base = 1000
                      multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
                  else:
                      base = 1024
                      multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
                  sign = ""
                  if size > 0:
                      m = int(math.log(size, base))
                  elif size < 0:
                      sign = "-"
                      size = -size
                      m = int(math.log(size, base))
                  else:
                      m = 0
                  if m > 8:
                      m = 8
                  if m == 0:
                      precision = '%.0f'
                  else:
                      precision = '%%.%df' % precision
                  size = precision % (size / math.pow(base, m))
                  return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
              def _check_memory_usage(worker):
-                 memory_max_usage = worker._memory_max_usage
-                 if not memory_max_usage:
+                 _memory_max_usage = worker._memory_max_usage
+                 if not _memory_max_usage:
                      return
-                 memory_usage_check_interval = worker._memory_usage_check_interval
-                 memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
+                 _memory_usage_check_interval = worker._memory_usage_check_interval
+                 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
                  elapsed = time.time() - worker._last_memory_check_time
-                 if elapsed > memory_usage_check_interval:
+                 if elapsed > _memory_usage_check_interval:
                      mem_usage = _get_process_rss()
-                     if mem_usage and mem_usage > memory_max_usage:
+                     if mem_usage and mem_usage > _memory_max_usage:
                          worker.log.info(
                              "memory usage %s > %s, forcing gc",
-                             _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                             _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                          # Try to clean it up by forcing a full collection.
                          gc.collect()
                          mem_usage = _get_process_rss()
-                         if mem_usage > memory_usage_recovery_threshold:
+                         if mem_usage > _memory_usage_recovery_threshold:
                              # Didn't clean up enough, we'll have to terminate.
                              worker.log.warning(
                                  "memory usage %s > %s after gc, quitting",
-                                 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                                 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                              # This will cause worker to auto-restart itself
                              worker.alive = False
                      worker._last_memory_check_time = time.time()
              def worker_int(worker):
-                 worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid)
+                 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
-                 # get traceback info, on worker crash
+                 # get traceback info, when a worker crashes
+                 def get_thread_id(t_id):
                  id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
+                     return id2name.get(t_id, "unknown_thread_id")
                  code = []
-                 for thread_id, stack in sys._current_frames().items():
+                 for thread_id, stack in sys._current_frames().items():  # noqa
                      code.append(
-                         "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id))
+                         "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
                      for fname, lineno, name, line in traceback.extract_stack(stack):
                          code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
                          if line:
                              code.append("  %s" % (line.strip()))
                  worker.log.debug("\n".join(code))
              def worker_abort(worker):
-                 worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid)
+                 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
              def worker_exit(server, worker):
-                 worker.log.info("[%-10s] worker exit", worker.pid)
+                 worker.log.info("pid=[%-10s] worker exit", worker.pid)
              def child_exit(server, worker):
-                 worker.log.info("[%-10s] worker child exit", worker.pid)
+                 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
              def pre_request(worker, req):
                  worker.start_time = time.time()
                  worker.log.debug(
                      "GNCRN PRE  WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
              def post_request(worker, req, environ, resp):
                  total_time = time.time() - worker.start_time
                  # Gunicorn sometimes has problems with reading the status_code
                  status_code = getattr(resp, 'status_code', '')
                  worker.log.debug(
                      "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
                      worker.nr, req.method, req.path, status_code, total_time)
                  _check_memory_usage(worker)
+             def _filter_proxy(ip):
+                 """
+                 Passed in IP addresses in HEADERS can be in a special format of multiple
+                 ips. Those comma separated IPs are passed from various proxies in the
+                 chain of request processing. The left-most being the original client.
+                 We only care about the first IP which came from the org. client.
+                 :param ip: ip string from headers
+                 """
+                 if ',' in ip:
+                     _ips = ip.split(',')
+                     _first_ip = _ips[0].strip()
+                     return _first_ip
+                 return ip
+             def _filter_port(ip):
+                 """
+                 Removes a port from ip, there are 4 main cases to handle here.
+                 - ipv4 eg. 127.0.0.1
+                 - ipv6 eg. ::1
+                 - ipv4+port eg. 127.0.0.1:8080
+                 - ipv6+port eg. [::1]:8080
+                 :param ip:
+                 """
+                 def is_ipv6(ip_addr):
+                     if hasattr(socket, 'inet_pton'):
+                         try:
+                             socket.inet_pton(socket.AF_INET6, ip_addr)
+                         except socket.error:
+                             return False
+                     else:
+                         return False
+                     return True
+                 if ':' not in ip:  # must be ipv4 pure ip
+                     return ip
+                 if '[' in ip and ']' in ip:  # ipv6 with port
+                     return ip.split(']')[0][1:].lower()
+                 # must be ipv6 or ipv4 with port
+                 if is_ipv6(ip):
+                     return ip
+                 else:
+                     ip, _port = ip.split(':')[:2]  # means ipv4+port
+                     return ip
+             def get_ip_addr(environ):
+                 proxy_key = 'HTTP_X_REAL_IP'
+                 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
+                 def_key = 'REMOTE_ADDR'
+                 def _filters(x):
+                     return _filter_port(_filter_proxy(x))
+                 ip = environ.get(proxy_key)
+                 if ip:
+                     return _filters(ip)
+                 ip = environ.get(proxy_key2)
+                 if ip:
+                     return _filters(ip)
+                 ip = environ.get(def_key, '0.0.0.0')
+                 return _filters(ip)
              class RhodeCodeLogger(Logger):
                  """
                  Custom Logger that allows some customization that gunicorn doesn't allow
                  """
                  datefmt = r"%Y-%m-%d %H:%M:%S"
                  def __init__(self, cfg):
                      Logger.__init__(self, cfg)
                  def now(self):
                      """ return date in RhodeCode Log format """
                      now = time.time()
-                     msecs = int((now - long(now)) * 1000)
+                     msecs = int((now - int(now)) * 1000)
                      return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
+                 def atoms(self, resp, req, environ, request_time):
+                     """ Gets atoms for log formatting.
+                     """
+                     status = resp.status
+                     if isinstance(status, str):
+                         status = status.split(None, 1)[0]
+                     atoms = {
+                         'h': get_ip_addr(environ),
+                         'l': '-',
+                         'u': self._get_user(environ) or '-',
+                         't': self.now(),
+                         'r': "%s %s %s" % (environ['REQUEST_METHOD'],
+                                            environ['RAW_URI'],
+                                            environ["SERVER_PROTOCOL"]),
+                         's': status,
+                         'm': environ.get('REQUEST_METHOD'),
+                         'U': environ.get('PATH_INFO'),
+                         'q': environ.get('QUERY_STRING'),
+                         'H': environ.get('SERVER_PROTOCOL'),
+                         'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
+                         'B': getattr(resp, 'sent', None),
+                         'f': environ.get('HTTP_REFERER', '-'),
+                         'a': environ.get('HTTP_USER_AGENT', '-'),
+                         'T': request_time.seconds,
+                         'D': (request_time.seconds * 1000000) + request_time.microseconds,
+                         'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
+                         'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
+                         'p': "<%s>" % os.getpid()
+                     }
+                     # add request headers
+                     if hasattr(req, 'headers'):
+                         req_headers = req.headers
+                     else:
+                         req_headers = req
+                     if hasattr(req_headers, "items"):
+                         req_headers = req_headers.items()
+                     atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
+                     resp_headers = resp.headers
+                     if hasattr(resp_headers, "items"):
+                         resp_headers = resp_headers.items()
+                     # add response headers
+                     atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
+                     # add environ variables
+                     environ_variables = environ.items()
+                     atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
+                     return atoms
              logger_class = RhodeCodeLogger

configs/production.ini

0 +43 -85

-             ## -*- coding: utf-8 -*-
+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              ; #################################
              [server:main]
              ; COMMON HOST/IP CONFIG
              host = 127.0.0.1
-             port = 9900
+             port = 10010
              ; ###########################
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
+             ; run with gunicorn --paste rhodecode.ini
              ; Module to use, this setting shouldn't be changed
              use = egg:gunicorn#main
-             ; Sets the number of process workers. More workers means more concurrent connections
-             ; RhodeCode can handle at the same time. Each additional worker also it increases
-             ; memory usage as each has it's own set of caches.
-             ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-             ; than 8-10 unless for really big deployments .e.g 700-1000 users.
-             ; `instance_id = *` must be set in the [app:main] section below (which is the default)
-             ; when using more than 1 worker.
-             workers = 2
-             ; Gunicorn access log level
-             loglevel = info
-             ; Process name visible in process list
-             proc_name = rhodecode_vcsserver
-             ; Type of worker class, one of `sync`, `gevent`
-             ; currently `sync` is the only option allowed.
-             worker_class = sync
-             ; The maximum number of simultaneous clients. Valid only for gevent
-             worker_connections = 10
-             ; Max number of requests that worker will handle before being gracefully restarted.
-             ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-             max_requests = 1000
-             max_requests_jitter = 30
-             ; Amount of time a worker can spend with handling a request before it
-             ; gets killed and restarted. By default set to 21600 (6hrs)
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             timeout = 21600
-             ; The maximum size of HTTP request line in bytes.
-             ; 0 for unlimited
-             limit_request_line = 0
-             ; Limit the number of HTTP headers fields in a request.
-             ; By default this value is 100 and can't be larger than 32768.
-             limit_request_fields = 32768
-             ; Limit the allowed size of an HTTP request header field.
-             ; Value is a positive number or 0.
-             ; Setting it to 0 will allow unlimited header field sizes.
-             limit_request_field_size = 0
-             ; Timeout for graceful workers restart.
-             ; After receiving a restart signal, workers have this much time to finish
-             ; serving requests. Workers still alive after the timeout (starting from the
-             ; receipt of the restart signal) are force killed.
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             graceful_timeout = 3600
-             # The number of seconds to wait for requests on a Keep-Alive connection.
-             # Generally set in the 1-5 seconds range.
-             keepalive = 2
-             ; Maximum memory usage that each worker can use before it will receive a
-             ; graceful restart signal 0 = memory monitoring is disabled
-             ; Examples: 268435456 (256MB), 536870912 (512MB)
-             ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-             memory_max_usage = 0
-             ; How often in seconds to check for memory usage for each gunicorn worker
-             memory_usage_check_interval = 60
-             ; Threshold value for which we don't recycle worker if GarbageCollection
-             ; frees up enough resources. Before each restart we try to run GC on worker
-             ; in case we get enough free memory after that, restart will not happen.
-             memory_usage_recovery_threshold = 0.8
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
+             ; Each option in the app:main can be override by an environmental variable
+             ;
+             ;To override an option:
+             ;
+             ;RC_<KeyName>
+             ;Everything should be uppercase, . and - should be replaced by _.
+             ;For example, if you have these configuration settings:
+             ;rc_cache.repo_object.backend = foo
+             ;can be overridden by
+             ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; Pyramid default locales, we need this to be set
-             pyramid.default_locale_name = en
+             #pyramid.default_locale_name = en
              ; default locale used by VCS systems
-             locale = en_US.UTF-8
+             #locale = en_US.UTF-8
              ; path to binaries for vcsserver, it should be set by the installer
-             ; at installation time, e.g /home/user/vcsserver-1/profile/bin
+             ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
              ; it can also be a path to nix-build output in case of development
              core.binary_dir = ""
              ; Custom exception store path, defaults to TMPDIR
              ; This is used to store exception from RhodeCode in shared directory
              #exception_tracker.store_path =
              ; #############
              ; DOGPILE CACHE
              ; #############
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
-             cache_dir = %(here)s/data
+             #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
-             rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
+             #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
-             rc_cache.repo_object.expiration_time = 2592000
+             #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
-             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
+             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; recommended for larger instance, and for better performance
              ; ***********************************************************
              ; `repo_object` cache settings for vcs methods for repositories
              #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
              #rc_cache.repo_object.expiration_time = 2592000
              ; redis_expiration_time needs to be greater then expiration_time
              #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
              #rc_cache.repo_object.arguments.host = localhost
              #rc_cache.repo_object.arguments.port = 6379
              #rc_cache.repo_object.arguments.db = 5
              #rc_cache.repo_object.arguments.socket_timeout = 30
              ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
              #rc_cache.repo_object.arguments.distributed_lock = true
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
-             ; Statsd client config
+             ; Statsd client config, this is used to send metrics to statsd
+             ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
+             ; configure logging automatically at server startup set to false
+             ; to use the below custom logging config.
+             ; RC_LOGGING_FORMATTER
+             ; RC_LOGGING_LEVEL
+             ; env variables can control the settings for logging in case of autoconfigure
+             #logging.autoconfigure = true
+             ; specify your own custom logging config file to configure logging
+             #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              [handlers]
              keys = console
              [formatters]
-             keys = generic
+             keys = generic, json
              ; #######
              ; LOGGERS
              ; #######
              [logger_root]
              level = NOTSET
              handlers = console
              [logger_vcsserver]
-             level = DEBUG
+             level = INFO
              handlers =
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              [handler_console]
              class = StreamHandler
              args = (sys.stderr, )
              level = INFO
+             ; To enable JSON formatted logs replace 'generic' with 'json'
+             ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              ; FORMATTERS
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
+             [formatter_json]
+             format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
+             class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

conftest.py ~~vcsserver/tests/conftest.py~~

0 renamed +4 -5

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import socket
              import pytest
              def pytest_addoption(parser):
                  parser.addoption(
-                     '--repeat', type=int, default=100,
+                     '--perf-repeat-vcs', type=int, default=100,
                      help="Number of repetitions in performance tests.")
              @pytest.fixture(scope='session')
              def repeat(request):
                  """
                  The number of repetitions is based on this fixture.
                  Slower calls may divide it by 10 or 100. It is chosen in a way so that the
                  tests are not too slow in our default test suite.
                  """
-                 return request.config.getoption('--repeat')
+                 return request.config.getoption('--perf-repeat-vcs')
              @pytest.fixture(scope='session')
              def vcsserver_port(request):
                  port = get_available_port()
-                 print('Using vcsserver port %s' % (port, ))
+                 print(f'Using vcsserver port {port}')
                  return port
              def get_available_port():
                  family = socket.AF_INET
                  socktype = socket.SOCK_STREAM
                  host = '127.0.0.1'
                  mysocket = socket.socket(family, socktype)
                  mysocket.bind((host, 0))
                  port = mysocket.getsockname()[1]
                  mysocket.close()
                  del mysocket
                  return port

requirements.txt

0 +69 -40

		@@ -1,48 +1,77 b''
1		## dependencies
2
3		# our custom configobj
4		https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626#egg=configobj==5.0.6
5
6		dogpile.cache==0.9.0
7		dogpile.core==0.4.1
8		decorator==4.1.2
9		dulwich==0.13.0
10		hgsubversion==1.9.3
11		hg-evolve==9.1.0
12		mako==1.1.0
13		markupsafe==1.1.1
14		mercurial==5.1.1
15		msgpack-python==0.5.6
16
17		pastedeploy==2.1.0
18		pyramid==1.10.4
19		pyramid-mako==1.1.0
20		pygit2==0.28.2
	1	# deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f \| tr '[:upper:]' '[:lower:]'
21	2
	3	async-timeout==4.0.3
	4	atomicwrites==1.4.1
	5	celery==5.3.6
	6	billiard==4.2.0
	7	click==8.1.3
	8	click-didyoumean==0.3.0
	9	click==8.1.3
	10	click-plugins==1.1.1
	11	click==8.1.3
	12	click-repl==0.2.0
	13	click==8.1.3
	14	prompt-toolkit==3.0.38
	15	wcwidth==0.2.6
	16	six==1.16.0
	17	kombu==5.3.5
	18	amqp==5.2.0
	19	vine==5.1.0
	20	vine==5.1.0
	21	python-dateutil==2.8.2
	22	six==1.16.0
	23	tzdata==2023.4
	24	vine==5.1.0
	25	contextlib2==21.6.0
	26	cov-core==1.15.0
	27	coverage==7.2.3
	28	diskcache==5.6.3
	29	dogpile.cache==1.3.0
	30	decorator==5.1.1
	31	stevedore==5.1.0
	32	pbr==5.11.1
	33	dulwich==0.21.6
	34	urllib3==1.26.14
	35	gunicorn==21.2.0
	36	packaging==23.1
	37	hg-evolve==11.0.2
	38	importlib-metadata==6.0.0
	39	zipp==3.15.0
	40	mercurial==6.3.3
	41	mock==5.0.2
	42	more-itertools==9.1.0
	43	msgpack==1.0.7
	44	orjson==3.9.13
	45	psutil==5.9.8
	46	py==1.11.0
	47	pygit2==1.13.3
	48	cffi==1.16.0
	49	pycparser==2.21
	50	pygments==2.15.1
	51	pyparsing==3.1.1
	52	pyramid==2.0.2
	53	hupper==1.12
	54	plaster==1.1.2
	55	plaster-pastedeploy==1.0.1
	56	pastedeploy==3.1.0
	57	plaster==1.1.2
	58	translationstring==1.4
	59	venusian==3.0.0
	60	webob==1.8.7
	61	zope.deprecation==5.0.0
	62	zope.interface==6.1.0
	63	redis==5.0.1
	64	async-timeout==4.0.3
22	65	repoze.lru==0.7
23		redis==3.5.3
24		simplejson==3.16.0
25		subprocess32==3.5.4
26		subvertpy==0.10.1
	66	scandir==1.10.0
	67	setproctitle==1.3.3
	68	subvertpy==0.11.0
	69	waitress==3.0.0
	70	wcwidth==0.2.6
27	71
28		six==1.11.0
29		translationstring==1.3
30		webob==1.8.5
31		zope.deprecation==4.4.0
32		zope.interface==4.6.0
33
34		## http servers
35		gevent==1.5.0
36		greenlet==0.4.15
37		gunicorn==19.9.0
38		waitress==1.3.1
39
40		## debug
41		ipdb==0.13.2
42		ipython==5.1.0
43	72
44	73	## test related requirements
45		-r requirements_test.txt
	74	#-r requirements_test.txt
46	75
47	76	## uncomment to add the debug libraries
48	77	#-r requirements_debug.txt

requirements_debug.txt

0 +23 -3

		@@ -1,8 +1,28 b''
1	1	## special libraries we could extend the requirements.txt file with to add some
2		## custom libraries useful for debug and memory tracing
3
4		## uncomment inclusion of this file in requirements.txt run make generate-pkgs and nix-shell
	2	## custom libraries usefull for debug and memory tracing
5	3
6	4	objgraph
7	5	memory-profiler
8	6	pympler
	7
	8	## debug
	9	ipdb
	10	ipython
	11	rich
	12
	13	# format
	14	flake8
	15	ruff
	16
	17	pipdeptree==2.7.1
	18	invoke==2.0.0
	19	bumpversion==0.6.0
	20	bump2version==1.0.1
	21
	22	docutils-stubs
	23	types-redis
	24	types-requests==2.31.0.6
	25	types-sqlalchemy
	26	types-psutil
	27	types-pycurl
	28	types-ujson

requirements_test.txt

0 +42 -13

		@@ -1,16 +1,45 b''
1	1	# test related requirements
2		pytest==4.6.5
3		py==1.8.0
4		pytest-cov==2.7.1
5		pytest-sugar==0.9.2
6		pytest-runner==5.1.0
	2
	3	cov-core==1.15.0
	4	coverage==7.2.3
	5	mock==5.0.2
	6	py==1.11.0
	7	pytest-cov==4.0.0
	8	coverage==7.2.3
	9	pytest==7.3.1
	10	attrs==22.2.0
	11	iniconfig==2.0.0
	12	packaging==23.1
	13	pluggy==1.0.0
7	14	pytest-profiling==1.7.0
8		pytest-timeout==1.3.3
9		gprof2dot==2017.9.19
	15	gprof2dot==2022.7.29
	16	pytest==7.3.1
	17	attrs==22.2.0
	18	iniconfig==2.0.0
	19	packaging==23.1
	20	pluggy==1.0.0
	21	six==1.16.0
	22	pytest-runner==6.0.0
	23	pytest-sugar==0.9.7
	24	packaging==23.1
	25	pytest==7.3.1
	26	attrs==22.2.0
	27	iniconfig==2.0.0
	28	packaging==23.1
	29	pluggy==1.0.0
	30	termcolor==2.3.0
	31	pytest-timeout==2.1.0
	32	pytest==7.3.1
	33	attrs==22.2.0
	34	iniconfig==2.0.0
	35	packaging==23.1
	36	pluggy==1.0.0
	37	webtest==3.0.0
	38	beautifulsoup4==4.11.2
	39	soupsieve==2.4
	40	waitress==3.0.0
	41	webob==1.8.7
10	42
11		mock==3.0.5
12		cov-core==1.15.0
13		coverage==4.5.4
14
15		webtest==2.0.34
16		beautifulsoup4==4.6.3
	43	# RhodeCode test-data
	44	rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
	45	rc_testdata==0.11.0

vcsserver/VERSION

0 +1 -1

		@@ -1,1 +1,1 b''
1		4.27.1 No newline at end of file
	1	5.0.0 No newline at end of file

vcsserver/__init__.py

0 +16 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import pkgutil
+             import os
+             __version__ = ''
-             __version__ = pkgutil.get_data('vcsserver', 'VERSION').strip()
+             def get_version():
+                 global __version__
+                 if __version__:
+                     return __version__
+                 here = os.path.abspath(os.path.dirname(__file__))
+                 ver_file = os.path.join(here, "VERSION")
+                 with open(ver_file, "rt") as f:
+                     version = f.read().strip()
+                 __version__ = version
+                 return version
              # link to config for pyramid
              CONFIG = {}
              # Populated with the settings dictionary from application init in
              #
              PYRAMID_SETTINGS = {}

vcsserver/base.py

0 +92 -29

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import sys
-             import traceback
+             import tempfile
              import logging
-             import urlparse
+             import urllib.parse
+             from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
              from vcsserver import exceptions
              from vcsserver.exceptions import NoContentException
-             from vcsserver.hgcompat import (archival)
+             from vcsserver.hgcompat import archival
+             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.exc_tracking import format_exc
              log = logging.getLogger(__name__)
-             class RepoFactory(object):
+             class RepoFactory:
                  """
                  Utility to create instances of repository
                  It provides internal caching of the `repo` object based on
                  the :term:`call context`.
                  """
                  repo_type = None
                  def __init__(self):
                      pass
                  def _create_config(self, path, config):
                      config = {}
                      return config
                  def _create_repo(self, wire, create):
                      raise NotImplementedError()
                  def repo(self, wire, create=False):
                      raise NotImplementedError()
              def obfuscate_qs(query_string):
                  if query_string is None:
                      return None
                  parsed = []
-                 for k, v in urlparse.parse_qsl(query_string, keep_blank_values=True):
+                 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
                      if k in ['auth_token', 'api_key']:
                          v = "*****"
                      parsed.append((k, v))
                  return '&'.join('{}{}'.format(
-                     k, '={}'.format(v) if v else '') for k, v in parsed)
+                     k, f'={v}' if v else '') for k, v in parsed)
-             def raise_from_original(new_type):
+             def raise_from_original(new_type, org_exc: Exception):
                  """
                  Raise a new exception type with original args and traceback.
                  """
-                 exc_type, exc_value, exc_traceback = sys.exc_info()
+                 exc_info = sys.exc_info()
+                 exc_type, exc_value, exc_traceback = exc_info
                  new_exc = new_type(*exc_value.args)
                  # store the original traceback into the new exc
-                 new_exc._org_exc_tb = traceback.format_exc(exc_traceback)
+                 new_exc._org_exc_tb = format_exc(exc_info)
                  try:
-                     raise new_exc, None, exc_traceback
+                     raise new_exc.with_traceback(exc_traceback)
                  finally:
                      del exc_traceback
-             class ArchiveNode(object):
+             class ArchiveNode:
                  def __init__(self, path, mode, is_link, raw_bytes):
                      self.path = path
                      self.mode = mode
                      self.is_link = is_link
                      self.raw_bytes = raw_bytes
-             def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
-                              archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
+             def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
+                                        commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
                  """
-                 walker should be a file walker, for example:
-                     def walker():
+                 Function that would store generate archive and send it to a dedicated backend store
+                 In here we use diskcache
+                 :param node_walker: a generator returning nodes to add to archive
+                 :param archive_key: key used to store the path
+                 :param kind: archive kind
+                 :param mtime: time of creation
+                 :param archive_at_path: default '/' the path at archive was started.
+                     If this is not '/' it means it's a partial archive
+                 :param archive_dir_name: inside dir name when creating an archive
+                 :param commit_id: commit sha of revision archive was created at
+                 :param write_metadata:
+                 :param extra_metadata:
+                 :param cache_config:
+                 walker should be a file walker, for example,
+                     def node_walker():
                          for file_info in files:
                              yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
                  """
                  extra_metadata = extra_metadata or {}
+                 d_cache = get_archival_cache_store(config=cache_config)
+                 if archive_key in d_cache:
+                     with d_cache as d_cache_reader:
+                         reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
+                         return reader.name
+                 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
+                 log.debug('Creating new temp archive in %s', archive_tmp_path)
                  if kind == "tgz":
-                     archiver = archival.tarit(archive_dest_path, mtime, "gz")
+                     archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
                  elif kind == "tbz2":
-                     archiver = archival.tarit(archive_dest_path, mtime, "bz2")
+                     archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
                  elif kind == 'zip':
-                     archiver = archival.zipit(archive_dest_path, mtime)
+                     archiver = archival.zipit(archive_tmp_path, mtime)
                  else:
                      raise exceptions.ArchiveException()(
-                         'Remote does not support: "%s" archive type.' % kind)
+                         f'Remote does not support: "{kind}" archive type.')
-                 for f in walker(commit_id, archive_at_path):
-                     f_path = os.path.join(archive_dir_name, f.path.lstrip('/'))
+                 for f in node_walker(commit_id, archive_at_path):
+                     f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
                      try:
                          archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
                      except NoContentException:
                          # NOTE(marcink): this is a special case for SVN so we can create "empty"
-                         # directories which arent supported by archiver
-                         archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '')
+                         # directories which are not supported by archiver
+                         archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
                  if write_metadata:
                      metadata = dict([
                          ('commit_id', commit_id),
                          ('mtime', mtime),
                      ])
                      metadata.update(extra_metadata)
-                     meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()]
-                     f_path = os.path.join(archive_dir_name, '.archival.txt')
-                     archiver.addfile(f_path, 0o644, False, '\n'.join(meta))
+                     meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
+                     f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
+                     archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
+                 archiver.done()
+                 # ensure set & get are atomic
+                 with d_cache.transact():
+                     with open(archive_tmp_path, 'rb') as archive_file:
+                         add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
+                         if not add_result:
+                             log.error('Failed to store cache for key=%s', archive_key)
+                     os.remove(archive_tmp_path)
-                 return archiver.done()
+                     reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
+                     if not reader:
+                         raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
+                     return reader.name
+             class BinaryEnvelope:
+                 def __init__(self, val):
+                     self.val = val
+             class BytesEnvelope(bytes):
+                 def __new__(cls, content):
+                     if isinstance(content, bytes):
+                         return super().__new__(cls, content)
+                     else:
+                         raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
+             class BinaryBytesEnvelope(BytesEnvelope):
+                 pass

vcsserver/echo_stub/__init__.py

0 +2 0

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Provides a stub implementation for VCS operations.
              Intended usage is to help in performance measurements. The basic idea is to
              implement an `EchoApp` which sends back what it gets. Based on a configuration
              parameter this app can be activated, so that it replaced the endpoints for Git
              and Mercurial.
              """

vcsserver/echo_stub/echo_app.py

0 +7 -5

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Implementation of :class:`EchoApp`.
              This WSGI application will just echo back the data which it recieves.
              """
              import logging
              log = logging.getLogger(__name__)
-             class EchoApp(object):
+             class EchoApp:
                  def __init__(self, repo_path, repo_name, config):
                      self._repo_path = repo_path
                      log.info("EchoApp initialized for %s", repo_path)
                  def __call__(self, environ, start_response):
                      log.debug("EchoApp called for %s", self._repo_path)
                      log.debug("Content-Length: %s", environ.get('CONTENT_LENGTH'))
                      environ['wsgi.input'].read()
                      status = '200 OK'
                      headers = [('Content-Type', 'text/plain')]
                      start_response(status, headers)
-                     return ["ECHO"]
+                     return [b"ECHO"]
-             class EchoAppStream(object):
+             class EchoAppStream:
                  def __init__(self, repo_path, repo_name, config):
                      self._repo_path = repo_path
                      log.info("EchoApp initialized for %s", repo_path)
                  def __call__(self, environ, start_response):
                      log.debug("EchoApp called for %s", self._repo_path)
                      log.debug("Content-Length: %s", environ.get('CONTENT_LENGTH'))
                      environ['wsgi.input'].read()
                      status = '200 OK'
                      headers = [('Content-Type', 'text/plain')]
                      start_response(status, headers)
                      def generator():
-                         for _ in xrange(1000000):
-                             yield "ECHO"
+                         for _ in range(1000000):
+                             yield b"ECHO_STREAM"
                      return generator()
              def create_app():
                  """
                  Allows to run this app directly in a WSGI server.
                  """
                  stub_config = {}
                  return EchoApp('stub_path', 'stub_name', stub_config)

vcsserver/echo_stub/remote_wsgi.py

0 +4 -2

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Provides the same API as :mod:`remote_wsgi`.
              Uses the `EchoApp` instead of real implementations.
              """
              import logging
              from .echo_app import EchoApp
              from vcsserver import wsgi_app_caller
              log = logging.getLogger(__name__)
-             class GitRemoteWsgi(object):
+             class GitRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          create_echo_wsgi_app(*args, **kwargs))
                      return app.handle(environ, input_data)
-             class HgRemoteWsgi(object):
+             class HgRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          create_echo_wsgi_app(*args, **kwargs))
                      return app.handle(environ, input_data)
              def create_echo_wsgi_app(repo_path, repo_name, config):
                  log.debug("Creating EchoApp WSGI application")
                  _assert_valid_config(config)
                  # Remaining items are forwarded to have the extras available
                  return EchoApp(repo_path, repo_name, config=config)
              def _assert_valid_config(config):
                  config = config.copy()
                  # This is what git needs from config at this stage
                  config.pop('git_update_server_info')

vcsserver/exceptions.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """
              Special exception handling over the wire.
              Since we cannot assume that our client is able to import our exception classes,
              this module provides a "wrapping" mechanism to raise plain exceptions
              which contain an extra attribute `_vcs_kind` to allow a client to distinguish
              different error conditions.
              """
              from pyramid.httpexceptions import HTTPLocked, HTTPForbidden
              def _make_exception(kind, org_exc, *args):
                  """
                  Prepares a base `Exception` instance to be sent over the wire.
                  To give our caller a hint what this is about, it will attach an attribute
                  `_vcs_kind` to the exception.
                  """
                  exc = Exception(*args)
                  exc._vcs_kind = kind
                  exc._org_exc = org_exc
                  exc._org_exc_tb = getattr(org_exc, '_org_exc_tb', '')
                  return exc
              def AbortException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('abort', org_exc, *args)
                  return _make_exception_wrapper
              def ArchiveException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('archive', org_exc, *args)
                  return _make_exception_wrapper
              def LookupException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('lookup', org_exc, *args)
                  return _make_exception_wrapper
              def VcsException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('error', org_exc, *args)
                  return _make_exception_wrapper
              def RepositoryLockedException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('repo_locked', org_exc, *args)
                  return _make_exception_wrapper
              def RepositoryBranchProtectedException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('repo_branch_protected', org_exc, *args)
                  return _make_exception_wrapper
              def RequirementException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('requirement', org_exc, *args)
                  return _make_exception_wrapper
              def UnhandledException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('unhandled', org_exc, *args)
                  return _make_exception_wrapper
              def URLError(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('url_error', org_exc, *args)
                  return _make_exception_wrapper
              def SubrepoMergeException(org_exc=None):
                  def _make_exception_wrapper(*args):
                      return _make_exception('subrepo_merge_error', org_exc, *args)
                  return _make_exception_wrapper
              class HTTPRepoLocked(HTTPLocked):
                  """
                  Subclass of HTTPLocked response that allows to set the title and status
                  code via constructor arguments.
                  """
                  def __init__(self, title, status_code=None, **kwargs):
                      self.code = status_code or HTTPLocked.code
                      self.title = title
-                     super(HTTPRepoLocked, self).__init__(**kwargs)
+                     super().__init__(**kwargs)
              class HTTPRepoBranchProtected(HTTPForbidden):
                  def __init__(self, *args, **kwargs):
                      super(HTTPForbidden, self).__init__(*args, **kwargs)
              class RefNotFoundException(KeyError):
                  pass
              class NoContentException(ValueError):
                  pass

vcsserver/git_lfs/__init__.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             from app import create_app
+             from .app import create_app  # noqa

vcsserver/git_lfs/app.py

0 +13 -9

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import logging
-             from wsgiref.util import FileWrapper
-             import simplejson as json
              from pyramid.config import Configurator
              from pyramid.response import Response, FileIter
              from pyramid.httpexceptions import (
                  HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
                  HTTPUnprocessableEntity)
+             from vcsserver.lib.rc_json import json
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
-             from vcsserver.utils import safe_int
+             from vcsserver.str_utils import safe_int
              log = logging.getLogger(__name__)
              GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ?
              GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
              def write_response_error(http_exception, text=None):
                  content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  _exception = http_exception(content_type=content_type)
                  _exception.content_type = content_type
                  if text:
                      _exception.body = json.dumps({'message': text})
                  log.debug('LFS: writing response of type %s to client with text:%s',
                            http_exception, text)
                  return _exception
-             class AuthHeaderRequired(object):
+             class AuthHeaderRequired:
                  """
                  Decorator to check if request has proper auth-header
                  """
                  def __call__(self, func):
                      return get_cython_compat_decorator(self.__wrapper, func)
                  def __wrapper(self, func, *fargs, **fkwargs):
                      request = fargs[1]
                      auth = request.authorization
                      if not auth:
                          return write_response_error(HTTPForbidden)
                      return func(*fargs[1:], **fkwargs)
              # views
              def lfs_objects(request):
                  # indicate not supported, V1 API
                  log.warning('LFS: v1 api not supported, reporting it back to client')
                  return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
              @AuthHeaderRequired()
              def lfs_objects_batch(request):
                  """
                  The client sends the following information to the Batch endpoint to transfer some objects:
                      operation - Should be download or upload.
                      transfers - An optional Array of String identifiers for transfer
                          adapters that the client has configured. If omitted, the basic
                          transfer adapter MUST be assumed by the server.
                      objects - An Array of objects to download.
                      oid - String OID of the LFS object.
                      size - Integer byte size of the LFS object. Must be at least zero.
                  """
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  auth = request.authorization
                  repo = request.matchdict.get('repo')
                  data = request.json
                  operation = data.get('operation')
                  http_scheme = request.registry.git_lfs_http_scheme
                  if operation not in ('download', 'upload'):
                      log.debug('LFS: unsupported operation:%s', operation)
                      return write_response_error(
-                         HTTPBadRequest, 'unsupported operation mode: `%s`' % operation)
+                         HTTPBadRequest, f'unsupported operation mode: `{operation}`')
                  if 'objects' not in data:
                      log.debug('LFS: missing objects data')
                      return write_response_error(
                          HTTPBadRequest, 'missing objects data')
                  log.debug('LFS: handling operation of type: %s', operation)
                  objects = []
                  for o in data['objects']:
                      try:
                          oid = o['oid']
                          obj_size = o['size']
                      except KeyError:
                          log.exception('LFS, failed to extract data')
                          return write_response_error(
                              HTTPBadRequest, 'unsupported data in objects')
                      obj_data = {'oid': oid}
+                     if http_scheme == 'http':
+                         # Note(marcink): when using http, we might have a custom port
+                         # so we skip setting it to http, url dispatch then wont generate a port in URL
+                         # for development we need this
+                         http_scheme = None
-                     obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
                                                   _scheme=http_scheme)
                      obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
                                                          _scheme=http_scheme)
                      store = LFSOidStore(
                          oid, repo, store_location=request.registry.git_lfs_store_path)
                      handler = OidHandler(
                          store, repo, auth, oid, obj_size, obj_data,
                          obj_href, obj_verify_href)
                      # this verifies also OIDs
                      actions, errors = handler.exec_operation(operation)
                      if errors:
                          log.warning('LFS: got following errors: %s', errors)
                          obj_data['errors'] = errors
                      if actions:
                          obj_data['actions'] = actions
                      obj_data['size'] = obj_size
                      obj_data['authenticated'] = True
                      objects.append(obj_data)
                  result = {'objects': objects, 'transfer': 'basic'}
                  log.debug('LFS Response %s', safe_result(result))
                  return result
              def lfs_objects_oid_upload(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  engine = store.get_engine(mode='wb')
                  log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
                  body = request.environ['wsgi.input']
                  with engine as f:
                      blksize = 64 * 1024  # 64kb
                      while True:
                          # read in chunks as stream comes in from Gunicorn
                          # this is a specific Gunicorn support function.
                          # might work differently on waitress
                          chunk = body.read(blksize)
                          if not chunk:
                              break
                          f.write(chunk)
                  return {'upload': 'ok'}
              def lfs_objects_oid_download(request):
                  repo = request.matchdict.get('repo')
                  oid = request.matchdict.get('oid')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
-                         HTTPNotFound, 'requested file with oid `%s` not found in store' % oid)
+                         HTTPNotFound, f'requested file with oid `{oid}` not found in store')
                  # TODO(marcink): support range header ?
                  # Range: bytes=0-, `bytes=(\d+)\-.*`
                  f = open(store.oid_path, 'rb')
                  response = Response(
                      content_type='application/octet-stream', app_iter=FileIter(f))
                  response.headers.add('X-RC-LFS-Response-Oid', str(oid))
                  return response
              def lfs_objects_verify(request):
                  request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
                  repo = request.matchdict.get('repo')
                  data = request.json
                  oid = data.get('oid')
                  size = safe_int(data.get('size'))
                  if not (oid and size):
                      return write_response_error(
                          HTTPBadRequest, 'missing oid and size in request data')
                  store = LFSOidStore(
                      oid, repo, store_location=request.registry.git_lfs_store_path)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
-                         HTTPNotFound, 'oid `%s` does not exists in store' % oid)
+                         HTTPNotFound, f'oid `{oid}` does not exists in store')
                  store_size = store.size_oid()
                  if store_size != size:
-                     msg = 'requested file size mismatch store size:%s requested:%s' % (
+                     msg = 'requested file size mismatch store size:{} requested:{}'.format(
                          store_size, size)
                      return write_response_error(
                          HTTPUnprocessableEntity, msg)
                  return {'message': {'size': 'ok', 'in_store': 'ok'}}
              def lfs_objects_lock(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS locking api not supported')
              def not_found(request):
                  return write_response_error(
                      HTTPNotFound, 'request path not found')
              def lfs_disabled(request):
                  return write_response_error(
                      HTTPNotImplemented, 'GIT LFS disabled for this repo')
              def git_lfs_app(config):
                  # v1 API deprecation endpoint
                  config.add_route('lfs_objects',
                                   '/{repo:.*?[^/]}/info/lfs/objects')
                  config.add_view(lfs_objects, route_name='lfs_objects',
                                  request_method='POST', renderer='json')
                  # locking API
                  config.add_route('lfs_objects_lock',
                                   '/{repo:.*?[^/]}/info/lfs/locks')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
                                  request_method=('POST', 'GET'), renderer='json')
                  config.add_route('lfs_objects_lock_verify',
                                   '/{repo:.*?[^/]}/info/lfs/locks/verify')
                  config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
                                  request_method=('POST', 'GET'), renderer='json')
                  # batch API
                  config.add_route('lfs_objects_batch',
                                   '/{repo:.*?[^/]}/info/lfs/objects/batch')
                  config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
                                  request_method='POST', renderer='json')
                  # oid upload/download API
                  config.add_route('lfs_objects_oid',
                                   '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
                  config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
                                  request_method='PUT', renderer='json')
                  config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
                                  request_method='GET', renderer='json')
                  # verification API
                  config.add_route('lfs_objects_verify',
                                   '/{repo:.*?[^/]}/info/lfs/verify')
                  config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
                                  request_method='POST', renderer='json')
                  # not found handler for API
                  config.add_notfound_view(not_found, renderer='json')
              def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
                  config = Configurator()
                  if git_lfs_enabled:
                      config.include(git_lfs_app)
                      config.registry.git_lfs_store_path = git_lfs_store_path
                      config.registry.git_lfs_http_scheme = git_lfs_http_scheme
                  else:
                      # not found handler for API, reporting disabled LFS support
                      config.add_notfound_view(lfs_disabled, renderer='json')
                  app = config.make_wsgi_app()
                  return app

vcsserver/git_lfs/lib.py

0 +8 -6

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import shutil
              import logging
              from collections import OrderedDict
              log = logging.getLogger(__name__)
-             class OidHandler(object):
+             class OidHandler:
                  def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
                               obj_verify_href=None):
                      self.current_store = store
                      self.repo_name = repo_name
                      self.auth = auth
                      self.oid = oid
                      self.obj_size = obj_size
                      self.obj_data = obj_data
                      self.obj_href = obj_href
                      self.obj_verify_href = obj_verify_href
                  def get_store(self, mode=None):
                      return self.current_store
                  def get_auth(self):
                      """returns auth header for re-use in upload/download"""
                      return " ".join(self.auth)
                  def download(self):
                      store = self.get_store()
                      response = None
                      has_errors = None
                      if not store.has_oid():
                          # error reply back to client that something is wrong with dl
-                         err_msg = 'object: {} does not exist in store'.format(store.oid)
+                         err_msg = f'object: {store.oid} does not exist in store'
                          has_errors = OrderedDict(
                              error=OrderedDict(
                                  code=404,
                                  message=err_msg
                              )
                          )
                      download_action = OrderedDict(
                          href=self.obj_href,
                          header=OrderedDict([("Authorization", self.get_auth())])
                      )
                      if not has_errors:
                          response = OrderedDict(download=download_action)
                      return response, has_errors
                  def upload(self, skip_existing=True):
                      """
                      Write upload action for git-lfs server
                      """
                      store = self.get_store()
                      response = None
                      has_errors = None
                      # verify if we have the OID before, if we do, reply with empty
                      if store.has_oid():
                          log.debug('LFS: store already has oid %s', store.oid)
                          # validate size
                          store_size = store.size_oid()
                          size_match = store_size == self.obj_size
                          if not size_match:
                              log.warning(
                                  'LFS: size mismatch for oid:%s, in store:%s expected: %s',
                                  self.oid, store_size, self.obj_size)
                          elif skip_existing:
                              log.debug('LFS: skipping further action as oid is existing')
                              return response, has_errors
                      chunked = ("Transfer-Encoding", "chunked")
                      upload_action = OrderedDict(
                          href=self.obj_href,
                          header=OrderedDict([("Authorization", self.get_auth()), chunked])
                      )
                      if not has_errors:
                          response = OrderedDict(upload=upload_action)
                          # if specified in handler, return the verification endpoint
                          if self.obj_verify_href:
                              verify_action = OrderedDict(
                                  href=self.obj_verify_href,
                                  header=OrderedDict([("Authorization", self.get_auth())])
                              )
                              response['verify'] = verify_action
                      return response, has_errors
                  def exec_operation(self, operation, *args, **kwargs):
                      handler = getattr(self, operation)
                      log.debug('LFS: handling request using %s handler', handler)
                      return handler(*args, **kwargs)
-             class LFSOidStore(object):
+             class LFSOidStore:
                  def __init__(self, oid, repo, store_location=None):
                      self.oid = oid
                      self.repo = repo
-                     self.store_path = store_location or self.get_default_store()
+                     defined_store_path = store_location or self.get_default_store()
+                     self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}"
+                     self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}"
                      self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
                      self.oid_path = os.path.join(self.store_path, oid)
                      self.fd = None
                  def get_engine(self, mode):
                      """
                      engine = .get_engine(mode='wb')
                      with engine as f:
                          f.write('...')
                      """
-                     class StoreEngine(object):
+                     class StoreEngine:
                          def __init__(self, mode, store_path, oid_path, tmp_oid_path):
                              self.mode = mode
                              self.store_path = store_path
                              self.oid_path = oid_path
                              self.tmp_oid_path = tmp_oid_path
                          def __enter__(self):
                              if not os.path.isdir(self.store_path):
                                  os.makedirs(self.store_path)
                              # TODO(marcink): maybe write metadata here with size/oid ?
                              fd = open(self.tmp_oid_path, self.mode)
                              self.fd = fd
                              return fd
                          def __exit__(self, exc_type, exc_value, traceback):
                              # close tmp file, and rename to final destination
                              self.fd.close()
                              shutil.move(self.tmp_oid_path, self.oid_path)
                      return StoreEngine(
                          mode, self.store_path, self.oid_path, self.tmp_oid_path)
                  def get_default_store(self):
                      """
                      Default store, consistent with defaults of Mercurial large files store
                      which is /home/username/.cache/largefiles
                      """
                      user_home = os.path.expanduser("~")
                      return os.path.join(user_home, '.cache', 'lfs-store')
                  def has_oid(self):
                      return os.path.exists(os.path.join(self.store_path, self.oid))
                  def size_oid(self):
                      size = -1
                      if self.has_oid():
                          oid = os.path.join(self.store_path, self.oid)
                          size = os.stat(oid).st_size
                      return size

vcsserver/git_lfs/tests/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA

vcsserver/git_lfs/tests/test_lfs_app.py

0 +62 -60

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import pytest
              from webtest.app import TestApp as WebObTestApp
-             import simplejson as json
+             from vcsserver.lib.rc_json import json
+             from vcsserver.str_utils import safe_bytes
              from vcsserver.git_lfs.app import create_app
+             from vcsserver.git_lfs.lib import LFSOidStore
              @pytest.fixture(scope='function')
              def git_lfs_app(tmpdir):
                  custom_app = WebObTestApp(create_app(
                      git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
                      git_lfs_http_scheme='http'))
                  custom_app._store = str(tmpdir)
                  return custom_app
              @pytest.fixture(scope='function')
              def git_lfs_https_app(tmpdir):
                  custom_app = WebObTestApp(create_app(
                      git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
                      git_lfs_http_scheme='https'))
                  custom_app._store = str(tmpdir)
                  return custom_app
              @pytest.fixture()
              def http_auth():
                  return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
-             class TestLFSApplication(object):
+             class TestLFSApplication:
                  def test_app_wrong_path(self, git_lfs_app):
                      git_lfs_app.get('/repo/info/lfs/xxx', status=404)
                  def test_app_deprecated_endpoint(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
                      assert response.status_code == 501
-                     assert json.loads(response.text) == {u'message': u'LFS: v1 api not supported'}
+                     assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
                  def test_app_lock_verify_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
-                         u'message': u'GIT LFS locking api not supported'}
+                         'message': 'GIT LFS locking api not supported'}
                  def test_app_lock_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
-                         u'message': u'GIT LFS locking api not supported'}
+                         'message': 'GIT LFS locking api not supported'}
                  def test_app_batch_api_missing_auth(self, git_lfs_app):
                      git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={}, status=403)
                  def test_app_batch_api_unsupported_operation(self, git_lfs_app, http_auth):
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={}, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'unsupported operation mode: `None`'}
+                         'message': 'unsupported operation mode: `None`'}
                  def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={'operation': 'download'},
                          status=400, extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'missing objects data'}
+                         'message': 'missing objects data'}
                  def test_app_batch_api_unsupported_data_in_objects(
                          self, git_lfs_app, http_auth):
                      params = {'operation': 'download',
                                'objects': [{}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'unsupported data in objects'}
+                         'message': 'unsupported data in objects'}
                  def test_app_batch_api_download_missing_object(
                          self, git_lfs_app, http_auth):
                      params = {'operation': 'download',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'errors': {u'error': {
-                              u'code': 404,
-                              u'message': u'object: 123 does not exist in store'}},
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                         {'authenticated': True,
+                          'errors': {'error': {
+                              'code': 404,
+                              'message': 'object: 123 does not exist in store'}},
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_download(self, git_lfs_app, http_auth):
                      oid = '456'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'operation': 'download',
                                'objects': [{'oid': oid, 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'download': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'http://localhost/repo/info/lfs/objects/456'},
+                         {'authenticated': True,
+                          'actions': {
+                              'download': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'http://localhost/repo/info/lfs/objects/456'},
                           },
-                          u'oid': u'456',
-                          u'size': u'1024'}
+                          'oid': '456',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_upload(self, git_lfs_app, http_auth):
                      params = {'operation': 'upload',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'upload': {
-                                  u'header': {u'Authorization': u'Basic XXXXX',
-                                              u'Transfer-Encoding': u'chunked'},
-                                  u'href': u'http://localhost/repo/info/lfs/objects/123'},
-                              u'verify': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'http://localhost/repo/info/lfs/verify'}
+                         {'authenticated': True,
+                          'actions': {
+                              'upload': {
+                                  'header': {'Authorization': 'Basic XXXXX',
+                                              'Transfer-Encoding': 'chunked'},
+                                  'href': 'http://localhost/repo/info/lfs/objects/123'},
+                              'verify': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'http://localhost/repo/info/lfs/verify'}
                           },
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_upload_for_https(self, git_lfs_https_app, http_auth):
                      params = {'operation': 'upload',
                                'objects': [{'oid': '123', 'size': '1024'}]}
                      response = git_lfs_https_app.post_json(
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'upload': {
-                                  u'header': {u'Authorization': u'Basic XXXXX',
-                                              u'Transfer-Encoding': u'chunked'},
-                                  u'href': u'https://localhost/repo/info/lfs/objects/123'},
-                              u'verify': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'https://localhost/repo/info/lfs/verify'}
+                         {'authenticated': True,
+                          'actions': {
+                              'upload': {
+                                  'header': {'Authorization': 'Basic XXXXX',
+                                              'Transfer-Encoding': 'chunked'},
+                                  'href': 'https://localhost/repo/info/lfs/objects/123'},
+                              'verify': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'https://localhost/repo/info/lfs/verify'}
                           },
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_verify_api_missing_data(self, git_lfs_app):
                      params = {'oid': 'missing'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params,
                          status=400)
                      assert json.loads(response.text) == {
-                         u'message': u'missing oid and size in request data'}
+                         'message': 'missing oid and size in request data'}
                  def test_app_verify_api_missing_obj(self, git_lfs_app):
                      params = {'oid': 'missing', 'size': '1024'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params,
                          status=404)
                      assert json.loads(response.text) == {
-                         u'message': u'oid `missing` does not exists in store'}
+                         'message': 'oid `missing` does not exists in store'}
                  def test_app_verify_api_size_mismatch(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': '1024'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params, status=422)
                      assert json.loads(response.text) == {
-                         u'message': u'requested file size mismatch '
-                                     u'store size:11 requested:1024'}
+                         'message': 'requested file size mismatch '
+                                     'store size:11 requested:1024'}
                  def test_app_verify_api(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': 11}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params)
                      assert json.loads(response.text) == {
-                         u'message': {u'size': u'ok', u'in_store': u'ok'}}
+                         'message': {'size': 'ok', 'in_store': 'ok'}}
                  def test_app_download_api_oid_not_existing(self, git_lfs_app):
                      oid = 'missing'
                      response = git_lfs_app.get(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
                      assert json.loads(response.text) == {
-                         u'message': u'requested file with oid `missing` not found in store'}
+                         'message': 'requested file with oid `missing` not found in store'}
                  def test_app_download_api(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      response = git_lfs_app.get(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid))
                      assert response
                  def test_app_upload(self, git_lfs_app):
                      oid = 'uploaded'
                      response = git_lfs_app.put(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
-                     assert json.loads(response.text) == {u'upload': u'ok'}
+                     assert json.loads(response.text) == {'upload': 'ok'}
                      # verify that we actually wrote that OID
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      assert os.path.isfile(oid_path)
                      assert 'CONTENT' == open(oid_path).read()

vcsserver/git_lfs/tests/test_lib.py

0 +10 -9

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import pytest
+             from vcsserver.str_utils import safe_bytes
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              @pytest.fixture()
              def lfs_store(tmpdir):
                  repo = 'test'
                  oid = '123456789'
                  store = LFSOidStore(oid=oid, repo=repo, store_location=str(tmpdir))
                  return store
              @pytest.fixture()
              def oid_handler(lfs_store):
                  store = lfs_store
                  repo = store.repo
                  oid = store.oid
                  oid_handler = OidHandler(
                      store=store, repo_name=repo, auth=('basic', 'xxxx'),
                      oid=oid,
                      obj_size='1024', obj_data={}, obj_href='http://localhost/handle_oid',
                      obj_verify_href='http://localhost/verify')
                  return oid_handler
-             class TestOidHandler(object):
+             class TestOidHandler:
                  @pytest.mark.parametrize('exec_action', [
                      'download',
                      'upload',
                  ])
                  def test_exec_action(self, exec_action, oid_handler):
                      handler = oid_handler.exec_operation(exec_action)
                      assert handler
                  def test_exec_action_undefined(self, oid_handler):
                      with pytest.raises(AttributeError):
                          oid_handler.exec_operation('wrong')
                  def test_download_oid_not_existing(self, oid_handler):
                      response, has_errors = oid_handler.exec_operation('download')
                      assert response is None
                      assert has_errors['error'] == {
                          'code': 404,
                          'message': 'object: 123456789 does not exist in store'}
                  def test_download_oid(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      response, has_errors = oid_handler.exec_operation('download')
                      assert has_errors is None
                      assert response['download'] == {
                          'header': {'Authorization': 'basic xxxx'},
                          'href': 'http://localhost/handle_oid'
                      }
                  def test_upload_oid_that_exists(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 7
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response is None
                  def test_upload_oid_that_exists_but_has_wrong_size(self, oid_handler):
                      store = oid_handler.get_store()
                      if not os.path.isdir(os.path.dirname(store.oid_path)):
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 10240
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response['upload'] == {
                          'header': {'Authorization': 'basic xxxx',
                                     'Transfer-Encoding': 'chunked'},
                          'href': 'http://localhost/handle_oid',
                      }
                  def test_upload_oid(self, oid_handler):
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                      assert response['upload'] == {
                          'header': {'Authorization': 'basic xxxx',
                                     'Transfer-Encoding': 'chunked'},
                          'href': 'http://localhost/handle_oid'
                      }
-             class TestLFSStore(object):
+             class TestLFSStore:
                  def test_write_oid(self, lfs_store):
                      oid_location = lfs_store.oid_path
                      assert not os.path.isfile(oid_location)
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      assert os.path.isfile(oid_location)
                  def test_detect_has_oid(self, lfs_store):
                      assert lfs_store.has_oid() is False
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
-                     assert lfs_store.has_oid() is True
  No newline at end of file
+                     assert lfs_store.has_oid() is True

vcsserver/git_lfs/utils.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import copy
              from functools import wraps
              def get_cython_compat_decorator(wrapper, func):
                  """
                  Creates a cython compatible decorator. The previously used
                  decorator.decorator() function seems to be incompatible with cython.
                  :param wrapper: __wrapper method of the decorator class
                  :param func: decorated function
                  """
                  @wraps(func)
                  def local_wrapper(*args, **kwds):
                      return wrapper(func, *args, **kwds)
                  local_wrapper.__wrapped__ = func
                  return local_wrapper
              def safe_result(result):
                  """clean result for better representation in logs"""
                  clean_copy = copy.deepcopy(result)
                  try:
                      if 'objects' in clean_copy:
                          for oid_data in clean_copy['objects']:
                              if 'actions' in oid_data:
                                  for action_name, data in oid_data['actions'].items():
                                      if 'header' in data:
                                          data['header'] = {'Authorization': '*****'}
                  except Exception:
                      return result
                  return clean_copy

vcsserver/hgcompat.py

0 +18 -5

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """
              Mercurial libs compatibility
              """
              import mercurial
              from mercurial import demandimport
              # patch demandimport, due to bug in mercurial when it always triggers
              # demandimport.enable()
+             from vcsserver.str_utils import safe_bytes
              demandimport.enable = lambda *args, **kwargs: 1
              from mercurial import ui
              from mercurial import patch
              from mercurial import config
              from mercurial import extensions
              from mercurial import scmutil
              from mercurial import archival
              from mercurial import discovery
              from mercurial import unionrepo
              from mercurial import localrepo
              from mercurial import merge as hg_merge
              from mercurial import subrepo
              from mercurial import subrepoutil
              from mercurial import tags as hg_tag
              from mercurial import util as hgutil
-             from mercurial.commands import clone, nullid, pull
+             from mercurial.commands import clone, pull
+             from mercurial.node import nullid
              from mercurial.context import memctx, memfilectx
              from mercurial.error import (
                  LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
                  RequirementError, ProgrammingError)
              from mercurial.hgweb import hgweb_mod
              from mercurial.localrepo import instance
              from mercurial.match import match, alwaysmatcher, patternmatcher
              from mercurial.mdiff import diffopts
              from mercurial.node import bin, hex
              from mercurial.encoding import tolocal
              from mercurial.discovery import findcommonoutgoing
              from mercurial.hg import peer
              from mercurial.httppeer import makepeer
-             from mercurial.util import url as hg_url
+             from mercurial.utils.urlutil import url as hg_url
              from mercurial.scmutil import revrange, revsymbol
              from mercurial.node import nullrev
              from mercurial import exchange
              from hgext import largefiles
              # those authnadlers are patched for python 2.6.5 bug an
              # infinit looping when given invalid resources
              from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
+             # hg strip is in core now
+             from mercurial import strip as hgext_strip
              def get_ctx(repo, ref):
+                 if not isinstance(ref, int):
+                     ref = safe_bytes(ref)
                  try:
                      ctx = repo[ref]
+                     return ctx
                  except (ProgrammingError, TypeError):
                      # we're unable to find the rev using a regular lookup, we fallback
                      # to slower, but backward compat revsymbol usage
-                     ctx = revsymbol(repo, ref)
+                     pass
                  except (LookupError, RepoLookupError):
                      # Similar case as above but only for refs that are not numeric
-                     if isinstance(ref, (int, long)):
+                     if isinstance(ref, int):
                          raise
-                     ctx = revsymbol(repo, ref)
                  return ctx

vcsserver/hgpatches.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """
              Adjustments to Mercurial
              Intentionally kept separate from `hgcompat` and `hg`, so that these patches can
              be applied without having to import the whole Mercurial machinery.
              Imports are function local, so that just importing this module does not cause
              side-effects other than these functions being defined.
              """
              import logging
              def patch_largefiles_capabilities():
                  """
                  Patches the capabilities function in the largefiles extension.
                  """
                  from vcsserver import hgcompat
                  lfproto = hgcompat.largefiles.proto
                  wrapper = _dynamic_capabilities_wrapper(
                      lfproto, hgcompat.extensions.extensions)
                  lfproto._capabilities = wrapper
              def _dynamic_capabilities_wrapper(lfproto, extensions):
                  wrapped_capabilities = lfproto._capabilities
                  logger = logging.getLogger('vcsserver.hg')
                  def _dynamic_capabilities(orig, repo, proto):
                      """
                      Adds dynamic behavior, so that the capability is only added if the
                      extension is enabled in the current ui object.
                      """
                      if 'largefiles' in dict(extensions(repo.ui)):
                          logger.debug('Extension largefiles enabled')
                          calc_capabilities = wrapped_capabilities
                          return calc_capabilities(orig, repo, proto)
                      else:
                          logger.debug('Extension largefiles disabled')
                          return orig(repo, proto)
                  return _dynamic_capabilities
              def patch_subrepo_type_mapping():
                  from collections import defaultdict
-                 from hgcompat import subrepo, subrepoutil
+                 from .hgcompat import subrepo, subrepoutil
                  from vcsserver.exceptions import SubrepoMergeException
                  class NoOpSubrepo(subrepo.abstractsubrepo):
                      def __init__(self, ctx, path, *args, **kwargs):
                          """Initialize abstractsubrepo part
                          ``ctx`` is the context referring this subrepository in the
                          parent repository.
                          ``path`` is the path to this subrepository as seen from
                          innermost repository.
                          """
                          self.ui = ctx.repo().ui
                          self._ctx = ctx
                          self._path = path
                      def storeclean(self, path):
                          """
                          returns true if the repository has not changed since it was last
                          cloned from or pushed to a given repository.
                          """
                          return True
                      def dirty(self, ignoreupdate=False, missing=False):
                          """returns true if the dirstate of the subrepo is dirty or does not
                          match current stored state. If ignoreupdate is true, only check
                          whether the subrepo has uncommitted changes in its dirstate.
                          """
                          return False
                      def basestate(self):
                          """current working directory base state, disregarding .hgsubstate
                          state and working directory modifications"""
                          substate = subrepoutil.state(self._ctx, self.ui)
                          file_system_path, rev, repotype = substate.get(self._path)
                          return rev
                      def remove(self):
                          """remove the subrepo
                          (should verify the dirstate is not dirty first)
                          """
                          pass
                      def get(self, state, overwrite=False):
                          """run whatever commands are needed to put the subrepo into
                          this state
                          """
                          pass
                      def merge(self, state):
                          """merge currently-saved state with the new state."""
                          raise SubrepoMergeException()()
                      def push(self, opts):
                          """perform whatever action is analogous to 'hg push'
                          This may be a no-op on some systems.
                          """
                          pass
                  # Patch subrepo type mapping to always return our NoOpSubrepo class
                  # whenever a subrepo class is looked up.
                  subrepo.types = {
                      'hg': NoOpSubrepo,
                      'git': NoOpSubrepo,
                      'svn': NoOpSubrepo
                  }

vcsserver/hook_utils/__init__.py

0 +42 -27

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import re
              import os
              import sys
              import datetime
              import logging
              import pkg_resources
              import vcsserver
+             from vcsserver.str_utils import safe_bytes
              log = logging.getLogger(__name__)
+             HOOKS_DIR_MODE = 0o755
+             HOOKS_FILE_MODE = 0o755
+             def set_permissions_if_needed(path_to_check, perms: oct):
+                 # Get current permissions
+                 current_permissions = os.stat(path_to_check).st_mode & 0o777  # Extract permission bits
+                 # Check if current permissions are lower than required
+                 if current_permissions < int(perms):
+                     # Change the permissions if they are lower than required
+                     os.chmod(path_to_check, perms)
              def get_git_hooks_path(repo_path, bare):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  if not bare:
                      hooks_path = os.path.join(repo_path, '.git', 'hooks')
                  return hooks_path
              def install_git_hooks(repo_path, bare, executable=None, force_create=False):
                  """
                  Creates a RhodeCode hook inside a git repository
                  :param repo_path: path to repository
+                 :param bare: defines if repository is considered a bare git repo
                  :param executable: binary executable to put in the hooks
-                 :param force_create: Create even if same name hook exists
+                 :param force_create: Creates even if the same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_git_hooks_path(repo_path, bare)
-                 if not os.path.isdir(hooks_path):
-                     os.makedirs(hooks_path, mode=0o777)
+                 # we always call it to ensure dir exists and it has a proper mode
+                 if not os.path.exists(hooks_path):
+                     # If it doesn't exist, create a new directory with the specified mode
+                     os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
+                 # If it exists, change the directory's mode to the specified mode
+                 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing git hook in repo %s', repo_path)
-                     _hook_file = os.path.join(hooks_path, '%s-receive' % h_type)
+                     _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing git %s hook file at %s !', h_type, _hook_file)
                          try:
                              with open(_hook_file, 'wb') as f:
-                                 template = template.replace(
-                                     '_TMPL_', vcsserver.__version__)
-                                 template = template.replace('_DATE_', timestamp)
-                                 template = template.replace('_ENV_', executable)
-                                 template = template.replace('_PATH_', path)
+                                 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
+                                 template = template.replace(b'_DATE_', safe_bytes(timestamp))
+                                 template = template.replace(b'_ENV_', safe_bytes(executable))
+                                 template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
-                             os.chmod(_hook_file, 0o755)
-                         except IOError:
+                             set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_svn_hooks_path(repo_path):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  return hooks_path
              def install_svn_hooks(repo_path, executable=None, force_create=False):
                  """
                  Creates RhodeCode hooks inside a svn repository
                  :param repo_path: path to repository
                  :param executable: binary executable to put in the hooks
                  :param force_create: Create even if same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_svn_hooks_path(repo_path)
                  if not os.path.isdir(hooks_path):
-                     os.makedirs(hooks_path, mode=0o777)
+                     os.makedirs(hooks_path, mode=0o777, exist_ok=True)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
                  tmpl_pre = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                          ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
                  path = ''  # not used for now
                  timestamp = datetime.datetime.utcnow().isoformat()
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing svn hook in repo %s', repo_path)
-                     _hook_file = os.path.join(hooks_path, '%s-commit' % h_type)
+                     _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
                          try:
                              with open(_hook_file, 'wb') as f:
-                                 template = template.replace(
-                                     '_TMPL_', vcsserver.__version__)
-                                 template = template.replace('_DATE_', timestamp)
-                                 template = template.replace('_ENV_', executable)
-                                 template = template.replace('_PATH_', path)
+                                 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
+                                 template = template.replace(b'_DATE_', safe_bytes(timestamp))
+                                 template = template.replace(b'_ENV_', safe_bytes(executable))
+                                 template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              os.chmod(_hook_file, 0o755)
-                         except IOError:
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  return True
              def get_version_from_hook(hook_path):
-                 version = ''
+                 version = b''
                  hook_content = read_hook_content(hook_path)
-                 matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content)
+                 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
                  if matches:
                      try:
                          version = matches.groups()[0]
                          log.debug('got version %s from hooks.', version)
                      except Exception:
                          log.exception("Exception while reading the hook version.")
-                 return version.replace("'", "")
+                 return version.replace(b"'", b"")
              def check_rhodecode_hook(hook_path):
                  """
                  Check if the hook was created by RhodeCode
                  """
                  if not os.path.exists(hook_path):
                      return True
                  log.debug('hook exists, checking if it is from RhodeCode')
                  version = get_version_from_hook(hook_path)
                  if version:
                      return True
                  return False
-             def read_hook_content(hook_path):
-                 content = ''
+             def read_hook_content(hook_path) -> bytes:
+                 content = b''
                  if os.path.isfile(hook_path):
                      with open(hook_path, 'rb') as f:
                          content = f.read()
                  return content
              def get_git_pre_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'pre-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_git_post_hook_version(repo_path, bare):
                  hooks_path = get_git_hooks_path(repo_path, bare)
                  _hook_file = os.path.join(hooks_path, 'post-receive')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_pre_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'pre-commit')
                  version = get_version_from_hook(_hook_file)
                  return version
              def get_svn_post_hook_version(repo_path):
                  hooks_path = get_svn_hooks_path(repo_path)
                  _hook_file = os.path.join(hooks_path, 'post-commit')
                  version = get_version_from_hook(_hook_file)
                  return version

vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl

0 +2 -2

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.stdin.readlines()
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  # os.environ is modified here by a subprocess call that
                  # runs git and later git executes this hook.
                  # Environ gets some additional info from rhodecode system
                  # like IP or username from basic-auth
                  try:
                      result = hooks.git_post_receive(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl

0 +2 -2

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.stdin.readlines()
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  # os.environ is modified here by a subprocess call that
                  # runs git and later git executes this hook.
                  # Environ gets some additional info from rhodecode system
                  # like IP or username from basic-auth
                  try:
                      result = hooks.git_pre_receive(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl

0 +2 -2

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.argv[1:]
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
                      result = hooks.svn_post_commit(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl

0 +2 -2

              #!_ENV_
              import os
              import sys
              path_adjust = [_PATH_]
              if path_adjust:
                  sys.path = path_adjust
              try:
                  from vcsserver import hooks
              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
              # TIMESTAMP: _DATE_
              RC_HOOK_VER = '_TMPL_'
              def main():
                  if os.environ.get('SSH_READ_ONLY') == '1':
                      sys.stderr.write('Only read-only access is allowed')
                      sys.exit(1)
                  if hooks is None:
                      # exit with success if we cannot import vcsserver.hooks !!
                      # this allows simply push to this repo even without rhodecode
                      sys.exit(0)
                  if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
                      sys.exit(0)
                  repo_path = os.getcwd()
                  push_data = sys.argv[1:]
                  os.environ['RC_HOOK_VER'] = RC_HOOK_VER
                  try:
                      result = hooks.svn_pre_commit(repo_path, push_data, os.environ)
                      sys.exit(result)
                  except Exception as error:
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)
              if __name__ == '__main__':
                  main()

vcsserver/hooks.py

0 +163 -97

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import io
              import os
              import sys
              import logging
              import collections
-             import importlib
              import base64
+             import msgpack
+             import dataclasses
+             import pygit2
-             from httplib import HTTPConnection
+             import http.client
+             from celery import Celery
              import mercurial.scmutil
              import mercurial.node
-             import simplejson as json
+             from vcsserver.lib.rc_json import json
              from vcsserver import exceptions, subprocessio, settings
+             from vcsserver.str_utils import ascii_str, safe_str
+             from vcsserver.remote.git_remote import Repository
+             celery_app = Celery('__vcsserver__')
              log = logging.getLogger(__name__)
-             class HooksHttpClient(object):
+             class HooksHttpClient:
+                 proto = 'msgpack.v1'
                  connection = None
                  def __init__(self, hooks_uri):
                      self.hooks_uri = hooks_uri
+                 def __repr__(self):
+                     return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
                  def __call__(self, method, extras):
-                     connection = HTTPConnection(self.hooks_uri)
-                     body = self._serialize(method, extras)
-                     try:
-                         connection.request('POST', '/', body)
-                     except Exception:
-                         log.error('Hooks calling Connection failed on %s', connection.__dict__)
-                         raise
-                     response = connection.getresponse()
-                     response_data = response.read()
+                     connection = http.client.HTTPConnection(self.hooks_uri)
+                     # binary msgpack body
+                     headers, body = self._serialize(method, extras)
+                     log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
                      try:
-                         return json.loads(response_data)
+                         try:
+                             connection.request('POST', '/', body, headers)
+                         except Exception as error:
+                             log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
+                             raise
+                         response = connection.getresponse()
+                         try:
+                             return msgpack.load(response)
                      except Exception:
+                             response_data = response.read()
                          log.exception('Failed to decode hook response json data. '
                                        'response_code:%s, raw_data:%s',
                                        response.status, response_data)
                          raise
+                     finally:
+                         connection.close()
-                 def _serialize(self, hook_name, extras):
+                 @classmethod
+                 def _serialize(cls, hook_name, extras):
                      data = {
                          'method': hook_name,
                          'extras': extras
                      }
-                     return json.dumps(data)
+                     headers = {
+                         "rc-hooks-protocol": cls.proto,
+                         "Connection": "keep-alive"
+                     }
+                     return headers, msgpack.packb(data)
-             class HooksDummyClient(object):
-                 def __init__(self, hooks_module):
-                     self._hooks_module = importlib.import_module(hooks_module)
+             class HooksCeleryClient:
+                 TASK_TIMEOUT = 60  # time in seconds
-                 def __call__(self, hook_name, extras):
-                     with self._hooks_module.Hooks() as hooks:
-                         return getattr(hooks, hook_name)(extras)
+                 def __init__(self, queue, backend):
+                     celery_app.config_from_object({
+                         'broker_url': queue, 'result_backend': backend,
+                         'broker_connection_retry_on_startup': True,
+                         'task_serializer': 'msgpack',
+                         'accept_content': ['json', 'msgpack'],
+                         'result_serializer': 'msgpack',
+                         'result_accept_content': ['json', 'msgpack']
+                     })
+                     self.celery_app = celery_app
+                 def __call__(self, method, extras):
+                     inquired_task = self.celery_app.signature(
+                         f'rhodecode.lib.celerylib.tasks.{method}'
+                     )
+                     return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
-             class HooksShadowRepoClient(object):
+             class HooksShadowRepoClient:
                  def __call__(self, hook_name, extras):
                      return {'output': '', 'status': 0}
-             class RemoteMessageWriter(object):
+             class RemoteMessageWriter:
                  """Writer base class."""
                  def write(self, message):
                      raise NotImplementedError()
              class HgMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to mercurial clients."""
                  def __init__(self, ui):
                      self.ui = ui
-                 def write(self, message):
+                 def write(self, message: str):
                      # TODO: Check why the quiet flag is set by default.
                      old = self.ui.quiet
                      self.ui.quiet = False
                      self.ui.status(message.encode('utf-8'))
                      self.ui.quiet = old
              class GitMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to git clients."""
                  def __init__(self, stdout=None):
                      self.stdout = stdout or sys.stdout
-                 def write(self, message):
-                     self.stdout.write(message.encode('utf-8'))
+                 def write(self, message: str):
+                     self.stdout.write(message)
              class SvnMessageWriter(RemoteMessageWriter):
                  """Writer that knows how to send messages to svn clients."""
                  def __init__(self, stderr=None):
                      # SVN needs data sent to stderr for back-to-client messaging
                      self.stderr = stderr or sys.stderr
                  def write(self, message):
                      self.stderr.write(message.encode('utf-8'))
              def _handle_exception(result):
                  exception_class = result.get('exception')
                  exception_traceback = result.get('exception_traceback')
+                 log.debug('Handling hook-call exception: %s', exception_class)
                  if exception_traceback:
                      log.error('Got traceback from remote call:%s', exception_traceback)
                  if exception_class == 'HTTPLockedRC':
                      raise exceptions.RepositoryLockedException()(*result['exception_args'])
                  elif exception_class == 'HTTPBranchProtected':
                      raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
                  elif exception_class == 'RepositoryError':
                      raise exceptions.VcsException()(*result['exception_args'])
                  elif exception_class:
-                     raise Exception('Got remote exception "%s" with args "%s"' %
-                                     (exception_class, result['exception_args']))
+                     raise Exception(
+                         f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
+                     )
              def _get_hooks_client(extras):
                  hooks_uri = extras.get('hooks_uri')
+                 task_queue = extras.get('task_queue')
+                 task_backend = extras.get('task_backend')
                  is_shadow_repo = extras.get('is_shadow_repo')
                  if hooks_uri:
-                     return HooksHttpClient(extras['hooks_uri'])
+                     return HooksHttpClient(hooks_uri)
+                 elif task_queue and task_backend:
+                     return HooksCeleryClient(task_queue, task_backend)
                  elif is_shadow_repo:
                      return HooksShadowRepoClient()
                  else:
-                     return HooksDummyClient(extras['hooks_module'])
+                     raise Exception("Hooks client not found!")
              def _call_hook(hook_name, extras, writer):
                  hooks_client = _get_hooks_client(extras)
                  log.debug('Hooks, using client:%s', hooks_client)
                  result = hooks_client(hook_name, extras)
                  log.debug('Hooks got result: %s', result)
                  _handle_exception(result)
                  writer.write(result['output'])
                  return result['status']
              def _extras_from_ui(ui):
-                 hook_data = ui.config('rhodecode', 'RC_SCM_DATA')
+                 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
                  if not hook_data:
                      # maybe it's inside environ ?
                      env_hook_data = os.environ.get('RC_SCM_DATA')
                      if env_hook_data:
                          hook_data = env_hook_data
                  extras = {}
                  if hook_data:
                      extras = json.loads(hook_data)
                  return extras
              def _rev_range_hash(repo, node, check_heads=False):
                  from vcsserver.hgcompat import get_ctx
                  commits = []
                  revs = []
                  start = get_ctx(repo, node).rev()
                  end = len(repo)
                  for rev in range(start, end):
                      revs.append(rev)
                      ctx = get_ctx(repo, rev)
-                     commit_id = mercurial.node.hex(ctx.node())
-                     branch = ctx.branch()
+                     commit_id = ascii_str(mercurial.node.hex(ctx.node()))
+                     branch = safe_str(ctx.branch())
                      commits.append((commit_id, branch))
                  parent_heads = []
                  if check_heads:
                      parent_heads = _check_heads(repo, start, end, revs)
                  return commits, parent_heads
              def _check_heads(repo, start, end, commits):
                  from vcsserver.hgcompat import get_ctx
                  changelog = repo.changelog
                  parents = set()
                  for new_rev in commits:
                      for p in changelog.parentrevs(new_rev):
                          if p == mercurial.node.nullrev:
                              continue
                          if p < start:
                              parents.add(p)
                  for p in parents:
                      branch = get_ctx(repo, p).branch()
                      # The heads descending from that parent, on the same branch
-                     parent_heads = set([p])
-                     reachable = set([p])
-                     for x in xrange(p + 1, end):
+                     parent_heads = {p}
+                     reachable = {p}
+                     for x in range(p + 1, end):
                          if get_ctx(repo, x).branch() != branch:
                              continue
                          for pp in changelog.parentrevs(x):
                              if pp in reachable:
                                  reachable.add(x)
                                  parent_heads.discard(pp)
                                  parent_heads.add(x)
                      # More than one head? Suggest merging
                      if len(parent_heads) > 1:
                          return list(parent_heads)
                  return []
              def _get_git_env():
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('GIT'):
                          env[k] = v
                  # serialized version
                  return [(k, v) for k, v in env.items()]
              def _get_hg_env(old_rev, new_rev, txnid, repo_path):
                  env = {}
                  for k, v in os.environ.items():
                      if k.startswith('HG'):
                          env[k] = v
                  env['HG_NODE'] = old_rev
                  env['HG_NODE_LAST'] = new_rev
                  env['HG_TXNID'] = txnid
                  env['HG_PENDING'] = repo_path
                  return [(k, v) for k, v in env.items()]
              def repo_size(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('repo_size', extras, HgMessageWriter(ui))
              def pre_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('pre_pull', extras, HgMessageWriter(ui))
              def pre_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return pre_pull(ui, repo, **kwargs)
                  return 0
              def post_pull(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  return _call_hook('post_pull', extras, HgMessageWriter(ui))
              def post_pull_ssh(ui, repo, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras and extras.get('SSH'):
                      return post_pull(ui, repo, **kwargs)
                  return 0
              def pre_push(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook
                  """
                  extras = _extras_from_ui(ui)
                  detect_force_push = extras.get('detect_force_push')
                  rev_data = []
-                 if node and kwargs.get('hooktype') == 'pretxnchangegroup':
+                 hook_type: str = safe_str(kwargs.get('hooktype'))
+                 if node and hook_type == 'pretxnchangegroup':
                      branches = collections.defaultdict(list)
                      commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
                      for commit_id, branch in commits:
                          branches[branch].append(commit_id)
                      for branch, commits in branches.items():
-                         old_rev = kwargs.get('node_last') or commits[0]
+                         old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
                          rev_data.append({
                              'total_commits': len(commits),
                              'old_rev': old_rev,
                              'new_rev': commits[-1],
                              'ref': '',
                              'type': 'branch',
                              'name': branch,
                          })
                      for push_ref in rev_data:
                          push_ref['multiple_heads'] = _heads
                          repo_path = os.path.join(
                              extras.get('repo_store', ''), extras.get('repository', ''))
                          push_ref['hg_env'] = _get_hg_env(
                              old_rev=push_ref['old_rev'],
-                             new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'),
+                             new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
                              repo_path=repo_path)
-                 extras['hook_type'] = kwargs.get('hooktype', 'pre_push')
+                 extras['hook_type'] = hook_type or 'pre_push'
                  extras['commit_ids'] = rev_data
                  return _call_hook('pre_push', extras, HgMessageWriter(ui))
              def pre_push_ssh(ui, repo, node=None, **kwargs):
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      return pre_push(ui, repo, node, **kwargs)
                  return 0
              def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
                  """
                  Mercurial pre_push hook for SSH
                  """
                  extras = _extras_from_ui(ui)
                  if extras.get('SSH'):
                      permission = extras['SSH_PERMISSIONS']
                      if 'repository.write' == permission or 'repository.admin' == permission:
                          return 0
                      # non-zero ret code
                      return 1
                  return 0
              def post_push(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook
                  """
                  extras = _extras_from_ui(ui)
                  commit_ids = []
                  branches = []
                  bookmarks = []
                  tags = []
+                 hook_type: str = safe_str(kwargs.get('hooktype'))
                  commits, _heads = _rev_range_hash(repo, node)
                  for commit_id, branch in commits:
                      commit_ids.append(commit_id)
                      if branch not in branches:
                          branches.append(branch)
-                 if hasattr(ui, '_rc_pushkey_branches'):
-                     bookmarks = ui._rc_pushkey_branches
+                 if hasattr(ui, '_rc_pushkey_bookmarks'):
+                     bookmarks = ui._rc_pushkey_bookmarks
-                 extras['hook_type'] = kwargs.get('hooktype', 'post_push')
+                 extras['hook_type'] = hook_type or 'post_push'
                  extras['commit_ids'] = commit_ids
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': bookmarks,
                      'tags': tags
                  }
                  return _call_hook('post_push', extras, HgMessageWriter(ui))
              def post_push_ssh(ui, repo, node, **kwargs):
                  """
                  Mercurial post_push hook for SSH
                  """
                  if _extras_from_ui(ui).get('SSH'):
                      return post_push(ui, repo, node, **kwargs)
                  return 0
              def key_push(ui, repo, **kwargs):
                  from vcsserver.hgcompat import get_ctx
-                 if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks':
+                 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
                      # store new bookmarks in our UI object propagated later to post_push
-                     ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks()
+                     ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
                  return
              # backward compat
              log_pull_action = post_pull
              # backward compat
              log_push_action = post_push
              def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
              def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
                  """
                  Old hook name: keep here for backward compatibility.
                  This is only required when the installed git hooks are not upgraded.
                  """
                  pass
-             HookResponse = collections.namedtuple('HookResponse', ('status', 'output'))
+             @dataclasses.dataclass
+             class HookResponse:
+                 status: int
+                 output: str
-             def git_pre_pull(extras):
+             def git_pre_pull(extras) -> HookResponse:
                  """
                  Pre pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
-                 stdout = io.BytesIO()
+                 stdout = io.StringIO()
                  try:
-                     status = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
+                     status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
-                     status = 128
-                     stdout.write('ERROR: %s\n' % str(error))
+                     log.exception('Failed to call pre_pull hook')
+                     status_code = 128
+                     stdout.write(f'ERROR: {error}\n')
-                 return HookResponse(status, stdout.getvalue())
+                 return HookResponse(status_code, stdout.getvalue())
-             def git_post_pull(extras):
+             def git_post_pull(extras) -> HookResponse:
                  """
                  Post pull hook.
                  :param extras: dictionary containing the keys defined in simplevcs
                  :type extras: dict
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
-                 stdout = io.BytesIO()
+                 stdout = io.StringIO()
                  try:
                      status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      status = 128
-                     stdout.write('ERROR: %s\n' % error)
+                     stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status, stdout.getvalue())
              def _parse_git_ref_lines(revision_lines):
                  rev_data = []
                  for revision_line in revision_lines or []:
                      old_rev, new_rev, ref = revision_line.strip().split(' ')
                      ref_data = ref.split('/', 2)
                      if ref_data[1] in ('tags', 'heads'):
                          rev_data.append({
                              # NOTE(marcink):
                              # we're unable to tell total_commits for git at this point
                              # but we set the variable for consistency with GIT
                              'total_commits': -1,
                              'old_rev': old_rev,
                              'new_rev': new_rev,
                              'ref': ref,
                              'type': ref_data[1],
                              'name': ref_data[2],
                          })
                  return rev_data
-             def git_pre_receive(unused_repo_path, revision_lines, env):
+             def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Pre push hook.
-                 :param extras: dictionary containing the keys defined in simplevcs
-                 :type extras: dict
                  :return: status code of the hook. 0 for success.
-                 :rtype: int
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  rev_data = _parse_git_ref_lines(revision_lines)
                  if 'push' not in extras['hooks']:
                      return 0
                  empty_commit_id = '0' * 40
                  detect_force_push = extras.get('detect_force_push')
                  for push_ref in rev_data:
                      # store our git-env which holds the temp store
                      push_ref['git_env'] = _get_git_env()
                      push_ref['pruned_sha'] = ''
                      if not detect_force_push:
                          # don't check for forced-push when we don't need to
                          continue
                      type_ = push_ref['type']
                      new_branch = push_ref['old_rev'] == empty_commit_id
                      delete_branch = push_ref['new_rev'] == empty_commit_id
                      if type_ == 'heads' and not (new_branch or delete_branch):
                          old_rev = push_ref['old_rev']
                          new_rev = push_ref['new_rev']
-                         cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
+                         cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
                          stdout, stderr = subprocessio.run_command(
                              cmd, env=os.environ.copy())
                          # means we're having some non-reachable objects, this forced push was used
                          if stdout:
                              push_ref['pruned_sha'] = stdout.splitlines()
                  extras['hook_type'] = 'pre_receive'
                  extras['commit_ids'] = rev_data
-                 return _call_hook('pre_push', extras, GitMessageWriter())
+                 stdout = sys.stdout
+                 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
+                 return status_code
-             def git_post_receive(unused_repo_path, revision_lines, env):
+             def git_post_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Post push hook.
-                 :param extras: dictionary containing the keys defined in simplevcs
-                 :type extras: dict
                  :return: status code of the hook. 0 for success.
-                 :rtype: int
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  if 'push' not in extras['hooks']:
                      return 0
                  rev_data = _parse_git_ref_lines(revision_lines)
                  git_revs = []
                  # N.B.(skreft): it is ok to just call git, as git before calling a
                  # subcommand sets the PATH environment variable so that it point to the
                  # correct version of the git executable.
                  empty_commit_id = '0' * 40
                  branches = []
                  tags = []
                  for push_ref in rev_data:
                      type_ = push_ref['type']
                      if type_ == 'heads':
+                         # starting new branch case
                          if push_ref['old_rev'] == empty_commit_id:
-                             # starting new branch case
-                             if push_ref['name'] not in branches:
-                                 branches.append(push_ref['name'])
+                             push_ref_name = push_ref['name']
-                             # Fix up head revision if needed
-                             cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD']
+                             if push_ref_name not in branches:
+                                 branches.append(push_ref_name)
+                             need_head_set = ''
+                             with Repository(os.getcwd()) as repo:
                              try:
-                                 subprocessio.run_command(cmd, env=os.environ.copy())
-                             except Exception:
-                                 cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', 'HEAD',
-                                        'refs/heads/%s' % push_ref['name']]
-                                 print("Setting default branch to %s" % push_ref['name'])
-                                 subprocessio.run_command(cmd, env=os.environ.copy())
+                                     repo.head
+                                 except pygit2.GitError:
+                                     need_head_set = f'refs/heads/{push_ref_name}'
-                             cmd = [settings.GIT_EXECUTABLE, 'for-each-ref',
-                                    '--format=%(refname)', 'refs/heads/*']
+                                 if need_head_set:
+                                     repo.set_head(need_head_set)
+                                     print(f"Setting default branch to {push_ref_name}")
+                             cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             heads = stdout
+                             heads = safe_str(stdout)
                              heads = heads.replace(push_ref['ref'], '')
                              heads = ' '.join(head for head
                                               in heads.splitlines() if head) or '.'
                              cmd = [settings.GIT_EXECUTABLE, 'log', '--reverse',
                                     '--pretty=format:%H', '--', push_ref['new_rev'],
                                     '--not', heads]
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             git_revs.extend(stdout.splitlines())
+                             git_revs.extend(list(map(ascii_str, stdout.splitlines())))
+                         # delete branch case
                          elif push_ref['new_rev'] == empty_commit_id:
-                             # delete branch case
-                             git_revs.append('delete_branch=>%s' % push_ref['name'])
+                             git_revs.append(f'delete_branch=>{push_ref["name"]}')
                          else:
                              if push_ref['name'] not in branches:
                                  branches.append(push_ref['name'])
                              cmd = [settings.GIT_EXECUTABLE, 'log',
-                                    '{old_rev}..{new_rev}'.format(**push_ref),
+                                    f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
                                     '--reverse', '--pretty=format:%H']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             git_revs.extend(stdout.splitlines())
+                             # we get bytes from stdout, we need str to be consistent
+                             log_revs = list(map(ascii_str, stdout.splitlines()))
+                             git_revs.extend(log_revs)
+                             # Pure pygit2 impl. but still 2-3x slower :/
+                             # results = []
+                             #
+                             # with Repository(os.getcwd()) as repo:
+                             #     repo_new_rev = repo[push_ref['new_rev']]
+                             #     repo_old_rev = repo[push_ref['old_rev']]
+                             #     walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
+                             #
+                             #     for commit in walker:
+                             #         if commit.id == repo_old_rev.id:
+                             #             break
+                             #         results.append(commit.id.hex)
+                             #     # reverse the order, can't use GIT_SORT_REVERSE
+                             #     log_revs = results[::-1]
                      elif type_ == 'tags':
                          if push_ref['name'] not in tags:
                              tags.append(push_ref['name'])
-                         git_revs.append('tag=>%s' % push_ref['name'])
+                         git_revs.append(f'tag=>{push_ref["name"]}')
                  extras['hook_type'] = 'post_receive'
                  extras['commit_ids'] = git_revs
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
+                 stdout = sys.stdout
                  if 'repo_size' in extras['hooks']:
                      try:
-                         _call_hook('repo_size', extras, GitMessageWriter())
-                     except:
+                         _call_hook('repo_size', extras, GitMessageWriter(stdout))
+                     except Exception:
                          pass
-                 return _call_hook('post_push', extras, GitMessageWriter())
+                 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
+                 return status_code
              def _get_extras_from_txn_id(path, txn_id):
                  extras = {}
                  try:
                      cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
                             '-t', txn_id,
                             '--revprop', path, 'rc-scm-extras']
                      stdout, stderr = subprocessio.run_command(
                          cmd, env=os.environ.copy())
                      extras = json.loads(base64.urlsafe_b64decode(stdout))
                  except Exception:
                      log.exception('Failed to extract extras info from txn_id')
                  return extras
              def _get_extras_from_commit_id(commit_id, path):
                  extras = {}
                  try:
                      cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
                             '-r', commit_id,
                             '--revprop', path, 'rc-scm-extras']
                      stdout, stderr = subprocessio.run_command(
                          cmd, env=os.environ.copy())
                      extras = json.loads(base64.urlsafe_b64decode(stdout))
                  except Exception:
                      log.exception('Failed to extract extras info from commit_id')
                  return extras
              def svn_pre_commit(repo_path, commit_data, env):
                  path, txn_id = commit_data
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
                      # fallback method to read from TXN-ID stored data
                      extras = _get_extras_from_txn_id(path, txn_id)
                      if not extras:
                          return 0
                  extras['hook_type'] = 'pre_commit'
                  extras['commit_ids'] = [txn_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'total_commits': 1,
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                  }
                  return _call_hook('pre_push', extras, SvnMessageWriter())
              def svn_post_commit(repo_path, commit_data, env):
                  """
                  commit_data is path, rev, txn_id
                  """
                  if len(commit_data) == 3:
                      path, commit_id, txn_id = commit_data
                  elif len(commit_data) == 2:
                      log.error('Failed to extract txn_id from commit_data using legacy method. '
                                'Some functionality might be limited')
                      path, commit_id = commit_data
                      txn_id = None
                  branches = []
                  tags = []
                  if env.get('RC_SCM_DATA'):
                      extras = json.loads(env['RC_SCM_DATA'])
                  else:
                      # fallback method to read from TXN-ID stored data
                      extras = _get_extras_from_commit_id(commit_id, path)
                      if not extras:
                          return 0
                  extras['hook_type'] = 'post_commit'
                  extras['commit_ids'] = [commit_id]
                  extras['txn_id'] = txn_id
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': [],
                      'tags': tags,
                      'total_commits': 1,
                  }
                  if 'repo_size' in extras['hooks']:
                      try:
                          _call_hook('repo_size', extras, SvnMessageWriter())
                      except Exception:
                          pass
                  return _call_hook('post_push', extras, SvnMessageWriter())

vcsserver/http_main.py

0 +230 -160

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+             import io
              import os
+             import platform
              import sys
-             import base64
              import locale
              import logging
              import uuid
+             import time
              import wsgiref.util
-             import traceback
              import tempfile
              import psutil
              from itertools import chain
-             from cStringIO import StringIO
-             import simplejson as json
              import msgpack
+             import configparser
              from pyramid.config import Configurator
-             from pyramid.settings import asbool, aslist
              from pyramid.wsgi import wsgiapp
-             from pyramid.compat import configparser
              from pyramid.response import Response
-             from vcsserver.utils import safe_int
+             from vcsserver.base import BytesEnvelope, BinaryEnvelope
+             from vcsserver.lib.rc_json import json
+             from vcsserver.config.settings_maker import SettingsMaker
+             from vcsserver.str_utils import safe_int
+             from vcsserver.lib.statsd_client import StatsdClient
+             from vcsserver.tweens.request_wrapper import get_headers_call_context
+             import vcsserver
+             from vcsserver import remote_wsgi, scm_app, settings, hgpatches
+             from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
+             from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
+             from vcsserver.echo_stub.echo_app import EchoApp
+             from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
+             from vcsserver.lib.exc_tracking import store_exception, format_exc
+             from vcsserver.server import VcsServer
+             strict_vcs = True
+             git_import_err = None
+             try:
+                 from vcsserver.remote.git_remote import GitFactory, GitRemote
+             except ImportError as e:
+                 GitFactory = None
+                 GitRemote = None
+                 git_import_err = e
+                 if strict_vcs:
+                     raise
+             hg_import_err = None
+             try:
+                 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
+             except ImportError as e:
+                 MercurialFactory = None
+                 HgRemote = None
+                 hg_import_err = e
+                 if strict_vcs:
+                     raise
+             svn_import_err = None
+             try:
+                 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
+             except ImportError as e:
+                 SubversionFactory = None
+                 SvnRemote = None
+                 svn_import_err = e
+                 if strict_vcs:
+                     raise
              log = logging.getLogger(__name__)
              # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
              # causing problems and "fix" it in case they do and fallback to LC_ALL = C
              try:
                  locale.setlocale(locale.LC_ALL, '')
              except locale.Error as e:
                  log.error(
                      'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
                  os.environ['LC_ALL'] = 'C'
-             import vcsserver
-             from vcsserver import remote_wsgi, scm_app, settings, hgpatches
-             from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
-             from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
-             from vcsserver.echo_stub.echo_app import EchoApp
-             from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
-             from vcsserver.lib.exc_tracking import store_exception
-             from vcsserver.server import VcsServer
-             try:
-                 from vcsserver.git import GitFactory, GitRemote
-             except ImportError:
-                 GitFactory = None
-                 GitRemote = None
-             try:
-                 from vcsserver.hg import MercurialFactory, HgRemote
-             except ImportError:
-                 MercurialFactory = None
-                 HgRemote = None
-             try:
-                 from vcsserver.svn import SubversionFactory, SvnRemote
-             except ImportError:
-                 SubversionFactory = None
-                 SvnRemote = None
              def _is_request_chunked(environ):
                  stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
                  return stream
-             def _int_setting(settings, name, default):
-                 settings[name] = int(settings.get(name, default))
-                 return settings[name]
-             def _bool_setting(settings, name, default):
-                 input_val = settings.get(name, default)
-                 if isinstance(input_val, unicode):
-                     input_val = input_val.encode('utf8')
-                 settings[name] = asbool(input_val)
-                 return settings[name]
-             def _list_setting(settings, name, default):
-                 raw_value = settings.get(name, default)
-                 # Otherwise we assume it uses pyramids space/newline separation.
-                 settings[name] = aslist(raw_value)
-                 return settings[name]
-             def _string_setting(settings, name, default, lower=True, default_when_empty=False):
-                 value = settings.get(name, default)
-                 if default_when_empty and not value:
-                     # use default value when value is empty
-                     value = default
-                 if lower:
-                     value = value.lower()
-                 settings[name] = value
-                 return settings[name]
              def log_max_fd():
                  try:
                      maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
                      log.info('Max file descriptors value: %s', maxfd)
                  except Exception:
                      pass
-             class VCS(object):
+             class VCS:
                  def __init__(self, locale_conf=None, cache_config=None):
                      self.locale = locale_conf
                      self.cache_config = cache_config
                      self._configure_locale()
                      log_max_fd()
                      if GitFactory and GitRemote:
                          git_factory = GitFactory()
                          self._git_remote = GitRemote(git_factory)
                      else:
-                         log.info("Git client import failed")
+                         log.error("Git client import failed: %s", git_import_err)
                      if MercurialFactory and HgRemote:
                          hg_factory = MercurialFactory()
                          self._hg_remote = HgRemote(hg_factory)
                      else:
-                         log.info("Mercurial client import failed")
+                         log.error("Mercurial client import failed: %s", hg_import_err)
                      if SubversionFactory and SvnRemote:
                          svn_factory = SubversionFactory()
                          # hg factory is used for svn url validation
                          hg_factory = MercurialFactory()
                          self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
                      else:
-                         log.info("Subversion client import failed")
+                         log.error("Subversion client import failed: %s", svn_import_err)
                      self._vcsserver = VcsServer()
                  def _configure_locale(self):
                      if self.locale:
                          log.info('Settings locale: `LC_ALL` to %s', self.locale)
                      else:
-                         log.info(
-                             'Configuring locale subsystem based on environment variables')
+                         log.info('Configuring locale subsystem based on environment variables')
                      try:
                          # If self.locale is the empty string, then the locale
                          # module will use the environment variables. See the
                          # documentation of the package `locale`.
                          locale.setlocale(locale.LC_ALL, self.locale)
                          language_code, encoding = locale.getlocale()
                          log.info(
                              'Locale set to language code "%s" with encoding "%s".',
                              language_code, encoding)
                      except locale.Error:
-                         log.exception(
-                             'Cannot set locale, not configuring the locale system')
+                         log.exception('Cannot set locale, not configuring the locale system')
-             class WsgiProxy(object):
+             class WsgiProxy:
                  def __init__(self, wsgi):
                      self.wsgi = wsgi
                  def __call__(self, environ, start_response):
                      input_data = environ['wsgi.input'].read()
                      input_data = msgpack.unpackb(input_data)
                      error = None
                      try:
                          data, status, headers = self.wsgi.handle(
                              input_data['environment'], input_data['input_data'],
                              *input_data['args'], **input_data['kwargs'])
                      except Exception as e:
                          data, status, headers = [], None, None
                          error = {
                              'message': str(e),
                              '_vcs_kind': getattr(e, '_vcs_kind', None)
                          }
                      start_response(200, {})
                      return self._iterator(error, status, headers, data)
                  def _iterator(self, error, status, headers, data):
                      initial_data = [
                          error,
                          status,
                          headers,
                      ]
                      for d in chain(initial_data, data):
                          yield msgpack.packb(d)
              def not_found(request):
                  return {'status': '404 NOT FOUND'}
-             class VCSViewPredicate(object):
+             class VCSViewPredicate:
                  def __init__(self, val, config):
                      self.remotes = val
                  def text(self):
-                     return 'vcs view method = %s' % (self.remotes.keys(),)
+                     return f'vcs view method = {list(self.remotes.keys())}'
                  phash = text
                  def __call__(self, context, request):
                      """
                      View predicate that returns true if given backend is supported by
                      defined remotes.
                      """
                      backend = request.matchdict.get('backend')
                      return backend in self.remotes
-             class HTTPApplication(object):
+             class HTTPApplication:
                  ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
                  remote_wsgi = remote_wsgi
                  _use_echo_app = False
                  def __init__(self, settings=None, global_config=None):
-                     self._sanitize_settings_and_apply_defaults(settings)
                      self.config = Configurator(settings=settings)
+                     # Init our statsd at very start
+                     self.config.registry.statsd = StatsdClient.statsd
+                     self.config.registry.vcs_call_context = {}
                      self.global_config = global_config
                      self.config.include('vcsserver.lib.rc_cache')
+                     self.config.include('vcsserver.lib.rc_cache.archive_cache')
                      settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
                      vcs = VCS(locale_conf=settings_locale, cache_config=settings)
                      self._remotes = {
                          'hg': vcs._hg_remote,
                          'git': vcs._git_remote,
                          'svn': vcs._svn_remote,
                          'server': vcs._vcsserver,
                      }
                      if settings.get('dev.use_echo_app', 'false').lower() == 'true':
                          self._use_echo_app = True
                          log.warning("Using EchoApp for VCS operations.")
                          self.remote_wsgi = remote_wsgi_stub
                      self._configure_settings(global_config, settings)
                      self._configure()
                  def _configure_settings(self, global_config, app_settings):
                      """
                      Configure the settings module.
                      """
                      settings_merged = global_config.copy()
                      settings_merged.update(app_settings)
                      git_path = app_settings.get('git_path', None)
                      if git_path:
                          settings.GIT_EXECUTABLE = git_path
                      binary_dir = app_settings.get('core.binary_dir', None)
                      if binary_dir:
                          settings.BINARY_DIR = binary_dir
                      # Store the settings to make them available to other modules.
                      vcsserver.PYRAMID_SETTINGS = settings_merged
                      vcsserver.CONFIG = settings_merged
-                 def _sanitize_settings_and_apply_defaults(self, settings):
-                     temp_store = tempfile.gettempdir()
-                     default_cache_dir = os.path.join(temp_store, 'rc_cache')
-                     # save default, cache dir, and use it for all backends later.
-                     default_cache_dir = _string_setting(
-                         settings,
-                         'cache_dir',
-                         default_cache_dir, lower=False, default_when_empty=True)
-                     # ensure we have our dir created
-                     if not os.path.isdir(default_cache_dir):
-                         os.makedirs(default_cache_dir, mode=0o755)
-                     # exception store cache
-                     _string_setting(
-                         settings,
-                         'exception_tracker.store_path',
-                         temp_store, lower=False, default_when_empty=True)
-                     # repo_object cache
-                     _string_setting(
-                         settings,
-                         'rc_cache.repo_object.backend',
-                         'dogpile.cache.rc.file_namespace', lower=False)
-                     _int_setting(
-                         settings,
-                         'rc_cache.repo_object.expiration_time',
-* 24 * 60 * 60)
-                     _string_setting(
-                         settings,
-                         'rc_cache.repo_object.arguments.filename',
-                         os.path.join(default_cache_dir, 'vcsserver_cache_1'), lower=False)
                  def _configure(self):
                      self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
                      self.config.add_route('service', '/_service')
                      self.config.add_route('status', '/status')
                      self.config.add_route('hg_proxy', '/proxy/hg')
                      self.config.add_route('git_proxy', '/proxy/git')
                      # rpc methods
                      self.config.add_route('vcs', '/{backend}')
                      # streaming rpc remote methods
                      self.config.add_route('vcs_stream', '/{backend}/stream')
                      # vcs operations clone/push as streaming
                      self.config.add_route('stream_git', '/stream/git/*repo_name')
                      self.config.add_route('stream_hg', '/stream/hg/*repo_name')
                      self.config.add_view(self.status_view, route_name='status', renderer='json')
                      self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
                      self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
                      self.config.add_view(self.git_proxy(), route_name='git_proxy')
                      self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
                                           vcs_view=self._remotes)
                      self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
                                           vcs_view=self._remotes)
                      self.config.add_view(self.hg_stream(), route_name='stream_hg')
                      self.config.add_view(self.git_stream(), route_name='stream_git')
                      self.config.add_view_predicate('vcs_view', VCSViewPredicate)
                      self.config.add_notfound_view(not_found, renderer='json')
                      self.config.add_view(self.handle_vcs_exception, context=Exception)
                      self.config.add_tween(
                          'vcsserver.tweens.request_wrapper.RequestWrapperTween',
                      )
                      self.config.add_request_method(
                          'vcsserver.lib.request_counter.get_request_counter',
                          'request_count')
-                     self.config.add_request_method(
-                         'vcsserver.lib._vendor.statsd.get_statsd_client',
-                         'statsd', reify=True)
                  def wsgi_app(self):
                      return self.config.make_wsgi_app()
                  def _vcs_view_params(self, request):
                      remote = self._remotes[request.matchdict['backend']]
                      payload = msgpack.unpackb(request.body, use_list=True)
                      method = payload.get('method')
                      params = payload['params']
                      wire = params.get('wire')
                      args = params.get('args')
                      kwargs = params.get('kwargs')
                      context_uid = None
+                     request.registry.vcs_call_context = {
+                         'method': method,
+                         'repo_name': payload.get('_repo_name'),
+                     }
                      if wire:
                          try:
                              wire['context'] = context_uid = uuid.UUID(wire['context'])
                          except KeyError:
                              pass
                          args.insert(0, wire)
                      repo_state_uid = wire.get('repo_state_uid') if wire else None
                      # NOTE(marcink): trading complexity for slight performance
                      if log.isEnabledFor(logging.DEBUG):
-                         no_args_methods = [
+                         ]
-                         if method in no_args_methods:
+                         # also we SKIP printing out any of those methods args since they maybe excessive
+                         just_args_methods = {
+                             'commitctx': ('content', 'removed', 'updated'),
+                             'commit': ('content', 'removed', 'updated')
+                         }
+                         if method in just_args_methods:
+                             skip_args = just_args_methods[method]
                              call_args = ''
+                             call_kwargs = {}
+                             for k in kwargs:
+                                 if k in skip_args:
+                                     # replace our skip key with dummy
+                                     call_kwargs[k] = f'RemovedParam({k})'
+                                 else:
+                                     call_kwargs[k] = kwargs[k]
                          else:
                              call_args = args[1:]
+                             call_kwargs = kwargs
                          log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
-                                   method, call_args, kwargs, context_uid, repo_state_uid)
+                                   method, call_args, call_kwargs, context_uid, repo_state_uid)
+                     statsd = request.registry.statsd
+                     if statsd:
+                         statsd.incr(
+                             'vcsserver_method_total', tags=[
+                                 f"method:{method}",
+                             ])
                      return payload, remote, method, args, kwargs
                  def vcs_view(self, request):
                      payload, remote, method, args, kwargs = self._vcs_view_params(request)
                      payload_id = payload.get('id')
                      try:
                          resp = getattr(remote, method)(*args, **kwargs)
                      except Exception as e:
                          exc_info = list(sys.exc_info())
                          exc_type, exc_value, exc_traceback = exc_info
                          org_exc = getattr(e, '_org_exc', None)
                          org_exc_name = None
                          org_exc_tb = ''
                          if org_exc:
                              org_exc_name = org_exc.__class__.__name__
                              org_exc_tb = getattr(e, '_org_exc_tb', '')
                              # replace our "faked" exception with our org
                              exc_info[0] = org_exc.__class__
                              exc_info[1] = org_exc
                          should_store_exc = True
                          if org_exc:
                              def get_exc_fqn(_exc_obj):
                                  module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
                                  return module_name + '.' + org_exc_name
                              exc_fqn = get_exc_fqn(org_exc)
                              if exc_fqn in ['mercurial.error.RepoLookupError',
                                             'vcsserver.exceptions.RefNotFoundException']:
                                  should_store_exc = False
                          if should_store_exc:
                              store_exception(id(exc_info), exc_info, request_path=request.path)
-                         tb_info = ''.join(
-                             traceback.format_exception(exc_type, exc_value, exc_traceback))
+                         tb_info = format_exc(exc_info)
                          type_ = e.__class__.__name__
                          if type_ not in self.ALLOWED_EXCEPTIONS:
                              type_ = None
                          resp = {
                              'id': payload_id,
                              'error': {
-                                 'message': e.message,
+                                 'message': str(e),
                                  'traceback': tb_info,
                                  'org_exc': org_exc_name,
                                  'org_exc_tb': org_exc_tb,
                                  'type': type_
                              }
                          }
                          try:
                              resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
                          except AttributeError:
                              pass
                      else:
                          resp = {
                              'id': payload_id,
                              'result': resp
                          }
+                     log.debug('Serving data for method %s', method)
                      return resp
                  def vcs_stream_view(self, request):
                      payload, remote, method, args, kwargs = self._vcs_view_params(request)
                      # this method has a stream: marker we remove it here
                      method = method.split('stream:')[-1]
                      chunk_size = safe_int(payload.get('chunk_size')) or 4096
-                     try:
-                         resp = getattr(remote, method)(*args, **kwargs)
-                     except Exception as e:
-                         raise
                      def get_chunked_data(method_resp):
-                         stream = StringIO(method_resp)
+                         stream = io.BytesIO(method_resp)
                          while 1:
                              chunk = stream.read(chunk_size)
                              if not chunk:
                                  break
                              yield chunk
                      response = Response(app_iter=get_chunked_data(resp))
                      response.content_type = 'application/octet-stream'
                      return response
                  def status_view(self, request):
                      import vcsserver
-                     return {'status': 'OK', 'vcsserver_version': vcsserver.__version__,
-                             'pid': os.getpid()}
+                     _platform_id = platform.uname()[1] or 'instance'
+                     return {
+                         "status": "OK",
+                         "vcsserver_version": vcsserver.get_version(),
+                         "platform": _platform_id,
+                         "pid": os.getpid(),
+                     }
                  def service_view(self, request):
                      import vcsserver
                      payload = msgpack.unpackb(request.body, use_list=True)
                      server_config, app_config = {}, {}
                      try:
                          path = self.global_config['__file__']
                          config = configparser.RawConfigParser()
                          config.read(path)
                          if config.has_section('server:main'):
                              server_config = dict(config.items('server:main'))
                          if config.has_section('app:main'):
                              app_config = dict(config.items('app:main'))
                      except Exception:
                          log.exception('Failed to read .ini file for display')
-                     environ = os.environ.items()
+                     environ = list(os.environ.items())
                      resp = {
                          'id': payload.get('id'),
                          'result': dict(
-                             version=vcsserver.__version__,
+                             version=vcsserver.get_version(),
                              config=server_config,
                              app_config=app_config,
                              environ=environ,
                              payload=payload,
                          )
                      }
                      return resp
                  def _msgpack_renderer_factory(self, info):
                      def _render(value, system):
+                         bin_type = False
+                         res = value.get('result')
+                         if isinstance(res, BytesEnvelope):
+                             log.debug('Result is wrapped in BytesEnvelope type')
+                             bin_type = True
+                         elif isinstance(res, BinaryEnvelope):
+                             log.debug('Result is wrapped in BinaryEnvelope type')
+                             value['result'] = res.val
+                             bin_type = True
                          request = system.get('request')
                          if request is not None:
                              response = request.response
                              ct = response.content_type
                              if ct == response.default_content_type:
                                  response.content_type = 'application/x-msgpack'
-                         return msgpack.packb(value)
+                                 if bin_type:
+                                     response.content_type = 'application/x-msgpack-bin'
+                         return msgpack.packb(value, use_bin_type=bin_type)
                      return _render
                  def set_env_from_config(self, environ, config):
                      dict_conf = {}
                      try:
                          for elem in config:
                              if elem[0] == 'rhodecode':
                                  dict_conf = json.loads(elem[2])
                                  break
                      except Exception:
                          log.exception('Failed to fetch SCM CONFIG')
                          return
                      username = dict_conf.get('username')
                      if username:
                          environ['REMOTE_USER'] = username
                          # mercurial specific, some extension api rely on this
                          environ['HGUSER'] = username
                      ip = dict_conf.get('ip')
                      if ip:
                          environ['REMOTE_HOST'] = ip
                      if _is_request_chunked(environ):
                          # set the compatibility flag for webob
                          environ['wsgi.input_terminated'] = True
                  def hg_proxy(self):
                      @wsgiapp
                      def _hg_proxy(environ, start_response):
                          app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
                          return app(environ, start_response)
                      return _hg_proxy
                  def git_proxy(self):
                      @wsgiapp
                      def _git_proxy(environ, start_response):
                          app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
                          return app(environ, start_response)
                      return _git_proxy
                  def hg_stream(self):
                      if self._use_echo_app:
                          @wsgiapp
                          def _hg_stream(environ, start_response):
                              app = EchoApp('fake_path', 'fake_name', None)
                              return app(environ, start_response)
                          return _hg_stream
                      else:
                          @wsgiapp
                          def _hg_stream(environ, start_response):
                              log.debug('http-app: handling hg stream')
-                             repo_path = environ['HTTP_X_RC_REPO_PATH']
-                             repo_name = environ['HTTP_X_RC_REPO_NAME']
-                             packed_config = base64.b64decode(
-                                 environ['HTTP_X_RC_REPO_CONFIG'])
-                             config = msgpack.unpackb(packed_config)
+                             call_context = get_headers_call_context(environ)
+                             repo_path = call_context['repo_path']
+                             repo_name = call_context['repo_name']
+                             config = call_context['repo_config']
                              app = scm_app.create_hg_wsgi_app(
                                  repo_path, repo_name, config)
                              # Consistent path information for hgweb
-                             environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
+                             environ['PATH_INFO'] = call_context['path_info']
                              environ['REPO_NAME'] = repo_name
                              self.set_env_from_config(environ, config)
                              log.debug('http-app: starting app handler '
                                        'with %s and process request', app)
                              return app(environ, ResponseFilter(start_response))
                          return _hg_stream
                  def git_stream(self):
                      if self._use_echo_app:
                          @wsgiapp
                          def _git_stream(environ, start_response):
                              app = EchoApp('fake_path', 'fake_name', None)
                              return app(environ, start_response)
                          return _git_stream
                      else:
                          @wsgiapp
                          def _git_stream(environ, start_response):
                              log.debug('http-app: handling git stream')
-                             repo_path = environ['HTTP_X_RC_REPO_PATH']
-                             repo_name = environ['HTTP_X_RC_REPO_NAME']
-                             packed_config = base64.b64decode(
-                                 environ['HTTP_X_RC_REPO_CONFIG'])
-                             config = msgpack.unpackb(packed_config)
+                             call_context = get_headers_call_context(environ)
-                             environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
+                             repo_path = call_context['repo_path']
+                             repo_name = call_context['repo_name']
+                             config = call_context['repo_config']
+                             environ['PATH_INFO'] = call_context['path_info']
                              self.set_env_from_config(environ, config)
                              content_type = environ.get('CONTENT_TYPE', '')
                              path = environ['PATH_INFO']
                              is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
                              log.debug(
                                  'LFS: Detecting if request `%s` is LFS server path based '
                                  'on content type:`%s`, is_lfs:%s',
                                  path, content_type, is_lfs_request)
                              if not is_lfs_request:
                                  # fallback detection by path
                                  if GIT_LFS_PROTO_PAT.match(path):
                                      is_lfs_request = True
                                  log.debug(
                                      'LFS: fallback detection by path of: `%s`, is_lfs:%s',
                                      path, is_lfs_request)
                              if is_lfs_request:
                                  app = scm_app.create_git_lfs_wsgi_app(
                                      repo_path, repo_name, config)
                              else:
                                  app = scm_app.create_git_wsgi_app(
                                      repo_path, repo_name, config)
                              log.debug('http-app: starting app handler '
                                        'with %s and process request', app)
                              return app(environ, start_response)
                          return _git_stream
                  def handle_vcs_exception(self, exception, request):
                      _vcs_kind = getattr(exception, '_vcs_kind', '')
                      if _vcs_kind == 'repo_locked':
-                         # Get custom repo-locked status code if present.
-                         status_code = request.headers.get('X-RC-Locked-Status-Code')
+                         headers_call_context = get_headers_call_context(request.environ)
+                         status_code = safe_int(headers_call_context['locked_status_code'])
                          return HTTPRepoLocked(
-                             title=exception.message, status_code=status_code)
+                             title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
                      elif _vcs_kind == 'repo_branch_protected':
                          # Get custom repo-branch-protected status code if present.
-                         return HTTPRepoBranchProtected(title=exception.message)
+                         return HTTPRepoBranchProtected(
+                             title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
                      exc_info = request.exc_info
                      store_exception(id(exc_info), exc_info)
                      traceback_info = 'unavailable'
                      if request.exc_info:
-                         exc_type, exc_value, exc_tb = request.exc_info
-                         traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
+                         traceback_info = format_exc(request.exc_info)
                      log.error(
-                         'error occurred handling this request for path: %s, \n tb: %s',
+                         'error occurred handling this request for path: %s, \n%s',
                          request.path, traceback_info)
+                     statsd = request.registry.statsd
+                     if statsd:
+                         exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
+                         statsd.incr('vcsserver_exception_total',
+                                     tags=[f"type:{exc_type}"])
                      raise exception
-             class ResponseFilter(object):
+             class ResponseFilter:
                  def __init__(self, start_response):
                      self._start_response = start_response
                  def __call__(self, status, response_headers, exc_info=None):
                      headers = tuple(
                          (h, v) for h, v in response_headers
                          if not wsgiref.util.is_hop_by_hop(h))
                      return self._start_response(status, headers, exc_info)
+             def sanitize_settings_and_apply_defaults(global_config, settings):
+                 _global_settings_maker = SettingsMaker(global_config)
+                 settings_maker = SettingsMaker(settings)
+                 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
+                 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
+                 settings_maker.enable_logging(logging_conf)
+                 # Default includes, possible to change as a user
+                 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
+                 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
+                 settings_maker.make_setting('__file__', global_config.get('__file__'))
+                 settings_maker.make_setting('pyramid.default_locale_name', 'en')
+                 settings_maker.make_setting('locale', 'en_US.UTF-8')
+                 settings_maker.make_setting('core.binary_dir', '')
+                 temp_store = tempfile.gettempdir()
+                 default_cache_dir = os.path.join(temp_store, 'rc_cache')
+                 # save default, cache dir, and use it for all backends later.
+                 default_cache_dir = settings_maker.make_setting(
+                     'cache_dir',
+                     default=default_cache_dir, default_when_empty=True,
+                     parser='dir:ensured')
+                 # exception store cache
+                 settings_maker.make_setting(
+                     'exception_tracker.store_path',
+                     default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
+                     parser='dir:ensured'
+                 )
+                 # repo_object cache defaults
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.backend',
+                     default='dogpile.cache.rc.file_namespace',
+                     parser='string')
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.expiration_time',
+                     default=30 * 24 * 60 * 60,  # 30days
+                     parser='int')
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.arguments.filename',
+                     default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
+                     parser='string')
+                 # statsd
+                 settings_maker.make_setting('statsd.enabled', False, parser='bool')
+                 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
+                 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
+                 settings_maker.make_setting('statsd.statsd_prefix', '')
+                 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
+                 settings_maker.env_expand()
              def main(global_config, **settings):
+                 start_time = time.time()
+                 log.info('Pyramid app config starting')
                  if MercurialFactory:
                      hgpatches.patch_largefiles_capabilities()
                      hgpatches.patch_subrepo_type_mapping()
-                 app = HTTPApplication(settings=settings, global_config=global_config)
-                 return app.wsgi_app()
+                 # Fill in and sanitize the defaults & do ENV expansion
+                 sanitize_settings_and_apply_defaults(global_config, settings)
+                 # init and bootstrap StatsdClient
+                 StatsdClient.setup(settings)
+                 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
+                 total_time = time.time() - start_time
+                 log.info('Pyramid app created and configured in %.2fs', total_time)
+                 return pyramid_app

vcsserver/lib/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA

vcsserver/lib/_vendor/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              # This package contains non rhodecode licensed packages that are
              # vendored for various reasons
              import os
              import sys
              vendor_dir = os.path.abspath(os.path.dirname(__file__))
              sys.path.append(vendor_dir)

vcsserver/lib/_vendor/redis_lock/__init__.py

0 +40 -36

-             import sys
              import threading
              import weakref
              from base64 import b64encode
              from logging import getLogger
              from os import urandom
+             from typing import Union
              from redis import StrictRedis
-             __version__ = '3.7.0'
+             __version__ = '4.0.0'
              loggers = {
                  k: getLogger("vcsserver." + ".".join((__name__, k)))
                  for k in [
                      "acquire",
                      "refresh.thread.start",
                      "refresh.thread.stop",
                      "refresh.thread.exit",
                      "refresh.start",
                      "refresh.shutdown",
                      "refresh.exit",
                      "release",
                  ]
              }
-             PY3 = sys.version_info[0] == 3
-             if PY3:
-                 text_type = str
-                 binary_type = bytes
-             else:
-                 text_type = unicode  # noqa
-                 binary_type = str
              # Check if the id match. If not, return an error code.
              UNLOCK_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  else
                      redis.call("del", KEYS[2])
                      redis.call("lpush", KEYS[2], 1)
                      redis.call("pexpire", KEYS[2], ARGV[2])
                      redis.call("del", KEYS[1])
                      return 0
                  end
              """
              # Covers both cases when key doesn't exist and doesn't equal to lock's id
              EXTEND_SCRIPT = b"""
                  if redis.call("get", KEYS[1]) ~= ARGV[1] then
                      return 1
                  elseif redis.call("ttl", KEYS[1]) < 0 then
                      return 2
                  else
                      redis.call("expire", KEYS[1], ARGV[2])
                      return 0
                  end
              """
              RESET_SCRIPT = b"""
                  redis.call('del', KEYS[2])
                  redis.call('lpush', KEYS[2], 1)
                  redis.call('pexpire', KEYS[2], ARGV[2])
                  return redis.call('del', KEYS[1])
              """
              RESET_ALL_SCRIPT = b"""
                  local locks = redis.call('keys', 'lock:*')
                  local signal
                  for _, lock in pairs(locks) do
                      signal = 'lock-signal:' .. string.sub(lock, 6)
                      redis.call('del', signal)
                      redis.call('lpush', signal, 1)
                      redis.call('expire', signal, 1)
                      redis.call('del', lock)
                  end
                  return #locks
              """
              class AlreadyAcquired(RuntimeError):
                  pass
              class NotAcquired(RuntimeError):
                  pass
              class AlreadyStarted(RuntimeError):
                  pass
              class TimeoutNotUsable(RuntimeError):
                  pass
              class InvalidTimeout(RuntimeError):
                  pass
              class TimeoutTooLarge(RuntimeError):
                  pass
              class NotExpirable(RuntimeError):
                  pass
-             class Lock(object):
+             class Lock:
                  """
                  A Lock context manager implemented via redis SETNX/BLPOP.
                  """
                  unlock_script = None
                  extend_script = None
                  reset_script = None
                  reset_all_script = None
+                 _lock_renewal_interval: float
+                 _lock_renewal_thread: Union[threading.Thread, None]
                  def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
                      """
                      :param redis_client:
                          An instance of :class:`~StrictRedis`.
                      :param name:
                          The name (redis key) the lock should have.
                      :param expire:
                          The lock expiry time in seconds. If left at the default (None)
                          the lock will not expire.
                      :param id:
                          The ID (redis value) the lock should have. A random value is
                          generated when left at the default.
                          Note that if you specify this then the lock is marked as "held". Acquires
                          won't be possible.
                      :param auto_renewal:
                          If set to ``True``, Lock will automatically renew the lock so that it
                          doesn't expire for as long as the lock is held (acquire() called
                          or running in a context manager).
                          Implementation note: Renewal will happen using a daemon thread with
                          an interval of ``expire*2/3``. If wishing to use a different renewal
                          time, subclass Lock, call ``super().__init__()`` then set
                          ``self._lock_renewal_interval`` to your desired interval.
                      :param strict:
                          If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
                      :param signal_expire:
                          Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
                      """
                      if strict and not isinstance(redis_client, StrictRedis):
                          raise ValueError("redis_client must be instance of StrictRedis. "
                                           "Use strict=False if you know what you're doing.")
                      if auto_renewal and expire is None:
                          raise ValueError("Expire may not be None when auto_renewal is set")
                      self._client = redis_client
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      else:
                          expire = None
                      self._expire = expire
                      self._signal_expire = signal_expire
                      if id is None:
                          self._id = b64encode(urandom(18)).decode('ascii')
                      elif isinstance(id, binary_type):
                          try:
                              self._id = id.decode('ascii')
                          except UnicodeDecodeError:
                              self._id = b64encode(id).decode('ascii')
                      elif isinstance(id, text_type):
                          self._id = id
                      else:
-                         raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id))
+                         raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
                      self._name = 'lock:' + name
                      self._signal = 'lock-signal:' + name
                      self._lock_renewal_interval = (float(expire) * 2 / 3
                                                     if auto_renewal
                                                     else None)
                      self._lock_renewal_thread = None
                      self.register_scripts(redis_client)
                  @classmethod
                  def register_scripts(cls, redis_client):
                      global reset_all_script
                      if reset_all_script is None:
-                         reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                          cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
                          cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
                          cls.reset_script = redis_client.register_script(RESET_SCRIPT)
                          cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
+                         reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                  @property
                  def _held(self):
                      return self.id == self.get_owner_id()
                  def reset(self):
                      """
                      Forcibly deletes the lock. Use this with care.
                      """
                      self.reset_script(client=self._client, keys=(self._name, self._signal), args=(self.id, self._signal_expire))
                  @property
                  def id(self):
                      return self._id
                  def get_owner_id(self):
                      owner_id = self._client.get(self._name)
                      if isinstance(owner_id, binary_type):
                          owner_id = owner_id.decode('ascii', 'replace')
                      return owner_id
                  def acquire(self, blocking=True, timeout=None):
                      """
                      :param blocking:
                          Boolean value specifying whether lock should be blocking or not.
                      :param timeout:
                          An integer value specifying the maximum number of seconds to block.
                      """
                      logger = loggers["acquire"]
-                     logger.debug("Getting acquire on %r ...", self._name)
+                     logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
                      if self._held:
                          owner_id = self.get_owner_id()
                          raise AlreadyAcquired("Already acquired from this Lock instance. Lock id: {}".format(owner_id))
                      if not blocking and timeout is not None:
                          raise TimeoutNotUsable("Timeout cannot be used if blocking=False")
                      if timeout:
                          timeout = int(timeout)
                          if timeout < 0:
-                             raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout)
+                             raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
                          if self._expire and not self._lock_renewal_interval and timeout > self._expire:
-                             raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire))
+                             raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
                      busy = True
                      blpop_timeout = timeout or self._expire or 0
                      timed_out = False
                      while busy:
                          busy = not self._client.set(self._name, self._id, nx=True, ex=self._expire)
                          if busy:
                              if timed_out:
                                  return False
                              elif blocking:
                                  timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
                              else:
-                                 logger.warning("Failed to get %r.", self._name)
+                                 logger.warning("Failed to acquire Lock(%r).", self._name)
                                  return False
-                     logger.info("Got lock for %r.", self._name)
+                     logger.debug("Acquired Lock(%r).", self._name)
                      if self._lock_renewal_interval is not None:
                          self._start_lock_renewer()
                      return True
                  def extend(self, expire=None):
-                     """Extends expiration time of the lock.
+                     """
+                     Extends expiration time of the lock.
                      :param expire:
                          New expiration time. If ``None`` - `expire` provided during
                          lock initialization will be taken.
                      """
                      if expire:
                          expire = int(expire)
                          if expire < 0:
                              raise ValueError("A negative expire is not acceptable.")
                      elif self._expire is not None:
                          expire = self._expire
                      else:
                          raise TypeError(
                              "To extend a lock 'expire' must be provided as an "
                              "argument to extend() method or at initialization time."
                          )
                      error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
                      if error == 1:
-                         raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
+                         raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
                      elif error == 2:
-                         raise NotExpirable("Lock %s has no assigned expiration time" % self._name)
+                         raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
                      elif error:
-                         raise RuntimeError("Unsupported error code %s from EXTEND script" % error)
+                         raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
                  @staticmethod
-                 def _lock_renewer(lockref, interval, stop):
+                 def _lock_renewer(name, lockref, interval, stop):
                      """
                      Renew the lock key in redis every `interval` seconds for as long
                      as `self._lock_renewal_thread.should_exit` is False.
                      """
                      while not stop.wait(timeout=interval):
-                         loggers["refresh.thread.start"].debug("Refreshing lock")
-                         lock = lockref()
+                         loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
+                         lock: "Lock" = lockref()
                          if lock is None:
                              loggers["refresh.thread.stop"].debug(
-                                 "The lock no longer exists, stopping lock refreshing"
+                                 "Stopping loop because Lock(%r) was garbage collected.", name
                              )
                              break
                          lock.extend(expire=lock._expire)
                          del lock
-                     loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing")
+                     loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
                  def _start_lock_renewer(self):
                      """
                      Starts the lock refresher thread.
                      """
                      if self._lock_renewal_thread is not None:
                          raise AlreadyStarted("Lock refresh thread already started")
                      loggers["refresh.start"].debug(
-                         "Starting thread to refresh lock every %s seconds",
-                         self._lock_renewal_interval
+                         "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
+                         self._name, self._lock_renewal_interval
                      )
                      self._lock_renewal_stop = threading.Event()
                      self._lock_renewal_thread = threading.Thread(
                          group=None,
                          target=self._lock_renewer,
-                         kwargs={'lockref': weakref.ref(self),
+                         kwargs={
+                             'name': self._name,
+                             'lockref': weakref.ref(self),
-                                 'interval': self._lock_renewal_interval,
-                                 'stop': self._lock_renewal_stop}
+                             'stop': self._lock_renewal_stop,
+                         },
                      )
-                     self._lock_renewal_thread.setDaemon(True)
+                     self._lock_renewal_thread.daemon = True
                      self._lock_renewal_thread.start()
                  def _stop_lock_renewer(self):
                      """
                      Stop the lock renewer.
                      This signals the renewal thread and waits for its exit.
                      """
                      if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
                          return
-                     loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop")
+                     loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
                      self._lock_renewal_stop.set()
                      self._lock_renewal_thread.join()
                      self._lock_renewal_thread = None
-                     loggers["refresh.exit"].debug("Lock refresher has stopped")
+                     loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
                  def __enter__(self):
                      acquired = self.acquire(blocking=True)
-                     assert acquired, "Lock wasn't acquired, but blocking=True"
+                     if not acquired:
+                         raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
                      return self
                  def __exit__(self, exc_type=None, exc_value=None, traceback=None):
                      self.release()
                  def release(self):
                      """Releases the lock, that was acquired with the same object.
                      .. note::
                          If you want to release a lock that you acquired in a different place you have two choices:
                          * Use ``Lock("name", id=id_from_other_place).release()``
                          * Use ``Lock("name").reset()``
                      """
                      if self._lock_renewal_thread is not None:
                          self._stop_lock_renewer()
-                     loggers["release"].debug("Releasing %r.", self._name)
+                     loggers["release"].debug("Releasing Lock(%r).", self._name)
                      error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
                      if error == 1:
-                         raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
+                         raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
                      elif error:
-                         raise RuntimeError("Unsupported error code %s from EXTEND script." % error)
+                         raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
                  def locked(self):
                      """
                      Return true if the lock is acquired.
                      Checks that lock with same name already exists. This method returns true, even if
                      lock have another id.
                      """
                      return self._client.exists(self._name) == 1
              reset_all_script = None
              def reset_all(redis_client):
                  """
                  Forcibly deletes all locks if its remains (like a crash reason). Use this with care.
                  :param redis_client:
                      An instance of :class:`~StrictRedis`.
                  """
                  Lock.register_scripts(redis_client)
                  reset_all_script(client=redis_client)  # noqa

vcsserver/lib/_vendor/statsd/__init__.py

0 +7 -3

-             from __future__ import absolute_import, division, unicode_literals
-             import logging
              from .stream import TCPStatsClient, UnixSocketStatsClient  # noqa
              from .udp import StatsClient  # noqa
              HOST = 'localhost'
              PORT = 8125
              IPV6 = False
              PREFIX = None
              MAXUDPSIZE = 512
              log = logging.getLogger('rhodecode.statsd')
              def statsd_config(config, prefix='statsd.'):
                  _config = {}
                  for key in config.keys():
                      if key.startswith(prefix):
                          _config[key[len(prefix):]] = config[key]
                  return _config
              def client_from_config(configuration, prefix='statsd.', **kwargs):
                  from pyramid.settings import asbool
                  _config = statsd_config(configuration, prefix)
                  statsd_enabled = asbool(_config.pop('enabled', False))
                  if not statsd_enabled:
                      log.debug('statsd client not enabled by statsd.enabled =  flag, skipping...')
                      return
                  host = _config.pop('statsd_host', HOST)
                  port = _config.pop('statsd_port', PORT)
                  prefix = _config.pop('statsd_prefix', PREFIX)
                  maxudpsize = _config.pop('statsd_maxudpsize', MAXUDPSIZE)
                  ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
                  log.debug('configured statsd client %s:%s', host, port)
-                 return StatsClient(
+                 try:
+                     client = StatsClient(
                      host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
+                 except Exception:
+                     log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
+                     client = None
+                 return client
              def get_statsd_client(request):
                  return client_from_config(request.registry.settings)

vcsserver/lib/_vendor/statsd/base.py

0 +70 -23

-             from __future__ import absolute_import, division, unicode_literals
+             import re
              import random
              from collections import deque
              from datetime import timedelta
+             from repoze.lru import lru_cache
              from .timer import Timer
+             TAG_INVALID_CHARS_RE = re.compile(
+                 r"[^\w\d_\-:/\.]",
+                 #re.UNICODE
+             )
+             TAG_INVALID_CHARS_SUBS = "_"
-             class StatsClientBase(object):
+             # we save and expose methods called by statsd for discovery
+             buckets_dict = {
+             }
+             @lru_cache(maxsize=500)
+             def _normalize_tags_with_cache(tag_list):
+                 return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
+             def normalize_tags(tag_list):
+                 # We have to turn our input tag list into a non-mutable tuple for it to
+                 # be hashable (and thus usable) by the @lru_cache decorator.
+                 return _normalize_tags_with_cache(tuple(tag_list))
+             class StatsClientBase:
                  """A Base class for various statsd clients."""
                  def close(self):
                      """Used to close and clean up any underlying resources."""
                      raise NotImplementedError()
                  def _send(self):
                      raise NotImplementedError()
                  def pipeline(self):
                      raise NotImplementedError()
-                 def timer(self, stat, rate=1):
-                     return Timer(self, stat, rate)
+                 def timer(self, stat, rate=1, tags=None, auto_send=True):
+                     """
+                     statsd = StatsdClient.statsd
+                     with statsd.timer('bucket_name', auto_send=True) as tmr:
+                         # This block will be timed.
+                         for i in range(0, 100000):
+                             i ** 2
+                     # you can access time here...
+                     elapsed_ms = tmr.ms
+                     """
+                     return Timer(self, stat, rate, tags, auto_send=auto_send)
-                 def timing(self, stat, delta, rate=1):
+                 def timing(self, stat, delta, rate=1, tags=None, use_decimals=True):
                      """
                      Send new timing information.
                      `delta` can be either a number of milliseconds or a timedelta.
                      """
                      if isinstance(delta, timedelta):
                          # Convert timedelta to number of milliseconds.
                          delta = delta.total_seconds() * 1000.
-                     self._send_stat(stat, '%0.6f|ms' % delta, rate)
-                 def incr(self, stat, count=1, rate=1):
-                     """Increment a stat by `count`."""
-                     self._send_stat(stat, '%s|c' % count, rate)
+                     if use_decimals:
+                         fmt = '%0.6f|ms'
+                     else:
+                         fmt = '%s|ms'
+                     self._send_stat(stat, fmt % delta, rate, tags)
-                 def decr(self, stat, count=1, rate=1):
+                 def incr(self, stat, count=1, rate=1, tags=None):
+                     """Increment a stat by `count`."""
+                     self._send_stat(stat, f'{count}|c', rate, tags)
+                 def decr(self, stat, count=1, rate=1, tags=None):
                      """Decrement a stat by `count`."""
-                     self.incr(stat, -count, rate)
+                     self.incr(stat, -count, rate, tags)
-                 def gauge(self, stat, value, rate=1, delta=False):
+                 def gauge(self, stat, value, rate=1, delta=False, tags=None):
                      """Set a gauge value."""
                      if value < 0 and not delta:
                          if rate < 1:
                              if random.random() > rate:
                                  return
                          with self.pipeline() as pipe:
                              pipe._send_stat(stat, '0|g', 1)
-                             pipe._send_stat(stat, '%s|g' % value, 1)
+                             pipe._send_stat(stat, f'{value}|g', 1)
                      else:
                          prefix = '+' if delta and value >= 0 else ''
-                         self._send_stat(stat, '%s%s|g' % (prefix, value), rate)
+                         self._send_stat(stat, f'{prefix}{value}|g', rate, tags)
                  def set(self, stat, value, rate=1):
                      """Set a set value."""
-                     self._send_stat(stat, '%s|s' % value, rate)
+                     self._send_stat(stat, f'{value}|s', rate)
+                 def histogram(self, stat, value, rate=1, tags=None):
+                     """Set a histogram"""
+                     self._send_stat(stat, f'{value}|h', rate, tags)
-                 def _send_stat(self, stat, value, rate):
-                     self._after(self._prepare(stat, value, rate))
+                 def _send_stat(self, stat, value, rate, tags=None):
+                     self._after(self._prepare(stat, value, rate, tags))
-                 def _prepare(self, stat, value, rate):
+                 def _prepare(self, stat, value, rate, tags=None):
+                     global buckets_dict
+                     buckets_dict[stat] = 1
                      if rate < 1:
                          if random.random() > rate:
                              return
-                         value = '%s|@%s' % (value, rate)
+                         value = f'{value}|@{rate}'
                      if self._prefix:
-                         stat = '%s.%s' % (self._prefix, stat)
+                         stat = f'{self._prefix}.{stat}'
-                     return '%s:%s' % (stat, value)
+                     res = '%s:%s%s' % (
+                         stat,
+                         value,
+                         ("|#" + ",".join(normalize_tags(tags))) if tags else "",
+                     )
+                     return res
                  def _after(self, data):
                      if data:
                          self._send(data)
              class PipelineBase(StatsClientBase):
                  def __init__(self, client):
                      self._client = client
                      self._prefix = client._prefix
                      self._stats = deque()
                  def _send(self):
                      raise NotImplementedError()
                  def _after(self, data):
                      if data is not None:
                          self._stats.append(data)
                  def __enter__(self):
                      return self
                  def __exit__(self, typ, value, tb):
                      self.send()
                  def send(self):
                      if not self._stats:
                          return
                      self._send()
                  def pipeline(self):
                      return self.__class__(self)

vcsserver/lib/_vendor/statsd/stream.py

0 0 -2

-             from __future__ import absolute_import, division, unicode_literals
-             import socket
              from .base import StatsClientBase, PipelineBase
              class StreamPipeline(PipelineBase):
                  def _send(self):
                      self._client._after('\n'.join(self._stats))
                      self._stats.clear()
              class StreamClientBase(StatsClientBase):
                  def connect(self):
                      raise NotImplementedError()
                  def close(self):
                      if self._sock and hasattr(self._sock, 'close'):
                          self._sock.close()
                      self._sock = None
                  def reconnect(self):
                      self.close()
                      self.connect()
                  def pipeline(self):
                      return StreamPipeline(self)
                  def _send(self, data):
                      """Send data to statsd."""
                      if not self._sock:
                          self.connect()
                      self._do_send(data)
                  def _do_send(self, data):
                      self._sock.sendall(data.encode('ascii') + b'\n')
              class TCPStatsClient(StreamClientBase):
                  """TCP version of StatsClient."""
                  def __init__(self, host='localhost', port=8125, prefix=None,
                               timeout=None, ipv6=False):
                      """Create a new client."""
                      self._host = host
                      self._port = port
                      self._ipv6 = ipv6
                      self._timeout = timeout
                      self._prefix = prefix
                      self._sock = None
                  def connect(self):
                      fam = socket.AF_INET6 if self._ipv6 else socket.AF_INET
                      family, _, _, _, addr = socket.getaddrinfo(
                          self._host, self._port, fam, socket.SOCK_STREAM)[0]
                      self._sock = socket.socket(family, socket.SOCK_STREAM)
                      self._sock.settimeout(self._timeout)
                      self._sock.connect(addr)
              class UnixSocketStatsClient(StreamClientBase):
                  """Unix domain socket version of StatsClient."""
                  def __init__(self, socket_path, prefix=None, timeout=None):
                      """Create a new client."""
                      self._socket_path = socket_path
                      self._timeout = timeout
                      self._prefix = prefix
                      self._sock = None
                  def connect(self):
                      self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
                      self._sock.settimeout(self._timeout)
                      self._sock.connect(self._socket_path)

vcsserver/lib/_vendor/statsd/timer.py

0 +9 -14

-             from __future__ import absolute_import, division, unicode_literals
-             import functools
-             # Use timer that's not susceptible to time of day adjustments.
-             try:
-                 # perf_counter is only present on Py3.3+
-                 from time import perf_counter as time_now
-             except ImportError:
-                 # fall back to using time
-                 from time import time as time_now
              def safe_wraps(wrapper, *args, **kwargs):
                  """Safely wraps partial functions."""
                  while isinstance(wrapper, functools.partial):
                      wrapper = wrapper.func
                  return functools.wraps(wrapper, *args, **kwargs)
-             class Timer(object):
+             class Timer:
                  """A context manager/decorator for statsd.timing()."""
-                 def __init__(self, client, stat, rate=1):
+                 def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True):
                      self.client = client
                      self.stat = stat
                      self.rate = rate
+                     self.tags = tags
                      self.ms = None
                      self._sent = False
                      self._start_time = None
+                     self.use_decimals = use_decimals
+                     self.auto_send = auto_send
                  def __call__(self, f):
                      """Thread-safe timing function decorator."""
                      @safe_wraps(f)
                      def _wrapped(*args, **kwargs):
                          start_time = time_now()
                          try:
                              return f(*args, **kwargs)
                          finally:
                              elapsed_time_ms = 1000.0 * (time_now() - start_time)
-                             self.client.timing(self.stat, elapsed_time_ms, self.rate)
+                             self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals)
+                             self._sent = True
                      return _wrapped
                  def __enter__(self):
                      return self.start()
                  def __exit__(self, typ, value, tb):
-                     self.stop()
+                     self.stop(send=self.auto_send)
                  def start(self):
                      self.ms = None
                      self._sent = False
                      self._start_time = time_now()
                      return self
                  def stop(self, send=True):
                      if self._start_time is None:
                          raise RuntimeError('Timer has not started.')
                      dt = time_now() - self._start_time
                      self.ms = 1000.0 * dt  # Convert to milliseconds.
                      if send:
                          self.send()
                      return self
                  def send(self):
                      if self.ms is None:
                          raise RuntimeError('No data recorded.')
                      if self._sent:
                          raise RuntimeError('Already sent data.')
                      self._sent = True
-                     self.client.timing(self.stat, self.ms, self.rate)
+                     self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals)

vcsserver/lib/_vendor/statsd/udp.py

0 +1 -3

-             from __future__ import absolute_import, division, unicode_literals
-             import socket
              from .base import StatsClientBase, PipelineBase
              class Pipeline(PipelineBase):
                  def __init__(self, client):
-                     super(Pipeline, self).__init__(client)
+                     super().__init__(client)
                      self._maxudpsize = client._maxudpsize
                  def _send(self):
                      data = self._stats.popleft()
                      while self._stats:
                          # Use popleft to preserve the order of the stats.
                          stat = self._stats.popleft()
                          if len(stat) + len(data) + 1 >= self._maxudpsize:
                              self._client._after(data)
                              data = stat
                          else:
                              data += '\n' + stat
                      self._client._after(data)
              class StatsClient(StatsClientBase):
                  """A client for statsd."""
                  def __init__(self, host='localhost', port=8125, prefix=None,
                               maxudpsize=512, ipv6=False):
                      """Create a new client."""
                      fam = socket.AF_INET6 if ipv6 else socket.AF_INET
                      family, _, _, _, addr = socket.getaddrinfo(
                          host, port, fam, socket.SOCK_DGRAM)[0]
                      self._addr = addr
                      self._sock = socket.socket(family, socket.SOCK_DGRAM)
                      self._prefix = prefix
                      self._maxudpsize = maxudpsize
                  def _send(self, data):
                      """Send data to statsd."""
                      try:
                          self._sock.sendto(data.encode('ascii'), self._addr)
                      except (socket.error, RuntimeError):
                          # No time for love, Dr. Jones!
                          pass
                  def close(self):
                      if self._sock and hasattr(self._sock, 'close'):
                          self._sock.close()
                      self._sock = None
                  def pipeline(self):
                      return Pipeline(self)

vcsserver/lib/exc_tracking.py

0 +147 -49

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+             import io
              import os
              import time
+             import sys
              import datetime
              import msgpack
              import logging
              import traceback
              import tempfile
-             from pyramid import compat
+             import glob
              log = logging.getLogger(__name__)
              # NOTE: Any changes should be synced with exc_tracking at rhodecode.lib.exc_tracking
              global_prefix = 'vcsserver'
              exc_store_dir_name = 'rc_exception_store_v1'
-             def exc_serialize(exc_id, tb, exc_type):
+             def exc_serialize(exc_id, tb, exc_type, extra_data=None):
                  data = {
-                     'version': 'v1',
-                     'exc_id': exc_id,
-                     'exc_utc_date': datetime.datetime.utcnow().isoformat(),
-                     'exc_timestamp': repr(time.time()),
-                     'exc_message': tb,
-                     'exc_type': exc_type,
+                     "version": "v1",
+                     "exc_id": exc_id,
+                     "exc_utc_date": datetime.datetime.utcnow().isoformat(),
+                     "exc_timestamp": repr(time.time()),
+                     "exc_message": tb,
+                     "exc_type": exc_type,
                  }
+                 if extra_data:
+                     data.update(extra_data)
                  return msgpack.packb(data), data
              def exc_unserialize(tb):
                  return msgpack.unpackb(tb)
+             _exc_store = None
              def get_exc_store():
                  """
                  Get and create exception store if it's not existing
                  """
+                 global _exc_store
+                 if _exc_store is not None:
+                     # quick global cache
+                     return _exc_store
                  import vcsserver as app
-                 exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir()
+                 exc_store_dir = (
+                     app.CONFIG.get("exception_tracker.store_path", "") or tempfile.gettempdir()
+                 )
                  _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name)
                  _exc_store_path = os.path.abspath(_exc_store_path)
                  if not os.path.isdir(_exc_store_path):
                      os.makedirs(_exc_store_path)
-                     log.debug('Initializing exceptions store at %s', _exc_store_path)
+                     log.debug("Initializing exceptions store at %s", _exc_store_path)
+                     _exc_store = _exc_store_path
                  return _exc_store_path
-             def _store_exception(exc_id, exc_info, prefix, request_path=''):
-                 exc_type, exc_value, exc_traceback = exc_info
+             def get_detailed_tb(exc_info):
+                 try:
+                     from pip._vendor.rich import (
+                         traceback as rich_tb,
+                         scope as rich_scope,
+                         console as rich_console,
+                     )
+                 except ImportError:
+                     try:
+                         from rich import (
+                             traceback as rich_tb,
+                             scope as rich_scope,
+                             console as rich_console,
+                         )
+                     except ImportError:
+                         return None
+                 console = rich_console.Console(width=160, file=io.StringIO())
+                 exc = rich_tb.Traceback.extract(*exc_info, show_locals=True)
+                 tb_rich = rich_tb.Traceback(
+                     trace=exc,
+                     width=160,
+                     extra_lines=3,
+                     theme=None,
+                     word_wrap=False,
+                     show_locals=False,
+                     max_frames=100,
+                 )
-                 tb = ''.join(traceback.format_exception(
-                     exc_type, exc_value, exc_traceback, None))
+                 # last_stack = exc.stacks[-1]
+                 # last_frame = last_stack.frames[-1]
+                 # if last_frame and last_frame.locals:
+                 #     console.print(
+                 #         rich_scope.render_scope(
+                 #             last_frame.locals,
+                 #             title=f'{last_frame.filename}:{last_frame.lineno}'))
+                 console.print(tb_rich)
+                 formatted_locals = console.file.getvalue()
+                 return formatted_locals
-                 detailed_tb = getattr(exc_value, '_org_exc_tb', None)
+             def get_request_metadata(request=None) -> dict:
+                 request_metadata = {}
+                 if not request:
+                     from pyramid.threadlocal import get_current_request
+                     request = get_current_request()
+                 # NOTE(marcink): store request information into exc_data
+                 if request:
+                     request_metadata["client_address"] = getattr(request, "client_addr", "")
+                     request_metadata["user_agent"] = getattr(request, "user_agent", "")
+                     request_metadata["method"] = getattr(request, "method", "")
+                     request_metadata["url"] = getattr(request, "url", "")
+                 return request_metadata
+             def format_exc(exc_info, use_detailed_tb=True):
+                 exc_type, exc_value, exc_traceback = exc_info
+                 tb = "++ TRACEBACK ++\n\n"
+                 tb += "".join(traceback.format_exception(exc_type, exc_value, exc_traceback, None))
+                 detailed_tb = getattr(exc_value, "_org_exc_tb", None)
                  if detailed_tb:
-                     if isinstance(detailed_tb, compat.string_types):
+                     remote_tb = detailed_tb
+                     if isinstance(detailed_tb, str):
                          remote_tb = [detailed_tb]
                      tb += (
-                         '\n+++ BEG SOURCE EXCEPTION +++\n\n'
-                         '{}\n'
-                         '+++ END SOURCE EXCEPTION +++\n'
-                         ''.format('\n'.join(remote_tb))
+                         "\n+++ BEG SOURCE EXCEPTION +++\n\n"
+                         "{}\n"
+                         "+++ END SOURCE EXCEPTION +++\n"
+                         "".format("\n".join(remote_tb))
                      )
                      # Avoid that remote_tb also appears in the frame
                      del remote_tb
+                 if use_detailed_tb:
+                     locals_tb = get_detailed_tb(exc_info)
+                     if locals_tb:
+                         tb += f"\n+++ DETAILS +++\n\n{locals_tb}\n" ""
+                 return tb
+             def _store_exception(exc_id, exc_info, prefix, request_path=''):
+                 """
+                 Low level function to store exception in the exception tracker
+                 """
+                 extra_data = {}
+                 extra_data.update(get_request_metadata())
+                 exc_type, exc_value, exc_traceback = exc_info
+                 tb = format_exc(exc_info)
                  exc_type_name = exc_type.__name__
+                 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name, extra_data=extra_data)
+                 exc_pref_id = f"{exc_id}_{prefix}_{org_data['exc_timestamp']}"
                  exc_store_path = get_exc_store()
-                 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name)
-                 exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp'])
                  if not os.path.isdir(exc_store_path):
                      os.makedirs(exc_store_path)
                  stored_exc_path = os.path.join(exc_store_path, exc_pref_id)
-                 with open(stored_exc_path, 'wb') as f:
+                 with open(stored_exc_path, "wb") as f:
                      f.write(exc_data)
-                 log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path)
+                 log.debug("Stored generated exception %s as: %s", exc_id, stored_exc_path)
+                 if request_path:
                  log.error(
                      'error occurred handling this request.\n'
-                     'Path: `%s`, tb: %s',
+                         'Path: `%s`, %s',
                      request_path, tb)
              def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''):
                  """
                  Example usage::
                      exc_info = sys.exc_info()
                      store_exception(id(exc_info), exc_info)
                  """
                  try:
-                     _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix,
-                                      request_path=request_path)
+                     exc_type = exc_info[0]
+                     exc_type_name = exc_type.__name__
+                     _store_exception(
+                         exc_id=exc_id, exc_info=exc_info, prefix=prefix, request_path=request_path,
+                     )
+                     return exc_id, exc_type_name
                  except Exception:
-                     log.exception('Failed to store exception `%s` information', exc_id)
+                     log.exception("Failed to store exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                      pass
              def _find_exc_file(exc_id, prefix=global_prefix):
                  exc_store_path = get_exc_store()
                  if prefix:
-                     exc_id = '{}_{}'.format(exc_id, prefix)
+                     exc_id = f"{exc_id}_{prefix}"
                  else:
                      # search without a prefix
-                     exc_id = '{}'.format(exc_id)
+                     exc_id = f"{exc_id}"
-                 # we need to search the store for such start pattern as above
-                 for fname in os.listdir(exc_store_path):
-                     if fname.startswith(exc_id):
-                         exc_id = os.path.join(exc_store_path, fname)
-                         break
-                     continue
-                 else:
-                     exc_id = None
+                 found_exc_id = None
+                 matches = glob.glob(os.path.join(exc_store_path, exc_id) + "*")
+                 if matches:
+                     found_exc_id = matches[0]
-                 return exc_id
+                 return found_exc_id
              def _read_exception(exc_id, prefix):
                  exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix)
                  if exc_id_file_path:
-                     with open(exc_id_file_path, 'rb') as f:
+                     with open(exc_id_file_path, "rb") as f:
                          return exc_unserialize(f.read())
                  else:
-                     log.debug('Exception File `%s` not found', exc_id_file_path)
+                     log.debug("Exception File `%s` not found", exc_id_file_path)
                  return None
              def read_exception(exc_id, prefix=global_prefix):
                  try:
                      return _read_exception(exc_id=exc_id, prefix=prefix)
                  except Exception:
-                     log.exception('Failed to read exception `%s` information', exc_id)
+                     log.exception("Failed to read exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                  return None
              def delete_exception(exc_id, prefix=global_prefix):
                  try:
                      exc_id_file_path = _find_exc_file(exc_id, prefix=prefix)
                      if exc_id_file_path:
                          os.remove(exc_id_file_path)
                  except Exception:
-                     log.exception('Failed to remove exception `%s` information', exc_id)
+                     log.exception("Failed to remove exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                      pass
+             def generate_id():
+                 return id(object())

vcsserver/lib/memory_lru_dict.py

0 +5 -7

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import logging
              from repoze.lru import LRUCache
-             from vcsserver.utils import safe_str
+             from vcsserver.str_utils import safe_str
              log = logging.getLogger(__name__)
              class LRUDict(LRUCache):
                  """
                  Wrapper to provide partial dict access
                  """
                  def __setitem__(self, key, value):
                      return self.put(key, value)
                  def __getitem__(self, key):
                      return self.get(key)
                  def __contains__(self, key):
                      return bool(self.get(key))
                  def __delitem__(self, key):
                      del self.data[key]
                  def keys(self):
-                     return self.data.keys()
+                     return list(self.data.keys())
              class LRUDictDebug(LRUDict):
                  """
                  Wrapper to provide some debug options
                  """
                  def _report_keys(self):
-                     elems_cnt = '%s/%s' % (len(self.keys()), self.size)
+                     elems_cnt = f'{len(list(self.keys()))}/{self.size}'
                      # trick for pformat print it more nicely
                      fmt = '\n'
                      for cnt, elem in enumerate(self.keys()):
-                         fmt += '%s - %s\n' % (cnt+1, safe_str(elem))
+                         fmt += f'{cnt+1} - {safe_str(elem)}\n'
                      log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
                  def __getitem__(self, key):
                      self._report_keys()
                      return self.get(key)

vcsserver/lib/rc_cache/__init__.py

0 +48 -13

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import logging
+             import threading
              from dogpile.cache import register_backend
+             from . import region_meta
+             from .utils import (
+                 backend_key_generator,
+                 clear_cache_namespace,
+                 get_default_cache_settings,
+                 get_or_create_region,
+                 make_region,
+                 str2bool,
+             )
+             module_name = 'vcsserver'
              register_backend(
-                 "dogpile.cache.rc.memory_lru", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends",
                  "LRUMemoryBackend")
              register_backend(
-                 "dogpile.cache.rc.file_namespace", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends",
                  "FileNamespaceBackend")
              register_backend(
-                 "dogpile.cache.rc.redis", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends",
                  "RedisPickleBackend")
              register_backend(
-                 "dogpile.cache.rc.redis_msgpack", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends",
                  "RedisMsgPackBackend")
              log = logging.getLogger(__name__)
-             from . import region_meta
-             from .utils import (
-                 get_default_cache_settings, backend_key_generator, get_or_create_region,
-                 clear_cache_namespace, make_region)
+             CACHE_OBJ_CACHE_VER = 'v2'
+             CLEAR_DELETE = 'delete'
+             CLEAR_INVALIDATE = 'invalidate'
+             def async_creation_runner(cache, cache_key, creator, mutex):
+                 def runner():
+                     try:
+                         value = creator()
+                         cache.set(cache_key, value)
+                     finally:
+                         mutex.release()
+                 thread = threading.Thread(target=runner)
+                 thread.start()
              def configure_dogpile_cache(settings):
                  cache_dir = settings.get('cache_dir')
                  if cache_dir:
                      region_meta.dogpile_config_defaults['cache_dir'] = cache_dir
                  rc_cache_data = get_default_cache_settings(settings, prefixes=['rc_cache.'])
                  # inspect available namespaces
                  avail_regions = set()
                  for key in rc_cache_data.keys():
                      namespace_name = key.split('.', 1)[0]
                      if namespace_name in avail_regions:
                          continue
                      avail_regions.add(namespace_name)
                      log.debug('dogpile: found following cache regions: %s', namespace_name)
                      new_region = make_region(
                          name=namespace_name,
-                         function_key_generator=None
+                         function_key_generator=None,
+                         async_creation_runner=None
                      )
-                     new_region.configure_from_config(settings, 'rc_cache.{}.'.format(namespace_name))
+                     new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.')
                      new_region.function_key_generator = backend_key_generator(new_region.actual_backend)
+                     async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false'))
+                     if async_creator:
+                         log.debug('configuring region %s with async creator', new_region)
+                         new_region.async_creation_runner = async_creation_runner
                      if log.isEnabledFor(logging.DEBUG):
-                         region_args = dict(backend=new_region.actual_backend.__class__,
+                         region_args = dict(backend=new_region.actual_backend,
                                             region_invalidator=new_region.region_invalidator.__class__)
-                         log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args)
+                         log.debug('dogpile: registering a new region key=`%s` args=%s', namespace_name, region_args)
                      region_meta.dogpile_cache_regions[namespace_name] = new_region
              def includeme(config):
                  configure_dogpile_cache(config.registry.settings)

vcsserver/lib/rc_cache/backends.py

0 +123 -149

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import time
-             import errno
+             #import errno
+             import fcntl
+             import functools
              import logging
+             import os
+             import pickle
+             #import time
+             #import gevent
              import msgpack
              import redis
-             from dogpile.cache.api import CachedValue
-             from dogpile.cache.backends import memory as memory_backend
+             flock_org = fcntl.flock
+             from typing import Union
+             from dogpile.cache.api import Deserializer, Serializer
              from dogpile.cache.backends import file as file_backend
+             from dogpile.cache.backends import memory as memory_backend
              from dogpile.cache.backends import redis as redis_backend
-             from dogpile.cache.backends.file import NO_VALUE, compat, FileLock
+             from dogpile.cache.backends.file import FileLock
              from dogpile.cache.util import memoized_property
-             from pyramid.settings import asbool
              from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
-             from vcsserver.utils import safe_str
+             from vcsserver.str_utils import safe_bytes, safe_str
+             from vcsserver.type_utils import str2bool
              _default_max_size = 1024
              log = logging.getLogger(__name__)
              class LRUMemoryBackend(memory_backend.MemoryBackend):
                  key_prefix = 'lru_mem_backend'
                  pickle_values = False
                  def __init__(self, arguments):
-                     max_size = arguments.pop('max_size', _default_max_size)
+                     self.max_size = arguments.pop('max_size', _default_max_size)
                      LRUDictClass = LRUDict
                      if arguments.pop('log_key_count', None):
                          LRUDictClass = LRUDictDebug
-                     arguments['cache_dict'] = LRUDictClass(max_size)
-                     super(LRUMemoryBackend, self).__init__(arguments)
+                     arguments['cache_dict'] = LRUDictClass(self.max_size)
+                     super().__init__(arguments)
+                 def __repr__(self):
+                     return f'{self.__class__}(maxsize=`{self.max_size}`)'
+                 def __str__(self):
+                     return self.__repr__()
                  def delete(self, key):
                      try:
                          del self._cache[key]
                      except KeyError:
                          # we don't care if key isn't there at deletion
                          pass
+                 def list_keys(self, prefix):
+                     return list(self._cache.keys())
                  def delete_multi(self, keys):
                      for key in keys:
                          self.delete(key)
-             class PickleSerializer(object):
-                 def _dumps(self, value, safe=False):
-                     try:
-                         return compat.pickle.dumps(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
-                 def _loads(self, value, safe=True):
-                     try:
-                         return compat.pickle.loads(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
+                 def delete_multi_by_prefix(self, prefix):
+                     cache_keys = self.list_keys(prefix=prefix)
+                     num_affected_keys = len(cache_keys)
+                     if num_affected_keys:
+                         self.delete_multi(cache_keys)
+                     return num_affected_keys
-             class MsgPackSerializer(object):
-                 def _dumps(self, value, safe=False):
-                     try:
-                         return msgpack.packb(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
-                 def _loads(self, value, safe=True):
-                     """
-                     pickle maintained the `CachedValue` wrapper of the tuple
-                     msgpack does not, so it must be added back in.
-                    """
-                     try:
-                         value = msgpack.unpackb(value, use_list=False)
-                         return CachedValue(*value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
+             class PickleSerializer:
+                 serializer: None | Serializer = staticmethod(  # type: ignore
+                     functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
+                 )
+                 deserializer: None | Deserializer = staticmethod(  # type: ignore
+                     functools.partial(pickle.loads)
+                 )
-             import fcntl
-             flock_org = fcntl.flock
+             class MsgPackSerializer:
+                 serializer: None | Serializer = staticmethod(  # type: ignore
+                     msgpack.packb
+                 )
+                 deserializer: None | Deserializer = staticmethod(  # type: ignore
+                     functools.partial(msgpack.unpackb, use_list=False)
+                 )
              class CustomLockFactory(FileLock):
                  pass
              class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
                  key_prefix = 'file_backend'
                  def __init__(self, arguments):
                      arguments['lock_factory'] = CustomLockFactory
                      db_file = arguments.get('filename')
-                     log.debug('initialing %s DB in %s', self.__class__.__name__, db_file)
+                     log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
+                     db_file_dir = os.path.dirname(db_file)
+                     if not os.path.isdir(db_file_dir):
+                         os.makedirs(db_file_dir)
                      try:
-                         super(FileNamespaceBackend, self).__init__(arguments)
+                         super().__init__(arguments)
                      except Exception:
-                         log.error('Failed to initialize db at: %s', db_file)
+                         log.exception('Failed to initialize db at: %s', db_file)
                          raise
                  def __repr__(self):
-                     return '{} `{}`'.format(self.__class__, self.filename)
+                     return f'{self.__class__}(file=`{self.filename}`)'
+                 def __str__(self):
+                     return self.__repr__()
-                 def list_keys(self, prefix=''):
-                     prefix = '{}:{}'.format(self.key_prefix, prefix)
+                 def _get_keys_pattern(self, prefix: bytes = b''):
+                     return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
-                     def cond(v):
+                 def list_keys(self, prefix: bytes = b''):
+                     prefix = self._get_keys_pattern(prefix)
+                     def cond(dbm_key: bytes):
                          if not prefix:
                              return True
-                         if v.startswith(prefix):
+                         if dbm_key.startswith(prefix):
                              return True
                          return False
                      with self._dbm_file(True) as dbm:
                          try:
-                             return filter(cond, dbm.keys())
+                             return list(filter(cond, dbm.keys()))
                          except Exception:
                              log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
                              raise
+                 def delete_multi_by_prefix(self, prefix):
+                     cache_keys = self.list_keys(prefix=prefix)
+                     num_affected_keys = len(cache_keys)
+                     if num_affected_keys:
+                         self.delete_multi(cache_keys)
+                     return num_affected_keys
                  def get_store(self):
                      return self.filename
-                 def _dbm_get(self, key):
-                     with self._dbm_file(False) as dbm:
-                         if hasattr(dbm, 'get'):
-                             value = dbm.get(key, NO_VALUE)
-                         else:
-                             # gdbm objects lack a .get method
-                             try:
-                                 value = dbm[key]
-                             except KeyError:
-                                 value = NO_VALUE
-                         if value is not NO_VALUE:
-                             value = self._loads(value)
-                         return value
-                 def get(self, key):
-                     try:
-                         return self._dbm_get(key)
-                     except Exception:
-                         log.error('Failed to fetch DBM key %s from DB: %s', key, self.get_store())
-                         raise
-                 def set(self, key, value):
-                     with self._dbm_file(True) as dbm:
-                         dbm[key] = self._dumps(value)
-                 def set_multi(self, mapping):
-                     with self._dbm_file(True) as dbm:
-                         for key, value in mapping.items():
-                             dbm[key] = self._dumps(value)
              class BaseRedisBackend(redis_backend.RedisBackend):
                  key_prefix = ''
                  def __init__(self, arguments):
-                     super(BaseRedisBackend, self).__init__(arguments)
+                     self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
+                     super().__init__(arguments)
                      self._lock_timeout = self.lock_timeout
-                     self._lock_auto_renewal = asbool(arguments.pop("lock_auto_renewal", True))
+                     self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
                      if self._lock_auto_renewal and not self._lock_timeout:
                          # set default timeout for auto_renewal
                          self._lock_timeout = 30
+                 def __repr__(self):
+                     return f'{self.__class__}(conn=`{self.db_conn}`)'
+                 def __str__(self):
+                     return self.__repr__()
                  def _create_client(self):
                      args = {}
                      if self.url is not None:
                          args.update(url=self.url)
                      else:
                          args.update(
                              host=self.host, password=self.password,
                              port=self.port, db=self.db
                          )
                      connection_pool = redis.ConnectionPool(**args)
+                     self.writer_client = redis.StrictRedis(
+                         connection_pool=connection_pool
+                     )
+                     self.reader_client = self.writer_client
-                     return redis.StrictRedis(connection_pool=connection_pool)
+                 def _get_keys_pattern(self, prefix: bytes = b''):
+                     return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
+                 def list_keys(self, prefix: bytes = b''):
+                     prefix = self._get_keys_pattern(prefix)
+                     return self.reader_client.keys(prefix)
-                 def list_keys(self, prefix=''):
-                     prefix = '{}:{}*'.format(self.key_prefix, prefix)
-                     return self.client.keys(prefix)
+                 def delete_multi_by_prefix(self, prefix, use_lua=False):
+                     if use_lua:
+                         # high efficient LUA script to delete ALL keys by prefix...
+                         lua = """local keys = redis.call('keys', ARGV[1])
+                                  for i=1,#keys,5000 do
+                                  redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
+                                  end
+                                  return #keys"""
+                         num_affected_keys = self.writer_client.eval(
+                             lua,
+,
+                             f"{prefix}*")
+                     else:
+                         cache_keys = self.list_keys(prefix=prefix)
+                         num_affected_keys = len(cache_keys)
+                         if num_affected_keys:
+                             self.delete_multi(cache_keys)
+                     return num_affected_keys
                  def get_store(self):
-                     return self.client.connection_pool
-                 def get(self, key):
-                     value = self.client.get(key)
-                     if value is None:
-                         return NO_VALUE
-                     return self._loads(value)
-                 def get_multi(self, keys):
-                     if not keys:
-                         return []
-                     values = self.client.mget(keys)
-                     loads = self._loads
-                     return [
-                         loads(v) if v is not None else NO_VALUE
-                         for v in values]
-                 def set(self, key, value):
-                     if self.redis_expiration_time:
-                         self.client.setex(key, self.redis_expiration_time,
-                                           self._dumps(value))
-                     else:
-                         self.client.set(key, self._dumps(value))
-                 def set_multi(self, mapping):
-                     dumps = self._dumps
-                     mapping = dict(
-                         (k, dumps(v))
-                         for k, v in mapping.items()
+                     )
-                     if not self.redis_expiration_time:
-                         self.client.mset(mapping)
-                     else:
-                         pipe = self.client.pipeline()
-                         for key, value in mapping.items():
-                             pipe.setex(key, self.redis_expiration_time, value)
-                         pipe.execute()
+                     return self.reader_client.connection_pool
                  def get_mutex(self, key):
                      if self.distributed_lock:
-                         lock_key = redis_backend.u('_lock_{0}').format(safe_str(key))
-                         return get_mutex_lock(self.client, lock_key, self._lock_timeout,
-                                               auto_renewal=self._lock_auto_renewal)
+                         lock_key = f'_lock_{safe_str(key)}'
+                         return get_mutex_lock(
+                             self.writer_client, lock_key,
+                             self._lock_timeout,
+                             auto_renewal=self._lock_auto_renewal
+                         )
                      else:
                          return None
              class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
                  key_prefix = 'redis_pickle_backend'
                  pass
              class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
                  key_prefix = 'redis_msgpack_backend'
                  pass
              def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
-                 import redis_lock
+                 from vcsserver.lib._vendor import redis_lock
-                 class _RedisLockWrapper(object):
+                 class _RedisLockWrapper:
                      """LockWrapper for redis_lock"""
                      @classmethod
                      def get_lock(cls):
                          return redis_lock.Lock(
                              redis_client=client,
                              name=lock_key,
                              expire=lock_timeout,
                              auto_renewal=auto_renewal,
                              strict=True,
                          )
                      def __repr__(self):
-                         return "{}:{}".format(self.__class__.__name__, lock_key)
+                         return f"{self.__class__.__name__}:{lock_key}"
                      def __str__(self):
-                         return "{}:{}".format(self.__class__.__name__, lock_key)
+                         return f"{self.__class__.__name__}:{lock_key}"
                      def __init__(self):
                          self.lock = self.get_lock()
                          self.lock_key = lock_key
                      def acquire(self, wait=True):
                          log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
                          try:
                              acquired = self.lock.acquire(wait)
                              log.debug('Got lock for key %s, %s', self.lock_key, acquired)
                              return acquired
                          except redis_lock.AlreadyAcquired:
                              return False
                          except redis_lock.AlreadyStarted:
                              # refresh thread exists, but it also means we acquired the lock
                              return True
                      def release(self):
                          try:
                              self.lock.release()
                          except redis_lock.NotAcquired:
                              pass
                  return _RedisLockWrapper()

vcsserver/lib/rc_cache/region_meta.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import tempfile
              dogpile_config_defaults = {
                  'cache_dir': os.path.join(tempfile.gettempdir(), 'rc_cache')
              }
              # GLOBAL TO STORE ALL REGISTERED REGIONS
              dogpile_cache_regions = {}

vcsserver/lib/rc_cache/utils.py

0 +94 -112

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import os
-             import time
+             import functools
              import logging
-             import functools
+             import os
+             import threading
+             import time
+             import decorator
              from dogpile.cache import CacheRegion
-             from dogpile.cache.util import compat
-             from vcsserver.utils import safe_str, sha1
+             from vcsserver.utils import sha1
+             from vcsserver.str_utils import safe_bytes
+             from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
-             from vcsserver.lib.rc_cache import region_meta
+             from . import region_meta
              log = logging.getLogger(__name__)
              class RhodeCodeCacheRegion(CacheRegion):
+                 def __repr__(self):
+                     return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
                  def conditional_cache_on_arguments(
                          self, namespace=None,
                          expiration_time=None,
                          should_cache_fn=None,
-                         to_str=compat.string_type,
+                         to_str=str,
                          function_key_generator=None,
                          condition=True):
                      """
                      Custom conditional decorator, that will not touch any dogpile internals if
-                     condition isn't meet. This works a bit different than should_cache_fn
+                     condition isn't meet. This works a bit different from should_cache_fn
                      And it's faster in cases we don't ever want to compute cached values
                      """
-                     expiration_time_is_callable = compat.callable(expiration_time)
+                     expiration_time_is_callable = callable(expiration_time)
+                     if not namespace:
+                         namespace = getattr(self, '_default_namespace', None)
                      if function_key_generator is None:
                          function_key_generator = self.function_key_generator
-                     # workaround for py2 and cython problems, this block should be removed
-                     # once we've migrated to py3
-                     if 'cython' == 'cython':
-                         def decorator(fn):
-                             if to_str is compat.string_type:
-                                 # backwards compatible
-                                 key_generator = function_key_generator(namespace, fn)
-                             else:
-                                 key_generator = function_key_generator(namespace, fn, to_str=to_str)
-                             @functools.wraps(fn)
-                             def decorate(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 @functools.wraps(fn)
-                                 def creator():
-                                     return fn(*arg, **kw)
+                     def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
-                                 if not condition:
-                                     return creator()
+                             log.debug('Calling un-cached method:%s', user_func.__name__)
+                             start = time.time()
+                             result = user_func(*arg, **kw)
+                             total = time.time() - start
+                             log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
+                             return result
+                         key = func_key_generator(*arg, **kw)
-                                 timeout = expiration_time() if expiration_time_is_callable \
-                                     else expiration_time
-                                 return self.get_or_create(key, creator, timeout, should_cache_fn)
-                             def invalidate(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 self.delete(key)
-                             def set_(value, *arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 self.set(key, value)
-                             def get(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 return self.get(key)
-                             def refresh(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 value = fn(*arg, **kw)
-                                 self.set(key, value)
-                                 return value
-                             decorate.set = set_
-                             decorate.invalidate = invalidate
-                             decorate.refresh = refresh
-                             decorate.get = get
-                             decorate.original = fn
-                             decorate.key_generator = key_generator
-                             decorate.__wrapped__ = fn
-                             return decorate
-                         return decorator
-                     def get_or_create_for_user_func(key_generator, user_func, *arg, **kw):
-                         if not condition:
-                             log.debug('Calling un-cached method:%s', user_func.func_name)
-                             start = time.time()
-                             result = user_func(*arg, **kw)
-                             total = time.time() - start
-                             log.debug('un-cached method:%s took %.4fs', user_func.func_name, total)
-                             return result
-                         key = key_generator(*arg, **kw)
-                         timeout = expiration_time() if expiration_time_is_callable \
-                             else expiration_time
-                         log.debug('Calling cached method:`%s`', user_func.func_name)
+                         log.debug('Calling cached method:`%s`', user_func.__name__)
                          return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
                      def cache_decorator(user_func):
-                         if to_str is compat.string_type:
+                         if to_str is str:
                              # backwards compatible
                              key_generator = function_key_generator(namespace, user_func)
                          else:
                              key_generator = function_key_generator(namespace, user_func, to_str=to_str)
                          def refresh(*arg, **kw):
                              """
                              Like invalidate, but regenerates the value instead
                              """
                              key = key_generator(*arg, **kw)
                              value = user_func(*arg, **kw)
                              self.set(key, value)
                              return value
                          def invalidate(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.delete(key)
                          def set_(value, *arg, **kw):
                              key = key_generator(*arg, **kw)
                              self.set(key, value)
                          def get(*arg, **kw):
                              key = key_generator(*arg, **kw)
                              return self.get(key)
                          user_func.set = set_
                          user_func.invalidate = invalidate
                          user_func.get = get
                          user_func.refresh = refresh
                          user_func.key_generator = key_generator
                          user_func.original = user_func
                          # Use `decorate` to preserve the signature of :param:`user_func`.
                          return decorator.decorate(user_func, functools.partial(
                              get_or_create_for_user_func, key_generator))
                      return cache_decorator
              def make_region(*arg, **kw):
                  return RhodeCodeCacheRegion(*arg, **kw)
              def get_default_cache_settings(settings, prefixes=None):
                  prefixes = prefixes or []
                  cache_settings = {}
                  for key in settings.keys():
                      for prefix in prefixes:
                          if key.startswith(prefix):
                              name = key.split(prefix)[1].strip()
                              val = settings[key]
-                             if isinstance(val, compat.string_types):
+                             if isinstance(val, str):
                                  val = val.strip()
                              cache_settings[name] = val
                  return cache_settings
              def compute_key_from_params(*args):
                  """
                  Helper to compute key from given params to be used in cache manager
                  """
-                 return sha1("_".join(map(safe_str, args)))
+                 return sha1(safe_bytes("_".join(map(str, args))))
+             def custom_key_generator(backend, namespace, fn):
+                 func_name = fn.__name__
+                 def generate_key(*args):
+                     backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
+                     namespace_pref = namespace or 'default_namespace'
+                     arg_key = compute_key_from_params(*args)
+                     final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
+                     return final_key
+                 return generate_key
              def backend_key_generator(backend):
                  """
                  Special wrapper that also sends over the backend to the key generator
                  """
                  def wrapper(namespace, fn):
-                     return key_generator(backend, namespace, fn)
+                     return custom_key_generator(backend, namespace, fn)
                  return wrapper
-             def key_generator(backend, namespace, fn):
-                 fname = fn.__name__
+             def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
+                 from .backends import FileNamespaceBackend
+                 from . import async_creation_runner
-                 def generate_key(*args):
-                     backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix'
-                     namespace_pref = namespace or 'default_namespace'
-                     arg_key = compute_key_from_params(*args)
-                     final_key = "{}:{}:{}_{}".format(backend_prefix, namespace_pref, fname, arg_key)
-                     return final_key
-                 return generate_key
-             def get_or_create_region(region_name, region_namespace=None):
-                 from vcsserver.lib.rc_cache.backends import FileNamespaceBackend
                  region_obj = region_meta.dogpile_cache_regions.get(region_name)
                  if not region_obj:
-                     raise EnvironmentError(
-                         'Region `{}` not in configured: {}.'.format(
-                             region_name, region_meta.dogpile_cache_regions.keys()))
+                     reg_keys = list(region_meta.dogpile_cache_regions.keys())
+                     raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
+                 region_uid_name = f'{region_name}:{region_namespace}'
-                 region_uid_name = '{}:{}'.format(region_name, region_namespace)
+                 # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
                  if isinstance(region_obj.actual_backend, FileNamespaceBackend):
+                     if not region_namespace:
+                         raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
                      region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
                      if region_exist:
                          log.debug('Using already configured region: %s', region_namespace)
                          return region_exist
-                     cache_dir = region_meta.dogpile_config_defaults['cache_dir']
                      expiration_time = region_obj.expiration_time
-                     if not os.path.isdir(cache_dir):
-                         os.makedirs(cache_dir)
+                     cache_dir = region_meta.dogpile_config_defaults['cache_dir']
+                     namespace_cache_dir = cache_dir
+                     # we default the namespace_cache_dir to our default cache dir.
+                     # however, if this backend is configured with filename= param, we prioritize that
+                     # so all caches within that particular region, even those namespaced end up in the same path
+                     if region_obj.actual_backend.filename:
+                         namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
+                     if not os.path.isdir(namespace_cache_dir):
+                         os.makedirs(namespace_cache_dir)
                      new_region = make_region(
                          name=region_uid_name,
                          function_key_generator=backend_key_generator(region_obj.actual_backend)
                      )
                      namespace_filename = os.path.join(
-                         cache_dir, "{}.cache.dbm".format(region_namespace))
+                         namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
                      # special type that allows 1db per namespace
                      new_region.configure(
                          backend='dogpile.cache.rc.file_namespace',
                          expiration_time=expiration_time,
                          arguments={"filename": namespace_filename}
                      )
                      # create and save in region caches
                      log.debug('configuring new region: %s', region_uid_name)
                      region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
+                 region_obj._default_namespace = region_namespace
+                 if use_async_runner:
+                     region_obj.async_creation_runner = async_creation_runner
                  return region_obj
-             def clear_cache_namespace(cache_region, cache_namespace_uid, invalidate=False):
-                 region = get_or_create_region(cache_region, cache_namespace_uid)
-                 cache_keys = region.backend.list_keys(prefix=cache_namespace_uid)
-                 num_delete_keys = len(cache_keys)
-                 if invalidate:
-                     region.invalidate(hard=False)
-                 else:
-                     if num_delete_keys:
-                         region.delete_multi(cache_keys)
-                 return num_delete_keys
+             def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
+                 from . import CLEAR_DELETE, CLEAR_INVALIDATE
+                 if not isinstance(cache_region, RhodeCodeCacheRegion):
+                     cache_region = get_or_create_region(cache_region, cache_namespace_uid)
+                 log.debug('clearing cache region: %s [prefix:%s] with method=%s',
+                           cache_region, cache_namespace_uid, method)
+                 num_affected_keys = 0
+                 if method == CLEAR_INVALIDATE:
+                     # NOTE: The CacheRegion.invalidate() method’s default mode of
+                     # operation is to set a timestamp local to this CacheRegion in this Python process only.
+                     # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
+                     cache_region.invalidate(hard=True)
+                 if method == CLEAR_DELETE:
+                     num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
+                 return num_affected_keys

vcsserver/lib/request_counter.py

0 +1 -3

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              counter = 0
              def get_request_counter(request):
                  global counter
                  counter += 1
                  return counter

vcsserver/pygrack.py

0 +116 -85

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              """Handles the Git smart protocol."""
              import os
              import socket
              import logging
-             import simplejson as json
              import dulwich.protocol
+             from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
              from webob import Request, Response, exc
+             from vcsserver.lib.rc_json import json
              from vcsserver import hooks, subprocessio
+             from vcsserver.str_utils import ascii_bytes
              log = logging.getLogger(__name__)
-             class FileWrapper(object):
+             class FileWrapper:
                  """File wrapper that ensures how much data is read from it."""
                  def __init__(self, fd, content_length):
                      self.fd = fd
                      self.content_length = content_length
                      self.remain = content_length
                  def read(self, size):
                      if size <= self.remain:
                          try:
                              data = self.fd.read(size)
                          except socket.error:
                              raise IOError(self)
                          self.remain -= size
                      elif self.remain:
                          data = self.fd.read(self.remain)
                          self.remain = 0
                      else:
                          data = None
                      return data
                  def __repr__(self):
-                     return '<FileWrapper %s len: %s, read: %s>' % (
+                     return '<FileWrapper {} len: {}, read: {}>'.format(
                          self.fd, self.content_length, self.content_length - self.remain
                      )
-             class GitRepository(object):
+             class GitRepository:
                  """WSGI app for handling Git smart protocol endpoints."""
-                 git_folder_signature = frozenset(
-                     ('config', 'head', 'info', 'objects', 'refs'))
+                 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
                  commands = frozenset(('git-upload-pack', 'git-receive-pack'))
-                 valid_accepts = frozenset(('application/x-%s-result' %
-                                            c for c in commands))
+                 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
                  # The last bytes are the SHA1 of the first 12 bytes.
                  EMPTY_PACK = (
-                     'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
-                     '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
+                     b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
+                     b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
                  )
-                 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
+                 FLUSH_PACKET = b"0000"
-                 def __init__(self, repo_name, content_path, git_path, update_server_info,
-                              extras):
+                 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
+                 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
                      files = frozenset(f.lower() for f in os.listdir(content_path))
                      valid_dir_signature = self.git_folder_signature.issubset(files)
                      if not valid_dir_signature:
-                         raise OSError('%s missing git signature' % content_path)
+                         raise OSError(f'{content_path} missing git signature')
                      self.content_path = content_path
                      self.repo_name = repo_name
                      self.extras = extras
                      self.git_path = git_path
                      self.update_server_info = update_server_info
                  def _get_fixedpath(self, path):
                      """
                      Small fix for repo_path
                      :param path:
                      """
                      path = path.split(self.repo_name, 1)[-1]
                      if path.startswith('.git'):
                          # for bare repos we still get the .git prefix inside, we skip it
                          # here, and remove from the service command
                          path = path[4:]
                      return path.strip('/')
                  def inforefs(self, request, unused_environ):
                      """
                      WSGI Response producer for HTTP GET Git Smart
                      HTTP /info/refs request.
                      """
                      git_command = request.GET.get('service')
                      if git_command not in self.commands:
                          log.debug('command %s not allowed', git_command)
                          return exc.HTTPForbidden()
                      # please, resist the urge to add '\n' to git capture and increment
                      # line count by 1.
                      # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
                      # a part of protocol.
                      # The code in Git client not only does NOT need '\n', but actually
                      # blows up if you sprinkle "flush" (0000) as "0001\n".
                      # It reads binary, per number of bytes specified.
                      # if you do add '\n' as part of data, count it.
-                     server_advert = '# service=%s\n' % git_command
-                     packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
+                     server_advert = f'# service={git_command}\n'
+                     packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
                      try:
                          gitenv = dict(os.environ)
                          # forget all configs
                          gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
                          command = [self.git_path, git_command[4:], '--stateless-rpc',
                                     '--advertise-refs', self.content_path]
                          out = subprocessio.SubprocessIOChunker(
                              command,
                              env=gitenv,
-                             starting_values=[packet_len + server_advert + '0000'],
+                             starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
                              shell=False
                          )
-                     except EnvironmentError:
+                     except OSError:
                          log.exception('Error processing command')
                          raise exc.HTTPExpectationFailed()
                      resp = Response()
-                     resp.content_type = 'application/x-%s-advertisement' % str(git_command)
+                     resp.content_type = f'application/x-{git_command}-advertisement'
                      resp.charset = None
                      resp.app_iter = out
                      return resp
                  def _get_want_capabilities(self, request):
                      """Read the capabilities found in the first want line of the request."""
                      pos = request.body_file_seekable.tell()
                      first_line = request.body_file_seekable.readline()
                      request.body_file_seekable.seek(pos)
                      return frozenset(
                          dulwich.protocol.extract_want_line_capabilities(first_line)[1])
                  def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
                      """
                      Construct a response with an empty PACK file.
                      We use an empty PACK file, as that would trigger the failure of the pull
                      or clone command.
                      We also print in the error output a message explaining why the command
                      was aborted.
-                     If aditionally, the user is accepting messages we send them the output
+                     If additionally, the user is accepting messages we send them the output
                      of the pre-pull hook.
                      Note that for clients not supporting side-band we just send them the
                      emtpy PACK file.
                      """
                      if self.SIDE_BAND_CAPS.intersection(capabilities):
                          response = []
                          proto = dulwich.protocol.Protocol(None, response.append)
-                         proto.write_pkt_line('NAK\n')
-                         self._write_sideband_to_proto(pre_pull_messages, proto,
-                                                       capabilities)
+                         proto.write_pkt_line(dulwich.protocol.NAK_LINE)
+                         self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
                          # N.B.(skreft): Do not change the sideband channel to 3, as that
                          # produces a fatal error in the client:
                          #   fatal: error in sideband demultiplexer
-                         proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
-                         proto.write_sideband(1, self.EMPTY_PACK)
+                         proto.write_sideband(
+                             dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
+                             ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
+                         proto.write_sideband(
+                             dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
+                             ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
-                         # writes 0000
+                         # writes b"0000" as default
                          proto.write_pkt_line(None)
                          return response
                      else:
-                         return [self.EMPTY_PACK]
+                         return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
+                 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
+                     """
+                     Given a list response we inject the post-pull messages.
+                     We only inject the messages if the client supports sideband, and the
+                     response has the format:
+NAK\n...0000
+                     Note that we do not check the no-progress capability as by default, git
+                     sends it, which effectively would block all messages.
+                     """
+                     if not self.SIDE_BAND_CAPS.intersection(capabilities):
+                         return response
+                     if not start_message and not end_message:
+                         return response
+                     try:
+                         iter(response)
+                         # iterator probably will work, we continue
+                     except TypeError:
+                         raise TypeError(f'response must be an iterator: got {type(response)}')
+                     if isinstance(response, (list, tuple)):
+                         raise TypeError(f'response must be an iterator: got {type(response)}')
+                     def injected_response():
-                 def _write_sideband_to_proto(self, data, proto, capabilities):
+                         do_loop = 1
+                         header_injected = 0
+                         next_item = None
+                         has_item = False
+                         item = b''
+                         while do_loop:
+                             try:
+                                 next_item = next(response)
+                             except StopIteration:
+                                 do_loop = 0
+                             if has_item:
+                                 # last item ! alter it now
+                                 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
+                                     new_response = [item[:-4]]
+                                     new_response.extend(self._get_messages(end_message, capabilities))
+                                     new_response.append(self.FLUSH_PACKET)
+                                     item = b''.join(new_response)
+                                 yield item
+                             has_item = True
+                             item = next_item
+                             # alter item if it's the initial chunk
+                             if not header_injected and item.startswith(b'0008NAK\n'):
+                                 new_response = [b'0008NAK\n']
+                                 new_response.extend(self._get_messages(start_message, capabilities))
+                                 new_response.append(item[8:])
+                                 item = b''.join(new_response)
+                                 header_injected = 1
+                     return injected_response()
+                 def _write_sideband_to_proto(self, proto, data, capabilities):
                      """
-                     Write the data to the proto's sideband number 2.
+                     Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
                      We do not use dulwich's write_sideband directly as it only supports
                      side-band-64k.
                      """
                      if not data:
                          return
                      # N.B.(skreft): The values below are explained in the pack protocol
                      # documentation, section Packfile Data.
                      # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
-                     if 'side-band-64k' in capabilities:
+                     if CAPABILITY_SIDE_BAND_64K in capabilities:
                          chunk_size = 65515
-                     elif 'side-band' in capabilities:
+                     elif CAPABILITY_SIDE_BAND in capabilities:
                          chunk_size = 995
                      else:
                          return
-                     chunker = (
-                         data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
+                     chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
                      for chunk in chunker:
-                         proto.write_sideband(2, chunk)
+                         proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
                  def _get_messages(self, data, capabilities):
                      """Return a list with packets for sending data in sideband number 2."""
                      response = []
                      proto = dulwich.protocol.Protocol(None, response.append)
-                     self._write_sideband_to_proto(data, proto, capabilities)
+                     self._write_sideband_to_proto(proto, data, capabilities)
                      return response
-                 def _inject_messages_to_response(self, response, capabilities,
-                                                  start_messages, end_messages):
-                     """
-                     Given a list response we inject the pre/post-pull messages.
-                     We only inject the messages if the client supports sideband, and the
-                     response has the format:
-NAK\n...0000
-                     Note that we do not check the no-progress capability as by default, git
-                     sends it, which effectively would block all messages.
-                     """
-                     if not self.SIDE_BAND_CAPS.intersection(capabilities):
-                         return response
-                     if not start_messages and not end_messages:
-                         return response
-                     # make a list out of response if it's an iterator
-                     # so we can investigate it for message injection.
-                     if hasattr(response, '__iter__'):
-                         response = list(response)
-                     if (not response[0].startswith('0008NAK\n') or
-                             not response[-1].endswith('0000')):
-                         return response
-                     new_response = ['0008NAK\n']
-                     new_response.extend(self._get_messages(start_messages, capabilities))
-                     if len(response) == 1:
-                         new_response.append(response[0][8:-4])
-                     else:
-                         new_response.append(response[0][8:])
-                         new_response.extend(response[1:-1])
-                         new_response.append(response[-1][:-4])
-                     new_response.extend(self._get_messages(end_messages, capabilities))
-                     new_response.append('0000')
-                     return new_response
                  def backend(self, request, environ):
                      """
                      WSGI Response producer for HTTP POST Git Smart HTTP requests.
                      Reads commands and data from HTTP POST's body.
                      returns an iterator obj with contents of git command's
                      response to stdout
                      """
                      # TODO(skreft): think how we could detect an HTTPLockedException, as
                      # we probably want to have the same mechanism used by mercurial and
                      # simplevcs.
                      # For that we would need to parse the output of the command looking for
                      # some signs of the HTTPLockedError, parse the data and reraise it in
                      # pygrack. However, that would interfere with the streaming.
                      #
                      # Now the output of a blocked push is:
                      # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
                      # POST git-receive-pack (1047 bytes)
                      # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
                      # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
                      # ! [remote rejected] master -> master (pre-receive hook declined)
                      # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
                      git_command = self._get_fixedpath(request.path_info)
                      if git_command not in self.commands:
                          log.debug('command %s not allowed', git_command)
                          return exc.HTTPForbidden()
                      capabilities = None
                      if git_command == 'git-upload-pack':
                          capabilities = self._get_want_capabilities(request)
                      if 'CONTENT_LENGTH' in environ:
                          inputstream = FileWrapper(request.body_file_seekable,
                                                    request.content_length)
                      else:
                          inputstream = request.body_file_seekable
                      resp = Response()
-                     resp.content_type = ('application/x-%s-result' %
-                                          git_command.encode('utf8'))
+                     resp.content_type = f'application/x-{git_command}-result'
                      resp.charset = None
                      pre_pull_messages = ''
+                     # Upload-pack == clone
                      if git_command == 'git-upload-pack':
-                         status, pre_pull_messages = hooks.git_pre_pull(self.extras)
-                         if status != 0:
+                         hook_response = hooks.git_pre_pull(self.extras)
+                         if hook_response.status != 0:
+                             pre_pull_messages = hook_response.output
                              resp.app_iter = self._build_failed_pre_pull_response(
                                  capabilities, pre_pull_messages)
                              return resp
                      gitenv = dict(os.environ)
                      # forget all configs
                      gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                      gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
                      cmd = [self.git_path, git_command[4:], '--stateless-rpc',
                             self.content_path]
                      log.debug('handling cmd %s', cmd)
                      out = subprocessio.SubprocessIOChunker(
                          cmd,
-                         inputstream=inputstream,
+                         input_stream=inputstream,
                          env=gitenv,
                          cwd=self.content_path,
                          shell=False,
                          fail_on_stderr=False,
                          fail_on_return_code=False
                      )
                      if self.update_server_info and git_command == 'git-receive-pack':
                          # We need to fully consume the iterator here, as the
                          # update-server-info command needs to be run after the push.
                          out = list(out)
                          # Updating refs manually after each push.
                          # This is required as some clients are exposing Git repos internally
                          # with the dumb protocol.
                          cmd = [self.git_path, 'update-server-info']
                          log.debug('handling cmd %s', cmd)
                          output = subprocessio.SubprocessIOChunker(
                              cmd,
-                             inputstream=inputstream,
+                             input_stream=inputstream,
                              env=gitenv,
                              cwd=self.content_path,
                              shell=False,
                              fail_on_stderr=False,
                              fail_on_return_code=False
                          )
                          # Consume all the output so the subprocess finishes
                          for _ in output:
                              pass
+                     # Upload-pack == clone
                      if git_command == 'git-upload-pack':
-                         unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
-                         resp.app_iter = self._inject_messages_to_response(
-                             out, capabilities, pre_pull_messages, post_pull_messages)
+                         hook_response = hooks.git_post_pull(self.extras)
+                         post_pull_messages = hook_response.output
+                         resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
                      else:
                          resp.app_iter = out
                      return resp
                  def __call__(self, environ, start_response):
                      request = Request(environ)
                      _path = self._get_fixedpath(request.path_info)
                      if _path.startswith('info/refs'):
                          app = self.inforefs
                      else:
                          app = self.backend
                      try:
                          resp = app(request, environ)
                      except exc.HTTPException as error:
                          log.exception('HTTP Error')
                          resp = error
                      except Exception:
                          log.exception('Unknown error')
                          resp = exc.HTTPInternalServerError()
                      return resp(environ, start_response)

vcsserver/remote/git_remote.py ~~vcsserver/git.py~~

0 renamed +502 -265

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation; either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import collections
              import logging
              import os
-             import posixpath as vcspath
              import re
              import stat
              import traceback
-             import urllib
-             import urllib2
+             import urllib.request
+             import urllib.parse
+             import urllib.error
              from functools import wraps
              import more_itertools
              import pygit2
              from pygit2 import Repository as LibGit2Repo
              from pygit2 import index as LibGit2Index
              from dulwich import index, objects
-             from dulwich.client import HttpGitClient, LocalGitClient
+             from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
              from dulwich.errors import (
                  NotGitRepository, ChecksumMismatch, WrongObjectException,
                  MissingCommitError, ObjectMissing, HangupException,
                  UnexpectedCommandError)
              from dulwich.repo import Repo as DulwichRepo
-             from dulwich.server import update_server_info
+             import rhodecode
              from vcsserver import exceptions, settings, subprocessio
-             from vcsserver.utils import safe_str, safe_int, safe_unicode
-             from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
+             from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
+             from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
              from vcsserver.hgcompat import (
                  hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
              from vcsserver.git_lfs.lib import LFSOidStore
              from vcsserver.vcs_base import RemoteBase
              DIR_STAT = stat.S_IFDIR
              FILE_MODE = stat.S_IFMT
              GIT_LINK = objects.S_IFGITLINK
-             PEELED_REF_MARKER = '^{}'
+             PEELED_REF_MARKER = b'^{}'
+             HEAD_MARKER = b'HEAD'
              log = logging.getLogger(__name__)
-             def str_to_dulwich(value):
-                 """
-                 Dulwich 0.10.1a requires `unicode` objects to be passed in.
-                 """
-                 return value.decode(settings.WIRE_ENCODING)
              def reraise_safe_exceptions(func):
                  """Converts Dulwich exceptions to something neutral."""
                  @wraps(func)
                  def wrapper(*args, **kwargs):
                      try:
                          return func(*args, **kwargs)
                      except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
                          exc = exceptions.LookupException(org_exc=e)
                          raise exc(safe_str(e))
                      except (HangupException, UnexpectedCommandError) as e:
                          exc = exceptions.VcsException(org_exc=e)
                          raise exc(safe_str(e))
-                     except Exception as e:
-                         # NOTE(marcink): becuase of how dulwich handles some exceptions
+                     except Exception:
+                         # NOTE(marcink): because of how dulwich handles some exceptions
                          # (KeyError on empty repos), we cannot track this and catch all
                          # exceptions, it's an exceptions from other handlers
                          #if not hasattr(e, '_vcs_kind'):
                              #log.exception("Unhandled exception in git remote call")
                              #raise_from_original(exceptions.UnhandledException)
                          raise
                  return wrapper
              class Repo(DulwichRepo):
                  """
                  A wrapper for dulwich Repo class.
                  Since dulwich is sometimes keeping .idx file descriptors open, it leads to
                  "Too many open files" error. We need to close all opened file descriptors
                  once the repo object is destroyed.
                  """
                  def __del__(self):
                      if hasattr(self, 'object_store'):
                          self.close()
              class Repository(LibGit2Repo):
                  def __enter__(self):
                      return self
                  def __exit__(self, exc_type, exc_val, exc_tb):
                      self.free()
              class GitFactory(RepoFactory):
                  repo_type = 'git'
                  def _create_repo(self, wire, create, use_libgit2=False):
                      if use_libgit2:
-                         return Repository(wire['path'])
+                         repo = Repository(safe_bytes(wire['path']))
                      else:
-                         repo_path = str_to_dulwich(wire['path'])
-                         return Repo(repo_path)
+                         # dulwich mode
+                         repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
+                         repo = Repo(repo_path)
+                     log.debug('repository created: got GIT object: %s', repo)
+                     return repo
                  def repo(self, wire, create=False, use_libgit2=False):
                      """
                      Get a repository instance for the given path.
                      """
                      return self._create_repo(wire, create, use_libgit2)
                  def repo_libgit2(self, wire):
                      return self.repo(wire, use_libgit2=True)
+             def create_signature_from_string(author_str, **kwargs):
+                 """
+                 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
+                 :param author_str: String of the format 'Name <email>'
+                 :return: pygit2.Signature object
+                 """
+                 match = re.match(r'^(.+) <(.+)>$', author_str)
+                 if match is None:
+                     raise ValueError(f"Invalid format: {author_str}")
+                 name, email = match.groups()
+                 return pygit2.Signature(name, email, **kwargs)
+             def get_obfuscated_url(url_obj):
+                 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
+                 url_obj.query = obfuscate_qs(url_obj.query)
+                 obfuscated_uri = str(url_obj)
+                 return obfuscated_uri
              class GitRemote(RemoteBase):
                  def __init__(self, factory):
                      self._factory = factory
                      self._bulk_methods = {
                          "date": self.date,
                          "author": self.author,
                          "branch": self.branch,
                          "message": self.message,
                          "parents": self.parents,
                          "_commit": self.revision,
                      }
+                     self._bulk_file_methods = {
+                         "size": self.get_node_size,
+                         "data": self.get_node_data,
+                         "flags": self.get_node_flags,
+                         "is_binary": self.get_node_is_binary,
+                         "md5": self.md5_hash
+                     }
                  def _wire_to_config(self, wire):
                      if 'config' in wire:
-                         return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
+                         return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
                      return {}
                  def _remote_conf(self, config):
                      params = [
                          '-c', 'core.askpass=""',
                      ]
-                     ssl_cert_dir = config.get('vcs_ssl_dir')
-                     if ssl_cert_dir:
-                         params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
+                     config_attrs = {
+                         'vcs_ssl_dir': 'http.sslCAinfo={}',
+                         'vcs_git_lfs_store_location': 'lfs.storage={}'
+                     }
+                     for key, param in config_attrs.items():
+                         if value := config.get(key):
+                             params.extend(['-c', param.format(value)])
                      return params
                  @reraise_safe_exceptions
                  def discover_git_version(self):
                      stdout, _ = self.run_git_command(
                          {}, ['--version'], _bare=True, _safe=True)
-                     prefix = 'git version'
+                     prefix = b'git version'
                      if stdout.startswith(prefix):
                          stdout = stdout[len(prefix):]
-                     return stdout.strip()
+                     return safe_str(stdout.strip())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          try:
                              has_head = repo.head.name
                              if has_head:
                                  return False
                              # NOTE(marcink): check again using more expensive method
                              return repo.is_empty
                          except Exception:
                              pass
                          return True
                  @reraise_safe_exceptions
                  def assert_correct_path(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
-                     def _assert_correct_path(_context_uid, _repo_id):
+                     def _assert_correct_path(_context_uid, _repo_id, fast_check):
+                         if fast_check:
+                             path = safe_str(wire['path'])
+                             if pygit2.discover_repository(path):
+                                 return True
+                             return False
+                         else:
                          try:
                              repo_init = self._factory.repo_libgit2(wire)
-                             with repo_init as repo:
+                                 with repo_init:
                                  pass
                          except pygit2.GitError:
                              path = wire.get('path')
                              tb = traceback.format_exc()
                              log.debug("Invalid Git path `%s`, tb: %s", path, tb)
                              return False
+                             return True
-                         return True
-                     return _assert_correct_path(context_uid, repo_id)
+                     return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def bare(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          return repo.is_bare
                  @reraise_safe_exceptions
+                 def get_node_data(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return BytesEnvelope(blob_obj.data)
+                 @reraise_safe_exceptions
+                 def get_node_size(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.size
+                 @reraise_safe_exceptions
+                 def get_node_flags(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.filemode
+                 @reraise_safe_exceptions
+                 def get_node_is_binary(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.is_binary
+                 @reraise_safe_exceptions
                  def blob_as_pretty_string(self, wire, sha):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob_obj = repo[sha]
-                         blob = blob_obj.data
-                         return blob
+                         return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def blob_raw_length(self, wire, sha):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _blob_raw_length(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[sha]
                              return blob.size
                      return _blob_raw_length(repo_id, sha)
                  def _parse_lfs_pointer(self, raw_content):
+                     spec_string = b'version https://git-lfs.github.com/spec'
+                     if raw_content and raw_content.startswith(spec_string):
-                     spec_string = 'version https://git-lfs.github.com/spec'
-                     if raw_content and raw_content.startswith(spec_string):
-                         pattern = re.compile(r"""
+                         pattern = re.compile(rb"""
                          (?:\n)?
                          ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
                          ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
                          ^size[ ](?P<oid_size>[0-9]+)\n
                          (?:\n)?
                          """, re.VERBOSE | re.MULTILINE)
                          match = pattern.match(raw_content)
                          if match:
                              return match.groupdict()
                      return {}
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob = repo[commit_id]
                              if blob.is_binary:
                                  return {}
                              return self._parse_lfs_pointer(blob.data)
                      return _is_large_file(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              blob_obj = repo[tree_id]
                              return blob_obj.is_binary
                      return _is_binary(repo_id, tree_id)
                  @reraise_safe_exceptions
+                 def md5_hash(self, wire, commit_id, path):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _md5_hash(_repo_id, _commit_id, _path):
+                         repo_init = self._factory.repo_libgit2(wire)
+                         with repo_init as repo:
+                             commit = repo[_commit_id]
+                             blob_obj = commit.tree[_path]
+                             if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                                 raise exceptions.LookupException()(
+                                     f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
+                             return ''
+                     return _md5_hash(repo_id, commit_id, path)
+                 @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.has_oid()
                      return False
                  @reraise_safe_exceptions
                  def store_path(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo_name = repo.path
                      store_location = conf.get('vcs_git_lfs_store_location')
                      if store_location:
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.oid_path
-                     raise ValueError('Unable to fetch oid with path {}'.format(oid))
+                     raise ValueError(f'Unable to fetch oid with path {oid}')
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, rev, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _rev, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  args = [wire, rev]
                                  result[attr] = method(*args)
                              except KeyError as e:
-                                 raise exceptions.VcsException(e)(
-                                     "Unknown bulk attribute: %s" % attr)
+                                 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
                          return result
                      return _bulk_request(repo_id, rev, sorted(pre_load))
-                 def _build_opener(self, url):
+                 @reraise_safe_exceptions
+                 def bulk_file_request(self, wire, commit_id, path, pre_load):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
+                         result = {}
+                         for attr in pre_load:
+                             try:
+                                 method = self._bulk_file_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
+                                 result[attr] = method(wire, _commit_id, _path)
+                             except KeyError as e:
+                                 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
+                         return result
+                     return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
+                 def _build_opener(self, url: str):
                      handlers = []
-                     url_obj = url_parser(url)
-                     _, authinfo = url_obj.authinfo()
+                     url_obj = url_parser(safe_bytes(url))
+                     authinfo = url_obj.authinfo()[1]
                      if authinfo:
                          # create a password manager
-                         passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
-                         passmgr.add_password(*authinfo)
+                         passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
+                         passmgr.add_password(*convert_to_str(authinfo))
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
-                     return urllib2.build_opener(*handlers)
-                 def _type_id_to_name(self, type_id):
-                     return {
-: b'commit',
-: b'tree',
-: b'blob',
-: b'tag'
-                     }[type_id]
+                     return urllib.request.build_opener(*handlers)
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
-                     url_obj = url_parser(url)
-                     test_uri, _ = url_obj.authinfo()
-                     url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
-                     url_obj.query = obfuscate_qs(url_obj.query)
-                     cleaned_uri = str(url_obj)
-                     log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
+                     url_obj = url_parser(safe_bytes(url))
+                     test_uri = safe_str(url_obj.authinfo()[0])
+                     obfuscated_uri = get_obfuscated_url(url_obj)
+                     log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      if not test_uri.endswith('info/refs'):
                          test_uri = test_uri.rstrip('/') + '/info/refs'
-                     o = self._build_opener(url)
+                     o = self._build_opener(url=url)
                      o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
                      q = {"service": 'git-upload-pack'}
-                     qs = '?%s' % urllib.urlencode(q)
-                     cu = "%s%s" % (test_uri, qs)
-                     req = urllib2.Request(cu, None, {})
+                     qs = f'?{urllib.parse.urlencode(q)}'
+                     cu = f"{test_uri}{qs}"
                      try:
-                         log.debug("Trying to open URL %s", cleaned_uri)
+                         req = urllib.request.Request(cu, None, {})
+                         log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
-                         log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
+                         log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
-                         raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
+                         raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now detect if it's proper git repo
-                     gitdata = resp.read()
-                     if 'service=git-upload-pack' in gitdata:
+                     gitdata: bytes = resp.read()
+                     if b'service=git-upload-pack' in gitdata:
                          pass
-                     elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
+                     elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
-                         # old style git can return some other format !
                          pass
                      else:
-                         raise exceptions.URLError()(
-                             "url [%s] does not look like an git" % (cleaned_uri,))
+                         e = None
+                         raise exceptions.URLError(e)(
+                             f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
                      return True
                  @reraise_safe_exceptions
                  def clone(self, wire, url, deferred, valid_refs, update_after_clone):
                      # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
                      remote_refs = self.pull(wire, url, apply_refs=False)
                      repo = self._factory.repo(wire)
                      if isinstance(valid_refs, list):
                          valid_refs = tuple(valid_refs)
                      for k in remote_refs:
                          # only parse heads/tags and skip so called deferred tags
                          if k.startswith(valid_refs) and not k.endswith(deferred):
                              repo[k] = remote_refs[k]
                      if update_after_clone:
                          # we want to checkout HEAD
                          repo["HEAD"] = remote_refs["HEAD"]
                          index.build_index_from_tree(repo.path, repo.index_path(),
                                                      repo.object_store, repo["HEAD"].tree)
                  @reraise_safe_exceptions
                  def branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branch(_context_uid, _repo_id, _commit_id):
                          regex = re.compile('^refs/heads')
                          def filter_with(ref):
                              return regex.match(ref[0]) and ref[1] == _commit_id
-                         branches = filter(filter_with, self.get_refs(wire).items())
+                         branches = list(filter(filter_with, list(self.get_refs(wire).items())))
                          return [x[0].split('refs/heads/')[-1] for x in branches]
                      return _branch(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def commit_branches(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _commit_branches(_context_uid, _repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              branches = [x for x in repo.branches.with_commit(_commit_id)]
                              return branches
                      return _commit_branches(context_uid, repo_id, commit_id)
                  @reraise_safe_exceptions
                  def add_object(self, wire, content):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob = objects.Blob()
                          blob.set_raw_string(content)
                          repo.object_store.add_object(blob)
                          return blob.id
-                 # TODO: this is quite complex, check if that can be simplified
+                 @reraise_safe_exceptions
+                 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
+                                   date_args: list[int, int] = None,
+                                   parents: list | None = None):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         if date_args:
+                             current_time, offset = date_args
+                             kw = {
+                                 'time': current_time,
+                                 'offset': offset
+                             }
+                         author = create_signature_from_string(author, **kw)
+                         committer = create_signature_from_string(committer, **kw)
+                         tree = new_tree_id
+                         if isinstance(tree, (bytes, str)):
+                             # validate this tree is in the repo...
+                             tree = repo[safe_str(tree)].id
+                         if parents:
+                             # run via sha's and validate them in repo
+                             parents = [repo[c].id for c in parents]
+                         else:
+                             parents = []
+                             # ensure we COMMIT on top of given branch head
+                             # check if this repo has ANY branches, otherwise it's a new branch case we need to make
+                             if branch in repo.branches.local:
+                                 parents += [repo.branches[branch].target]
+                             elif [x for x in repo.branches.local]:
+                                 parents += [repo.head.target]
+                             #else:
+                                 # in case we want to commit on new branch we create it on top of HEAD
+                                 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
+                         # # Create a new commit
+                         commit_oid = repo.create_commit(
+                             f'refs/heads/{branch}',  # the name of the reference to update
+                             author,  # the author of the commit
+                             committer,  # the committer of the commit
+                             message,  # the commit message
+                             tree,  # the tree produced by the index
+                             parents  # list of parents for the new commit, usually just one,
+                         )
+                         new_commit_id = safe_str(commit_oid)
+                         return new_commit_id
                  @reraise_safe_exceptions
                  def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
-                     # Defines the root tree
-                     class _Root(object):
-                         def __repr__(self):
-                             return 'ROOT TREE'
-                     ROOT = _Root()
-                     repo = self._factory.repo(wire)
-                     object_store = repo.object_store
-                     # Create tree and populates it with blobs
-                     if commit_tree and repo[commit_tree]:
-                         git_commit = repo[commit_data['parents'][0]]
-                         commit_tree = repo[git_commit.tree]  # root tree
-                     else:
-                         commit_tree = objects.Tree()
-                     for node in updated:
-                         # Compute subdirs if needed
-                         dirpath, nodename = vcspath.split(node['path'])
-                         dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
-                         parent = commit_tree
-                         ancestors = [('', parent)]
+                     def mode2pygit(mode):
+                         """
+                         git only supports two filemode 644 and 755
-                         # Tries to dig for the deepest existing tree
-                         while dirnames:
-                             curdir = dirnames.pop(0)
-                             try:
-                                 dir_id = parent[curdir][1]
-                             except KeyError:
-                                 # put curdir back into dirnames and stops
-                                 dirnames.insert(0, curdir)
-                                 break
-                             else:
-                                 # If found, updates parent
-                                 parent = repo[dir_id]
-                                 ancestors.append((curdir, parent))
-                         # Now parent is deepest existing tree and we need to create
-                         # subtrees for dirnames (in reverse order)
-                         # [this only applies for nodes from added]
-                         new_trees = []
+o100755 -> 33261
+o100644 -> 33188
+                         """
+                         return {
+o100644: pygit2.GIT_FILEMODE_BLOB,
+o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
+o120000: pygit2.GIT_FILEMODE_LINK
+                         }.get(mode) or pygit2.GIT_FILEMODE_BLOB
-                         blob = objects.Blob.from_string(node['content'])
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         repo_index = repo.index
-                         if dirnames:
-                             # If there are trees which should be created we need to build
-                             # them now (in reverse order)
-                             reversed_dirnames = list(reversed(dirnames))
-                             curtree = objects.Tree()
-                             curtree[node['node_path']] = node['mode'], blob.id
-                             new_trees.append(curtree)
-                             for dirname in reversed_dirnames[:-1]:
-                                 newtree = objects.Tree()
-                                 newtree[dirname] = (DIR_STAT, curtree.id)
-                                 new_trees.append(newtree)
-                                 curtree = newtree
-                             parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
-                         else:
-                             parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
+                         commit_parents = None
+                         if commit_tree and commit_data['parents']:
+                             commit_parents = commit_data['parents']
+                             parent_commit = repo[commit_parents[0]]
+                             repo_index.read_tree(parent_commit.tree)
-                         new_trees.append(parent)
-                         # Update ancestors
-                         reversed_ancestors = reversed(
-                             [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
-                         for parent, tree, path in reversed_ancestors:
-                             parent[path] = (DIR_STAT, tree.id)
-                             object_store.add_object(tree)
+                         for pathspec in updated:
+                             blob_id = repo.create_blob(pathspec['content'])
+                             ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
+                             repo_index.add(ie)
+                         for pathspec in removed:
+                             repo_index.remove(pathspec)
-                         object_store.add_object(blob)
-                         for tree in new_trees:
-                             object_store.add_object(tree)
+                         # Write changes to the index
+                         repo_index.write()
+                         # Create a tree from the updated index
+                         written_commit_tree = repo_index.write_tree()
+                     new_tree_id = written_commit_tree
-                     for node_path in removed:
-                         paths = node_path.split('/')
-                         tree = commit_tree  # start with top-level
-                         trees = [{'tree': tree, 'path': ROOT}]
-                         # Traverse deep into the forest...
-                         # resolve final tree by iterating the path.
-                         # e.g a/b/c.txt will get
-                         # - root as tree then
-                         # - 'a' as tree,
-                         # - 'b' as tree,
-                         # - stop at c as blob.
-                         for path in paths:
-                             try:
-                                 obj = repo[tree[path][1]]
-                                 if isinstance(obj, objects.Tree):
-                                     trees.append({'tree': obj, 'path': path})
-                                     tree = obj
-                             except KeyError:
-                                 break
-                         #PROBLEM:
-                         """
-                         We're not editing same reference tree object
-                         """
-                         # Cut down the blob and all rotten trees on the way back...
-                         for path, tree_data in reversed(zip(paths, trees)):
-                             tree = tree_data['tree']
-                             tree.__delitem__(path)
-                             # This operation edits the tree, we need to mark new commit back
+                     author = commit_data['author']
+                     committer = commit_data['committer']
+                     message = commit_data['message']
+                     date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
-                             if len(tree) > 0:
-                                 # This tree still has elements - don't remove it or any
-                                 # of it's parents
-                                 break
-                     object_store.add_object(commit_tree)
+                     new_commit_id = self.create_commit(wire, author, committer, message, branch,
+                                                        new_tree_id, date_args=date_args, parents=commit_parents)
-                     # Create commit
-                     commit = objects.Commit()
-                     commit.tree = commit_tree.id
-                     for k, v in commit_data.items():
-                         setattr(commit, k, v)
-                     object_store.add_object(commit)
+                     # libgit2, ensure the branch is there and exists
+                     self.create_branch(wire, branch, new_commit_id)
-                     self.create_branch(wire, branch, commit.id)
+                     # libgit2, set new ref to this created commit
+                     self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
-                     # dulwich set-ref
-                     ref = 'refs/heads/%s' % branch
-                     repo.refs[ref] = commit.id
-                     return commit.id
+                     return new_commit_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
                      if url != 'default' and '://' not in url:
                          client = LocalGitClient(url)
                      else:
-                         url_obj = url_parser(url)
+                         url_obj = url_parser(safe_bytes(url))
                          o = self._build_opener(url)
-                         url, _ = url_obj.authinfo()
+                         url = url_obj.authinfo()[0]
                          client = HttpGitClient(base_url=url, opener=o)
                      repo = self._factory.repo(wire)
                      determine_wants = repo.object_store.determine_wants_all
                      if refs:
-                         def determine_wants_requested(references):
-                             return [references[r] for r in references if r in refs]
+                         refs: list[bytes] = [ascii_bytes(x) for x in refs]
+                         def determine_wants_requested(_remote_refs):
+                             determined = []
+                             for ref_name, ref_hash in _remote_refs.items():
+                                 bytes_ref_name = safe_bytes(ref_name)
+                                 if bytes_ref_name in refs:
+                                     bytes_ref_hash = safe_bytes(ref_hash)
+                                     determined.append(bytes_ref_hash)
+                             return determined
+                         # swap with our custom requested wants
                          determine_wants = determine_wants_requested
                      try:
                          remote_refs = client.fetch(
                              path=url, target=repo, determine_wants=determine_wants)
                      except NotGitRepository as e:
                          log.warning(
                              'Trying to fetch from "%s" failed, not a Git repository.', url)
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                      # mikhail: client.fetch() returns all the remote refs, but fetches only
                      # refs filtered by `determine_wants` function. We need to filter result
                      # as well
                      if refs:
                          remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
                      if apply_refs:
                          # TODO: johbo: Needs proper test coverage with a git repository
                          # that contains a tag object, so that we would end up with
                          # a peeled ref at this point.
                          for k in remote_refs:
                              if k.endswith(PEELED_REF_MARKER):
                                  log.debug("Skipping peeled reference %s", k)
                                  continue
                              repo[k] = remote_refs[k]
                          if refs and not update_after:
+                             # update to ref
                              # mikhail: explicitly set the head to the last ref.
-                             repo["HEAD"] = remote_refs[refs[-1]]
+                             update_to_ref = refs[-1]
+                             if isinstance(update_after, str):
+                                 update_to_ref = update_after
+                             repo[HEAD_MARKER] = remote_refs[update_to_ref]
                      if update_after:
                          # we want to checkout HEAD
-                         repo["HEAD"] = remote_refs["HEAD"]
+                         repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
                          index.build_index_from_tree(repo.path, repo.index_path(),
-                                                     repo.object_store, repo["HEAD"].tree)
+                                                     repo.object_store, repo[HEAD_MARKER].tree)
+                     if isinstance(remote_refs, FetchPackResult):
+                         return remote_refs.refs
                      return remote_refs
                  @reraise_safe_exceptions
-                 def sync_fetch(self, wire, url, refs=None, all_refs=False):
-                     repo = self._factory.repo(wire)
+                 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
+                     self._factory.repo(wire)
                      if refs and not isinstance(refs, (list, tuple)):
                          refs = [refs]
                      config = self._wire_to_config(wire)
                      # get all remote refs we'll use to fetch later
                      cmd = ['ls-remote']
                      if not all_refs:
                          cmd += ['--heads', '--tags']
                      cmd += [url]
                      output, __ = self.run_git_command(
                          wire, cmd, fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
                      remote_refs = collections.OrderedDict()
                      fetch_refs = []
                      for ref_line in output.splitlines():
-                         sha, ref = ref_line.split('\t')
+                         sha, ref = ref_line.split(b'\t')
                          sha = sha.strip()
                          if ref in remote_refs:
                              # duplicate, skip
                              continue
                          if ref.endswith(PEELED_REF_MARKER):
                              log.debug("Skipping peeled reference %s", ref)
                              continue
                          # don't sync HEAD
-                         if ref in ['HEAD']:
+                         if ref in [HEAD_MARKER]:
                              continue
                          remote_refs[ref] = sha
                          if refs and sha in refs:
                              # we filter fetch using our specified refs
-                             fetch_refs.append('{}:{}'.format(ref, ref))
+                             fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                          elif not refs:
-                             fetch_refs.append('{}:{}'.format(ref, ref))
+                             fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                      log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
                      if fetch_refs:
-                         for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
+                         for chunk in more_itertools.chunked(fetch_refs, 128):
                              fetch_refs_chunks = list(chunk)
                              log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
-                             _out, _err = self.run_git_command(
+                             self.run_git_command(
                                  wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
                                  fail_on_stderr=False,
                                  _copts=self._remote_conf(config),
                                  extra_env={'GIT_TERMINAL_PROMPT': '0'})
+                         if kwargs.get('sync_large_objects'):
+                             self.run_git_command(
+                                 wire, ['lfs', 'fetch', url, '--all'],
+                                 fail_on_stderr=False,
+                                 _copts=self._remote_conf(config),
+                             )
                      return remote_refs
                  @reraise_safe_exceptions
-                 def sync_push(self, wire, url, refs=None):
+                 def sync_push(self, wire, url, refs=None, **kwargs):
                      if not self.check_url(url, wire):
                          return
                      config = self._wire_to_config(wire)
                      self._factory.repo(wire)
                      self.run_git_command(
                          wire, ['push', url, '--mirror'], fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
+                     if kwargs.get('sync_large_objects'):
+                         self.run_git_command(
+                             wire, ['lfs', 'push', url, '--all'],
+                             fail_on_stderr=False,
+                             _copts=self._remote_conf(config),
+                         )
                  @reraise_safe_exceptions
                  def get_remote_refs(self, wire, url):
                      repo = Repo(url)
                      return repo.get_refs()
                  @reraise_safe_exceptions
                  def get_description(self, wire):
                      repo = self._factory.repo(wire)
                      return repo.get_description()
                  @reraise_safe_exceptions
-                 def get_missing_revs(self, wire, rev1, rev2, path2):
+                 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
+                     origin_repo_path = wire['path']
                      repo = self._factory.repo(wire)
-                     LocalGitClient(thin_packs=False).fetch(path2, repo)
+                     # fetch from other_repo_path to our origin repo
+                     LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
                      wire_remote = wire.copy()
-                     wire_remote['path'] = path2
+                     wire_remote['path'] = other_repo_path
                      repo_remote = self._factory.repo(wire_remote)
-                     LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
+                     # fetch from origin_repo_path to our remote repo
+                     LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
                      revs = [
                          x.commit.id
-                         for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
+                         for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
                      return revs
                  @reraise_safe_exceptions
                  def get_object(self, wire, sha, maybe_unreachable=False):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_object(_context_uid, _repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
                              try:
                                  commit = repo.revparse_single(sha)
                              except KeyError:
                                  # NOTE(marcink): KeyError doesn't give us any meaningful information
                                  # here, we instead give something more explicit
                                  e = exceptions.RefNotFoundException('SHA: %s not found', sha)
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              except ValueError as e:
                                  raise exceptions.LookupException(e)(missing_commit_err)
                              is_tag = False
                              if isinstance(commit, pygit2.Tag):
                                  commit = repo.get(commit.target)
                                  is_tag = True
                              check_dangling = True
                              if is_tag:
                                  check_dangling = False
                              if check_dangling and maybe_unreachable:
                                  check_dangling = False
                              # we used a reference and it parsed means we're not having a dangling commit
                              if sha != commit.hex:
                                  check_dangling = False
                              if check_dangling:
                                  # check for dangling commit
                                  for branch in repo.branches.with_commit(commit.hex):
                                      if branch:
                                          break
                                  else:
                                      # NOTE(marcink): Empty error doesn't give us any meaningful information
                                      # here, we instead give something more explicit
                                      e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
                                      raise exceptions.LookupException(e)(missing_commit_err)
                              commit_id = commit.hex
-                             type_id = commit.type
+                             type_str = commit.type_str
                              return {
                                  'id': commit_id,
-                                 'type': self._type_id_to_name(type_id),
+                                 'type': type_str,
                                  'commit_id': commit_id,
                                  'idx': 0
                              }
                      return _get_object(context_uid, repo_id, sha)
                  @reraise_safe_exceptions
                  def get_refs(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_refs(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              regex = re.compile('^refs/(heads|tags)/')
                              return {x.name: x.target.hex for x in
-                                     filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
+                                     [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
                      return _get_refs(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def get_branch_pointers(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_branch_pointers(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          regex = re.compile('^refs/heads')
                          with repo_init as repo:
-                             branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
+                             branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
                              return {x.target.hex: x.shorthand for x in branches}
                      return _get_branch_pointers(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def head(self, wire, show_exc=True):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _head(_context_uid, _repo_id, _show_exc):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  return repo.head.peel().hex
                              except Exception:
                                  if show_exc:
                                      raise
                      return _head(context_uid, repo_id, show_exc)
                  @reraise_safe_exceptions
                  def init(self, wire):
-                     repo_path = str_to_dulwich(wire['path'])
-                     self.repo = Repo.init(repo_path)
+                     repo_path = safe_str(wire['path'])
+                     os.makedirs(repo_path, mode=0o755)
+                     pygit2.init_repository(repo_path, bare=False)
                  @reraise_safe_exceptions
                  def init_bare(self, wire):
-                     repo_path = str_to_dulwich(wire['path'])
-                     self.repo = Repo.init_bare(repo_path)
+                     repo_path = safe_str(wire['path'])
+                     os.makedirs(repo_path, mode=0o755)
+                     pygit2.init_repository(repo_path, bare=True)
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision(_context_uid, _repo_id, _rev):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[rev]
                              obj_data = {
                                  'id': commit.id.hex,
                              }
                              # tree objects itself don't have tree_id attribute
                              if hasattr(commit, 'tree_id'):
                                  obj_data['tree'] = commit.tree_id.hex
                              return obj_data
                      return _revision(context_uid, repo_id, rev)
                  @reraise_safe_exceptions
                  def date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _date(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'commit_time'):
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              else:
                                  commit = commit.get_object()
                                  commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
                              # TODO(marcink): check dulwich difference of offset vs timezone
                              return [commit_time, commit_time_offset]
                      return _date(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def author(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _author(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'author'):
                                  author = commit.author
                              else:
                                  author = commit.get_object().author
                              if author.email:
-                                 return u"{} <{}>".format(author.name, author.email)
+                                 return f"{author.name} <{author.email}>"
                              try:
-                                 return u"{}".format(author.name)
+                                 return f"{author.name}"
                              except Exception:
-                                 return u"{}".format(safe_unicode(author.raw_name))
+                                 return f"{safe_str(author.raw_name)}"
                      return _author(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def message(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _message(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              return commit.message
                      return _message(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _parents(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if hasattr(commit, 'parent_ids'):
                                  parent_ids = commit.parent_ids
                              else:
                                  parent_ids = commit.get_object().parent_ids
                              return [x.hex for x in parent_ids]
                      return _parents(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
+                     head = self.head(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _children(_repo_id, _commit_id):
                          output, __ = self.run_git_command(
-                             wire, ['rev-list', '--all', '--children'])
+                             wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
                          child_ids = []
-                         pat = re.compile(r'^%s' % commit_id)
-                         for l in output.splitlines():
-                             if pat.match(l):
-                                 found_ids = l.split(' ')[1:]
+                         pat = re.compile(fr'^{commit_id}')
+                         for line in output.splitlines():
+                             line = safe_str(line)
+                             if pat.match(line):
+                                 found_ids = line.split(' ')[1:]
                                  child_ids.extend(found_ids)
+                                 break
                          return child_ids
                      return _children(repo_id, commit_id)
                  @reraise_safe_exceptions
                  def set_refs(self, wire, key, value):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.create(key, value, force=True)
                  @reraise_safe_exceptions
+                 def update_refs(self, wire, key, value):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         if key not in repo.references:
+                             raise ValueError(f'Reference {key} not found in the repository')
+                         repo.references.create(key, value, force=True)
+                 @reraise_safe_exceptions
                  def create_branch(self, wire, branch_name, commit_id, force=False):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
+                         if commit_id:
                          commit = repo[commit_id]
+                         else:
+                             # if commit is not given  just use the HEAD
+                             commit = repo.head()
                          if force:
                              repo.branches.local.create(branch_name, commit, force=force)
                          elif not repo.branches.get(branch_name):
                              # create only if that branch isn't existing
                              repo.branches.local.create(branch_name, commit, force=force)
                  @reraise_safe_exceptions
                  def remove_ref(self, wire, key):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tag_remove(self, wire, tag_name):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
-                         key = 'refs/tags/{}'.format(tag_name)
+                         key = f'refs/tags/{tag_name}'
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tree_changes(self, wire, source_id, target_id):
-                     # TODO(marcink): remove this seems it's only used by tests
                      repo = self._factory.repo(wire)
+                     # source can be empty
+                     source_id = safe_bytes(source_id if source_id else b'')
+                     target_id = safe_bytes(target_id)
                      source = repo[source_id].tree if source_id else None
                      target = repo[target_id].tree
                      result = repo.object_store.tree_changes(source, target)
-                     return list(result)
+                     added = set()
+                     modified = set()
+                     deleted = set()
+                     for (old_path, new_path), (_, _), (_, _) in list(result):
+                         if new_path and old_path:
+                             modified.add(new_path)
+                         elif new_path and not old_path:
+                             added.add(new_path)
+                         elif not new_path and old_path:
+                             deleted.add(old_path)
+                     return list(added), list(modified), list(deleted)
                  @reraise_safe_exceptions
                  def tree_and_type_for_path(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              try:
                                  tree = commit.tree[path]
                              except KeyError:
                                  return None, None, None
-                             return tree.id.hex, tree.type, tree.filemode
+                             return tree.id.hex, tree.type_str, tree.filemode
                      return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def tree_items(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_items(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              try:
                                  tree = repo[tree_id]
                              except KeyError:
-                                 raise ObjectMissing('No tree with id: {}'.format(tree_id))
+                                 raise ObjectMissing(f'No tree with id: {tree_id}')
                              result = []
                              for item in tree:
                                  item_sha = item.hex
                                  item_mode = item.filemode
-                                 item_type = item.type
+                                 item_type = item.type_str
                                  if item_type == 'commit':
                                      # NOTE(marcink): submodules we translate to 'link' for backward compat
                                      item_type = 'link'
                                  result.append((item.name, item_mode, item_sha, item_type))
                              return result
                      return _tree_items(repo_id, tree_id)
                  @reraise_safe_exceptions
                  def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      """
                      Old version that uses subprocess to call diff
                      """
                      flags = [
-                         '-U%s' % context, '--patch',
+                         f'-U{context}', '--patch',
                          '--binary',
                          '--find-renames',
                          '--no-indent-heuristic',
                          # '--indent-heuristic',
                          #'--full-index',
                          #'--abbrev=40'
                      ]
                      if opt_ignorews:
                          flags.append('--ignore-all-space')
                      if commit_id_1 == self.EMPTY_COMMIT:
                          cmd = ['show'] + flags + [commit_id_2]
                      else:
                          cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
                      if file_filter:
                          cmd.extend(['--', file_filter])
                      diff, __ = self.run_git_command(wire, cmd)
                      # If we used 'show' command, strip first few lines (until actual diff
                      # starts)
                      if commit_id_1 == self.EMPTY_COMMIT:
                          lines = diff.splitlines()
                          x = 0
                          for line in lines:
-                             if line.startswith('diff'):
+                             if line.startswith(b'diff'):
                                  break
                              x += 1
                          # Append new line just like 'diff' command do
                          diff = '\n'.join(lines[x:]) + '\n'
                      return diff
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          swap = True
                          flags = 0
                          flags |= pygit2.GIT_DIFF_SHOW_BINARY
                          if opt_ignorews:
                              flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
                          if commit_id_1 == self.EMPTY_COMMIT:
                              comm1 = repo[commit_id_2]
                              diff_obj = comm1.tree.diff_to_tree(
                                  flags=flags, context_lines=context, swap=swap)
                          else:
                              comm1 = repo[commit_id_2]
                              comm2 = repo[commit_id_1]
                              diff_obj = comm1.tree.diff_to_tree(
                                  comm2.tree, flags=flags, context_lines=context, swap=swap)
                          similar_flags = 0
                          similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
                          diff_obj.find_similar(flags=similar_flags)
                          if file_filter:
                              for p in diff_obj:
                                  if p.delta.old_file.path == file_filter:
-                                     return p.patch or ''
+                                     return BytesEnvelope(p.data) or BytesEnvelope(b'')
                              # fo matching path == no diff
-                             return ''
-                         return diff_obj.patch or ''
+                             return BytesEnvelope(b'')
+                         return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def node_history(self, wire, commit_id, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
                          # optimize for n==1, rev-list is much faster for that use-case
                          if limit == 1:
                              cmd = ['rev-list', '-1', commit_id, '--', path]
                          else:
                              cmd = ['log']
                              if limit:
                                  cmd.extend(['-n', str(safe_int(limit, 0))])
                              cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
                          output, __ = self.run_git_command(wire, cmd)
-                         commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
+                         commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
                          return [x for x in commit_ids]
                      return _node_history(context_uid, repo_id, commit_id, path, limit)
                  @reraise_safe_exceptions
-                 def node_annotate(self, wire, commit_id, path):
+                 def node_annotate_legacy(self, wire, commit_id, path):
+                     # note: replaced by pygit2 implementation
                      cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
                      # -l     ==> outputs long shas (and we need all 40 characters)
                      # --root ==> doesn't put '^' character for boundaries
                      # -r commit_id ==> blames for the given commit
                      output, __ = self.run_git_command(wire, cmd)
                      result = []
-                     for i, blame_line in enumerate(output.split('\n')[:-1]):
+                     for i, blame_line in enumerate(output.splitlines()[:-1]):
                          line_no = i + 1
-                         commit_id, line = re.split(r' ', blame_line, 1)
-                         result.append((line_no, commit_id, line))
+                         blame_commit_id, line = re.split(rb' ', blame_line, 1)
+                         result.append((line_no, blame_commit_id, line))
                      return result
                  @reraise_safe_exceptions
-                 def update_server_info(self, wire):
-                     repo = self._factory.repo(wire)
-                     update_server_info(repo)
+                 def node_annotate(self, wire, commit_id, path):
+                     result_libgit = []
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blame_obj = repo.blame(path, newest_commit=commit_id)
+                         for i, line in enumerate(commit.tree[path].data.splitlines()):
+                             line_no = i + 1
+                             hunk = blame_obj.for_line(line_no)
+                             blame_commit_id = hunk.final_commit_id.hex
+                             result_libgit.append((line_no, blame_commit_id, line))
+                     return BinaryEnvelope(result_libgit)
+                 @reraise_safe_exceptions
+                 def update_server_info(self, wire, force=False):
+                     cmd = ['update-server-info']
+                     if force:
+                         cmd += ['--force']
+                     output, __ = self.run_git_command(wire, cmd)
+                     return output.splitlines()
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id):
                          cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
                          try:
                              output, __ = self.run_git_command(wire, cmd)
                              return output.splitlines()
                          except Exception:
                              # Can be raised for empty repositories
                              return []
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
+                         repo_init = self._factory.repo_libgit2(wire)
+                         from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
+                         results = []
+                         with repo_init as repo:
+                             for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
+                                 results.append(commit.id.hex)
                      return _get_all_commit_ids(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def run_git_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
+                     debug_mode = rhodecode.ConfigGet().get_bool('debug')
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                      if '_bare' in opts:
                          _copts = []
                          del opts['_bare']
                      else:
-                         _copts = ['-c', 'core.quotepath=false', ]
+                         _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
                      safe_call = False
                      if '_safe' in opts:
                          # no exc on failure
                          del opts['_safe']
                          safe_call = True
                      if '_copts' in opts:
                          _copts.extend(opts['_copts'] or [])
                          del opts['_copts']
                      gitenv = os.environ.copy()
                      gitenv.update(opts.pop('extra_env', {}))
                      # need to clean fix GIT_DIR !
                      if 'GIT_DIR' in gitenv:
                          del gitenv['GIT_DIR']
                      gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                      gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
                      cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
                      _opts = {'env': gitenv, 'shell': False}
                      proc = None
                      try:
                          _opts.update(opts)
                          proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
-                         return ''.join(proc), ''.join(proc.error)
-                     except (EnvironmentError, OSError) as err:
-                         cmd = ' '.join(cmd)  # human friendly CMD
-                         tb_err = ("Couldn't run git command (%s).\n"
-                                   "Original error was:%s\n"
-                                   "Call options:%s\n"
-                                   % (cmd, err, _opts))
+                         return b''.join(proc), b''.join(proc.stderr)
+                     except OSError as err:
+                         cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
+                         call_opts = {}
+                         if debug_mode:
+                             call_opts = _opts
+                         tb_err = ("Couldn't run git command ({}).\n"
+                                   "Original error was:{}\n"
+                                   "Call options:{}\n"
+                                   .format(cmd, err, call_opts))
                          log.exception(tb_err)
                          if safe_call:
                              return '', err
                          else:
                              raise exceptions.VcsException()(tb_err)
                      finally:
                          if proc:
                              proc.close()
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                      from vcsserver.hook_utils import install_git_hooks
                      bare = self.bare(wire)
                      path = wire['path']
+                     binary_dir = settings.BINARY_DIR
+                     if binary_dir:
+                         os.path.join(binary_dir, 'python3')
                      return install_git_hooks(path, bare, force_create=force)
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      from vcsserver.hook_utils import (
                          get_git_pre_hook_version, get_git_post_hook_version)
                      bare = self.bare(wire)
                      path = wire['path']
                      return {
                          'pre_version': get_git_pre_hook_version(path, bare),
                          'post_version': get_git_post_hook_version(path, bare),
                      }
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      log.debug('Setting refs/head to `%s`', head_name)
-                     cmd = ['symbolic-ref', 'HEAD', 'refs/heads/%s' % head_name]
-                     output, __ = self.run_git_command(wire, cmd)
-                     return [head_name] + output.splitlines()
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         repo.set_head(f'refs/heads/{head_name}')
+                     return [head_name] + [f'set HEAD to refs/heads/{head_name}']
                  @reraise_safe_exceptions
-                 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
-                                  archive_dir_name, commit_id):
+                 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
+                                  archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo_init = self._factory.repo_libgit2(wire)
                          with repo_init as repo:
                              commit = repo[commit_id]
                              if path in ['', '/']:
                                  tree = commit.tree
                              else:
                                  tree = commit.tree[path.rstrip('/')]
                                  tree_id = tree.id.hex
                                  try:
                                      tree = repo[tree_id]
                                  except KeyError:
-                                     raise ObjectMissing('No tree with id: {}'.format(tree_id))
+                                     raise ObjectMissing(f'No tree with id: {tree_id}')
                              index = LibGit2Index.Index()
                              index.read_tree(tree)
                              file_iter = index
-                             for fn in file_iter:
-                                 file_path = fn.path
-                                 mode = fn.mode
+                             for file_node in file_iter:
+                                 file_path = file_node.path
+                                 mode = file_node.mode
                                  is_link = stat.S_ISLNK(mode)
                                  if mode == pygit2.GIT_FILEMODE_COMMIT:
                                      log.debug('Skipping path %s as a commit node', file_path)
                                      continue
-                                 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
+                                 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
-                     return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
-                                         archive_dir_name, commit_id)
+                     return store_archive_in_cache(
+                         file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/hg_remote.py ~~vcsserver/hg.py~~

0 renamed 0 0

	1		NO CONTENT: file renamed from vcsserver/hg.py to vcsserver/remote/hg_remote.py
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/remote/svn_remote.py ~~vcsserver/svn.py~~

0 renamed 0 0

	1		NO CONTENT: file renamed from vcsserver/svn.py to vcsserver/remote/svn_remote.py
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/remote_wsgi.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/scm_app.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/server.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/settings.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/subprocessio.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/svn_diff.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/fixture.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_git.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_hg.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_hgpatches.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_hooks.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_http_performance.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_install_hooks.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_main_http.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_pygrack.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_scm_app.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_server.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_subprocessio.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_svn.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_wsgi_app_caller.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tweens/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tweens/request_wrapper.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/utils.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/vcs_base.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/wsgi_app_caller.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

default.nix

0 removed 0 -197

NO CONTENT: file was removed

pip2nix.ini

0 removed 0 -3

NO CONTENT: file was removed

pkgs/README.rst

0 removed 0 -28

NO CONTENT: file was removed

pkgs/nix-common/pip2nix.nix

0 removed 0 -17

NO CONTENT: file was removed

pkgs/overlays.nix

0 removed 0 -83

NO CONTENT: file was removed

pkgs/patches/configparser/pyproject.patch

0 removed 0 -10

NO CONTENT: file was removed

pkgs/patches/dulwich/handle-dir-refs.patch

0 removed 0 -15

NO CONTENT: file was removed

pkgs/patches/git/docbook2texi.patch

0 removed 0 -38

NO CONTENT: file was removed

pkgs/patches/git/git-send-email-honor-PATH.patch

0 removed 0 -28

NO CONTENT: file was removed

pkgs/patches/git/git-sh-i18n.patch

0 removed 0 -23

NO CONTENT: file was removed

pkgs/patches/git/installCheck-path.patch

0 removed 0 -13

NO CONTENT: file was removed

pkgs/patches/git/ssh-path.patch

0 removed 0 -26

NO CONTENT: file was removed

pkgs/patches/importlib_metadata/pyproject.patch

0 removed 0 -7

NO CONTENT: file was removed

pkgs/patches/pytest/setuptools.patch

0 removed 0 -12

NO CONTENT: file was removed

pkgs/patches/zipp/pyproject.patch

0 removed 0 -10

NO CONTENT: file was removed

pkgs/python-packages-overrides.nix

0 removed 0 -126

NO CONTENT: file was removed

pkgs/python-packages.nix

0 removed 0 -1103

	1		NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (1103 lines changed) Show them Hide them

pkgs/shell-generate.nix

0 removed 0 -42

NO CONTENT: file was removed

release.nix

0 removed 0 -22

NO CONTENT: file was removed

requirements_pinned.txt

0 removed 0 -18

NO CONTENT: file was removed

setup.cfg

0 removed 0 -2

NO CONTENT: file was removed

setup.py

0 removed 0 -136

NO CONTENT: file was removed

shell.nix

0 removed 0 -66

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages