rhodecode-vcsserver Commit - r1219:e9ee7632

merged default branch into stable

super-admin -

r1219:e9ee7632 stable

parent child

The requested changes are too big and content was truncated. Show full diff

configs/logging.ini

0 created 644 +53 0

			@@ -0,0 +1,53 b''
		1	; #####################
		2	; LOGGING CONFIGURATION
		3	; #####################
		4	; Logging template, used for configure the logging
		5	; some variables here are replaced by RhodeCode to default values
		6
		7	[loggers]
		8	keys = root, vcsserver
		9
		10	[handlers]
		11	keys = console
		12
		13	[formatters]
		14	keys = generic, json
		15
		16	; #######
		17	; LOGGERS
		18	; #######
		19	[logger_root]
		20	level = NOTSET
		21	handlers = console
		22
		23	[logger_vcsserver]
		24	level = $RC_LOGGING_LEVEL
		25	handlers =
		26	qualname = vcsserver
		27	propagate = 1
		28
		29	; ########
		30	; HANDLERS
		31	; ########
		32
		33	[handler_console]
		34	class = StreamHandler
		35	args = (sys.stderr, )
		36	level = $RC_LOGGING_LEVEL
		37	; To enable JSON formatted logs replace generic with json
		38	; This allows sending properly formatted logs to grafana loki or elasticsearch
		39	#formatter = json
		40	#formatter = generic
		41	formatter = $RC_LOGGING_FORMATTER
		42
		43	; ##########
		44	; FORMATTERS
		45	; ##########
		46
		47	[formatter_generic]
		48	format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
		49	datefmt = %Y-%m-%d %H:%M:%S
		50
		51	[formatter_json]
		52	format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
		53	class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

pyproject.toml

0 created 644 +73 0

			@@ -0,0 +1,73 b''
		1	[build-system]
		2	requires = ["setuptools>=61.0.0", "wheel"]
		3	build-backend = "setuptools.build_meta"
		4
		5	[project]
		6	name = "rhodecode-vcsserver"
		7	description = "Version Control System Server for RhodeCode"
		8	authors = [
		9	{name = "RhodeCode GmbH", email = "support@rhodecode.com"},
		10	]
		11
		12	license = {text = "GPL V3"}
		13	requires-python = ">=3.10"
		14	dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
		15	classifiers = [
		16	'Development Status :: 6 - Mature',
		17	'Intended Audience :: Developers',
		18	'Operating System :: OS Independent',
		19	'Topic :: Software Development :: Version Control',
		20	'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
		21	'Programming Language :: Python :: 3.10',
		22	]
		23
		24	[project.entry-points."paste.app_factory"]
		25	main = "vcsserver.http_main:main"
		26
		27
		28	[tool.setuptools]
		29	packages = ["vcsserver"]
		30
		31	[tool.setuptools.dynamic]
		32	readme = {file = ["README.rst"], content-type = "text/rst"}
		33	version = {file = "vcsserver/VERSION"}
		34	dependencies = {file = ["requirements.txt"]}
		35	optional-dependencies.tests = {file = ["requirements_test.txt"]}
		36
		37	[tool.ruff]
		38
		39	select = [
		40	# Pyflakes
		41	"F",
		42	# Pycodestyle
		43	"E",
		44	"W",
		45	# isort
		46	"I001"
		47	]
		48
		49	ignore = [
		50	"E501", # line too long, handled by black
		51	]
		52
		53	# Same as Black.
		54	line-length = 120
		55
		56	[tool.ruff.isort]
		57
		58	known-first-party = ["vcsserver"]
		59
		60	[tool.ruff.format]
		61
		62	# Like Black, use double quotes for strings.
		63	quote-style = "double"
		64
		65	# Like Black, indent with spaces, rather than tabs.
		66	indent-style = "space"
		67
		68	# Like Black, respect magic trailing commas.
		69	skip-magic-trailing-comma = false
		70
		71	# Like Black, automatically detect the appropriate line ending.
		72	line-ending = "auto"
		73

vcsserver/config/__init__.py

0 created 644 +1 0

			@@ -0,0 +1,1 b''
		1	# Copyright (C) 2014-2023 RhodeCode GmbH

vcsserver/config/hooks.py

0 created 644 +27 0

			@@ -0,0 +1,27 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	HOOK_REPO_SIZE = 'changegroup.repo_size'
		20
		21	# HG
		22	HOOK_PRE_PULL = 'preoutgoing.pre_pull'
		23	HOOK_PULL = 'outgoing.pull_logger'
		24	HOOK_PRE_PUSH = 'prechangegroup.pre_push'
		25	HOOK_PRETX_PUSH = 'pretxnchangegroup.pre_push'
		26	HOOK_PUSH = 'changegroup.push_logger'
		27	HOOK_PUSH_KEY = 'pushkey.key_push'

vcsserver/config/settings_maker.py

0 created 644 +168 0

			@@ -0,0 +1,168 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import os
		20	import textwrap
		21	import string
		22	import functools
		23	import logging
		24	import tempfile
		25	import logging.config
		26
		27	from vcsserver.type_utils import str2bool, aslist
		28
		29	log = logging.getLogger(__name__)
		30
		31	# skip keys, that are set here, so we don't double process those
		32	set_keys = {
		33	'__file__': ''
		34	}
		35
		36
		37	class SettingsMaker:
		38
		39	def __init__(self, app_settings):
		40	self.settings = app_settings
		41
		42	@classmethod
		43	def _bool_func(cls, input_val):
		44	if isinstance(input_val, bytes):
		45	# decode to str
		46	input_val = input_val.decode('utf8')
		47	return str2bool(input_val)
		48
		49	@classmethod
		50	def _int_func(cls, input_val):
		51	return int(input_val)
		52
		53	@classmethod
		54	def _list_func(cls, input_val, sep=','):
		55	return aslist(input_val, sep=sep)
		56
		57	@classmethod
		58	def _string_func(cls, input_val, lower=True):
		59	if lower:
		60	input_val = input_val.lower()
		61	return input_val
		62
		63	@classmethod
		64	def _float_func(cls, input_val):
		65	return float(input_val)
		66
		67	@classmethod
		68	def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
		69
		70	# ensure we have our dir created
		71	if not os.path.isdir(input_val) and ensure_dir:
		72	os.makedirs(input_val, mode=mode, exist_ok=True)
		73
		74	if not os.path.isdir(input_val):
		75	raise Exception(f'Dir at {input_val} does not exist')
		76	return input_val
		77
		78	@classmethod
		79	def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
		80	dirname = os.path.dirname(input_val)
		81	cls._dir_func(dirname, ensure_dir=ensure_dir)
		82	return input_val
		83
		84	@classmethod
		85	def _key_transformator(cls, key):
		86	return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
		87
		88	def maybe_env_key(self, key):
		89	# now maybe we have this KEY in env, search and use the value with higher priority.
		90	transformed_key = self._key_transformator(key)
		91	envvar_value = os.environ.get(transformed_key)
		92	if envvar_value:
		93	log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
		94
		95	return envvar_value
		96
		97	def env_expand(self):
		98	replaced = {}
		99	for k, v in self.settings.items():
		100	if k not in set_keys:
		101	envvar_value = self.maybe_env_key(k)
		102	if envvar_value:
		103	replaced[k] = envvar_value
		104	set_keys[k] = envvar_value
		105
		106	# replace ALL keys updated
		107	self.settings.update(replaced)
		108
		109	def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
		110	"""
		111	Helper to enable debug on running instance
		112	:return:
		113	"""
		114
		115	if not str2bool(self.settings.get('logging.autoconfigure')):
		116	log.info('logging configuration based on main .ini file')
		117	return
		118
		119	if logging_conf is None:
		120	logging_conf = self.settings.get('logging.logging_conf_file') or ''
		121
		122	if not os.path.isfile(logging_conf):
		123	log.error('Unable to setup logging based on %s, '
		124	'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
		125	return
		126
		127	with open(logging_conf, 'rt') as f:
		128	ini_template = textwrap.dedent(f.read())
		129	ini_template = string.Template(ini_template).safe_substitute(
		130	RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
		131	RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
		132	)
		133
		134	with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
		135	log.info('Saved Temporary LOGGING config at %s', f.name)
		136	f.write(ini_template)
		137
		138	logging.config.fileConfig(f.name)
		139	os.remove(f.name)
		140
		141	def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
		142	input_val = self.settings.get(key, default)
		143
		144	if default_when_empty and not input_val:
		145	# use default value when value is set in the config but it is empty
		146	input_val = default
		147
		148	parser_func = {
		149	'bool': self._bool_func,
		150	'int': self._int_func,
		151	'list': self._list_func,
		152	'list:newline': functools.partial(self._list_func, sep='/n'),
		153	'list:spacesep': functools.partial(self._list_func, sep=' '),
		154	'string': functools.partial(self._string_func, lower=lower),
		155	'dir': self._dir_func,
		156	'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
		157	'file': self._file_path_func,
		158	'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
		159	None: lambda i: i
		160	}[parser]
		161
		162	envvar_value = self.maybe_env_key(key)
		163	if envvar_value:
		164	input_val = envvar_value
		165	set_keys[key] = input_val
		166
		167	self.settings[key] = parser_func(input_val)
		168	return self.settings[key]

vcsserver/lib/_vendor/jsonlogger/__init__.py

0 created 644 +243 0

			@@ -0,0 +1,243 b''
		1	'''
		2	This library is provided to allow standard python logging
		3	to output log data as JSON formatted strings
		4	'''
		5	import logging
		6	import json
		7	import re
		8	from datetime import date, datetime, time, tzinfo, timedelta
		9	import traceback
		10	import importlib
		11
		12	from inspect import istraceback
		13
		14	from collections import OrderedDict
		15
		16
		17	def _inject_req_id(record, args, *kwargs):
		18	return record
		19
		20
		21	ExceptionAwareFormatter = logging.Formatter
		22
		23
		24	ZERO = timedelta(0)
		25	HOUR = timedelta(hours=1)
		26
		27
		28	class UTC(tzinfo):
		29	"""UTC"""
		30
		31	def utcoffset(self, dt):
		32	return ZERO
		33
		34	def tzname(self, dt):
		35	return "UTC"
		36
		37	def dst(self, dt):
		38	return ZERO
		39
		40	utc = UTC()
		41
		42
		43	# skip natural LogRecord attributes
		44	# http://docs.python.org/library/logging.html#logrecord-attributes
		45	RESERVED_ATTRS = (
		46	'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
		47	'funcName', 'levelname', 'levelno', 'lineno', 'module',
		48	'msecs', 'message', 'msg', 'name', 'pathname', 'process',
		49	'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
		50
		51
		52	def merge_record_extra(record, target, reserved):
		53	"""
		54	Merges extra attributes from LogRecord object into target dictionary
		55
		56	:param record: logging.LogRecord
		57	:param target: dict to update
		58	:param reserved: dict or list with reserved keys to skip
		59	"""
		60	for key, value in record.__dict__.items():
		61	# this allows to have numeric keys
		62	if (key not in reserved
		63	and not (hasattr(key, "startswith")
		64	and key.startswith('_'))):
		65	target[key] = value
		66	return target
		67
		68
		69	class JsonEncoder(json.JSONEncoder):
		70	"""
		71	A custom encoder extending the default JSONEncoder
		72	"""
		73
		74	def default(self, obj):
		75	if isinstance(obj, (date, datetime, time)):
		76	return self.format_datetime_obj(obj)
		77
		78	elif istraceback(obj):
		79	return ''.join(traceback.format_tb(obj)).strip()
		80
		81	elif type(obj) == Exception \
		82	or isinstance(obj, Exception) \
		83	or type(obj) == type:
		84	return str(obj)
		85
		86	try:
		87	return super().default(obj)
		88
		89	except TypeError:
		90	try:
		91	return str(obj)
		92
		93	except Exception:
		94	return None
		95
		96	def format_datetime_obj(self, obj):
		97	return obj.isoformat()
		98
		99
		100	class JsonFormatter(ExceptionAwareFormatter):
		101	"""
		102	A custom formatter to format logging records as json strings.
		103	Extra values will be formatted as str() if not supported by
		104	json default encoder
		105	"""
		106
		107	def __init__(self, args, *kwargs):
		108	"""
		109	:param json_default: a function for encoding non-standard objects
		110	as outlined in http://docs.python.org/2/library/json.html
		111	:param json_encoder: optional custom encoder
		112	:param json_serializer: a :meth:`json.dumps`-compatible callable
		113	that will be used to serialize the log record.
		114	:param json_indent: an optional :meth:`json.dumps`-compatible numeric value
		115	that will be used to customize the indent of the output json.
		116	:param prefix: an optional string prefix added at the beginning of
		117	the formatted string
		118	:param json_indent: indent parameter for json.dumps
		119	:param json_ensure_ascii: ensure_ascii parameter for json.dumps
		120	:param reserved_attrs: an optional list of fields that will be skipped when
		121	outputting json log record. Defaults to all log record attributes:
		122	http://docs.python.org/library/logging.html#logrecord-attributes
		123	:param timestamp: an optional string/boolean field to add a timestamp when
		124	outputting the json log record. If string is passed, timestamp will be added
		125	to log record using string as key. If True boolean is passed, timestamp key
		126	will be "timestamp". Defaults to False/off.
		127	"""
		128	self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
		129	self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
		130	self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
		131	self.json_indent = kwargs.pop("json_indent", None)
		132	self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
		133	self.prefix = kwargs.pop("prefix", "")
		134	reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
		135	self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
		136	self.timestamp = kwargs.pop("timestamp", True)
		137
		138	# super(JsonFormatter, self).__init__(args, *kwargs)
		139	logging.Formatter.__init__(self, args, *kwargs)
		140	if not self.json_encoder and not self.json_default:
		141	self.json_encoder = JsonEncoder
		142
		143	self._required_fields = self.parse()
		144	self._skip_fields = dict(list(zip(self._required_fields,
		145	self._required_fields)))
		146	self._skip_fields.update(self.reserved_attrs)
		147
		148	def _str_to_fn(self, fn_as_str):
		149	"""
		150	If the argument is not a string, return whatever was passed in.
		151	Parses a string such as package.module.function, imports the module
		152	and returns the function.
		153
		154	:param fn_as_str: The string to parse. If not a string, return it.
		155	"""
		156	if not isinstance(fn_as_str, str):
		157	return fn_as_str
		158
		159	path, _, function = fn_as_str.rpartition('.')
		160	module = importlib.import_module(path)
		161	return getattr(module, function)
		162
		163	def parse(self):
		164	"""
		165	Parses format string looking for substitutions
		166
		167	This method is responsible for returning a list of fields (as strings)
		168	to include in all log messages.
		169	"""
		170	standard_formatters = re.compile(r'$(.+?)$', re.IGNORECASE)
		171	return standard_formatters.findall(self._fmt)
		172
		173	def add_fields(self, log_record, record, message_dict):
		174	"""
		175	Override this method to implement custom logic for adding fields.
		176	"""
		177	for field in self._required_fields:
		178	log_record[field] = record.__dict__.get(field)
		179	log_record.update(message_dict)
		180	merge_record_extra(record, log_record, reserved=self._skip_fields)
		181
		182	if self.timestamp:
		183	key = self.timestamp if type(self.timestamp) == str else 'timestamp'
		184	log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
		185
		186	def process_log_record(self, log_record):
		187	"""
		188	Override this method to implement custom logic
		189	on the possibly ordered dictionary.
		190	"""
		191	return log_record
		192
		193	def jsonify_log_record(self, log_record):
		194	"""Returns a json string of the log record."""
		195	return self.json_serializer(log_record,
		196	default=self.json_default,
		197	cls=self.json_encoder,
		198	indent=self.json_indent,
		199	ensure_ascii=self.json_ensure_ascii)
		200
		201	def serialize_log_record(self, log_record):
		202	"""Returns the final representation of the log record."""
		203	return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
		204
		205	def format(self, record):
		206	"""Formats a log record and serializes to json"""
		207	message_dict = {}
		208	# FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
		209	# are always type of str. We shouldn't need to override that.
		210	if isinstance(record.msg, dict):
		211	message_dict = record.msg
		212	record.message = None
		213	else:
		214	record.message = record.getMessage()
		215	# only format time if needed
		216	if "asctime" in self._required_fields:
		217	record.asctime = self.formatTime(record, self.datefmt)
		218
		219	# Display formatted exception, but allow overriding it in the
		220	# user-supplied dict.
		221	if record.exc_info and not message_dict.get('exc_info'):
		222	message_dict['exc_info'] = self.formatException(record.exc_info)
		223	if not message_dict.get('exc_info') and record.exc_text:
		224	message_dict['exc_info'] = record.exc_text
		225	# Display formatted record of stack frames
		226	# default format is a string returned from :func:`traceback.print_stack`
		227	try:
		228	if record.stack_info and not message_dict.get('stack_info'):
		229	message_dict['stack_info'] = self.formatStack(record.stack_info)
		230	except AttributeError:
		231	# Python2.7 doesn't have stack_info.
		232	pass
		233
		234	try:
		235	log_record = OrderedDict()
		236	except NameError:
		237	log_record = {}
		238
		239	_inject_req_id(record, with_prefix=False)
		240	self.add_fields(log_record, record, message_dict)
		241	log_record = self.process_log_record(log_record)
		242
		243	return self.serialize_log_record(log_record)

vcsserver/lib/logging_formatter.py

0 created 644 +53 0

			@@ -0,0 +1,53 b''
		1	# Copyright (C) 2010-2023 RhodeCode GmbH
		2	#
		3	# This program is free software: you can redistribute it and/or modify
		4	# it under the terms of the GNU Affero General Public License, version 3
		5	# (only), as published by the Free Software Foundation.
		6	#
		7	# This program is distributed in the hope that it will be useful,
		8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		10	# GNU General Public License for more details.
		11	#
		12	# You should have received a copy of the GNU Affero General Public License
		13	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		14	#
		15	# This program is dual-licensed. If you wish to learn more about the
		16	# RhodeCode Enterprise Edition, including its added features, Support services,
		17	# and proprietary license terms, please see https://rhodecode.com/licenses/
		18
		19	import sys
		20	import logging
		21
		22
		23	BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
		24
		25	# Sequences
		26	RESET_SEQ = "\033[0m"
		27	COLOR_SEQ = "\033[0;%dm"
		28	BOLD_SEQ = "\033[1m"
		29
		30	COLORS = {
		31	'CRITICAL': MAGENTA,
		32	'ERROR': RED,
		33	'WARNING': CYAN,
		34	'INFO': GREEN,
		35	'DEBUG': BLUE,
		36	'SQL': YELLOW
		37	}
		38
		39
		40	class ColorFormatter(logging.Formatter):
		41
		42	def format(self, record):
		43	"""
		44	Change record's levelname to use with COLORS enum
		45	"""
		46	def_record = super().format(record)
		47
		48	levelname = record.levelname
		49	start = COLOR_SEQ % (COLORS[levelname])
		50	end = RESET_SEQ
		51
		52	colored_record = ''.join([start, def_record, end])
		53	return colored_record

vcsserver/lib/rc_cache/archive_cache.py

0 created 644 +87 0

			@@ -0,0 +1,87 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import logging
		19	import os
		20	import diskcache
		21	from diskcache import RLock
		22
		23	log = logging.getLogger(__name__)
		24
		25	cache_meta = None
		26
		27
		28	class ReentrantLock(RLock):
		29	def __enter__(self):
		30	reentrant_lock_key = self._key
		31
		32	log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key)
		33	#self.acquire()
		34	log.debug('Lock for key=%s acquired', reentrant_lock_key)
		35
		36	def __exit__(self, *exc_info):
		37	#self.release()
		38	pass
		39
		40
		41	def get_archival_config(config):
		42
		43	final_config = {
		44	'archive_cache.eviction_policy': 'least-frequently-used'
		45	}
		46
		47	for k, v in config.items():
		48	if k.startswith('archive_cache'):
		49	final_config[k] = v
		50
		51	return final_config
		52
		53
		54	def get_archival_cache_store(config):
		55
		56	global cache_meta
		57	if cache_meta is not None:
		58	return cache_meta
		59
		60	config = get_archival_config(config)
		61
		62	archive_cache_dir = config['archive_cache.store_dir']
		63	archive_cache_size_gb = config['archive_cache.cache_size_gb']
		64	archive_cache_shards = config['archive_cache.cache_shards']
		65	archive_cache_eviction_policy = config['archive_cache.eviction_policy']
		66
		67	log.debug('Initializing archival cache instance under %s', archive_cache_dir)
		68
		69	# check if it's ok to write, and re-create the archive cache
		70	if not os.path.isdir(archive_cache_dir):
		71	os.makedirs(archive_cache_dir, exist_ok=True)
		72
		73	d_cache = diskcache.FanoutCache(
		74	archive_cache_dir, shards=archive_cache_shards,
		75	cull_limit=0, # manual eviction required
		76	size_limit=archive_cache_size_gb * 1024 * 1024 * 1024,
		77	eviction_policy=archive_cache_eviction_policy,
		78	timeout=30
		79	)
		80	cache_meta = d_cache
		81	return cache_meta
		82
		83
		84	def includeme(config):
		85	# init our cache at start, for vcsserver we don't init at runtime
		86	# because our cache config is sent via wire on make archive call, this call just lazy-enables the client
		87	return

vcsserver/lib/rc_json.py

0 created 644 +2 0

			@@ -0,0 +1,2 b''
		1	# use orjson by default
		2	import orjson as json

vcsserver/lib/statsd_client.py

0 created 644 +70 0

			@@ -0,0 +1,70 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	from vcsserver.lib._vendor.statsd import client_from_config
		19
		20
		21	class StatsdClientNotInitialised(Exception):
		22	pass
		23
		24
		25	class _Singleton(type):
		26	"""A metaclass that creates a Singleton base class when called."""
		27
		28	_instances = {}
		29
		30	def __call__(cls, args, *kwargs):
		31	if cls not in cls._instances:
		32	cls._instances[cls] = super().__call__(args, *kwargs)
		33	return cls._instances[cls]
		34
		35
		36	class Singleton(_Singleton("SingletonMeta", (object,), {})):
		37	pass
		38
		39
		40	class StatsdClientClass(Singleton):
		41	setup_run = False
		42	statsd_client = None
		43	statsd = None
		44	strict_mode_init = False
		45
		46	def __getattribute__(self, name):
		47
		48	if name.startswith("statsd"):
		49	if self.setup_run:
		50	return super().__getattribute__(name)
		51	else:
		52	if self.strict_mode_init:
		53	raise StatsdClientNotInitialised(f"requested key was {name}")
		54	return None
		55
		56	return super().__getattribute__(name)
		57
		58	def setup(self, settings):
		59	"""
		60	Initialize the client
		61	"""
		62	strict_init_mode = settings.pop('statsd_strict_init', False)
		63
		64	statsd = client_from_config(settings)
		65	self.statsd = statsd
		66	self.statsd_client = statsd
		67	self.setup_run = True
		68
		69
		70	StatsdClient = StatsdClientClass()

vcsserver/lib/svnremoterepo.py

0 created 644 +160 0

			@@ -0,0 +1,160 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import os
		19	import tempfile
		20
		21	from svn import client
		22	from svn import core
		23	from svn import ra
		24
		25	from mercurial import error
		26
		27	from vcsserver.str_utils import safe_bytes
		28
		29	core.svn_config_ensure(None)
		30	svn_config = core.svn_config_get_config(None)
		31
		32
		33	class RaCallbacks(ra.Callbacks):
		34	@staticmethod
		35	def open_tmp_file(pool): # pragma: no cover
		36	(fd, fn) = tempfile.mkstemp()
		37	os.close(fd)
		38	return fn
		39
		40	@staticmethod
		41	def get_client_string(pool):
		42	return b'RhodeCode-subversion-url-checker'
		43
		44
		45	class SubversionException(Exception):
		46	pass
		47
		48
		49	class SubversionConnectionException(SubversionException):
		50	"""Exception raised when a generic error occurs when connecting to a repository."""
		51
		52
		53	def normalize_url(url):
		54	if not url:
		55	return url
		56	if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
		57	url = url[4:]
		58	url = url.rstrip(b'/')
		59	return url
		60
		61
		62	def _create_auth_baton(pool):
		63	"""Create a Subversion authentication baton. """
		64	# Give the client context baton a suite of authentication
		65	# providers.h
		66	platform_specific = [
		67	'svn_auth_get_gnome_keyring_simple_provider',
		68	'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
		69	'svn_auth_get_keychain_simple_provider',
		70	'svn_auth_get_keychain_ssl_client_cert_pw_provider',
		71	'svn_auth_get_kwallet_simple_provider',
		72	'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
		73	'svn_auth_get_ssl_client_cert_file_provider',
		74	'svn_auth_get_windows_simple_provider',
		75	'svn_auth_get_windows_ssl_server_trust_provider',
		76	]
		77
		78	providers = []
		79
		80	for p in platform_specific:
		81	if getattr(core, p, None) is not None:
		82	try:
		83	providers.append(getattr(core, p)())
		84	except RuntimeError:
		85	pass
		86
		87	providers += [
		88	client.get_simple_provider(),
		89	client.get_username_provider(),
		90	client.get_ssl_client_cert_file_provider(),
		91	client.get_ssl_client_cert_pw_file_provider(),
		92	client.get_ssl_server_trust_file_provider(),
		93	]
		94
		95	return core.svn_auth_open(providers, pool)
		96
		97
		98	class SubversionRepo:
		99	"""Wrapper for a Subversion repository.
		100
		101	It uses the SWIG Python bindings, see above for requirements.
		102	"""
		103	def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
		104
		105	self.username = username
		106	self.password = password
		107	self.svn_url = core.svn_path_canonicalize(svn_url)
		108
		109	self.auth_baton_pool = core.Pool()
		110	self.auth_baton = _create_auth_baton(self.auth_baton_pool)
		111	# self.init_ra_and_client() assumes that a pool already exists
		112	self.pool = core.Pool()
		113
		114	self.ra = self.init_ra_and_client()
		115	self.uuid = ra.get_uuid(self.ra, self.pool)
		116
		117	def init_ra_and_client(self):
		118	"""Initializes the RA and client layers, because sometimes getting
		119	unified diffs runs the remote server out of open files.
		120	"""
		121
		122	if self.username:
		123	core.svn_auth_set_parameter(self.auth_baton,
		124	core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
		125	self.username)
		126	if self.password:
		127	core.svn_auth_set_parameter(self.auth_baton,
		128	core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
		129	self.password)
		130
		131	callbacks = RaCallbacks()
		132	callbacks.auth_baton = self.auth_baton
		133
		134	try:
		135	return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
		136	except SubversionException as e:
		137	# e.child contains a detailed error messages
		138	msglist = []
		139	svn_exc = e
		140	while svn_exc:
		141	if svn_exc.args[0]:
		142	msglist.append(svn_exc.args[0])
		143	svn_exc = svn_exc.child
		144	msg = '\n'.join(msglist)
		145	raise SubversionConnectionException(msg)
		146
		147
		148	class svnremoterepo:
		149	""" the dumb wrapper for actual Subversion repositories """
		150
		151	def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
		152	self.username = username or b''
		153	self.password = password or b''
		154	self.path = normalize_url(svn_url)
		155
		156	def svn(self):
		157	try:
		158	return SubversionRepo(self.path, self.username, self.password)
		159	except SubversionConnectionException as e:
		160	raise error.Abort(safe_bytes(e))

vcsserver/remote/__init__.py

0 created 644 +17 0

			@@ -0,0 +1,17 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17

vcsserver/str_utils.py

0 created 644 +144 0

			@@ -0,0 +1,144 b''
		1	# RhodeCode VCSServer provides access to different vcs backends via network.
		2	# Copyright (C) 2014-2023 RhodeCode GmbH
		3	#
		4	# This program is free software; you can redistribute it and/or modify
		5	# it under the terms of the GNU General Public License as published by
		6	# the Free Software Foundation; either version 3 of the License, or
		7	# (at your option) any later version.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU General Public License
		15	# along with this program; if not, write to the Free Software Foundation,
		16	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17
		18	import typing
		19	import base64
		20	import logging
		21
		22
		23	log = logging.getLogger(__name__)
		24
		25
		26	def safe_int(val, default=None) -> int:
		27	"""
		28	Returns int() of val if val is not convertable to int use default
		29	instead
		30
		31	:param val:
		32	:param default:
		33	"""
		34
		35	try:
		36	val = int(val)
		37	except (ValueError, TypeError):
		38	val = default
		39
		40	return val
		41
		42
		43	def base64_to_str(text) -> str:
		44	return safe_str(base64.encodebytes(safe_bytes(text))).strip()
		45
		46
		47	def get_default_encodings() -> list[str]:
		48	return ['utf8']
		49
		50
		51	def safe_str(str_, to_encoding=None) -> str:
		52	"""
		53	safe str function. Does few trick to turn unicode_ into string
		54
		55	:param str_: str to encode
		56	:param to_encoding: encode to this type UTF8 default
		57	"""
		58	if isinstance(str_, str):
		59	return str_
		60
		61	# if it's bytes cast to str
		62	if not isinstance(str_, bytes):
		63	return str(str_)
		64
		65	to_encoding = to_encoding or get_default_encodings()
		66	if not isinstance(to_encoding, (list, tuple)):
		67	to_encoding = [to_encoding]
		68
		69	for enc in to_encoding:
		70	try:
		71	return str(str_, enc)
		72	except UnicodeDecodeError:
		73	pass
		74
		75	return str(str_, to_encoding[0], 'replace')
		76
		77
		78	def safe_bytes(str_, from_encoding=None) -> bytes:
		79	"""
		80	safe bytes function. Does few trick to turn str_ into bytes string:
		81
		82	:param str_: string to decode
		83	:param from_encoding: encode from this type UTF8 default
		84	"""
		85	if isinstance(str_, bytes):
		86	return str_
		87
		88	if not isinstance(str_, str):
		89	raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
		90
		91	from_encoding = from_encoding or get_default_encodings()
		92	if not isinstance(from_encoding, (list, tuple)):
		93	from_encoding = [from_encoding]
		94
		95	for enc in from_encoding:
		96	try:
		97	return str_.encode(enc)
		98	except UnicodeDecodeError:
		99	pass
		100
		101	return str_.encode(from_encoding[0], 'replace')
		102
		103
		104	def ascii_bytes(str_, allow_bytes=False) -> bytes:
		105	"""
		106	Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
		107	Fails with UnicodeError on invalid input.
		108	This should be used where encoding and "safe" ambiguity should be avoided.
		109	Where strings already have been encoded in other ways but still are unicode
		110	string - for example to hex, base64, json, urlencoding, or are known to be
		111	identifiers.
		112	"""
		113	if allow_bytes and isinstance(str_, bytes):
		114	return str_
		115
		116	if not isinstance(str_, str):
		117	raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
		118	return str_.encode('ascii')
		119
		120
		121	def ascii_str(str_) -> str:
		122	"""
		123	Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
		124	Fails with UnicodeError on invalid input.
		125	This should be used where encoding and "safe" ambiguity should be avoided.
		126	Where strings are encoded but also in other ways are known to be ASCII, and
		127	where a unicode string is wanted without caring about encoding. For example
		128	to hex, base64, urlencoding, or are known to be identifiers.
		129	"""
		130
		131	if not isinstance(str_, bytes):
		132	raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
		133	return str_.decode('ascii')
		134
		135
		136	def convert_to_str(data):
		137	if isinstance(data, bytes):
		138	return safe_str(data)
		139	elif isinstance(data, tuple):
		140	return tuple(convert_to_str(item) for item in data)
		141	elif isinstance(data, list):
		142	return list(convert_to_str(item) for item in data)
		143	else:
		144	return data

vcsserver/tests/test_utils.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/type_utils.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

.bumpversion.cfg

0 +1 -2

              [bumpversion]
-             current_version = 4.27.1
+             current_version = 5.0.0
              message = release: Bump version {current_version} to {new_version}
              [bumpversion:file:vcsserver/VERSION]

.hgignore

0 +4 0

              syntax: glob
              *.orig
              *.pyc
              *.swp
              ^\.pydevproject$
              ^\.coverage$
              ^\.cache.*$
+             ^\.venv.*$
+             ^\.ruff_cache.*$
              ^\.rhodecode$
              ^.dev
              ^build/
              ^coverage\.xml$

MANIFEST.in

0 +7 0

              # package extras
              include vcsserver/VERSION
+             # all python files inside vcsserver
+             graft vcsserver
              # all config files
              recursive-include configs *
              # skip any tests files
              recursive-exclude vcsserver/tests *
+             recursive-exclude docs/_build *
+             recursive-exclude * __pycache__
+             recursive-exclude * *.py[co]
+             recursive-exclude * .*.sw[a-z]

Makefile

0 +109 -15

		@@ -1,45 +1,139 b''
1		.DEFAULT_GOAL := help
	1	# required for pushd to work..
	2	SHELL = /bin/bash
	3
2	4
3	5	# set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
4	6	OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
5	7
6	8	.PHONY: clean
7		clean: ## full clean
	9	## Cleanup compiled and cache py files
	10	clean:
8	11	make test-clean
9	12	find . -type f $ -iname '.c' -o -iname '.pyc' -o -iname '.so' -o -iname '.orig' $ -exec rm '{}' ';'
	13	find . -type d -name "build" -prune -exec rm -rf '{}' ';'
10	14
11	15
12	16	.PHONY: test
13		~~test~~: ## run test-clean and tests
	17	## run test-clean and tests
	18	test:
14	19	make test-clean
15	20	make test-only
16	21
17	22
18		.PHONY:test-clean
19		~~test-clean~~: ## run test-clean and tests
	23	.PHONY: test-clean
	24	## run test-clean and tests
	25	test-clean:
20	26	rm -rf coverage.xml htmlcov junit.xml pylint.log result
21	27	find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
22	28	find . -type f $ -iname '.coverage.*' $ -exec rm '{}' ';'
23	29
24	30
25	31	.PHONY: test-only
26		test-only: ## run tests
	32	## Run tests only without cleanup
	33	test-only:
27	34	PYTHONHASHSEED=random \
28	35	py.test -x -vv -r xw -p no:sugar \
29		~~--cov~~=~~vcsserver~~ --cov-report=term-missing --cov-report=html \
30		vcsserver
	36	--cov-report=term-missing --cov-report=html \
	37	--cov=vcsserver vcsserver
31	38
32	39
33		.PHONY: generate-pkgs
34		generate-pkgs: ## generate new python packages
35		nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses"
	40	.PHONY: ruff-check
	41	## run a ruff analysis
	42	ruff-check:
	43	ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
36	44
37	45
38	46	.PHONY: pip-packages
39		~~pip-packages~~: ~~## s~~how outdated packages
	47	## Show outdated packages
	48	pip-packages:
40	49	python ${OUTDATED_PACKAGES}
41	50
42	51
43		.PHONY: ~~help~~
44		help:
45		@grep -E '^[a-zA-Z_-]+:.?## .$$' $(MAKEFILE_LIST) \| sort \| awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-24s\033[0m %s\n", $$1, $$2}'
	52	.PHONY: build
	53	## Build sdist/egg
	54	build:
	55	python -m build
	56
	57
	58	.PHONY: dev-sh
	59	## make dev-sh
	60	dev-sh:
	61	sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" \| sudo tee -a "/etc/apt/sources.list.d/fury.list"
	62	sudo apt-get update
	63	sudo apt-get install -y zsh carapace-bin
	64	rm -rf /home/rhodecode/.oh-my-zsh
	65	curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh \| sh
	66	echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
	67	PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
	68
	69
	70	.PHONY: dev-env
	71	## make dev-env based on the requirements files and install develop of packages
	72	## Cleanup: pip freeze \| grep -v "^-e" \| grep -v "@" \| xargs pip uninstall -y
	73	dev-env:
	74	pip install build virtualenv
	75	pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
	76	pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
	77	pip install -e .
	78
	79
	80	.PHONY: sh
	81	## shortcut for make dev-sh dev-env
	82	sh:
	83	make dev-env
	84	make dev-sh
	85
	86
	87	.PHONY: dev-srv
	88	## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
	89	dev-srv:
	90	pserve --reload .dev/dev.ini
	91
	92
	93	.PHONY: dev-srv-g
	94	## run gunicorn multi process workers
	95	dev-srv-g:
	96	gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
	97
	98
	99	# Default command on calling make
	100	.DEFAULT_GOAL := show-help
	101
	102	.PHONY: show-help
	103	show-help:
	104	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
	105	@echo
	106	@sed -n -e "/^## / { \
	107	h; \
	108	s/.*//; \
	109	:doc" \
	110	-e "H; \
	111	n; \
	112	s/^## //; \
	113	t doc" \
	114	-e "s/:.*//; \
	115	G; \
	116	s/\\n## /---/; \
	117	s/\\n/ /g; \
	118	p; \
	119	}" ${MAKEFILE_LIST} \
	120	\| LC_ALL='C' sort --ignore-case \
	121	\| awk -F '---' \
	122	-v ncol=$$(tput cols) \
	123	-v indent=19 \
	124	-v col_on="$$(tput setaf 6)" \
	125	-v col_off="$$(tput sgr0)" \
	126	'{ \
	127	printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
	128	n = split($$2, words, " "); \
	129	line_length = ncol - indent; \
	130	for (i = 1; i <= n; i++) { \
	131	line_length -= length(words[i]) + 1; \
	132	if (line_length <= 0) { \
	133	line_length = ncol - indent - length(words[i]) - 1; \
	134	printf "\n%*s ", -indent, " "; \
	135	} \
	136	printf "%s ", words[i]; \
	137	} \
	138	printf "\n"; \
	139	}'

configs/development.ini

0 +42 -84

-             ## -*- coding: utf-8 -*-
+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              [server:main]
              ; COMMON HOST/IP CONFIG
              host = 0.0.0.0
-             port = 9900
+             port = 10010
              ; ##################################################
              ; WAITRESS WSGI SERVER - Recommended for Development
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
+             ; run with gunicorn --paste rhodecode.ini
              ; Module to use, this setting shouldn't be changed
              #use = egg:gunicorn#main
-             ; Sets the number of process workers. More workers means more concurrent connections
-             ; RhodeCode can handle at the same time. Each additional worker also it increases
-             ; memory usage as each has it's own set of caches.
-             ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-             ; than 8-10 unless for really big deployments .e.g 700-1000 users.
-             ; `instance_id = *` must be set in the [app:main] section below (which is the default)
-             ; when using more than 1 worker.
-             #workers = 2
-             ; Gunicorn access log level
-             #loglevel = info
-             ; Process name visible in process list
-             #proc_name = rhodecode_vcsserver
-             ; Type of worker class, one of `sync`, `gevent`
-             ; currently `sync` is the only option allowed.
-             #worker_class = sync
-             ; The maximum number of simultaneous clients. Valid only for gevent
-             #worker_connections = 10
-             ; Max number of requests that worker will handle before being gracefully restarted.
-             ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-             #max_requests = 1000
-             #max_requests_jitter = 30
-             ; Amount of time a worker can spend with handling a request before it
-             ; gets killed and restarted. By default set to 21600 (6hrs)
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             #timeout = 21600
-             ; The maximum size of HTTP request line in bytes.
-             ; 0 for unlimited
-             #limit_request_line = 0
-             ; Limit the number of HTTP headers fields in a request.
-             ; By default this value is 100 and can't be larger than 32768.
-             #limit_request_fields = 32768
-             ; Limit the allowed size of an HTTP request header field.
-             ; Value is a positive number or 0.
-             ; Setting it to 0 will allow unlimited header field sizes.
-             #limit_request_field_size = 0
-             ; Timeout for graceful workers restart.
-             ; After receiving a restart signal, workers have this much time to finish
-             ; serving requests. Workers still alive after the timeout (starting from the
-             ; receipt of the restart signal) are force killed.
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             #graceful_timeout = 3600
-             # The number of seconds to wait for requests on a Keep-Alive connection.
-             # Generally set in the 1-5 seconds range.
-             #keepalive = 2
-             ; Maximum memory usage that each worker can use before it will receive a
-             ; graceful restart signal 0 = memory monitoring is disabled
-             ; Examples: 268435456 (256MB), 536870912 (512MB)
-             ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-             #memory_max_usage = 0
-             ; How often in seconds to check for memory usage for each gunicorn worker
-             #memory_usage_check_interval = 60
-             ; Threshold value for which we don't recycle worker if GarbageCollection
-             ; frees up enough resources. Before each restart we try to run GC on worker
-             ; in case we get enough free memory after that, restart will not happen.
-             #memory_usage_recovery_threshold = 0.8
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
+             ; Each option in the app:main can be override by an environmental variable
+             ;
+             ;To override an option:
+             ;
+             ;RC_<KeyName>
+             ;Everything should be uppercase, . and - should be replaced by _.
+             ;For example, if you have these configuration settings:
+             ;rc_cache.repo_object.backend = foo
+             ;can be overridden by
+             ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; #################
              ; Pyramid default locales, we need this to be set
-             pyramid.default_locale_name = en
+             #pyramid.default_locale_name = en
              ; default locale used by VCS systems
-             locale = en_US.UTF-8
+             #locale = en_US.UTF-8
              ; path to binaries for vcsserver, it should be set by the installer
-             ; at installation time, e.g /home/user/vcsserver-1/profile/bin
+             ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
              ; it can also be a path to nix-build output in case of development
              core.binary_dir = ""
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
-             cache_dir = %(here)s/data
+             #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
-             rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
+             #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
-             rc_cache.repo_object.expiration_time = 2592000
+             #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
-             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
+             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
-             ; Statsd client config
+             ; Statsd client config, this is used to send metrics to statsd
+             ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
+             ; configure logging automatically at server startup set to false
+             ; to use the below custom logging config.
+             ; RC_LOGGING_FORMATTER
+             ; RC_LOGGING_LEVEL
+             ; env variables can control the settings for logging in case of autoconfigure
+             #logging.autoconfigure = true
+             ; specify your own custom logging config file to configure logging
+             #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              keys = console
              [formatters]
-             keys = generic
+             keys = generic, json
              ; #######
              ; LOGGERS
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              class = StreamHandler
              args = (sys.stderr, )
              level = DEBUG
+             ; To enable JSON formatted logs replace 'generic' with 'json'
+             ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
+             [formatter_json]
+             format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
+             class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

configs/gunicorn_config.py

0 +307 -52

              import threading
              import traceback
              import random
+             import socket
+             import dataclasses
              from gunicorn.glogging import Logger
                  import multiprocessing
                  return multiprocessing.cpu_count() * 2 + 1
-             # GLOBAL
+             bind = "127.0.0.1:10010"
+             # Error logging output for gunicorn (-) is stdout
              errorlog = '-'
+             # Access logging output for gunicorn (-) is stdout
              accesslog = '-'
              worker_tmp_dir = None
              tmp_upload_dir = None
+             # use re-use port logic
+             #reuse_port = True
              # Custom log format
+             #access_log_format = (
+             #    '%(t)s %(p)s INFO  [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
+             # loki format for easier parsing in grafana
              access_log_format = (
-                 '%(t)s %(p)s INFO  [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
+                 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
+             # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
+             # workers = get_workers()
+             # Gunicorn access log level
+             loglevel = 'info'
+             # Process name visible in a process list
+             proc_name = "rhodecode_vcsserver"
+             # Type of worker class, one of `sync`, `gevent` or `gthread`
+             # currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed
+             # gevent:
+             # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
+             # e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
+             # gthread:
+             # In this case, the maximum number of concurrent requests is (N workers * X threads)
+             # e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled
+             worker_class = 'sync'
+             # Sets the number of process workers. More workers means more concurrent connections
+             # RhodeCode can handle at the same time. Each additional worker also it increases
+             # memory usage as each has its own set of caches.
+             # The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
+             # than 8-10 unless for huge deployments .e.g 700-1000 users.
+             # `instance_id = *` must be set in the [app:main] section below (which is the default)
+             # when using more than 1 worker.
+             workers = 2
+             # Threads numbers for worker class gthread
+             threads = 1
+             # The maximum number of simultaneous clients. Valid only for gevent
+             # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
+             # e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
+             worker_connections = 10
+             # Max number of requests that worker will handle before being gracefully restarted.
+             # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
+             max_requests = 2000
+             max_requests_jitter = int(max_requests * 0.2)  # 20% of max_requests
+             # The maximum number of pending connections.
+             # Exceeding this number results in the client getting an error when attempting to connect.
+             backlog = 64
-             # self adjust workers based on CPU count
-             # workers = get_workers()
+             # The Amount of time a worker can spend with handling a request before it
+             # gets killed and restarted. By default, set to 21600 (6hrs)
+             # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+             timeout = 21600
+             # The maximum size of HTTP request line in bytes.
+             # 0 for unlimited
+             limit_request_line = 0
+             # Limit the number of HTTP headers fields in a request.
+             # By default this value is 100 and can't be larger than 32768.
+             limit_request_fields = 32768
+             # Limit the allowed size of an HTTP request header field.
+             # Value is a positive number or 0.
+             # Setting it to 0 will allow unlimited header field sizes.
+             limit_request_field_size = 0
+             # Timeout for graceful workers restart.
+             # After receiving a restart signal, workers have this much time to finish
+             # serving requests. Workers still alive after the timeout (starting from the
+             # receipt of the restart signal) are force killed.
+             # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
+             graceful_timeout = 21600
+             # The number of seconds to wait for requests on a Keep-Alive connection.
+             # Generally set in the 1-5 seconds range.
+             keepalive = 2
+             # Maximum memory usage that each worker can use before it will receive a
+             # graceful restart signal 0 = memory monitoring is disabled
+             # Examples: 268435456 (256MB), 536870912 (512MB)
+             # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
+             # Dynamic formula 1024 * 1024 * 256 == 256MBs
+             memory_max_usage = 0
+             # How often in seconds to check for memory usage for each gunicorn worker
+             memory_usage_check_interval = 60
+             # Threshold value for which we don't recycle worker if GarbageCollection
+             # frees up enough resources. Before each restart, we try to run GC on worker
+             # in case we get enough free memory after that; restart will not happen.
+             memory_usage_recovery_threshold = 0.8
+             @dataclasses.dataclass
+             class MemoryCheckConfig:
+                 max_usage: int
+                 check_interval: int
+                 recovery_threshold: float
              def _get_process_rss(pid=None):
              def _get_config(ini_path):
+                 import configparser
                  try:
-                     import configparser
-                 except ImportError:
-                     import ConfigParser as configparser
-                 try:
                      config = configparser.RawConfigParser()
                      config.read(ini_path)
                      return config
                      return None
-             def _time_with_offset(memory_usage_check_interval):
-                 return time.time() - random.randint(0, memory_usage_check_interval/2.0)
+             def get_memory_usage_params(config=None):
+                 # memory spec defaults
+                 _memory_max_usage = memory_max_usage
+                 _memory_usage_check_interval = memory_usage_check_interval
+                 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
+                 if config:
+                     ini_path = os.path.abspath(config)
+                     conf = _get_config(ini_path)
+                     section = 'server:main'
+                     if conf and conf.has_section(section):
+                         if conf.has_option(section, 'memory_max_usage'):
+                             _memory_max_usage = conf.getint(section, 'memory_max_usage')
+                         if conf.has_option(section, 'memory_usage_check_interval'):
+                             _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
+                         if conf.has_option(section, 'memory_usage_recovery_threshold'):
+                             _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
+                 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
+                                         or _memory_max_usage)
+                 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
+                                                    or _memory_usage_check_interval)
+                 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
+                                                          or _memory_usage_recovery_threshold)
+                 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
+             def _time_with_offset(check_interval):
+                 return time.time() - random.randint(0, check_interval/2.0)
              def pre_fork(server, worker):
              def post_fork(server, worker):
-                 # memory spec defaults
-                 _memory_max_usage = 0
-                 _memory_usage_check_interval = 60
-                 _memory_usage_recovery_threshold = 0.8
-                 ini_path = os.path.abspath(server.cfg.paste)
-                 conf = _get_config(ini_path)
-                 section = 'server:main'
-                 if conf and conf.has_section(section):
+                 memory_conf = get_memory_usage_params()
+                 _memory_max_usage = memory_conf.max_usage
+                 _memory_usage_check_interval = memory_conf.check_interval
+                 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
-                     if conf.has_option(section, 'memory_max_usage'):
-                         _memory_max_usage = conf.getint(section, 'memory_max_usage')
-                     if conf.has_option(section, 'memory_usage_check_interval'):
-                         _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
-                     if conf.has_option(section, 'memory_usage_recovery_threshold'):
-                         _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
-                 worker._memory_max_usage = _memory_max_usage
-                 worker._memory_usage_check_interval = _memory_usage_check_interval
-                 worker._memory_usage_recovery_threshold = _memory_usage_recovery_threshold
+                 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
+                                                or _memory_max_usage)
+                 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
+                                                           or _memory_usage_check_interval)
+                 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
+                                                                 or _memory_usage_recovery_threshold)
                  # register memory last check time, with some random offset so we don't recycle all
                  # at once
                  worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
                  if _memory_max_usage:
-                     server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid,
+                     server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
                                      _format_data_size(_memory_max_usage))
                  else:
-                     server.log.info("[%-10s] WORKER spawned", worker.pid)
+                     server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
              def pre_exec(server):
              def on_starting(server):
                  server_lbl = '{} {}'.format(server.proc_name, server.address)
                  server.log.info("Server %s is starting.", server_lbl)
+                 server.log.info('Config:')
+                 server.log.info(f"\n{server.cfg}")
+                 server.log.info(get_memory_usage_params())
              def when_ready(server):
              def _check_memory_usage(worker):
-                 memory_max_usage = worker._memory_max_usage
-                 if not memory_max_usage:
+                 _memory_max_usage = worker._memory_max_usage
+                 if not _memory_max_usage:
                      return
-                 memory_usage_check_interval = worker._memory_usage_check_interval
-                 memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
+                 _memory_usage_check_interval = worker._memory_usage_check_interval
+                 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
                  elapsed = time.time() - worker._last_memory_check_time
-                 if elapsed > memory_usage_check_interval:
+                 if elapsed > _memory_usage_check_interval:
                      mem_usage = _get_process_rss()
-                     if mem_usage and mem_usage > memory_max_usage:
+                     if mem_usage and mem_usage > _memory_max_usage:
                          worker.log.info(
                              "memory usage %s > %s, forcing gc",
-                             _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                             _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                          # Try to clean it up by forcing a full collection.
                          gc.collect()
                          mem_usage = _get_process_rss()
-                         if mem_usage > memory_usage_recovery_threshold:
+                         if mem_usage > _memory_usage_recovery_threshold:
                              # Didn't clean up enough, we'll have to terminate.
                              worker.log.warning(
                                  "memory usage %s > %s after gc, quitting",
-                                 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
+                                 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
                              # This will cause worker to auto-restart itself
                              worker.alive = False
                      worker._last_memory_check_time = time.time()
              def worker_int(worker):
-                 worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid)
+                 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
-                 # get traceback info, on worker crash
-                 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
+                 # get traceback info, when a worker crashes
+                 def get_thread_id(t_id):
+                     id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
+                     return id2name.get(t_id, "unknown_thread_id")
                  code = []
-                 for thread_id, stack in sys._current_frames().items():
+                 for thread_id, stack in sys._current_frames().items():  # noqa
                      code.append(
-                         "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id))
+                         "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
                      for fname, lineno, name, line in traceback.extract_stack(stack):
                          code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
                          if line:
              def worker_abort(worker):
-                 worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid)
+                 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
              def worker_exit(server, worker):
-                 worker.log.info("[%-10s] worker exit", worker.pid)
+                 worker.log.info("pid=[%-10s] worker exit", worker.pid)
              def child_exit(server, worker):
-                 worker.log.info("[%-10s] worker child exit", worker.pid)
+                 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
              def pre_request(worker, req):
                  _check_memory_usage(worker)
+             def _filter_proxy(ip):
+                 """
+                 Passed in IP addresses in HEADERS can be in a special format of multiple
+                 ips. Those comma separated IPs are passed from various proxies in the
+                 chain of request processing. The left-most being the original client.
+                 We only care about the first IP which came from the org. client.
+                 :param ip: ip string from headers
+                 """
+                 if ',' in ip:
+                     _ips = ip.split(',')
+                     _first_ip = _ips[0].strip()
+                     return _first_ip
+                 return ip
+             def _filter_port(ip):
+                 """
+                 Removes a port from ip, there are 4 main cases to handle here.
+                 - ipv4 eg. 127.0.0.1
+                 - ipv6 eg. ::1
+                 - ipv4+port eg. 127.0.0.1:8080
+                 - ipv6+port eg. [::1]:8080
+                 :param ip:
+                 """
+                 def is_ipv6(ip_addr):
+                     if hasattr(socket, 'inet_pton'):
+                         try:
+                             socket.inet_pton(socket.AF_INET6, ip_addr)
+                         except socket.error:
+                             return False
+                     else:
+                         return False
+                     return True
+                 if ':' not in ip:  # must be ipv4 pure ip
+                     return ip
+                 if '[' in ip and ']' in ip:  # ipv6 with port
+                     return ip.split(']')[0][1:].lower()
+                 # must be ipv6 or ipv4 with port
+                 if is_ipv6(ip):
+                     return ip
+                 else:
+                     ip, _port = ip.split(':')[:2]  # means ipv4+port
+                     return ip
+             def get_ip_addr(environ):
+                 proxy_key = 'HTTP_X_REAL_IP'
+                 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
+                 def_key = 'REMOTE_ADDR'
+                 def _filters(x):
+                     return _filter_port(_filter_proxy(x))
+                 ip = environ.get(proxy_key)
+                 if ip:
+                     return _filters(ip)
+                 ip = environ.get(proxy_key2)
+                 if ip:
+                     return _filters(ip)
+                 ip = environ.get(def_key, '0.0.0.0')
+                 return _filters(ip)
              class RhodeCodeLogger(Logger):
                  """
                  Custom Logger that allows some customization that gunicorn doesn't allow
                  def now(self):
                      """ return date in RhodeCode Log format """
                      now = time.time()
-                     msecs = int((now - long(now)) * 1000)
+                     msecs = int((now - int(now)) * 1000)
                      return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
+                 def atoms(self, resp, req, environ, request_time):
+                     """ Gets atoms for log formatting.
+                     """
+                     status = resp.status
+                     if isinstance(status, str):
+                         status = status.split(None, 1)[0]
+                     atoms = {
+                         'h': get_ip_addr(environ),
+                         'l': '-',
+                         'u': self._get_user(environ) or '-',
+                         't': self.now(),
+                         'r': "%s %s %s" % (environ['REQUEST_METHOD'],
+                                            environ['RAW_URI'],
+                                            environ["SERVER_PROTOCOL"]),
+                         's': status,
+                         'm': environ.get('REQUEST_METHOD'),
+                         'U': environ.get('PATH_INFO'),
+                         'q': environ.get('QUERY_STRING'),
+                         'H': environ.get('SERVER_PROTOCOL'),
+                         'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
+                         'B': getattr(resp, 'sent', None),
+                         'f': environ.get('HTTP_REFERER', '-'),
+                         'a': environ.get('HTTP_USER_AGENT', '-'),
+                         'T': request_time.seconds,
+                         'D': (request_time.seconds * 1000000) + request_time.microseconds,
+                         'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
+                         'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
+                         'p': "<%s>" % os.getpid()
+                     }
+                     # add request headers
+                     if hasattr(req, 'headers'):
+                         req_headers = req.headers
+                     else:
+                         req_headers = req
+                     if hasattr(req_headers, "items"):
+                         req_headers = req_headers.items()
+                     atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
+                     resp_headers = resp.headers
+                     if hasattr(resp_headers, "items"):
+                         resp_headers = resp_headers.items()
+                     # add response headers
+                     atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
+                     # add environ variables
+                     environ_variables = environ.items()
+                     atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
+                     return atoms
              logger_class = RhodeCodeLogger

configs/production.ini

0 +43 -85

-             ## -*- coding: utf-8 -*-
+             #
              ; #################################
              ; RHODECODE VCSSERVER CONFIGURATION
              [server:main]
              ; COMMON HOST/IP CONFIG
              host = 127.0.0.1
-             port = 9900
+             port = 10010
              ; ###########################
              ; GUNICORN APPLICATION SERVER
              ; ###########################
-             ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
+             ; run with gunicorn --paste rhodecode.ini
              ; Module to use, this setting shouldn't be changed
              use = egg:gunicorn#main
-             ; Sets the number of process workers. More workers means more concurrent connections
-             ; RhodeCode can handle at the same time. Each additional worker also it increases
-             ; memory usage as each has it's own set of caches.
-             ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
-             ; than 8-10 unless for really big deployments .e.g 700-1000 users.
-             ; `instance_id = *` must be set in the [app:main] section below (which is the default)
-             ; when using more than 1 worker.
-             workers = 2
-             ; Gunicorn access log level
-             loglevel = info
-             ; Process name visible in process list
-             proc_name = rhodecode_vcsserver
-             ; Type of worker class, one of `sync`, `gevent`
-             ; currently `sync` is the only option allowed.
-             worker_class = sync
-             ; The maximum number of simultaneous clients. Valid only for gevent
-             worker_connections = 10
-             ; Max number of requests that worker will handle before being gracefully restarted.
-             ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
-             max_requests = 1000
-             max_requests_jitter = 30
-             ; Amount of time a worker can spend with handling a request before it
-             ; gets killed and restarted. By default set to 21600 (6hrs)
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             timeout = 21600
-             ; The maximum size of HTTP request line in bytes.
-             ; 0 for unlimited
-             limit_request_line = 0
-             ; Limit the number of HTTP headers fields in a request.
-             ; By default this value is 100 and can't be larger than 32768.
-             limit_request_fields = 32768
-             ; Limit the allowed size of an HTTP request header field.
-             ; Value is a positive number or 0.
-             ; Setting it to 0 will allow unlimited header field sizes.
-             limit_request_field_size = 0
-             ; Timeout for graceful workers restart.
-             ; After receiving a restart signal, workers have this much time to finish
-             ; serving requests. Workers still alive after the timeout (starting from the
-             ; receipt of the restart signal) are force killed.
-             ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
-             graceful_timeout = 3600
-             # The number of seconds to wait for requests on a Keep-Alive connection.
-             # Generally set in the 1-5 seconds range.
-             keepalive = 2
-             ; Maximum memory usage that each worker can use before it will receive a
-             ; graceful restart signal 0 = memory monitoring is disabled
-             ; Examples: 268435456 (256MB), 536870912 (512MB)
-             ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
-             memory_max_usage = 0
-             ; How often in seconds to check for memory usage for each gunicorn worker
-             memory_usage_check_interval = 60
-             ; Threshold value for which we don't recycle worker if GarbageCollection
-             ; frees up enough resources. Before each restart we try to run GC on worker
-             ; in case we get enough free memory after that, restart will not happen.
-             memory_usage_recovery_threshold = 0.8
              [app:main]
              ; The %(here)s variable will be replaced with the absolute path of parent directory
              ; of this file
+             ; Each option in the app:main can be override by an environmental variable
+             ;
+             ;To override an option:
+             ;
+             ;RC_<KeyName>
+             ;Everything should be uppercase, . and - should be replaced by _.
+             ;For example, if you have these configuration settings:
+             ;rc_cache.repo_object.backend = foo
+             ;can be overridden by
+             ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
              use = egg:rhodecode-vcsserver
              ; Pyramid default locales, we need this to be set
-             pyramid.default_locale_name = en
+             #pyramid.default_locale_name = en
              ; default locale used by VCS systems
-             locale = en_US.UTF-8
+             #locale = en_US.UTF-8
              ; path to binaries for vcsserver, it should be set by the installer
-             ; at installation time, e.g /home/user/vcsserver-1/profile/bin
+             ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
              ; it can also be a path to nix-build output in case of development
              core.binary_dir = ""
              ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
              ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
-             cache_dir = %(here)s/data
+             #cache_dir = %(here)s/data
              ; ***************************************
              ; `repo_object` cache, default file based
              ; ***************************************
              ; `repo_object` cache settings for vcs methods for repositories
-             rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
+             #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
              ; cache auto-expires after N seconds
              ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
-             rc_cache.repo_object.expiration_time = 2592000
+             #rc_cache.repo_object.expiration_time = 2592000
              ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
-             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
+             #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
              ; ***********************************************************
              ; `repo_object` cache with redis backend
              ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
              #rc_cache.repo_object.arguments.lock_auto_renewal = true
-             ; Statsd client config
+             ; Statsd client config, this is used to send metrics to statsd
+             ; We recommend setting statsd_exported and scrape them using Promethues
              #statsd.enabled = false
              #statsd.statsd_host = 0.0.0.0
              #statsd.statsd_port = 8125
              #statsd.statsd_prefix =
              #statsd.statsd_ipv6 = false
+             ; configure logging automatically at server startup set to false
+             ; to use the below custom logging config.
+             ; RC_LOGGING_FORMATTER
+             ; RC_LOGGING_LEVEL
+             ; env variables can control the settings for logging in case of autoconfigure
+             #logging.autoconfigure = true
+             ; specify your own custom logging config file to configure logging
+             #logging.logging_conf_file = /path/to/custom_logging.ini
              ; #####################
              ; LOGGING CONFIGURATION
              ; #####################
              [loggers]
              keys = root, vcsserver
              keys = console
              [formatters]
-             keys = generic
+             keys = generic, json
              ; #######
              ; LOGGERS
              handlers = console
              [logger_vcsserver]
-             level = DEBUG
+             level = INFO
              handlers =
              qualname = vcsserver
              propagate = 1
              ; ########
              ; HANDLERS
              ; ########
              class = StreamHandler
              args = (sys.stderr, )
              level = INFO
+             ; To enable JSON formatted logs replace 'generic' with 'json'
+             ; This allows sending properly formatted logs to grafana loki or elasticsearch
              formatter = generic
              ; ##########
              [formatter_generic]
              format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
              datefmt = %Y-%m-%d %H:%M:%S
+             [formatter_json]
+             format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
+             class = vcsserver.lib._vendor.jsonlogger.JsonFormatter

conftest.py ~~vcsserver/tests/conftest.py~~

0 renamed +4 -5

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import socket
              import pytest
              def pytest_addoption(parser):
                  parser.addoption(
-                     '--repeat', type=int, default=100,
+                     '--perf-repeat-vcs', type=int, default=100,
                      help="Number of repetitions in performance tests.")
                  Slower calls may divide it by 10 or 100. It is chosen in a way so that the
                  tests are not too slow in our default test suite.
                  """
-                 return request.config.getoption('--repeat')
+                 return request.config.getoption('--perf-repeat-vcs')
              @pytest.fixture(scope='session')
              def vcsserver_port(request):
                  port = get_available_port()
-                 print('Using vcsserver port %s' % (port, ))
+                 print(f'Using vcsserver port {port}')
                  return port

requirements.txt

0 +69 -40

		@@ -1,48 +1,77 b''
1		## dependencies
2
3		# our custom configobj
4		https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626#egg=configobj==5.0.6
5
6		dogpile.cache==0.9.0
7		dogpile.core==0.4.1
8		decorator==4.1.2
9		dulwich==0.13.0
10		hgsubversion==1.9.3
11		hg-evolve==9.1.0
12		mako==1.1.0
13		markupsafe==1.1.1
14		mercurial==5.1.1
15		msgpack-python==0.5.6
16
17		pastedeploy==2.1.0
18		pyramid==1.10.4
19		pyramid-mako==1.1.0
20		pygit2==0.28.2
	1	# deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f \| tr '[:upper:]' '[:lower:]'
21	2
	3	async-timeout==4.0.3
	4	atomicwrites==1.4.1
	5	celery==5.3.6
	6	billiard==4.2.0
	7	click==8.1.3
	8	click-didyoumean==0.3.0
	9	click==8.1.3
	10	click-plugins==1.1.1
	11	click==8.1.3
	12	click-repl==0.2.0
	13	click==8.1.3
	14	prompt-toolkit==3.0.38
	15	wcwidth==0.2.6
	16	six==1.16.0
	17	kombu==5.3.5
	18	amqp==5.2.0
	19	vine==5.1.0
	20	vine==5.1.0
	21	python-dateutil==2.8.2
	22	six==1.16.0
	23	tzdata==2023.4
	24	vine==5.1.0
	25	contextlib2==21.6.0
	26	cov-core==1.15.0
	27	coverage==7.2.3
	28	diskcache==5.6.3
	29	dogpile.cache==1.3.0
	30	decorator==5.1.1
	31	stevedore==5.1.0
	32	pbr==5.11.1
	33	dulwich==0.21.6
	34	urllib3==1.26.14
	35	gunicorn==21.2.0
	36	packaging==23.1
	37	hg-evolve==11.0.2
	38	importlib-metadata==6.0.0
	39	zipp==3.15.0
	40	mercurial==6.3.3
	41	mock==5.0.2
	42	more-itertools==9.1.0
	43	msgpack==1.0.7
	44	orjson==3.9.13
	45	psutil==5.9.8
	46	py==1.11.0
	47	pygit2==1.13.3
	48	cffi==1.16.0
	49	pycparser==2.21
	50	pygments==2.15.1
	51	pyparsing==3.1.1
	52	pyramid==2.0.2
	53	hupper==1.12
	54	plaster==1.1.2
	55	plaster-pastedeploy==1.0.1
	56	pastedeploy==3.1.0
	57	plaster==1.1.2
	58	translationstring==1.4
	59	venusian==3.0.0
	60	webob==1.8.7
	61	zope.deprecation==5.0.0
	62	zope.interface==6.1.0
	63	redis==5.0.1
	64	async-timeout==4.0.3
22	65	repoze.lru==0.7
23		redis==3.5.3
24		simplejson==3.16.0
25		subprocess32==3.5.4
26		subvertpy==0.10.1
	66	scandir==1.10.0
	67	setproctitle==1.3.3
	68	subvertpy==0.11.0
	69	waitress==3.0.0
	70	wcwidth==0.2.6
27	71
28		six==1.11.0
29		translationstring==1.3
30		webob==1.8.5
31		zope.deprecation==4.4.0
32		zope.interface==4.6.0
33
34		## http servers
35		gevent==1.5.0
36		greenlet==0.4.15
37		gunicorn==19.9.0
38		waitress==1.3.1
39
40		## debug
41		ipdb==0.13.2
42		ipython==5.1.0
43	72
44	73	## test related requirements
45		-r requirements_test.txt
	74	#-r requirements_test.txt
46	75
47	76	## uncomment to add the debug libraries
48	77	#-r requirements_debug.txt

requirements_debug.txt

0 +23 -3

		@@ -1,8 +1,28 b''
1	1	## special libraries we could extend the requirements.txt file with to add some
2		## custom libraries useful for debug and memory tracing
3
4		## uncomment inclusion of this file in requirements.txt run make generate-pkgs and nix-shell
	2	## custom libraries usefull for debug and memory tracing
5	3
6	4	objgraph
7	5	memory-profiler
8	6	pympler
	7
	8	## debug
	9	ipdb
	10	ipython
	11	rich
	12
	13	# format
	14	flake8
	15	ruff
	16
	17	pipdeptree==2.7.1
	18	invoke==2.0.0
	19	bumpversion==0.6.0
	20	bump2version==1.0.1
	21
	22	docutils-stubs
	23	types-redis
	24	types-requests==2.31.0.6
	25	types-sqlalchemy
	26	types-psutil
	27	types-pycurl
	28	types-ujson

requirements_test.txt

0 +42 -13

		@@ -1,16 +1,45 b''
1	1	# test related requirements
2		pytest==4.6.5
3		py==1.8.0
4		pytest-cov==2.7.1
5		pytest-sugar==0.9.2
6		pytest-runner==5.1.0
	2
	3	cov-core==1.15.0
	4	coverage==7.2.3
	5	mock==5.0.2
	6	py==1.11.0
	7	pytest-cov==4.0.0
	8	coverage==7.2.3
	9	pytest==7.3.1
	10	attrs==22.2.0
	11	iniconfig==2.0.0
	12	packaging==23.1
	13	pluggy==1.0.0
7	14	pytest-profiling==1.7.0
8		pytest-timeout==1.3.3
9		gprof2dot==2017.9.19
	15	gprof2dot==2022.7.29
	16	pytest==7.3.1
	17	attrs==22.2.0
	18	iniconfig==2.0.0
	19	packaging==23.1
	20	pluggy==1.0.0
	21	six==1.16.0
	22	pytest-runner==6.0.0
	23	pytest-sugar==0.9.7
	24	packaging==23.1
	25	pytest==7.3.1
	26	attrs==22.2.0
	27	iniconfig==2.0.0
	28	packaging==23.1
	29	pluggy==1.0.0
	30	termcolor==2.3.0
	31	pytest-timeout==2.1.0
	32	pytest==7.3.1
	33	attrs==22.2.0
	34	iniconfig==2.0.0
	35	packaging==23.1
	36	pluggy==1.0.0
	37	webtest==3.0.0
	38	beautifulsoup4==4.11.2
	39	soupsieve==2.4
	40	waitress==3.0.0
	41	webob==1.8.7
10	42
11		mock==3.0.5
12		cov-core==1.15.0
13		coverage==4.5.4
14
15		webtest==2.0.34
16		beautifulsoup4==4.6.3
	43	# RhodeCode test-data
	44	rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
	45	rc_testdata==0.11.0

vcsserver/VERSION

0 +1 -1

		@@ -1,1 +1,1 b''
1		4.27.1 No newline at end of file
	1	5.0.0 No newline at end of file

vcsserver/__init__.py

0 +16 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import pkgutil
+             import os
+             __version__ = ''
-             __version__ = pkgutil.get_data('vcsserver', 'VERSION').strip()
+             def get_version():
+                 global __version__
+                 if __version__:
+                     return __version__
+                 here = os.path.abspath(os.path.dirname(__file__))
+                 ver_file = os.path.join(here, "VERSION")
+                 with open(ver_file, "rt") as f:
+                     version = f.read().strip()
+                 __version__ = version
+                 return version
              # link to config for pyramid
              CONFIG = {}

vcsserver/base.py

0 +92 -29

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import os
              import sys
-             import traceback
+             import tempfile
              import logging
-             import urlparse
+             import urllib.parse
+             from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
              from vcsserver import exceptions
              from vcsserver.exceptions import NoContentException
-             from vcsserver.hgcompat import (archival)
+             from vcsserver.hgcompat import archival
+             from vcsserver.str_utils import safe_bytes
+             from vcsserver.lib.exc_tracking import format_exc
              log = logging.getLogger(__name__)
-             class RepoFactory(object):
+             class RepoFactory:
                  """
                  Utility to create instances of repository
                      return None
                  parsed = []
-                 for k, v in urlparse.parse_qsl(query_string, keep_blank_values=True):
+                 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
                      if k in ['auth_token', 'api_key']:
                          v = "*****"
                      parsed.append((k, v))
                  return '&'.join('{}{}'.format(
-                     k, '={}'.format(v) if v else '') for k, v in parsed)
+                     k, f'={v}' if v else '') for k, v in parsed)
-             def raise_from_original(new_type):
+             def raise_from_original(new_type, org_exc: Exception):
                  """
                  Raise a new exception type with original args and traceback.
                  """
-                 exc_type, exc_value, exc_traceback = sys.exc_info()
+                 exc_info = sys.exc_info()
+                 exc_type, exc_value, exc_traceback = exc_info
                  new_exc = new_type(*exc_value.args)
                  # store the original traceback into the new exc
-                 new_exc._org_exc_tb = traceback.format_exc(exc_traceback)
+                 new_exc._org_exc_tb = format_exc(exc_info)
                  try:
-                     raise new_exc, None, exc_traceback
+                     raise new_exc.with_traceback(exc_traceback)
                  finally:
                      del exc_traceback
-             class ArchiveNode(object):
+             class ArchiveNode:
                  def __init__(self, path, mode, is_link, raw_bytes):
                      self.path = path
                      self.mode = mode
                      self.raw_bytes = raw_bytes
-             def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
-                              archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
+             def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
+                                        commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
                  """
-                 walker should be a file walker, for example:
-                     def walker():
+                 Function that would store generate archive and send it to a dedicated backend store
+                 In here we use diskcache
+                 :param node_walker: a generator returning nodes to add to archive
+                 :param archive_key: key used to store the path
+                 :param kind: archive kind
+                 :param mtime: time of creation
+                 :param archive_at_path: default '/' the path at archive was started.
+                     If this is not '/' it means it's a partial archive
+                 :param archive_dir_name: inside dir name when creating an archive
+                 :param commit_id: commit sha of revision archive was created at
+                 :param write_metadata:
+                 :param extra_metadata:
+                 :param cache_config:
+                 walker should be a file walker, for example,
+                     def node_walker():
                          for file_info in files:
                              yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
                  """
                  extra_metadata = extra_metadata or {}
+                 d_cache = get_archival_cache_store(config=cache_config)
+                 if archive_key in d_cache:
+                     with d_cache as d_cache_reader:
+                         reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
+                         return reader.name
+                 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
+                 log.debug('Creating new temp archive in %s', archive_tmp_path)
                  if kind == "tgz":
-                     archiver = archival.tarit(archive_dest_path, mtime, "gz")
+                     archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
                  elif kind == "tbz2":
-                     archiver = archival.tarit(archive_dest_path, mtime, "bz2")
+                     archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
                  elif kind == 'zip':
-                     archiver = archival.zipit(archive_dest_path, mtime)
+                     archiver = archival.zipit(archive_tmp_path, mtime)
                  else:
                      raise exceptions.ArchiveException()(
-                         'Remote does not support: "%s" archive type.' % kind)
+                         f'Remote does not support: "{kind}" archive type.')
-                 for f in walker(commit_id, archive_at_path):
-                     f_path = os.path.join(archive_dir_name, f.path.lstrip('/'))
+                 for f in node_walker(commit_id, archive_at_path):
+                     f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
                      try:
                          archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
                      except NoContentException:
                          # NOTE(marcink): this is a special case for SVN so we can create "empty"
-                         # directories which arent supported by archiver
-                         archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '')
+                         # directories which are not supported by archiver
+                         archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
                  if write_metadata:
                      metadata = dict([
                      ])
                      metadata.update(extra_metadata)
-                     meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()]
-                     f_path = os.path.join(archive_dir_name, '.archival.txt')
-                     archiver.addfile(f_path, 0o644, False, '\n'.join(meta))
+                     meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
+                     f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
+                     archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
+                 archiver.done()
+                 # ensure set & get are atomic
+                 with d_cache.transact():
+                     with open(archive_tmp_path, 'rb') as archive_file:
+                         add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
+                         if not add_result:
+                             log.error('Failed to store cache for key=%s', archive_key)
+                     os.remove(archive_tmp_path)
-                 return archiver.done()
+                     reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
+                     if not reader:
+                         raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
+                     return reader.name
+             class BinaryEnvelope:
+                 def __init__(self, val):
+                     self.val = val
+             class BytesEnvelope(bytes):
+                 def __new__(cls, content):
+                     if isinstance(content, bytes):
+                         return super().__new__(cls, content)
+                     else:
+                         raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
+             class BinaryBytesEnvelope(BytesEnvelope):
+                 pass

vcsserver/echo_stub/__init__.py

0 +2 0

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Provides a stub implementation for VCS operations.

vcsserver/echo_stub/echo_app.py

0 +7 -5

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Implementation of :class:`EchoApp`.
              log = logging.getLogger(__name__)
-             class EchoApp(object):
+             class EchoApp:
                  def __init__(self, repo_path, repo_name, config):
                      self._repo_path = repo_path
                      status = '200 OK'
                      headers = [('Content-Type', 'text/plain')]
                      start_response(status, headers)
-                     return ["ECHO"]
+                     return [b"ECHO"]
-             class EchoAppStream(object):
+             class EchoAppStream:
                  def __init__(self, repo_path, repo_name, config):
                      self._repo_path = repo_path
                      start_response(status, headers)
                      def generator():
-                         for _ in xrange(1000000):
-                             yield "ECHO"
+                         for _ in range(1000000):
+                             yield b"ECHO_STREAM"
                      return generator()

vcsserver/echo_stub/remote_wsgi.py

0 +4 -2

+             # Copyright (C) 2014-2023 RhodeCode GmbH
              """
              Provides the same API as :mod:`remote_wsgi`.
              log = logging.getLogger(__name__)
-             class GitRemoteWsgi(object):
+             class GitRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          create_echo_wsgi_app(*args, **kwargs))
                      return app.handle(environ, input_data)
-             class HgRemoteWsgi(object):
+             class HgRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          create_echo_wsgi_app(*args, **kwargs))

vcsserver/exceptions.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
                  def __init__(self, title, status_code=None, **kwargs):
                      self.code = status_code or HTTPLocked.code
                      self.title = title
-                     super(HTTPRepoLocked, self).__init__(**kwargs)
+                     super().__init__(**kwargs)
              class HTTPRepoBranchProtected(HTTPForbidden):

vcsserver/git_lfs/__init__.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             from app import create_app
+             from .app import create_app  # noqa

vcsserver/git_lfs/app.py

0 +15 -11

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import re
              import logging
-             from wsgiref.util import FileWrapper
-             import simplejson as json
              from pyramid.config import Configurator
              from pyramid.response import Response, FileIter
              from pyramid.httpexceptions import (
                  HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
                  HTTPUnprocessableEntity)
+             from vcsserver.lib.rc_json import json
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
              from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
-             from vcsserver.utils import safe_int
+             from vcsserver.str_utils import safe_int
              log = logging.getLogger(__name__)
-             GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ?
+             GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs'  # +json ?
              GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
                  return _exception
-             class AuthHeaderRequired(object):
+             class AuthHeaderRequired:
                  """
                  Decorator to check if request has proper auth-header
                  """
                  if operation not in ('download', 'upload'):
                      log.debug('LFS: unsupported operation:%s', operation)
                      return write_response_error(
-                         HTTPBadRequest, 'unsupported operation mode: `%s`' % operation)
+                         HTTPBadRequest, f'unsupported operation mode: `{operation}`')
                  if 'objects' not in data:
                      log.debug('LFS: missing objects data')
                              HTTPBadRequest, 'unsupported data in objects')
                      obj_data = {'oid': oid}
+                     if http_scheme == 'http':
+                         # Note(marcink): when using http, we might have a custom port
+                         # so we skip setting it to http, url dispatch then wont generate a port in URL
+                         # for development we need this
+                         http_scheme = None
-                     obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
+                     obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
                                                   _scheme=http_scheme)
                      obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
                                                          _scheme=http_scheme)
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
-                         HTTPNotFound, 'requested file with oid `%s` not found in store' % oid)
+                         HTTPNotFound, f'requested file with oid `{oid}` not found in store')
                  # TODO(marcink): support range header ?
                  # Range: bytes=0-, `bytes=(\d+)\-.*`
                  if not store.has_oid():
                      log.debug('LFS: oid %s does not exists in store', oid)
                      return write_response_error(
-                         HTTPNotFound, 'oid `%s` does not exists in store' % oid)
+                         HTTPNotFound, f'oid `{oid}` does not exists in store')
                  store_size = store.size_oid()
                  if store_size != size:
-                     msg = 'requested file size mismatch store size:%s requested:%s' % (
+                     msg = 'requested file size mismatch store size:{} requested:{}'.format(
                          store_size, size)
                      return write_response_error(
                          HTTPUnprocessableEntity, msg)

vcsserver/git_lfs/lib.py

0 +8 -6

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              log = logging.getLogger(__name__)
-             class OidHandler(object):
+             class OidHandler:
                  def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
                               obj_verify_href=None):
                      if not store.has_oid():
                          # error reply back to client that something is wrong with dl
-                         err_msg = 'object: {} does not exist in store'.format(store.oid)
+                         err_msg = f'object: {store.oid} does not exist in store'
                          has_errors = OrderedDict(
                              error=OrderedDict(
                                  code=404,
                      return handler(*args, **kwargs)
-             class LFSOidStore(object):
+             class LFSOidStore:
                  def __init__(self, oid, repo, store_location=None):
                      self.oid = oid
                      self.repo = repo
-                     self.store_path = store_location or self.get_default_store()
+                     defined_store_path = store_location or self.get_default_store()
+                     self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}"
+                     self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}"
                      self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
                      self.oid_path = os.path.join(self.store_path, oid)
                      self.fd = None
                          f.write('...')
                      """
-                     class StoreEngine(object):
+                     class StoreEngine:
                          def __init__(self, mode, store_path, oid_path, tmp_oid_path):
                              self.mode = mode
                              self.store_path = store_path

vcsserver/git_lfs/tests/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/git_lfs/tests/test_lfs_app.py

0 +62 -60

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import os
              import pytest
              from webtest.app import TestApp as WebObTestApp
-             import simplejson as json
+             from vcsserver.lib.rc_json import json
+             from vcsserver.str_utils import safe_bytes
              from vcsserver.git_lfs.app import create_app
+             from vcsserver.git_lfs.lib import LFSOidStore
              @pytest.fixture(scope='function')
                  return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
-             class TestLFSApplication(object):
+             class TestLFSApplication:
                  def test_app_wrong_path(self, git_lfs_app):
                      git_lfs_app.get('/repo/info/lfs/xxx', status=404)
                  def test_app_deprecated_endpoint(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
                      assert response.status_code == 501
-                     assert json.loads(response.text) == {u'message': u'LFS: v1 api not supported'}
+                     assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
                  def test_app_lock_verify_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
-                         u'message': u'GIT LFS locking api not supported'}
+                         'message': 'GIT LFS locking api not supported'}
                  def test_app_lock_api_not_available(self, git_lfs_app):
                      response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
                      assert response.status_code == 501
                      assert json.loads(response.text) == {
-                         u'message': u'GIT LFS locking api not supported'}
+                         'message': 'GIT LFS locking api not supported'}
                  def test_app_batch_api_missing_auth(self, git_lfs_app):
                      git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={}, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'unsupported operation mode: `None`'}
+                         'message': 'unsupported operation mode: `None`'}
                  def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/objects/batch', params={'operation': 'download'},
                          status=400, extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'missing objects data'}
+                         'message': 'missing objects data'}
                  def test_app_batch_api_unsupported_data_in_objects(
                          self, git_lfs_app, http_auth):
                          '/repo/info/lfs/objects/batch', params=params, status=400,
                          extra_environ=http_auth)
                      assert json.loads(response.text) == {
-                         u'message': u'unsupported data in objects'}
+                         'message': 'unsupported data in objects'}
                  def test_app_batch_api_download_missing_object(
                          self, git_lfs_app, http_auth):
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'errors': {u'error': {
-                              u'code': 404,
-                              u'message': u'object: 123 does not exist in store'}},
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                         {'authenticated': True,
+                          'errors': {'error': {
+                              'code': 404,
+                              'message': 'object: 123 does not exist in store'}},
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                  def test_app_batch_api_download(self, git_lfs_app, http_auth):
                      oid = '456'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'operation': 'download',
                                'objects': [{'oid': oid, 'size': '1024'}]}
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'download': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'http://localhost/repo/info/lfs/objects/456'},
+                         {'authenticated': True,
+                          'actions': {
+                              'download': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'http://localhost/repo/info/lfs/objects/456'},
                           },
-                          u'oid': u'456',
-                          u'size': u'1024'}
+                          'oid': '456',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'upload': {
-                                  u'header': {u'Authorization': u'Basic XXXXX',
-                                              u'Transfer-Encoding': u'chunked'},
-                                  u'href': u'http://localhost/repo/info/lfs/objects/123'},
-                              u'verify': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'http://localhost/repo/info/lfs/verify'}
+                         {'authenticated': True,
+                          'actions': {
+                              'upload': {
+                                  'header': {'Authorization': 'Basic XXXXX',
+                                              'Transfer-Encoding': 'chunked'},
+                                  'href': 'http://localhost/repo/info/lfs/objects/123'},
+                              'verify': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'http://localhost/repo/info/lfs/verify'}
                           },
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                          '/repo/info/lfs/objects/batch', params=params,
                          extra_environ=http_auth)
                      expected_objects = [
-                         {u'authenticated': True,
-                          u'actions': {
-                              u'upload': {
-                                  u'header': {u'Authorization': u'Basic XXXXX',
-                                              u'Transfer-Encoding': u'chunked'},
-                                  u'href': u'https://localhost/repo/info/lfs/objects/123'},
-                              u'verify': {
-                                  u'header': {u'Authorization': u'Basic XXXXX'},
-                                  u'href': u'https://localhost/repo/info/lfs/verify'}
+                         {'authenticated': True,
+                          'actions': {
+                              'upload': {
+                                  'header': {'Authorization': 'Basic XXXXX',
+                                              'Transfer-Encoding': 'chunked'},
+                                  'href': 'https://localhost/repo/info/lfs/objects/123'},
+                              'verify': {
+                                  'header': {'Authorization': 'Basic XXXXX'},
+                                  'href': 'https://localhost/repo/info/lfs/verify'}
                           },
-                          u'oid': u'123',
-                          u'size': u'1024'}
+                          'oid': '123',
+                          'size': '1024'}
                      ]
                      assert json.loads(response.text) == {
                          'objects': expected_objects, 'transfer': 'basic'}
                          status=400)
                      assert json.loads(response.text) == {
-                         u'message': u'missing oid and size in request data'}
+                         'message': 'missing oid and size in request data'}
                  def test_app_verify_api_missing_obj(self, git_lfs_app):
                      params = {'oid': 'missing', 'size': '1024'}
                          status=404)
                      assert json.loads(response.text) == {
-                         u'message': u'oid `missing` does not exists in store'}
+                         'message': 'oid `missing` does not exists in store'}
                  def test_app_verify_api_size_mismatch(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': '1024'}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params, status=422)
                      assert json.loads(response.text) == {
-                         u'message': u'requested file size mismatch '
-                                     u'store size:11 requested:1024'}
+                         'message': 'requested file size mismatch '
+                                     'store size:11 requested:1024'}
                  def test_app_verify_api(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      params = {'oid': oid, 'size': 11}
                      response = git_lfs_app.post_json(
                          '/repo/info/lfs/verify', params=params)
                      assert json.loads(response.text) == {
-                         u'message': {u'size': u'ok', u'in_store': u'ok'}}
+                         'message': {'size': 'ok', 'in_store': 'ok'}}
                  def test_app_download_api_oid_not_existing(self, git_lfs_app):
                      oid = 'missing'
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
                      assert json.loads(response.text) == {
-                         u'message': u'requested file with oid `missing` not found in store'}
+                         'message': 'requested file with oid `missing` not found in store'}
                  def test_app_download_api(self, git_lfs_app):
                      oid = 'existing'
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      if not os.path.isdir(os.path.dirname(oid_path)):
                          os.makedirs(os.path.dirname(oid_path))
                      with open(oid_path, 'wb') as f:
-                         f.write('OID_CONTENT')
+                         f.write(safe_bytes('OID_CONTENT'))
                      response = git_lfs_app.get(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid))
                      response = git_lfs_app.put(
                          '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
-                     assert json.loads(response.text) == {u'upload': u'ok'}
+                     assert json.loads(response.text) == {'upload': 'ok'}
                      # verify that we actually wrote that OID
-                     oid_path = os.path.join(git_lfs_app._store, oid)
+                     oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
                      assert os.path.isfile(oid_path)
                      assert 'CONTENT' == open(oid_path).read()

vcsserver/git_lfs/tests/test_lib.py

0 +10 -9

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import os
              import pytest
+             from vcsserver.str_utils import safe_bytes
              from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
                  return oid_handler
-             class TestOidHandler(object):
+             class TestOidHandler:
                  @pytest.mark.parametrize('exec_action', [
                      'download',
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      response, has_errors = oid_handler.exec_operation('download')
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 7
                      response, has_errors = oid_handler.exec_operation('upload')
                      assert has_errors is None
                          os.makedirs(os.path.dirname(store.oid_path))
                      with open(store.oid_path, 'wb') as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      oid_handler.obj_size = 10240
                      response, has_errors = oid_handler.exec_operation('upload')
                      }
-             class TestLFSStore(object):
+             class TestLFSStore:
                  def test_write_oid(self, lfs_store):
                      oid_location = lfs_store.oid_path
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
                      assert os.path.isfile(oid_location)
                      assert lfs_store.has_oid() is False
                      engine = lfs_store.get_engine(mode='wb')
                      with engine as f:
-                         f.write('CONTENT')
+                         f.write(safe_bytes('CONTENT'))
-                     assert lfs_store.has_oid() is True
  No newline at end of file
+                     assert lfs_store.has_oid() is True

vcsserver/git_lfs/utils.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/hgcompat.py

0 +19 -6

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import mercurial
              from mercurial import demandimport
              # patch demandimport, due to bug in mercurial when it always triggers
              # demandimport.enable()
+             from vcsserver.str_utils import safe_bytes
              demandimport.enable = lambda *args, **kwargs: 1
              from mercurial import ui
              from mercurial import subrepoutil
              from mercurial import tags as hg_tag
              from mercurial import util as hgutil
-             from mercurial.commands import clone, nullid, pull
+             from mercurial.commands import clone, pull
+             from mercurial.node import nullid
              from mercurial.context import memctx, memfilectx
              from mercurial.error import (
                  LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
              from mercurial.discovery import findcommonoutgoing
              from mercurial.hg import peer
              from mercurial.httppeer import makepeer
-             from mercurial.util import url as hg_url
+             from mercurial.utils.urlutil import url as hg_url
              from mercurial.scmutil import revrange, revsymbol
              from mercurial.node import nullrev
              from mercurial import exchange
              # infinit looping when given invalid resources
              from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
+             # hg strip is in core now
+             from mercurial import strip as hgext_strip
              def get_ctx(repo, ref):
+                 if not isinstance(ref, int):
+                     ref = safe_bytes(ref)
                  try:
                      ctx = repo[ref]
+                     return ctx
                  except (ProgrammingError, TypeError):
                      # we're unable to find the rev using a regular lookup, we fallback
                      # to slower, but backward compat revsymbol usage
-                     ctx = revsymbol(repo, ref)
+                     pass
                  except (LookupError, RepoLookupError):
                      # Similar case as above but only for refs that are not numeric
-                     if isinstance(ref, (int, long)):
+                     if isinstance(ref, int):
                          raise
-                     ctx = revsymbol(repo, ref)
+                 ctx = revsymbol(repo, ref)
                  return ctx

vcsserver/hgpatches.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              def patch_subrepo_type_mapping():
                  from collections import defaultdict
-                 from hgcompat import subrepo, subrepoutil
+                 from .hgcompat import subrepo, subrepoutil
                  from vcsserver.exceptions import SubrepoMergeException
                  class NoOpSubrepo(subrepo.abstractsubrepo):

vcsserver/hook_utils/__init__.py

0 +42 -27

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import pkg_resources
              import vcsserver
+             from vcsserver.str_utils import safe_bytes
              log = logging.getLogger(__name__)
+             HOOKS_DIR_MODE = 0o755
+             HOOKS_FILE_MODE = 0o755
+             def set_permissions_if_needed(path_to_check, perms: oct):
+                 # Get current permissions
+                 current_permissions = os.stat(path_to_check).st_mode & 0o777  # Extract permission bits
+                 # Check if current permissions are lower than required
+                 if current_permissions < int(perms):
+                     # Change the permissions if they are lower than required
+                     os.chmod(path_to_check, perms)
              def get_git_hooks_path(repo_path, bare):
                  hooks_path = os.path.join(repo_path, 'hooks')
                  Creates a RhodeCode hook inside a git repository
                  :param repo_path: path to repository
+                 :param bare: defines if repository is considered a bare git repo
                  :param executable: binary executable to put in the hooks
-                 :param force_create: Create even if same name hook exists
+                 :param force_create: Creates even if the same name hook exists
                  """
                  executable = executable or sys.executable
                  hooks_path = get_git_hooks_path(repo_path, bare)
-                 if not os.path.isdir(hooks_path):
-                     os.makedirs(hooks_path, mode=0o777)
+                 # we always call it to ensure dir exists and it has a proper mode
+                 if not os.path.exists(hooks_path):
+                     # If it doesn't exist, create a new directory with the specified mode
+                     os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
+                 # If it exists, change the directory's mode to the specified mode
+                 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing git hook in repo %s', repo_path)
-                     _hook_file = os.path.join(hooks_path, '%s-receive' % h_type)
+                     _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          log.debug('writing git %s hook file at %s !', h_type, _hook_file)
                          try:
                              with open(_hook_file, 'wb') as f:
-                                 template = template.replace(
-                                     '_TMPL_', vcsserver.__version__)
-                                 template = template.replace('_DATE_', timestamp)
-                                 template = template.replace('_ENV_', executable)
-                                 template = template.replace('_PATH_', path)
+                                 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
+                                 template = template.replace(b'_DATE_', safe_bytes(timestamp))
+                                 template = template.replace(b'_ENV_', safe_bytes(executable))
+                                 template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
-                             os.chmod(_hook_file, 0o755)
-                         except IOError:
+                             set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
                  executable = executable or sys.executable
                  hooks_path = get_svn_hooks_path(repo_path)
                  if not os.path.isdir(hooks_path):
-                     os.makedirs(hooks_path, mode=0o777)
+                     os.makedirs(hooks_path, mode=0o777, exist_ok=True)
                  tmpl_post = pkg_resources.resource_string(
                      'vcsserver', '/'.join(
                  for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
                      log.debug('Installing svn hook in repo %s', repo_path)
-                     _hook_file = os.path.join(hooks_path, '%s-commit' % h_type)
+                     _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
                      _rhodecode_hook = check_rhodecode_hook(_hook_file)
                      if _rhodecode_hook or force_create:
                          try:
                              with open(_hook_file, 'wb') as f:
-                                 template = template.replace(
-                                     '_TMPL_', vcsserver.__version__)
-                                 template = template.replace('_DATE_', timestamp)
-                                 template = template.replace('_ENV_', executable)
-                                 template = template.replace('_PATH_', path)
+                                 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
+                                 template = template.replace(b'_DATE_', safe_bytes(timestamp))
+                                 template = template.replace(b'_ENV_', safe_bytes(executable))
+                                 template = template.replace(b'_PATH_', safe_bytes(path))
                                  f.write(template)
                              os.chmod(_hook_file, 0o755)
-                         except IOError:
+                         except OSError:
                              log.exception('error writing hook file %s', _hook_file)
                      else:
                          log.debug('skipping writing hook file')
              def get_version_from_hook(hook_path):
-                 version = ''
+                 version = b''
                  hook_content = read_hook_content(hook_path)
-                 matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content)
+                 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
                  if matches:
                      try:
                          version = matches.groups()[0]
                          log.debug('got version %s from hooks.', version)
                      except Exception:
                          log.exception("Exception while reading the hook version.")
-                 return version.replace("'", "")
+                 return version.replace(b"'", b"")
              def check_rhodecode_hook(hook_path):
                  return False
-             def read_hook_content(hook_path):
-                 content = ''
+             def read_hook_content(hook_path) -> bytes:
+                 content = b''
                  if os.path.isfile(hook_path):
                      with open(hook_path, 'rb') as f:
                          content = f.read()

vcsserver/hook_utils/hook_templates/git_post_receive.py.tmpl

0 +2 -2

              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)

vcsserver/hook_utils/hook_templates/git_pre_receive.py.tmpl

0 +2 -2

              except ImportError:
                  if os.environ.get('RC_DEBUG_GIT_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)

vcsserver/hook_utils/hook_templates/svn_post_commit_hook.py.tmpl

0 +2 -2

              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)

vcsserver/hook_utils/hook_templates/svn_pre_commit_hook.py.tmpl

0 +2 -2

              except ImportError:
                  if os.environ.get('RC_DEBUG_SVN_HOOK'):
                      import traceback
-                     print traceback.format_exc()
+                     print(traceback.format_exc())
                  hooks = None
                      # TODO: johbo: Improve handling of this special case
                      if not getattr(error, '_vcs_kind', None) == 'repo_locked':
                          raise
-                     print 'ERROR:', error
+                     print(f'ERROR: {error}')
                      sys.exit(1)
                  sys.exit(0)

vcsserver/hooks.py

0 +169 -103

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import sys
              import logging
              import collections
-             import importlib
              import base64
+             import msgpack
+             import dataclasses
+             import pygit2
-             from httplib import HTTPConnection
+             import http.client
+             from celery import Celery
              import mercurial.scmutil
              import mercurial.node
-             import simplejson as json
+             from vcsserver.lib.rc_json import json
              from vcsserver import exceptions, subprocessio, settings
+             from vcsserver.str_utils import ascii_str, safe_str
+             from vcsserver.remote.git_remote import Repository
+             celery_app = Celery('__vcsserver__')
              log = logging.getLogger(__name__)
-             class HooksHttpClient(object):
+             class HooksHttpClient:
+                 proto = 'msgpack.v1'
                  connection = None
                  def __init__(self, hooks_uri):
                      self.hooks_uri = hooks_uri
+                 def __repr__(self):
+                     return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
                  def __call__(self, method, extras):
-                     connection = HTTPConnection(self.hooks_uri)
-                     body = self._serialize(method, extras)
-                     try:
-                         connection.request('POST', '/', body)
-                     except Exception:
-                         log.error('Hooks calling Connection failed on %s', connection.__dict__)
-                         raise
-                     response = connection.getresponse()
-                     response_data = response.read()
+                     connection = http.client.HTTPConnection(self.hooks_uri)
+                     # binary msgpack body
+                     headers, body = self._serialize(method, extras)
+                     log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
                      try:
-                         return json.loads(response_data)
-                     except Exception:
-                         log.exception('Failed to decode hook response json data. '
-                                       'response_code:%s, raw_data:%s',
-                                       response.status, response_data)
-                         raise
+                         try:
+                             connection.request('POST', '/', body, headers)
+                         except Exception as error:
+                             log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
+                             raise
-                 def _serialize(self, hook_name, extras):
+                         response = connection.getresponse()
+                         try:
+                             return msgpack.load(response)
+                         except Exception:
+                             response_data = response.read()
+                             log.exception('Failed to decode hook response json data. '
+                                           'response_code:%s, raw_data:%s',
+                                           response.status, response_data)
+                             raise
+                     finally:
+                         connection.close()
+                 @classmethod
+                 def _serialize(cls, hook_name, extras):
                      data = {
                          'method': hook_name,
                          'extras': extras
                      }
-                     return json.dumps(data)
+                     headers = {
+                         "rc-hooks-protocol": cls.proto,
+                         "Connection": "keep-alive"
+                     }
+                     return headers, msgpack.packb(data)
-             class HooksDummyClient(object):
-                 def __init__(self, hooks_module):
-                     self._hooks_module = importlib.import_module(hooks_module)
+             class HooksCeleryClient:
+                 TASK_TIMEOUT = 60  # time in seconds
-                 def __call__(self, hook_name, extras):
-                     with self._hooks_module.Hooks() as hooks:
-                         return getattr(hooks, hook_name)(extras)
+                 def __init__(self, queue, backend):
+                     celery_app.config_from_object({
+                         'broker_url': queue, 'result_backend': backend,
+                         'broker_connection_retry_on_startup': True,
+                         'task_serializer': 'msgpack',
+                         'accept_content': ['json', 'msgpack'],
+                         'result_serializer': 'msgpack',
+                         'result_accept_content': ['json', 'msgpack']
+                     })
+                     self.celery_app = celery_app
+                 def __call__(self, method, extras):
+                     inquired_task = self.celery_app.signature(
+                         f'rhodecode.lib.celerylib.tasks.{method}'
+                     )
+                     return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
-             class HooksShadowRepoClient(object):
+             class HooksShadowRepoClient:
                  def __call__(self, hook_name, extras):
                      return {'output': '', 'status': 0}
-             class RemoteMessageWriter(object):
+             class RemoteMessageWriter:
                  """Writer base class."""
                  def write(self, message):
                      raise NotImplementedError()
                  def __init__(self, ui):
                      self.ui = ui
-                 def write(self, message):
+                 def write(self, message: str):
                      # TODO: Check why the quiet flag is set by default.
                      old = self.ui.quiet
                      self.ui.quiet = False
                  def __init__(self, stdout=None):
                      self.stdout = stdout or sys.stdout
-                 def write(self, message):
-                     self.stdout.write(message.encode('utf-8'))
+                 def write(self, message: str):
+                     self.stdout.write(message)
              class SvnMessageWriter(RemoteMessageWriter):
              def _handle_exception(result):
                  exception_class = result.get('exception')
                  exception_traceback = result.get('exception_traceback')
+                 log.debug('Handling hook-call exception: %s', exception_class)
                  if exception_traceback:
                      log.error('Got traceback from remote call:%s', exception_traceback)
                  elif exception_class == 'RepositoryError':
                      raise exceptions.VcsException()(*result['exception_args'])
                  elif exception_class:
-                     raise Exception('Got remote exception "%s" with args "%s"' %
-                                     (exception_class, result['exception_args']))
+                     raise Exception(
+                         f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
+                     )
              def _get_hooks_client(extras):
                  hooks_uri = extras.get('hooks_uri')
+                 task_queue = extras.get('task_queue')
+                 task_backend = extras.get('task_backend')
                  is_shadow_repo = extras.get('is_shadow_repo')
                  if hooks_uri:
-                     return HooksHttpClient(extras['hooks_uri'])
+                     return HooksHttpClient(hooks_uri)
+                 elif task_queue and task_backend:
+                     return HooksCeleryClient(task_queue, task_backend)
                  elif is_shadow_repo:
                      return HooksShadowRepoClient()
                  else:
-                     return HooksDummyClient(extras['hooks_module'])
+                     raise Exception("Hooks client not found!")
              def _call_hook(hook_name, extras, writer):
                  log.debug('Hooks, using client:%s', hooks_client)
                  result = hooks_client(hook_name, extras)
                  log.debug('Hooks got result: %s', result)
                  _handle_exception(result)
                  writer.write(result['output'])
              def _extras_from_ui(ui):
-                 hook_data = ui.config('rhodecode', 'RC_SCM_DATA')
+                 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
                  if not hook_data:
                      # maybe it's inside environ ?
                      env_hook_data = os.environ.get('RC_SCM_DATA')
                  for rev in range(start, end):
                      revs.append(rev)
                      ctx = get_ctx(repo, rev)
-                     commit_id = mercurial.node.hex(ctx.node())
-                     branch = ctx.branch()
+                     commit_id = ascii_str(mercurial.node.hex(ctx.node()))
+                     branch = safe_str(ctx.branch())
                      commits.append((commit_id, branch))
                  parent_heads = []
                  for p in parents:
                      branch = get_ctx(repo, p).branch()
                      # The heads descending from that parent, on the same branch
-                     parent_heads = set([p])
-                     reachable = set([p])
-                     for x in xrange(p + 1, end):
+                     parent_heads = {p}
+                     reachable = {p}
+                     for x in range(p + 1, end):
                          if get_ctx(repo, x).branch() != branch:
                              continue
                          for pp in changelog.parentrevs(x):
                  detect_force_push = extras.get('detect_force_push')
                  rev_data = []
-                 if node and kwargs.get('hooktype') == 'pretxnchangegroup':
+                 hook_type: str = safe_str(kwargs.get('hooktype'))
+                 if node and hook_type == 'pretxnchangegroup':
                      branches = collections.defaultdict(list)
                      commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
                      for commit_id, branch in commits:
                          branches[branch].append(commit_id)
                      for branch, commits in branches.items():
-                         old_rev = kwargs.get('node_last') or commits[0]
+                         old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
                          rev_data.append({
                              'total_commits': len(commits),
                              'old_rev': old_rev,
                              extras.get('repo_store', ''), extras.get('repository', ''))
                          push_ref['hg_env'] = _get_hg_env(
                              old_rev=push_ref['old_rev'],
-                             new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'),
+                             new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
                              repo_path=repo_path)
-                 extras['hook_type'] = kwargs.get('hooktype', 'pre_push')
+                 extras['hook_type'] = hook_type or 'pre_push'
                  extras['commit_ids'] = rev_data
                  return _call_hook('pre_push', extras, HgMessageWriter(ui))
                  branches = []
                  bookmarks = []
                  tags = []
+                 hook_type: str = safe_str(kwargs.get('hooktype'))
                  commits, _heads = _rev_range_hash(repo, node)
                  for commit_id, branch in commits:
                      if branch not in branches:
                          branches.append(branch)
-                 if hasattr(ui, '_rc_pushkey_branches'):
-                     bookmarks = ui._rc_pushkey_branches
+                 if hasattr(ui, '_rc_pushkey_bookmarks'):
+                     bookmarks = ui._rc_pushkey_bookmarks
-                 extras['hook_type'] = kwargs.get('hooktype', 'post_push')
+                 extras['hook_type'] = hook_type or 'post_push'
                  extras['commit_ids'] = commit_ids
                  extras['new_refs'] = {
                      'branches': branches,
                      'bookmarks': bookmarks,
              def key_push(ui, repo, **kwargs):
                  from vcsserver.hgcompat import get_ctx
-                 if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks':
+                 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
                      # store new bookmarks in our UI object propagated later to post_push
-                     ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks()
+                     ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
                  return
                  pass
-             HookResponse = collections.namedtuple('HookResponse', ('status', 'output'))
+             @dataclasses.dataclass
+             class HookResponse:
+                 status: int
+                 output: str
-             def git_pre_pull(extras):
+             def git_pre_pull(extras) -> HookResponse:
                  """
                  Pre pull hook.
                  :return: status code of the hook. 0 for success.
                  :rtype: int
                  """
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
-                 stdout = io.BytesIO()
+                 stdout = io.StringIO()
                  try:
-                     status = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
+                     status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
-                     status = 128
-                     stdout.write('ERROR: %s\n' % str(error))
+                     log.exception('Failed to call pre_pull hook')
+                     status_code = 128
+                     stdout.write(f'ERROR: {error}\n')
-                 return HookResponse(status, stdout.getvalue())
+                 return HookResponse(status_code, stdout.getvalue())
-             def git_post_pull(extras):
+             def git_post_pull(extras) -> HookResponse:
                  """
                  Post pull hook.
                  if 'pull' not in extras['hooks']:
                      return HookResponse(0, '')
-                 stdout = io.BytesIO()
+                 stdout = io.StringIO()
                  try:
                      status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
                  except Exception as error:
                      status = 128
-                     stdout.write('ERROR: %s\n' % error)
+                     stdout.write(f'ERROR: {error}\n')
                  return HookResponse(status, stdout.getvalue())
                  return rev_data
-             def git_pre_receive(unused_repo_path, revision_lines, env):
+             def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Pre push hook.
-                 :param extras: dictionary containing the keys defined in simplevcs
-                 :type extras: dict
                  :return: status code of the hook. 0 for success.
-                 :rtype: int
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  rev_data = _parse_git_ref_lines(revision_lines)
                      if type_ == 'heads' and not (new_branch or delete_branch):
                          old_rev = push_ref['old_rev']
                          new_rev = push_ref['new_rev']
-                         cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
+                         cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
                          stdout, stderr = subprocessio.run_command(
                              cmd, env=os.environ.copy())
                          # means we're having some non-reachable objects, this forced push was used
                  extras['hook_type'] = 'pre_receive'
                  extras['commit_ids'] = rev_data
-                 return _call_hook('pre_push', extras, GitMessageWriter())
+                 stdout = sys.stdout
+                 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
+                 return status_code
-             def git_post_receive(unused_repo_path, revision_lines, env):
+             def git_post_receive(unused_repo_path, revision_lines, env) -> int:
                  """
                  Post push hook.
-                 :param extras: dictionary containing the keys defined in simplevcs
-                 :type extras: dict
                  :return: status code of the hook. 0 for success.
-                 :rtype: int
                  """
                  extras = json.loads(env['RC_SCM_DATA'])
                  if 'push' not in extras['hooks']:
                      type_ = push_ref['type']
                      if type_ == 'heads':
+                         # starting new branch case
                          if push_ref['old_rev'] == empty_commit_id:
-                             # starting new branch case
-                             if push_ref['name'] not in branches:
-                                 branches.append(push_ref['name'])
+                             push_ref_name = push_ref['name']
+                             if push_ref_name not in branches:
+                                 branches.append(push_ref_name)
-                             # Fix up head revision if needed
-                             cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD']
-                             try:
-                                 subprocessio.run_command(cmd, env=os.environ.copy())
-                             except Exception:
-                                 cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', 'HEAD',
-                                        'refs/heads/%s' % push_ref['name']]
-                                 print("Setting default branch to %s" % push_ref['name'])
-                                 subprocessio.run_command(cmd, env=os.environ.copy())
+                             need_head_set = ''
+                             with Repository(os.getcwd()) as repo:
+                                 try:
+                                     repo.head
+                                 except pygit2.GitError:
+                                     need_head_set = f'refs/heads/{push_ref_name}'
-                             cmd = [settings.GIT_EXECUTABLE, 'for-each-ref',
-                                    '--format=%(refname)', 'refs/heads/*']
+                                 if need_head_set:
+                                     repo.set_head(need_head_set)
+                                     print(f"Setting default branch to {push_ref_name}")
+                             cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             heads = stdout
+                             heads = safe_str(stdout)
                              heads = heads.replace(push_ref['ref'], '')
                              heads = ' '.join(head for head
                                               in heads.splitlines() if head) or '.'
                                     '--not', heads]
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             git_revs.extend(stdout.splitlines())
+                             git_revs.extend(list(map(ascii_str, stdout.splitlines())))
+                         # delete branch case
                          elif push_ref['new_rev'] == empty_commit_id:
-                             # delete branch case
-                             git_revs.append('delete_branch=>%s' % push_ref['name'])
+                             git_revs.append(f'delete_branch=>{push_ref["name"]}')
                          else:
                              if push_ref['name'] not in branches:
                                  branches.append(push_ref['name'])
                              cmd = [settings.GIT_EXECUTABLE, 'log',
-                                    '{old_rev}..{new_rev}'.format(**push_ref),
+                                    f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
                                     '--reverse', '--pretty=format:%H']
                              stdout, stderr = subprocessio.run_command(
                                  cmd, env=os.environ.copy())
-                             git_revs.extend(stdout.splitlines())
+                             # we get bytes from stdout, we need str to be consistent
+                             log_revs = list(map(ascii_str, stdout.splitlines()))
+                             git_revs.extend(log_revs)
+                             # Pure pygit2 impl. but still 2-3x slower :/
+                             # results = []
+                             #
+                             # with Repository(os.getcwd()) as repo:
+                             #     repo_new_rev = repo[push_ref['new_rev']]
+                             #     repo_old_rev = repo[push_ref['old_rev']]
+                             #     walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
+                             #
+                             #     for commit in walker:
+                             #         if commit.id == repo_old_rev.id:
+                             #             break
+                             #         results.append(commit.id.hex)
+                             #     # reverse the order, can't use GIT_SORT_REVERSE
+                             #     log_revs = results[::-1]
                      elif type_ == 'tags':
                          if push_ref['name'] not in tags:
                              tags.append(push_ref['name'])
-                         git_revs.append('tag=>%s' % push_ref['name'])
+                         git_revs.append(f'tag=>{push_ref["name"]}')
                  extras['hook_type'] = 'post_receive'
                  extras['commit_ids'] = git_revs
                      'tags': tags,
                  }
+                 stdout = sys.stdout
                  if 'repo_size' in extras['hooks']:
                      try:
-                         _call_hook('repo_size', extras, GitMessageWriter())
-                     except:
+                         _call_hook('repo_size', extras, GitMessageWriter(stdout))
+                     except Exception:
                          pass
-                 return _call_hook('post_push', extras, GitMessageWriter())
+                 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
+                 return status_code
              def _get_extras_from_txn_id(path, txn_id):

vcsserver/http_main.py

0 +231 -161

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+             import io
              import os
+             import platform
              import sys
-             import base64
              import locale
              import logging
              import uuid
+             import time
              import wsgiref.util
-             import traceback
              import tempfile
              import psutil
              from itertools import chain
-             from cStringIO import StringIO
-             import simplejson as json
              import msgpack
+             import configparser
              from pyramid.config import Configurator
-             from pyramid.settings import asbool, aslist
              from pyramid.wsgi import wsgiapp
-             from pyramid.compat import configparser
              from pyramid.response import Response
-             from vcsserver.utils import safe_int
+             from vcsserver.base import BytesEnvelope, BinaryEnvelope
+             from vcsserver.lib.rc_json import json
+             from vcsserver.config.settings_maker import SettingsMaker
+             from vcsserver.str_utils import safe_int
+             from vcsserver.lib.statsd_client import StatsdClient
+             from vcsserver.tweens.request_wrapper import get_headers_call_context
+             import vcsserver
+             from vcsserver import remote_wsgi, scm_app, settings, hgpatches
+             from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
+             from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
+             from vcsserver.echo_stub.echo_app import EchoApp
+             from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
+             from vcsserver.lib.exc_tracking import store_exception, format_exc
+             from vcsserver.server import VcsServer
+             strict_vcs = True
+             git_import_err = None
+             try:
+                 from vcsserver.remote.git_remote import GitFactory, GitRemote
+             except ImportError as e:
+                 GitFactory = None
+                 GitRemote = None
+                 git_import_err = e
+                 if strict_vcs:
+                     raise
+             hg_import_err = None
+             try:
+                 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
+             except ImportError as e:
+                 MercurialFactory = None
+                 HgRemote = None
+                 hg_import_err = e
+                 if strict_vcs:
+                     raise
+             svn_import_err = None
+             try:
+                 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
+             except ImportError as e:
+                 SubversionFactory = None
+                 SvnRemote = None
+                 svn_import_err = e
+                 if strict_vcs:
+                     raise
              log = logging.getLogger(__name__)
                      'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
                  os.environ['LC_ALL'] = 'C'
-             import vcsserver
-             from vcsserver import remote_wsgi, scm_app, settings, hgpatches
-             from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
-             from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
-             from vcsserver.echo_stub.echo_app import EchoApp
-             from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
-             from vcsserver.lib.exc_tracking import store_exception
-             from vcsserver.server import VcsServer
-             try:
-                 from vcsserver.git import GitFactory, GitRemote
-             except ImportError:
-                 GitFactory = None
-                 GitRemote = None
-             try:
-                 from vcsserver.hg import MercurialFactory, HgRemote
-             except ImportError:
-                 MercurialFactory = None
-                 HgRemote = None
-             try:
-                 from vcsserver.svn import SubversionFactory, SvnRemote
-             except ImportError:
-                 SubversionFactory = None
-                 SvnRemote = None
              def _is_request_chunked(environ):
                  stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
                  return stream
-             def _int_setting(settings, name, default):
-                 settings[name] = int(settings.get(name, default))
-                 return settings[name]
-             def _bool_setting(settings, name, default):
-                 input_val = settings.get(name, default)
-                 if isinstance(input_val, unicode):
-                     input_val = input_val.encode('utf8')
-                 settings[name] = asbool(input_val)
-                 return settings[name]
-             def _list_setting(settings, name, default):
-                 raw_value = settings.get(name, default)
-                 # Otherwise we assume it uses pyramids space/newline separation.
-                 settings[name] = aslist(raw_value)
-                 return settings[name]
-             def _string_setting(settings, name, default, lower=True, default_when_empty=False):
-                 value = settings.get(name, default)
-                 if default_when_empty and not value:
-                     # use default value when value is empty
-                     value = default
-                 if lower:
-                     value = value.lower()
-                 settings[name] = value
-                 return settings[name]
              def log_max_fd():
                  try:
                      maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
                      pass
-             class VCS(object):
+             class VCS:
                  def __init__(self, locale_conf=None, cache_config=None):
                      self.locale = locale_conf
                      self.cache_config = cache_config
                          git_factory = GitFactory()
                          self._git_remote = GitRemote(git_factory)
                      else:
-                         log.info("Git client import failed")
+                         log.error("Git client import failed: %s", git_import_err)
                      if MercurialFactory and HgRemote:
                          hg_factory = MercurialFactory()
                          self._hg_remote = HgRemote(hg_factory)
                      else:
-                         log.info("Mercurial client import failed")
+                         log.error("Mercurial client import failed: %s", hg_import_err)
                      if SubversionFactory and SvnRemote:
                          svn_factory = SubversionFactory()
                          hg_factory = MercurialFactory()
                          self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
                      else:
-                         log.info("Subversion client import failed")
+                         log.error("Subversion client import failed: %s", svn_import_err)
                      self._vcsserver = VcsServer()
                      if self.locale:
                          log.info('Settings locale: `LC_ALL` to %s', self.locale)
                      else:
-                         log.info(
-                             'Configuring locale subsystem based on environment variables')
+                         log.info('Configuring locale subsystem based on environment variables')
                      try:
                          # If self.locale is the empty string, then the locale
                          # module will use the environment variables. See the
                              'Locale set to language code "%s" with encoding "%s".',
                              language_code, encoding)
                      except locale.Error:
-                         log.exception(
-                             'Cannot set locale, not configuring the locale system')
+                         log.exception('Cannot set locale, not configuring the locale system')
-             class WsgiProxy(object):
+             class WsgiProxy:
                  def __init__(self, wsgi):
                      self.wsgi = wsgi
                  return {'status': '404 NOT FOUND'}
-             class VCSViewPredicate(object):
+             class VCSViewPredicate:
                  def __init__(self, val, config):
                      self.remotes = val
                  def text(self):
-                     return 'vcs view method = %s' % (self.remotes.keys(),)
+                     return f'vcs view method = {list(self.remotes.keys())}'
                  phash = text
                      return backend in self.remotes
-             class HTTPApplication(object):
+             class HTTPApplication:
                  ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
                  remote_wsgi = remote_wsgi
                  _use_echo_app = False
                  def __init__(self, settings=None, global_config=None):
-                     self._sanitize_settings_and_apply_defaults(settings)
                      self.config = Configurator(settings=settings)
+                     # Init our statsd at very start
+                     self.config.registry.statsd = StatsdClient.statsd
+                     self.config.registry.vcs_call_context = {}
                      self.global_config = global_config
                      self.config.include('vcsserver.lib.rc_cache')
+                     self.config.include('vcsserver.lib.rc_cache.archive_cache')
                      settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
                      vcs = VCS(locale_conf=settings_locale, cache_config=settings)
                      vcsserver.PYRAMID_SETTINGS = settings_merged
                      vcsserver.CONFIG = settings_merged
-                 def _sanitize_settings_and_apply_defaults(self, settings):
-                     temp_store = tempfile.gettempdir()
-                     default_cache_dir = os.path.join(temp_store, 'rc_cache')
-                     # save default, cache dir, and use it for all backends later.
-                     default_cache_dir = _string_setting(
-                         settings,
-                         'cache_dir',
-                         default_cache_dir, lower=False, default_when_empty=True)
-                     # ensure we have our dir created
-                     if not os.path.isdir(default_cache_dir):
-                         os.makedirs(default_cache_dir, mode=0o755)
-                     # exception store cache
-                     _string_setting(
-                         settings,
-                         'exception_tracker.store_path',
-                         temp_store, lower=False, default_when_empty=True)
-                     # repo_object cache
-                     _string_setting(
-                         settings,
-                         'rc_cache.repo_object.backend',
-                         'dogpile.cache.rc.file_namespace', lower=False)
-                     _int_setting(
-                         settings,
-                         'rc_cache.repo_object.expiration_time',
-* 24 * 60 * 60)
-                     _string_setting(
-                         settings,
-                         'rc_cache.repo_object.arguments.filename',
-                         os.path.join(default_cache_dir, 'vcsserver_cache_1'), lower=False)
                  def _configure(self):
                      self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
                          'vcsserver.lib.request_counter.get_request_counter',
                          'request_count')
-                     self.config.add_request_method(
-                         'vcsserver.lib._vendor.statsd.get_statsd_client',
-                         'statsd', reify=True)
                  def wsgi_app(self):
                      return self.config.make_wsgi_app()
                  def _vcs_view_params(self, request):
                      remote = self._remotes[request.matchdict['backend']]
                      payload = msgpack.unpackb(request.body, use_list=True)
                      method = payload.get('method')
                      params = payload['params']
                      wire = params.get('wire')
                      kwargs = params.get('kwargs')
                      context_uid = None
+                     request.registry.vcs_call_context = {
+                         'method': method,
+                         'repo_name': payload.get('_repo_name'),
+                     }
                      if wire:
                          try:
                              wire['context'] = context_uid = uuid.UUID(wire['context'])
                      # NOTE(marcink): trading complexity for slight performance
                      if log.isEnabledFor(logging.DEBUG):
-                         no_args_methods = [
+                         ]
-                         if method in no_args_methods:
+                         # also we SKIP printing out any of those methods args since they maybe excessive
+                         just_args_methods = {
+                             'commitctx': ('content', 'removed', 'updated'),
+                             'commit': ('content', 'removed', 'updated')
+                         }
+                         if method in just_args_methods:
+                             skip_args = just_args_methods[method]
                              call_args = ''
+                             call_kwargs = {}
+                             for k in kwargs:
+                                 if k in skip_args:
+                                     # replace our skip key with dummy
+                                     call_kwargs[k] = f'RemovedParam({k})'
+                                 else:
+                                     call_kwargs[k] = kwargs[k]
                          else:
                              call_args = args[1:]
+                             call_kwargs = kwargs
                          log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
-                                   method, call_args, kwargs, context_uid, repo_state_uid)
+                                   method, call_args, call_kwargs, context_uid, repo_state_uid)
+                     statsd = request.registry.statsd
+                     if statsd:
+                         statsd.incr(
+                             'vcsserver_method_total', tags=[
+                                 f"method:{method}",
+                             ])
                      return payload, remote, method, args, kwargs
                  def vcs_view(self, request):
                          if should_store_exc:
                              store_exception(id(exc_info), exc_info, request_path=request.path)
-                         tb_info = ''.join(
-                             traceback.format_exception(exc_type, exc_value, exc_traceback))
+                         tb_info = format_exc(exc_info)
                          type_ = e.__class__.__name__
                          if type_ not in self.ALLOWED_EXCEPTIONS:
                          resp = {
                              'id': payload_id,
                              'error': {
-                                 'message': e.message,
+                                 'message': str(e),
                                  'traceback': tb_info,
                                  'org_exc': org_exc_name,
                                  'org_exc_tb': org_exc_tb,
                              'id': payload_id,
                              'result': resp
                          }
+                     log.debug('Serving data for method %s', method)
                      return resp
                  def vcs_stream_view(self, request):
                      method = method.split('stream:')[-1]
                      chunk_size = safe_int(payload.get('chunk_size')) or 4096
-                     try:
-                         resp = getattr(remote, method)(*args, **kwargs)
-                     except Exception as e:
-                         raise
+                     resp = getattr(remote, method)(*args, **kwargs)
                      def get_chunked_data(method_resp):
-                         stream = StringIO(method_resp)
+                         stream = io.BytesIO(method_resp)
                          while 1:
                              chunk = stream.read(chunk_size)
                              if not chunk:
                  def status_view(self, request):
                      import vcsserver
-                     return {'status': 'OK', 'vcsserver_version': vcsserver.__version__,
-                             'pid': os.getpid()}
+                     _platform_id = platform.uname()[1] or 'instance'
+                     return {
+                         "status": "OK",
+                         "vcsserver_version": vcsserver.get_version(),
+                         "platform": _platform_id,
+                         "pid": os.getpid(),
+                     }
                  def service_view(self, request):
                      import vcsserver
                      except Exception:
                          log.exception('Failed to read .ini file for display')
-                     environ = os.environ.items()
+                     environ = list(os.environ.items())
                      resp = {
                          'id': payload.get('id'),
                          'result': dict(
-                             version=vcsserver.__version__,
+                             version=vcsserver.get_version(),
                              config=server_config,
                              app_config=app_config,
                              environ=environ,
                      return resp
                  def _msgpack_renderer_factory(self, info):
                      def _render(value, system):
+                         bin_type = False
+                         res = value.get('result')
+                         if isinstance(res, BytesEnvelope):
+                             log.debug('Result is wrapped in BytesEnvelope type')
+                             bin_type = True
+                         elif isinstance(res, BinaryEnvelope):
+                             log.debug('Result is wrapped in BinaryEnvelope type')
+                             value['result'] = res.val
+                             bin_type = True
                          request = system.get('request')
                          if request is not None:
                              response = request.response
                              ct = response.content_type
                              if ct == response.default_content_type:
                                  response.content_type = 'application/x-msgpack'
-                         return msgpack.packb(value)
+                                 if bin_type:
+                                     response.content_type = 'application/x-msgpack-bin'
+                         return msgpack.packb(value, use_bin_type=bin_type)
                      return _render
                  def set_env_from_config(self, environ, config):
                          @wsgiapp
                          def _hg_stream(environ, start_response):
                              log.debug('http-app: handling hg stream')
-                             repo_path = environ['HTTP_X_RC_REPO_PATH']
-                             repo_name = environ['HTTP_X_RC_REPO_NAME']
-                             packed_config = base64.b64decode(
-                                 environ['HTTP_X_RC_REPO_CONFIG'])
-                             config = msgpack.unpackb(packed_config)
+                             call_context = get_headers_call_context(environ)
+                             repo_path = call_context['repo_path']
+                             repo_name = call_context['repo_name']
+                             config = call_context['repo_config']
                              app = scm_app.create_hg_wsgi_app(
                                  repo_path, repo_name, config)
                              # Consistent path information for hgweb
-                             environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
+                             environ['PATH_INFO'] = call_context['path_info']
                              environ['REPO_NAME'] = repo_name
                              self.set_env_from_config(environ, config)
                          @wsgiapp
                          def _git_stream(environ, start_response):
                              log.debug('http-app: handling git stream')
-                             repo_path = environ['HTTP_X_RC_REPO_PATH']
-                             repo_name = environ['HTTP_X_RC_REPO_NAME']
-                             packed_config = base64.b64decode(
-                                 environ['HTTP_X_RC_REPO_CONFIG'])
-                             config = msgpack.unpackb(packed_config)
+                             call_context = get_headers_call_context(environ)
-                             environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
+                             repo_path = call_context['repo_path']
+                             repo_name = call_context['repo_name']
+                             config = call_context['repo_config']
+                             environ['PATH_INFO'] = call_context['path_info']
                              self.set_env_from_config(environ, config)
                              content_type = environ.get('CONTENT_TYPE', '')
                  def handle_vcs_exception(self, exception, request):
                      _vcs_kind = getattr(exception, '_vcs_kind', '')
                      if _vcs_kind == 'repo_locked':
-                         # Get custom repo-locked status code if present.
-                         status_code = request.headers.get('X-RC-Locked-Status-Code')
+                         headers_call_context = get_headers_call_context(request.environ)
+                         status_code = safe_int(headers_call_context['locked_status_code'])
                          return HTTPRepoLocked(
-                             title=exception.message, status_code=status_code)
+                             title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
                      elif _vcs_kind == 'repo_branch_protected':
                          # Get custom repo-branch-protected status code if present.
-                         return HTTPRepoBranchProtected(title=exception.message)
+                         return HTTPRepoBranchProtected(
+                             title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
                      exc_info = request.exc_info
                      store_exception(id(exc_info), exc_info)
                      traceback_info = 'unavailable'
                      if request.exc_info:
-                         exc_type, exc_value, exc_tb = request.exc_info
-                         traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
+                         traceback_info = format_exc(request.exc_info)
                      log.error(
-                         'error occurred handling this request for path: %s, \n tb: %s',
+                         'error occurred handling this request for path: %s, \n%s',
                          request.path, traceback_info)
+                     statsd = request.registry.statsd
+                     if statsd:
+                         exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
+                         statsd.incr('vcsserver_exception_total',
+                                     tags=[f"type:{exc_type}"])
                      raise exception
-             class ResponseFilter(object):
+             class ResponseFilter:
                  def __init__(self, start_response):
                      self._start_response = start_response
                      return self._start_response(status, headers, exc_info)
+             def sanitize_settings_and_apply_defaults(global_config, settings):
+                 _global_settings_maker = SettingsMaker(global_config)
+                 settings_maker = SettingsMaker(settings)
+                 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
+                 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
+                 settings_maker.enable_logging(logging_conf)
+                 # Default includes, possible to change as a user
+                 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
+                 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
+                 settings_maker.make_setting('__file__', global_config.get('__file__'))
+                 settings_maker.make_setting('pyramid.default_locale_name', 'en')
+                 settings_maker.make_setting('locale', 'en_US.UTF-8')
+                 settings_maker.make_setting('core.binary_dir', '')
+                 temp_store = tempfile.gettempdir()
+                 default_cache_dir = os.path.join(temp_store, 'rc_cache')
+                 # save default, cache dir, and use it for all backends later.
+                 default_cache_dir = settings_maker.make_setting(
+                     'cache_dir',
+                     default=default_cache_dir, default_when_empty=True,
+                     parser='dir:ensured')
+                 # exception store cache
+                 settings_maker.make_setting(
+                     'exception_tracker.store_path',
+                     default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
+                     parser='dir:ensured'
+                 )
+                 # repo_object cache defaults
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.backend',
+                     default='dogpile.cache.rc.file_namespace',
+                     parser='string')
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.expiration_time',
+                     default=30 * 24 * 60 * 60,  # 30days
+                     parser='int')
+                 settings_maker.make_setting(
+                     'rc_cache.repo_object.arguments.filename',
+                     default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
+                     parser='string')
+                 # statsd
+                 settings_maker.make_setting('statsd.enabled', False, parser='bool')
+                 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
+                 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
+                 settings_maker.make_setting('statsd.statsd_prefix', '')
+                 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
+                 settings_maker.env_expand()
              def main(global_config, **settings):
+                 start_time = time.time()
+                 log.info('Pyramid app config starting')
                  if MercurialFactory:
                      hgpatches.patch_largefiles_capabilities()
                      hgpatches.patch_subrepo_type_mapping()
-                 app = HTTPApplication(settings=settings, global_config=global_config)
-                 return app.wsgi_app()
+                 # Fill in and sanitize the defaults & do ENV expansion
+                 sanitize_settings_and_apply_defaults(global_config, settings)
+                 # init and bootstrap StatsdClient
+                 StatsdClient.setup(settings)
+                 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
+                 total_time = time.time() - start_time
+                 log.info('Pyramid app created and configured in %.2fs', total_time)
+                 return pyramid_app

vcsserver/lib/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/lib/_vendor/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/lib/_vendor/redis_lock/__init__.py

0 +43 -39

-             import sys
              import threading
              import weakref
              from base64 import b64encode
              from logging import getLogger
              from os import urandom
+             from typing import Union
              from redis import StrictRedis
-             __version__ = '3.7.0'
+             __version__ = '4.0.0'
              loggers = {
                  k: getLogger("vcsserver." + ".".join((__name__, k)))
                  ]
              }
-             PY3 = sys.version_info[0] == 3
-             if PY3:
-                 text_type = str
-                 binary_type = bytes
-             else:
-                 text_type = unicode  # noqa
-                 binary_type = str
+             text_type = str
+             binary_type = bytes
              # Check if the id match. If not, return an error code.
                  pass
-             class Lock(object):
+             class Lock:
                  """
                  A Lock context manager implemented via redis SETNX/BLPOP.
                  """
                  unlock_script = None
                  extend_script = None
                  reset_script = None
                  reset_all_script = None
+                 _lock_renewal_interval: float
+                 _lock_renewal_thread: Union[threading.Thread, None]
                  def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
                      """
                      :param redis_client:
                      elif isinstance(id, text_type):
                          self._id = id
                      else:
-                         raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id))
+                         raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
                      self._name = 'lock:' + name
                      self._signal = 'lock-signal:' + name
                      self._lock_renewal_interval = (float(expire) * 2 / 3
                  def register_scripts(cls, redis_client):
                      global reset_all_script
                      if reset_all_script is None:
-                         reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                          cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
                          cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
                          cls.reset_script = redis_client.register_script(RESET_SCRIPT)
                          cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
+                         reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
                  @property
                  def _held(self):
                      """
                      logger = loggers["acquire"]
-                     logger.debug("Getting acquire on %r ...", self._name)
+                     logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
                      if self._held:
                          owner_id = self.get_owner_id()
                      if timeout:
                          timeout = int(timeout)
                          if timeout < 0:
-                             raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout)
+                             raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
                          if self._expire and not self._lock_renewal_interval and timeout > self._expire:
-                             raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire))
+                             raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
                      busy = True
                      blpop_timeout = timeout or self._expire or 0
                              elif blocking:
                                  timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
                              else:
-                                 logger.warning("Failed to get %r.", self._name)
+                                 logger.warning("Failed to acquire Lock(%r).", self._name)
                                  return False
-                     logger.info("Got lock for %r.", self._name)
+                     logger.debug("Acquired Lock(%r).", self._name)
                      if self._lock_renewal_interval is not None:
                          self._start_lock_renewer()
                      return True
                  def extend(self, expire=None):
-                     """Extends expiration time of the lock.
+                     """
+                     Extends expiration time of the lock.
                      :param expire:
                          New expiration time. If ``None`` - `expire` provided during
                      error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
                      if error == 1:
-                         raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
+                         raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
                      elif error == 2:
-                         raise NotExpirable("Lock %s has no assigned expiration time" % self._name)
+                         raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
                      elif error:
-                         raise RuntimeError("Unsupported error code %s from EXTEND script" % error)
+                         raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
                  @staticmethod
-                 def _lock_renewer(lockref, interval, stop):
+                 def _lock_renewer(name, lockref, interval, stop):
                      """
                      Renew the lock key in redis every `interval` seconds for as long
                      as `self._lock_renewal_thread.should_exit` is False.
                      """
                      while not stop.wait(timeout=interval):
-                         loggers["refresh.thread.start"].debug("Refreshing lock")
-                         lock = lockref()
+                         loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
+                         lock: "Lock" = lockref()
                          if lock is None:
                              loggers["refresh.thread.stop"].debug(
-                                 "The lock no longer exists, stopping lock refreshing"
+                                 "Stopping loop because Lock(%r) was garbage collected.", name
                              )
                              break
                          lock.extend(expire=lock._expire)
                          del lock
-                     loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing")
+                     loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
                  def _start_lock_renewer(self):
                      """
                          raise AlreadyStarted("Lock refresh thread already started")
                      loggers["refresh.start"].debug(
-                         "Starting thread to refresh lock every %s seconds",
-                         self._lock_renewal_interval
+                         "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
+                         self._name, self._lock_renewal_interval
                      )
                      self._lock_renewal_stop = threading.Event()
                      self._lock_renewal_thread = threading.Thread(
                          group=None,
                          target=self._lock_renewer,
-                         kwargs={'lockref': weakref.ref(self),
-                                 'interval': self._lock_renewal_interval,
-                                 'stop': self._lock_renewal_stop}
+                         kwargs={
+                             'name': self._name,
+                             'lockref': weakref.ref(self),
+                             'interval': self._lock_renewal_interval,
+                             'stop': self._lock_renewal_stop,
+                         },
                      )
-                     self._lock_renewal_thread.setDaemon(True)
+                     self._lock_renewal_thread.daemon = True
                      self._lock_renewal_thread.start()
                  def _stop_lock_renewer(self):
                      """
                      if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
                          return
-                     loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop")
+                     loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
                      self._lock_renewal_stop.set()
                      self._lock_renewal_thread.join()
                      self._lock_renewal_thread = None
-                     loggers["refresh.exit"].debug("Lock refresher has stopped")
+                     loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
                  def __enter__(self):
                      acquired = self.acquire(blocking=True)
-                     assert acquired, "Lock wasn't acquired, but blocking=True"
+                     if not acquired:
+                         raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
                      return self
                  def __exit__(self, exc_type=None, exc_value=None, traceback=None):
                      """
                      if self._lock_renewal_thread is not None:
                          self._stop_lock_renewer()
-                     loggers["release"].debug("Releasing %r.", self._name)
+                     loggers["release"].debug("Releasing Lock(%r).", self._name)
                      error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
                      if error == 1:
-                         raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
+                         raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
                      elif error:
-                         raise RuntimeError("Unsupported error code %s from EXTEND script." % error)
+                         raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
                  def locked(self):
                      """

vcsserver/lib/_vendor/statsd/__init__.py

0 +8 -4

-             from __future__ import absolute_import, division, unicode_literals
-             import logging
              from .stream import TCPStatsClient, UnixSocketStatsClient  # noqa
                  ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
                  log.debug('configured statsd client %s:%s', host, port)
-                 return StatsClient(
-                     host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
+                 try:
+                     client = StatsClient(
+                         host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
+                 except Exception:
+                     log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
+                     client = None
+                 return client
              def get_statsd_client(request):

vcsserver/lib/_vendor/statsd/base.py

0 +70 -23

-             from __future__ import absolute_import, division, unicode_literals
+             import re
              import random
              from collections import deque
              from datetime import timedelta
+             from repoze.lru import lru_cache
              from .timer import Timer
+             TAG_INVALID_CHARS_RE = re.compile(
+                 r"[^\w\d_\-:/\.]",
+                 #re.UNICODE
+             )
+             TAG_INVALID_CHARS_SUBS = "_"
-             class StatsClientBase(object):
+             # we save and expose methods called by statsd for discovery
+             buckets_dict = {
+             }
+             @lru_cache(maxsize=500)
+             def _normalize_tags_with_cache(tag_list):
+                 return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
+             def normalize_tags(tag_list):
+                 # We have to turn our input tag list into a non-mutable tuple for it to
+                 # be hashable (and thus usable) by the @lru_cache decorator.
+                 return _normalize_tags_with_cache(tuple(tag_list))
+             class StatsClientBase:
                  """A Base class for various statsd clients."""
                  def close(self):
                  def pipeline(self):
                      raise NotImplementedError()
-                 def timer(self, stat, rate=1):
-                     return Timer(self, stat, rate)
+                 def timer(self, stat, rate=1, tags=None, auto_send=True):
+                     """
+                     statsd = StatsdClient.statsd
+                     with statsd.timer('bucket_name', auto_send=True) as tmr:
+                         # This block will be timed.
+                         for i in range(0, 100000):
+                             i ** 2
+                     # you can access time here...
+                     elapsed_ms = tmr.ms
+                     """
+                     return Timer(self, stat, rate, tags, auto_send=auto_send)
-                 def timing(self, stat, delta, rate=1):
+                 def timing(self, stat, delta, rate=1, tags=None, use_decimals=True):
                      """
                      Send new timing information.
                      if isinstance(delta, timedelta):
                          # Convert timedelta to number of milliseconds.
                          delta = delta.total_seconds() * 1000.
-                     self._send_stat(stat, '%0.6f|ms' % delta, rate)
-                 def incr(self, stat, count=1, rate=1):
-                     """Increment a stat by `count`."""
-                     self._send_stat(stat, '%s|c' % count, rate)
+                     if use_decimals:
+                         fmt = '%0.6f|ms'
+                     else:
+                         fmt = '%s|ms'
+                     self._send_stat(stat, fmt % delta, rate, tags)
-                 def decr(self, stat, count=1, rate=1):
+                 def incr(self, stat, count=1, rate=1, tags=None):
+                     """Increment a stat by `count`."""
+                     self._send_stat(stat, f'{count}|c', rate, tags)
+                 def decr(self, stat, count=1, rate=1, tags=None):
                      """Decrement a stat by `count`."""
-                     self.incr(stat, -count, rate)
+                     self.incr(stat, -count, rate, tags)
-                 def gauge(self, stat, value, rate=1, delta=False):
+                 def gauge(self, stat, value, rate=1, delta=False, tags=None):
                      """Set a gauge value."""
                      if value < 0 and not delta:
                          if rate < 1:
                                  return
                          with self.pipeline() as pipe:
                              pipe._send_stat(stat, '0|g', 1)
-                             pipe._send_stat(stat, '%s|g' % value, 1)
+                             pipe._send_stat(stat, f'{value}|g', 1)
                      else:
                          prefix = '+' if delta and value >= 0 else ''
-                         self._send_stat(stat, '%s%s|g' % (prefix, value), rate)
+                         self._send_stat(stat, f'{prefix}{value}|g', rate, tags)
                  def set(self, stat, value, rate=1):
                      """Set a set value."""
-                     self._send_stat(stat, '%s|s' % value, rate)
+                     self._send_stat(stat, f'{value}|s', rate)
+                 def histogram(self, stat, value, rate=1, tags=None):
+                     """Set a histogram"""
+                     self._send_stat(stat, f'{value}|h', rate, tags)
-                 def _send_stat(self, stat, value, rate):
-                     self._after(self._prepare(stat, value, rate))
+                 def _send_stat(self, stat, value, rate, tags=None):
+                     self._after(self._prepare(stat, value, rate, tags))
-                 def _prepare(self, stat, value, rate):
+                 def _prepare(self, stat, value, rate, tags=None):
+                     global buckets_dict
+                     buckets_dict[stat] = 1
                      if rate < 1:
                          if random.random() > rate:
                              return
-                         value = '%s|@%s' % (value, rate)
+                         value = f'{value}|@{rate}'
                      if self._prefix:
-                         stat = '%s.%s' % (self._prefix, stat)
+                         stat = f'{self._prefix}.{stat}'
-                     return '%s:%s' % (stat, value)
+                     res = '%s:%s%s' % (
+                         stat,
+                         value,
+                         ("|#" + ",".join(normalize_tags(tags))) if tags else "",
+                     )
+                     return res
                  def _after(self, data):
                      if data:

vcsserver/lib/_vendor/statsd/stream.py

0 0 -2

-             from __future__ import absolute_import, division, unicode_literals
-             import socket
              from .base import StatsClientBase, PipelineBase

vcsserver/lib/_vendor/statsd/timer.py

0 +10 -15

-             from __future__ import absolute_import, division, unicode_literals
-             import functools
-             # Use timer that's not susceptible to time of day adjustments.
-             try:
-                 # perf_counter is only present on Py3.3+
-                 from time import perf_counter as time_now
-             except ImportError:
-                 # fall back to using time
-                 from time import time as time_now
+             from time import perf_counter as time_now
              def safe_wraps(wrapper, *args, **kwargs):
                  return functools.wraps(wrapper, *args, **kwargs)
-             class Timer(object):
+             class Timer:
                  """A context manager/decorator for statsd.timing()."""
-                 def __init__(self, client, stat, rate=1):
+                 def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True):
                      self.client = client
                      self.stat = stat
                      self.rate = rate
+                     self.tags = tags
                      self.ms = None
                      self._sent = False
                      self._start_time = None
+                     self.use_decimals = use_decimals
+                     self.auto_send = auto_send
                  def __call__(self, f):
                      """Thread-safe timing function decorator."""
                              return f(*args, **kwargs)
                          finally:
                              elapsed_time_ms = 1000.0 * (time_now() - start_time)
-                             self.client.timing(self.stat, elapsed_time_ms, self.rate)
+                             self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals)
+                             self._sent = True
                      return _wrapped
                  def __enter__(self):
                      return self.start()
                  def __exit__(self, typ, value, tb):
-                     self.stop()
+                     self.stop(send=self.auto_send)
                  def start(self):
                      self.ms = None
                      if self._sent:
                          raise RuntimeError('Already sent data.')
                      self._sent = True
-                     self.client.timing(self.stat, self.ms, self.rate)
+                     self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals)

vcsserver/lib/_vendor/statsd/udp.py

0 +1 -3

-             from __future__ import absolute_import, division, unicode_literals
-             import socket
              from .base import StatsClientBase, PipelineBase
              class Pipeline(PipelineBase):
                  def __init__(self, client):
-                     super(Pipeline, self).__init__(client)
+                     super().__init__(client)
                      self._maxudpsize = client._maxudpsize
                  def _send(self):

vcsserver/lib/exc_tracking.py

0 +150 -52

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+             import io
              import os
              import time
+             import sys
              import datetime
              import msgpack
              import logging
              import traceback
              import tempfile
-             from pyramid import compat
+             import glob
              log = logging.getLogger(__name__)
              exc_store_dir_name = 'rc_exception_store_v1'
-             def exc_serialize(exc_id, tb, exc_type):
+             def exc_serialize(exc_id, tb, exc_type, extra_data=None):
                  data = {
-                     'version': 'v1',
-                     'exc_id': exc_id,
-                     'exc_utc_date': datetime.datetime.utcnow().isoformat(),
-                     'exc_timestamp': repr(time.time()),
-                     'exc_message': tb,
-                     'exc_type': exc_type,
+                     "version": "v1",
+                     "exc_id": exc_id,
+                     "exc_utc_date": datetime.datetime.utcnow().isoformat(),
+                     "exc_timestamp": repr(time.time()),
+                     "exc_message": tb,
+                     "exc_type": exc_type,
                  }
+                 if extra_data:
+                     data.update(extra_data)
                  return msgpack.packb(data), data
                  return msgpack.unpackb(tb)
+             _exc_store = None
              def get_exc_store():
                  """
                  Get and create exception store if it's not existing
                  """
+                 global _exc_store
+                 if _exc_store is not None:
+                     # quick global cache
+                     return _exc_store
                  import vcsserver as app
-                 exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir()
+                 exc_store_dir = (
+                     app.CONFIG.get("exception_tracker.store_path", "") or tempfile.gettempdir()
+                 )
                  _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name)
                  _exc_store_path = os.path.abspath(_exc_store_path)
                  if not os.path.isdir(_exc_store_path):
                      os.makedirs(_exc_store_path)
-                     log.debug('Initializing exceptions store at %s', _exc_store_path)
+                     log.debug("Initializing exceptions store at %s", _exc_store_path)
+                     _exc_store = _exc_store_path
                  return _exc_store_path
-             def _store_exception(exc_id, exc_info, prefix, request_path=''):
-                 exc_type, exc_value, exc_traceback = exc_info
+             def get_detailed_tb(exc_info):
+                 try:
+                     from pip._vendor.rich import (
+                         traceback as rich_tb,
+                         scope as rich_scope,
+                         console as rich_console,
+                     )
+                 except ImportError:
+                     try:
+                         from rich import (
+                             traceback as rich_tb,
+                             scope as rich_scope,
+                             console as rich_console,
+                         )
+                     except ImportError:
+                         return None
+                 console = rich_console.Console(width=160, file=io.StringIO())
+                 exc = rich_tb.Traceback.extract(*exc_info, show_locals=True)
+                 tb_rich = rich_tb.Traceback(
+                     trace=exc,
+                     width=160,
+                     extra_lines=3,
+                     theme=None,
+                     word_wrap=False,
+                     show_locals=False,
+                     max_frames=100,
+                 )
-                 tb = ''.join(traceback.format_exception(
-                     exc_type, exc_value, exc_traceback, None))
+                 # last_stack = exc.stacks[-1]
+                 # last_frame = last_stack.frames[-1]
+                 # if last_frame and last_frame.locals:
+                 #     console.print(
+                 #         rich_scope.render_scope(
+                 #             last_frame.locals,
+                 #             title=f'{last_frame.filename}:{last_frame.lineno}'))
+                 console.print(tb_rich)
+                 formatted_locals = console.file.getvalue()
+                 return formatted_locals
-                 detailed_tb = getattr(exc_value, '_org_exc_tb', None)
+             def get_request_metadata(request=None) -> dict:
+                 request_metadata = {}
+                 if not request:
+                     from pyramid.threadlocal import get_current_request
+                     request = get_current_request()
+                 # NOTE(marcink): store request information into exc_data
+                 if request:
+                     request_metadata["client_address"] = getattr(request, "client_addr", "")
+                     request_metadata["user_agent"] = getattr(request, "user_agent", "")
+                     request_metadata["method"] = getattr(request, "method", "")
+                     request_metadata["url"] = getattr(request, "url", "")
+                 return request_metadata
+             def format_exc(exc_info, use_detailed_tb=True):
+                 exc_type, exc_value, exc_traceback = exc_info
+                 tb = "++ TRACEBACK ++\n\n"
+                 tb += "".join(traceback.format_exception(exc_type, exc_value, exc_traceback, None))
+                 detailed_tb = getattr(exc_value, "_org_exc_tb", None)
                  if detailed_tb:
-                     if isinstance(detailed_tb, compat.string_types):
+                     remote_tb = detailed_tb
+                     if isinstance(detailed_tb, str):
                          remote_tb = [detailed_tb]
                      tb += (
-                         '\n+++ BEG SOURCE EXCEPTION +++\n\n'
-                         '{}\n'
-                         '+++ END SOURCE EXCEPTION +++\n'
-                         ''.format('\n'.join(remote_tb))
+                         "\n+++ BEG SOURCE EXCEPTION +++\n\n"
+                         "{}\n"
+                         "+++ END SOURCE EXCEPTION +++\n"
+                         "".format("\n".join(remote_tb))
                      )
                      # Avoid that remote_tb also appears in the frame
                      del remote_tb
+                 if use_detailed_tb:
+                     locals_tb = get_detailed_tb(exc_info)
+                     if locals_tb:
+                         tb += f"\n+++ DETAILS +++\n\n{locals_tb}\n" ""
+                 return tb
+             def _store_exception(exc_id, exc_info, prefix, request_path=''):
+                 """
+                 Low level function to store exception in the exception tracker
+                 """
+                 extra_data = {}
+                 extra_data.update(get_request_metadata())
+                 exc_type, exc_value, exc_traceback = exc_info
+                 tb = format_exc(exc_info)
                  exc_type_name = exc_type.__name__
+                 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name, extra_data=extra_data)
+                 exc_pref_id = f"{exc_id}_{prefix}_{org_data['exc_timestamp']}"
                  exc_store_path = get_exc_store()
-                 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name)
-                 exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp'])
                  if not os.path.isdir(exc_store_path):
                      os.makedirs(exc_store_path)
                  stored_exc_path = os.path.join(exc_store_path, exc_pref_id)
-                 with open(stored_exc_path, 'wb') as f:
+                 with open(stored_exc_path, "wb") as f:
                      f.write(exc_data)
-                 log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path)
+                 log.debug("Stored generated exception %s as: %s", exc_id, stored_exc_path)
-                 log.error(
-                     'error occurred handling this request.\n'
-                     'Path: `%s`, tb: %s',
-                     request_path, tb)
+                 if request_path:
+                     log.error(
+                         'error occurred handling this request.\n'
+                         'Path: `%s`, %s',
+                         request_path, tb)
              def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''):
                  """
                  try:
-                     _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix,
-                                      request_path=request_path)
+                     exc_type = exc_info[0]
+                     exc_type_name = exc_type.__name__
+                     _store_exception(
+                         exc_id=exc_id, exc_info=exc_info, prefix=prefix, request_path=request_path,
+                     )
+                     return exc_id, exc_type_name
                  except Exception:
-                     log.exception('Failed to store exception `%s` information', exc_id)
+                     log.exception("Failed to store exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                      pass
              def _find_exc_file(exc_id, prefix=global_prefix):
                  exc_store_path = get_exc_store()
                  if prefix:
-                     exc_id = '{}_{}'.format(exc_id, prefix)
+                     exc_id = f"{exc_id}_{prefix}"
                  else:
                      # search without a prefix
-                     exc_id = '{}'.format(exc_id)
+                     exc_id = f"{exc_id}"
-                 # we need to search the store for such start pattern as above
-                 for fname in os.listdir(exc_store_path):
-                     if fname.startswith(exc_id):
-                         exc_id = os.path.join(exc_store_path, fname)
-                         break
-                     continue
-                 else:
-                     exc_id = None
+                 found_exc_id = None
+                 matches = glob.glob(os.path.join(exc_store_path, exc_id) + "*")
+                 if matches:
+                     found_exc_id = matches[0]
-                 return exc_id
+                 return found_exc_id
              def _read_exception(exc_id, prefix):
                  exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix)
                  if exc_id_file_path:
-                     with open(exc_id_file_path, 'rb') as f:
+                     with open(exc_id_file_path, "rb") as f:
                          return exc_unserialize(f.read())
                  else:
-                     log.debug('Exception File `%s` not found', exc_id_file_path)
+                     log.debug("Exception File `%s` not found", exc_id_file_path)
                  return None
                  try:
                      return _read_exception(exc_id=exc_id, prefix=prefix)
                  except Exception:
-                     log.exception('Failed to read exception `%s` information', exc_id)
+                     log.exception("Failed to read exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                  return None
                          os.remove(exc_id_file_path)
                  except Exception:
-                     log.exception('Failed to remove exception `%s` information', exc_id)
+                     log.exception("Failed to remove exception `%s` information", exc_id)
                      # there's no way this can fail, it will crash server badly if it does.
                      pass
+             def generate_id():
+                 return id(object())

vcsserver/lib/memory_lru_dict.py

0 +5 -7

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              from repoze.lru import LRUCache
-             from vcsserver.utils import safe_str
+             from vcsserver.str_utils import safe_str
              log = logging.getLogger(__name__)
                      del self.data[key]
                  def keys(self):
-                     return self.data.keys()
+                     return list(self.data.keys())
              class LRUDictDebug(LRUDict):
                  Wrapper to provide some debug options
                  """
                  def _report_keys(self):
-                     elems_cnt = '%s/%s' % (len(self.keys()), self.size)
+                     elems_cnt = f'{len(list(self.keys()))}/{self.size}'
                      # trick for pformat print it more nicely
                      fmt = '\n'
                      for cnt, elem in enumerate(self.keys()):
-                         fmt += '%s - %s\n' % (cnt+1, safe_str(elem))
+                         fmt += f'{cnt+1} - {safe_str(elem)}\n'
                      log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
                  def __getitem__(self, key):

vcsserver/lib/rc_cache/__init__.py

0 +48 -13

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
              import logging
+             import threading
              from dogpile.cache import register_backend
+             from . import region_meta
+             from .utils import (
+                 backend_key_generator,
+                 clear_cache_namespace,
+                 get_default_cache_settings,
+                 get_or_create_region,
+                 make_region,
+                 str2bool,
+             )
+             module_name = 'vcsserver'
              register_backend(
-                 "dogpile.cache.rc.memory_lru", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends",
                  "LRUMemoryBackend")
              register_backend(
-                 "dogpile.cache.rc.file_namespace", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends",
                  "FileNamespaceBackend")
              register_backend(
-                 "dogpile.cache.rc.redis", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends",
                  "RedisPickleBackend")
              register_backend(
-                 "dogpile.cache.rc.redis_msgpack", "vcsserver.lib.rc_cache.backends",
+                 "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends",
                  "RedisMsgPackBackend")
              log = logging.getLogger(__name__)
-             from . import region_meta
-             from .utils import (
-                 get_default_cache_settings, backend_key_generator, get_or_create_region,
-                 clear_cache_namespace, make_region)
+             CACHE_OBJ_CACHE_VER = 'v2'
+             CLEAR_DELETE = 'delete'
+             CLEAR_INVALIDATE = 'invalidate'
+             def async_creation_runner(cache, cache_key, creator, mutex):
+                 def runner():
+                     try:
+                         value = creator()
+                         cache.set(cache_key, value)
+                     finally:
+                         mutex.release()
+                 thread = threading.Thread(target=runner)
+                 thread.start()
              def configure_dogpile_cache(settings):
                      new_region = make_region(
                          name=namespace_name,
-                         function_key_generator=None
+                         function_key_generator=None,
+                         async_creation_runner=None
                      )
-                     new_region.configure_from_config(settings, 'rc_cache.{}.'.format(namespace_name))
+                     new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.')
                      new_region.function_key_generator = backend_key_generator(new_region.actual_backend)
+                     async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false'))
+                     if async_creator:
+                         log.debug('configuring region %s with async creator', new_region)
+                         new_region.async_creation_runner = async_creation_runner
                      if log.isEnabledFor(logging.DEBUG):
-                         region_args = dict(backend=new_region.actual_backend.__class__,
+                         region_args = dict(backend=new_region.actual_backend,
                                             region_invalidator=new_region.region_invalidator.__class__)
-                         log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args)
+                         log.debug('dogpile: registering a new region key=`%s` args=%s', namespace_name, region_args)
                      region_meta.dogpile_cache_regions[namespace_name] = new_region

vcsserver/lib/rc_cache/backends.py

0 +123 -149

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import time
-             import errno
+             #import errno
+             import fcntl
+             import functools
              import logging
+             import os
+             import pickle
+             #import time
+             #import gevent
              import msgpack
              import redis
-             from dogpile.cache.api import CachedValue
-             from dogpile.cache.backends import memory as memory_backend
+             flock_org = fcntl.flock
+             from typing import Union
+             from dogpile.cache.api import Deserializer, Serializer
              from dogpile.cache.backends import file as file_backend
+             from dogpile.cache.backends import memory as memory_backend
              from dogpile.cache.backends import redis as redis_backend
-             from dogpile.cache.backends.file import NO_VALUE, compat, FileLock
+             from dogpile.cache.backends.file import FileLock
              from dogpile.cache.util import memoized_property
-             from pyramid.settings import asbool
              from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
-             from vcsserver.utils import safe_str
+             from vcsserver.str_utils import safe_bytes, safe_str
+             from vcsserver.type_utils import str2bool
              _default_max_size = 1024
                  pickle_values = False
                  def __init__(self, arguments):
-                     max_size = arguments.pop('max_size', _default_max_size)
+                     self.max_size = arguments.pop('max_size', _default_max_size)
                      LRUDictClass = LRUDict
                      if arguments.pop('log_key_count', None):
                          LRUDictClass = LRUDictDebug
-                     arguments['cache_dict'] = LRUDictClass(max_size)
-                     super(LRUMemoryBackend, self).__init__(arguments)
+                     arguments['cache_dict'] = LRUDictClass(self.max_size)
+                     super().__init__(arguments)
+                 def __repr__(self):
+                     return f'{self.__class__}(maxsize=`{self.max_size}`)'
+                 def __str__(self):
+                     return self.__repr__()
                  def delete(self, key):
                      try:
                          # we don't care if key isn't there at deletion
                          pass
+                 def list_keys(self, prefix):
+                     return list(self._cache.keys())
                  def delete_multi(self, keys):
                      for key in keys:
                          self.delete(key)
-             class PickleSerializer(object):
-                 def _dumps(self, value, safe=False):
-                     try:
-                         return compat.pickle.dumps(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
-                 def _loads(self, value, safe=True):
-                     try:
-                         return compat.pickle.loads(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
+                 def delete_multi_by_prefix(self, prefix):
+                     cache_keys = self.list_keys(prefix=prefix)
+                     num_affected_keys = len(cache_keys)
+                     if num_affected_keys:
+                         self.delete_multi(cache_keys)
+                     return num_affected_keys
-             class MsgPackSerializer(object):
-                 def _dumps(self, value, safe=False):
-                     try:
-                         return msgpack.packb(value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
-                 def _loads(self, value, safe=True):
-                     """
-                     pickle maintained the `CachedValue` wrapper of the tuple
-                     msgpack does not, so it must be added back in.
-                    """
-                     try:
-                         value = msgpack.unpackb(value, use_list=False)
-                         return CachedValue(*value)
-                     except Exception:
-                         if safe:
-                             return NO_VALUE
-                         else:
-                             raise
+             class PickleSerializer:
+                 serializer: None | Serializer = staticmethod(  # type: ignore
+                     functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
+                 )
+                 deserializer: None | Deserializer = staticmethod(  # type: ignore
+                     functools.partial(pickle.loads)
+                 )
-             import fcntl
-             flock_org = fcntl.flock
+             class MsgPackSerializer:
+                 serializer: None | Serializer = staticmethod(  # type: ignore
+                     msgpack.packb
+                 )
+                 deserializer: None | Deserializer = staticmethod(  # type: ignore
+                     functools.partial(msgpack.unpackb, use_list=False)
+                 )
              class CustomLockFactory(FileLock):
                      arguments['lock_factory'] = CustomLockFactory
                      db_file = arguments.get('filename')
-                     log.debug('initialing %s DB in %s', self.__class__.__name__, db_file)
+                     log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
+                     db_file_dir = os.path.dirname(db_file)
+                     if not os.path.isdir(db_file_dir):
+                         os.makedirs(db_file_dir)
                      try:
-                         super(FileNamespaceBackend, self).__init__(arguments)
+                         super().__init__(arguments)
                      except Exception:
-                         log.error('Failed to initialize db at: %s', db_file)
+                         log.exception('Failed to initialize db at: %s', db_file)
                          raise
                  def __repr__(self):
-                     return '{} `{}`'.format(self.__class__, self.filename)
+                     return f'{self.__class__}(file=`{self.filename}`)'
+                 def __str__(self):
+                     return self.__repr__()
-                 def list_keys(self, prefix=''):
-                     prefix = '{}:{}'.format(self.key_prefix, prefix)
+                 def _get_keys_pattern(self, prefix: bytes = b''):
+                     return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
-                     def cond(v):
+                 def list_keys(self, prefix: bytes = b''):
+                     prefix = self._get_keys_pattern(prefix)
+                     def cond(dbm_key: bytes):
                          if not prefix:
                              return True
-                         if v.startswith(prefix):
+                         if dbm_key.startswith(prefix):
                              return True
                          return False
                      with self._dbm_file(True) as dbm:
                          try:
-                             return filter(cond, dbm.keys())
+                             return list(filter(cond, dbm.keys()))
                          except Exception:
                              log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
                              raise
+                 def delete_multi_by_prefix(self, prefix):
+                     cache_keys = self.list_keys(prefix=prefix)
+                     num_affected_keys = len(cache_keys)
+                     if num_affected_keys:
+                         self.delete_multi(cache_keys)
+                     return num_affected_keys
                  def get_store(self):
                      return self.filename
-                 def _dbm_get(self, key):
-                     with self._dbm_file(False) as dbm:
-                         if hasattr(dbm, 'get'):
-                             value = dbm.get(key, NO_VALUE)
-                         else:
-                             # gdbm objects lack a .get method
-                             try:
-                                 value = dbm[key]
-                             except KeyError:
-                                 value = NO_VALUE
-                         if value is not NO_VALUE:
-                             value = self._loads(value)
-                         return value
-                 def get(self, key):
-                     try:
-                         return self._dbm_get(key)
-                     except Exception:
-                         log.error('Failed to fetch DBM key %s from DB: %s', key, self.get_store())
-                         raise
-                 def set(self, key, value):
-                     with self._dbm_file(True) as dbm:
-                         dbm[key] = self._dumps(value)
-                 def set_multi(self, mapping):
-                     with self._dbm_file(True) as dbm:
-                         for key, value in mapping.items():
-                             dbm[key] = self._dumps(value)
              class BaseRedisBackend(redis_backend.RedisBackend):
                  key_prefix = ''
                  def __init__(self, arguments):
-                     super(BaseRedisBackend, self).__init__(arguments)
+                     self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
+                     super().__init__(arguments)
                      self._lock_timeout = self.lock_timeout
-                     self._lock_auto_renewal = asbool(arguments.pop("lock_auto_renewal", True))
+                     self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
                      if self._lock_auto_renewal and not self._lock_timeout:
                          # set default timeout for auto_renewal
                          self._lock_timeout = 30
+                 def __repr__(self):
+                     return f'{self.__class__}(conn=`{self.db_conn}`)'
+                 def __str__(self):
+                     return self.__repr__()
                  def _create_client(self):
                      args = {}
                          )
                      connection_pool = redis.ConnectionPool(**args)
+                     self.writer_client = redis.StrictRedis(
+                         connection_pool=connection_pool
+                     )
+                     self.reader_client = self.writer_client
-                     return redis.StrictRedis(connection_pool=connection_pool)
+                 def _get_keys_pattern(self, prefix: bytes = b''):
+                     return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
+                 def list_keys(self, prefix: bytes = b''):
+                     prefix = self._get_keys_pattern(prefix)
+                     return self.reader_client.keys(prefix)
-                 def list_keys(self, prefix=''):
-                     prefix = '{}:{}*'.format(self.key_prefix, prefix)
-                     return self.client.keys(prefix)
+                 def delete_multi_by_prefix(self, prefix, use_lua=False):
+                     if use_lua:
+                         # high efficient LUA script to delete ALL keys by prefix...
+                         lua = """local keys = redis.call('keys', ARGV[1])
+                                  for i=1,#keys,5000 do
+                                  redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
+                                  end
+                                  return #keys"""
+                         num_affected_keys = self.writer_client.eval(
+                             lua,
+,
+                             f"{prefix}*")
+                     else:
+                         cache_keys = self.list_keys(prefix=prefix)
+                         num_affected_keys = len(cache_keys)
+                         if num_affected_keys:
+                             self.delete_multi(cache_keys)
+                     return num_affected_keys
                  def get_store(self):
-                     return self.client.connection_pool
-                 def get(self, key):
-                     value = self.client.get(key)
-                     if value is None:
-                         return NO_VALUE
-                     return self._loads(value)
-                 def get_multi(self, keys):
-                     if not keys:
-                         return []
-                     values = self.client.mget(keys)
-                     loads = self._loads
-                     return [
-                         loads(v) if v is not None else NO_VALUE
-                         for v in values]
-                 def set(self, key, value):
-                     if self.redis_expiration_time:
-                         self.client.setex(key, self.redis_expiration_time,
-                                           self._dumps(value))
-                     else:
-                         self.client.set(key, self._dumps(value))
-                 def set_multi(self, mapping):
-                     dumps = self._dumps
-                     mapping = dict(
-                         (k, dumps(v))
-                         for k, v in mapping.items()
+                     )
-                     if not self.redis_expiration_time:
-                         self.client.mset(mapping)
-                     else:
-                         pipe = self.client.pipeline()
-                         for key, value in mapping.items():
-                             pipe.setex(key, self.redis_expiration_time, value)
-                         pipe.execute()
+                     return self.reader_client.connection_pool
                  def get_mutex(self, key):
                      if self.distributed_lock:
-                         lock_key = redis_backend.u('_lock_{0}').format(safe_str(key))
-                         return get_mutex_lock(self.client, lock_key, self._lock_timeout,
-                                               auto_renewal=self._lock_auto_renewal)
+                         lock_key = f'_lock_{safe_str(key)}'
+                         return get_mutex_lock(
+                             self.writer_client, lock_key,
+                             self._lock_timeout,
+                             auto_renewal=self._lock_auto_renewal
+                         )
                      else:
                          return None
              def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
-                 import redis_lock
+                 from vcsserver.lib._vendor import redis_lock
-                 class _RedisLockWrapper(object):
+                 class _RedisLockWrapper:
                      """LockWrapper for redis_lock"""
                      @classmethod
                          )
                      def __repr__(self):
-                         return "{}:{}".format(self.__class__.__name__, lock_key)
+                         return f"{self.__class__.__name__}:{lock_key}"
                      def __str__(self):
-                         return "{}:{}".format(self.__class__.__name__, lock_key)
+                         return f"{self.__class__.__name__}:{lock_key}"
                      def __init__(self):
                          self.lock = self.get_lock()

vcsserver/lib/rc_cache/region_meta.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/lib/rc_cache/utils.py

0 +89 -107

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import os
-             import time
+             import functools
              import logging
-             import functools
+             import os
+             import threading
+             import time
+             import decorator
              from dogpile.cache import CacheRegion
-             from dogpile.cache.util import compat
-             from vcsserver.utils import safe_str, sha1
+             from vcsserver.utils import sha1
+             from vcsserver.str_utils import safe_bytes
+             from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
-             from vcsserver.lib.rc_cache import region_meta
+             from . import region_meta
              log = logging.getLogger(__name__)
              class RhodeCodeCacheRegion(CacheRegion):
+                 def __repr__(self):
+                     return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
                  def conditional_cache_on_arguments(
                          self, namespace=None,
                          expiration_time=None,
                          should_cache_fn=None,
-                         to_str=compat.string_type,
+                         to_str=str,
                          function_key_generator=None,
                          condition=True):
                      """
                      Custom conditional decorator, that will not touch any dogpile internals if
-                     condition isn't meet. This works a bit different than should_cache_fn
+                     condition isn't meet. This works a bit different from should_cache_fn
                      And it's faster in cases we don't ever want to compute cached values
                      """
-                     expiration_time_is_callable = compat.callable(expiration_time)
+                     expiration_time_is_callable = callable(expiration_time)
+                     if not namespace:
+                         namespace = getattr(self, '_default_namespace', None)
                      if function_key_generator is None:
                          function_key_generator = self.function_key_generator
-                     # workaround for py2 and cython problems, this block should be removed
-                     # once we've migrated to py3
-                     if 'cython' == 'cython':
-                         def decorator(fn):
-                             if to_str is compat.string_type:
-                                 # backwards compatible
-                                 key_generator = function_key_generator(namespace, fn)
-                             else:
-                                 key_generator = function_key_generator(namespace, fn, to_str=to_str)
-                             @functools.wraps(fn)
-                             def decorate(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 @functools.wraps(fn)
-                                 def creator():
-                                     return fn(*arg, **kw)
-                                 if not condition:
-                                     return creator()
-                                 timeout = expiration_time() if expiration_time_is_callable \
-                                     else expiration_time
-                                 return self.get_or_create(key, creator, timeout, should_cache_fn)
-                             def invalidate(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 self.delete(key)
-                             def set_(value, *arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 self.set(key, value)
-                             def get(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 return self.get(key)
-                             def refresh(*arg, **kw):
-                                 key = key_generator(*arg, **kw)
-                                 value = fn(*arg, **kw)
-                                 self.set(key, value)
-                                 return value
-                             decorate.set = set_
-                             decorate.invalidate = invalidate
-                             decorate.refresh = refresh
-                             decorate.get = get
-                             decorate.original = fn
-                             decorate.key_generator = key_generator
-                             decorate.__wrapped__ = fn
-                             return decorate
-                         return decorator
-                     def get_or_create_for_user_func(key_generator, user_func, *arg, **kw):
+                     def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
                          if not condition:
-                             log.debug('Calling un-cached method:%s', user_func.func_name)
+                             log.debug('Calling un-cached method:%s', user_func.__name__)
                              start = time.time()
                              result = user_func(*arg, **kw)
                              total = time.time() - start
-                             log.debug('un-cached method:%s took %.4fs', user_func.func_name, total)
+                             log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
                              return result
-                         key = key_generator(*arg, **kw)
+                         key = func_key_generator(*arg, **kw)
                          timeout = expiration_time() if expiration_time_is_callable \
                              else expiration_time
-                         log.debug('Calling cached method:`%s`', user_func.func_name)
+                         log.debug('Calling cached method:`%s`', user_func.__name__)
                          return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
                      def cache_decorator(user_func):
-                         if to_str is compat.string_type:
+                         if to_str is str:
                              # backwards compatible
                              key_generator = function_key_generator(namespace, user_func)
                          else:
                          if key.startswith(prefix):
                              name = key.split(prefix)[1].strip()
                              val = settings[key]
-                             if isinstance(val, compat.string_types):
+                             if isinstance(val, str):
                                  val = val.strip()
                              cache_settings[name] = val
                  return cache_settings
                  """
                  Helper to compute key from given params to be used in cache manager
                  """
-                 return sha1("_".join(map(safe_str, args)))
+                 return sha1(safe_bytes("_".join(map(str, args))))
+             def custom_key_generator(backend, namespace, fn):
+                 func_name = fn.__name__
+                 def generate_key(*args):
+                     backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
+                     namespace_pref = namespace or 'default_namespace'
+                     arg_key = compute_key_from_params(*args)
+                     final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
+                     return final_key
+                 return generate_key
              def backend_key_generator(backend):
                  Special wrapper that also sends over the backend to the key generator
                  """
                  def wrapper(namespace, fn):
-                     return key_generator(backend, namespace, fn)
+                     return custom_key_generator(backend, namespace, fn)
                  return wrapper
-             def key_generator(backend, namespace, fn):
-                 fname = fn.__name__
+             def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
+                 from .backends import FileNamespaceBackend
+                 from . import async_creation_runner
-                 def generate_key(*args):
-                     backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix'
-                     namespace_pref = namespace or 'default_namespace'
-                     arg_key = compute_key_from_params(*args)
-                     final_key = "{}:{}:{}_{}".format(backend_prefix, namespace_pref, fname, arg_key)
-                     return final_key
-                 return generate_key
-             def get_or_create_region(region_name, region_namespace=None):
-                 from vcsserver.lib.rc_cache.backends import FileNamespaceBackend
                  region_obj = region_meta.dogpile_cache_regions.get(region_name)
                  if not region_obj:
-                     raise EnvironmentError(
-                         'Region `{}` not in configured: {}.'.format(
-                             region_name, region_meta.dogpile_cache_regions.keys()))
+                     reg_keys = list(region_meta.dogpile_cache_regions.keys())
+                     raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
+                 region_uid_name = f'{region_name}:{region_namespace}'
-                 region_uid_name = '{}:{}'.format(region_name, region_namespace)
+                 # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
                  if isinstance(region_obj.actual_backend, FileNamespaceBackend):
+                     if not region_namespace:
+                         raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
                      region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
                      if region_exist:
                          log.debug('Using already configured region: %s', region_namespace)
                          return region_exist
-                     cache_dir = region_meta.dogpile_config_defaults['cache_dir']
                      expiration_time = region_obj.expiration_time
-                     if not os.path.isdir(cache_dir):
-                         os.makedirs(cache_dir)
+                     cache_dir = region_meta.dogpile_config_defaults['cache_dir']
+                     namespace_cache_dir = cache_dir
+                     # we default the namespace_cache_dir to our default cache dir.
+                     # however, if this backend is configured with filename= param, we prioritize that
+                     # so all caches within that particular region, even those namespaced end up in the same path
+                     if region_obj.actual_backend.filename:
+                         namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
+                     if not os.path.isdir(namespace_cache_dir):
+                         os.makedirs(namespace_cache_dir)
                      new_region = make_region(
                          name=region_uid_name,
                          function_key_generator=backend_key_generator(region_obj.actual_backend)
                      )
                      namespace_filename = os.path.join(
-                         cache_dir, "{}.cache.dbm".format(region_namespace))
+                         namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
                      # special type that allows 1db per namespace
                      new_region.configure(
                          backend='dogpile.cache.rc.file_namespace',
                      log.debug('configuring new region: %s', region_uid_name)
                      region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
+                 region_obj._default_namespace = region_namespace
+                 if use_async_runner:
+                     region_obj.async_creation_runner = async_creation_runner
                  return region_obj
-             def clear_cache_namespace(cache_region, cache_namespace_uid, invalidate=False):
-                 region = get_or_create_region(cache_region, cache_namespace_uid)
-                 cache_keys = region.backend.list_keys(prefix=cache_namespace_uid)
-                 num_delete_keys = len(cache_keys)
-                 if invalidate:
-                     region.invalidate(hard=False)
-                 else:
-                     if num_delete_keys:
-                         region.delete_multi(cache_keys)
-                 return num_delete_keys
+             def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
+                 from . import CLEAR_DELETE, CLEAR_INVALIDATE
+                 if not isinstance(cache_region, RhodeCodeCacheRegion):
+                     cache_region = get_or_create_region(cache_region, cache_namespace_uid)
+                 log.debug('clearing cache region: %s [prefix:%s] with method=%s',
+                           cache_region, cache_namespace_uid, method)
+                 num_affected_keys = 0
+                 if method == CLEAR_INVALIDATE:
+                     # NOTE: The CacheRegion.invalidate() method’s default mode of
+                     # operation is to set a timestamp local to this CacheRegion in this Python process only.
+                     # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
+                     cache_region.invalidate(hard=True)
+                 if method == CLEAR_DELETE:
+                     num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
+                 return num_affected_keys

vcsserver/lib/request_counter.py

0 +1 -3

-             # -*- coding: utf-8 -*-
-             # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/pygrack.py

0 +116 -85

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import socket
              import logging
-             import simplejson as json
              import dulwich.protocol
+             from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
              from webob import Request, Response, exc
+             from vcsserver.lib.rc_json import json
              from vcsserver import hooks, subprocessio
+             from vcsserver.str_utils import ascii_bytes
              log = logging.getLogger(__name__)
-             class FileWrapper(object):
+             class FileWrapper:
                  """File wrapper that ensures how much data is read from it."""
                  def __init__(self, fd, content_length):
                      return data
                  def __repr__(self):
-                     return '<FileWrapper %s len: %s, read: %s>' % (
+                     return '<FileWrapper {} len: {}, read: {}>'.format(
                          self.fd, self.content_length, self.content_length - self.remain
                      )
-             class GitRepository(object):
+             class GitRepository:
                  """WSGI app for handling Git smart protocol endpoints."""
-                 git_folder_signature = frozenset(
-                     ('config', 'head', 'info', 'objects', 'refs'))
+                 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
                  commands = frozenset(('git-upload-pack', 'git-receive-pack'))
-                 valid_accepts = frozenset(('application/x-%s-result' %
-                                            c for c in commands))
+                 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
                  # The last bytes are the SHA1 of the first 12 bytes.
                  EMPTY_PACK = (
-                     'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
-                     '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
+                     b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
+                     b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
                  )
-                 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
+                 FLUSH_PACKET = b"0000"
-                 def __init__(self, repo_name, content_path, git_path, update_server_info,
-                              extras):
+                 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
+                 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
                      files = frozenset(f.lower() for f in os.listdir(content_path))
                      valid_dir_signature = self.git_folder_signature.issubset(files)
                      if not valid_dir_signature:
-                         raise OSError('%s missing git signature' % content_path)
+                         raise OSError(f'{content_path} missing git signature')
                      self.content_path = content_path
                      self.repo_name = repo_name
                      # blows up if you sprinkle "flush" (0000) as "0001\n".
                      # It reads binary, per number of bytes specified.
                      # if you do add '\n' as part of data, count it.
-                     server_advert = '# service=%s\n' % git_command
-                     packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
+                     server_advert = f'# service={git_command}\n'
+                     packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
                      try:
                          gitenv = dict(os.environ)
                          # forget all configs
                          out = subprocessio.SubprocessIOChunker(
                              command,
                              env=gitenv,
-                             starting_values=[packet_len + server_advert + '0000'],
+                             starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
                              shell=False
                          )
-                     except EnvironmentError:
+                     except OSError:
                          log.exception('Error processing command')
                          raise exc.HTTPExpectationFailed()
                      resp = Response()
-                     resp.content_type = 'application/x-%s-advertisement' % str(git_command)
+                     resp.content_type = f'application/x-{git_command}-advertisement'
                      resp.charset = None
                      resp.app_iter = out
                      We also print in the error output a message explaining why the command
                      was aborted.
-                     If aditionally, the user is accepting messages we send them the output
+                     If additionally, the user is accepting messages we send them the output
                      of the pre-pull hook.
                      Note that for clients not supporting side-band we just send them the
                      emtpy PACK file.
                      """
                      if self.SIDE_BAND_CAPS.intersection(capabilities):
                          response = []
                          proto = dulwich.protocol.Protocol(None, response.append)
-                         proto.write_pkt_line('NAK\n')
-                         self._write_sideband_to_proto(pre_pull_messages, proto,
-                                                       capabilities)
+                         proto.write_pkt_line(dulwich.protocol.NAK_LINE)
+                         self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
                          # N.B.(skreft): Do not change the sideband channel to 3, as that
                          # produces a fatal error in the client:
                          #   fatal: error in sideband demultiplexer
-                         proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
-                         proto.write_sideband(1, self.EMPTY_PACK)
+                         proto.write_sideband(
+                             dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
+                             ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
+                         proto.write_sideband(
+                             dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
+                             ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
-                         # writes 0000
+                         # writes b"0000" as default
                          proto.write_pkt_line(None)
                          return response
                      else:
-                         return [self.EMPTY_PACK]
+                         return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
+                 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
+                     """
+                     Given a list response we inject the post-pull messages.
+                     We only inject the messages if the client supports sideband, and the
+                     response has the format:
+NAK\n...0000
+                     Note that we do not check the no-progress capability as by default, git
+                     sends it, which effectively would block all messages.
+                     """
+                     if not self.SIDE_BAND_CAPS.intersection(capabilities):
+                         return response
+                     if not start_message and not end_message:
+                         return response
+                     try:
+                         iter(response)
+                         # iterator probably will work, we continue
+                     except TypeError:
+                         raise TypeError(f'response must be an iterator: got {type(response)}')
+                     if isinstance(response, (list, tuple)):
+                         raise TypeError(f'response must be an iterator: got {type(response)}')
+                     def injected_response():
-                 def _write_sideband_to_proto(self, data, proto, capabilities):
+                         do_loop = 1
+                         header_injected = 0
+                         next_item = None
+                         has_item = False
+                         item = b''
+                         while do_loop:
+                             try:
+                                 next_item = next(response)
+                             except StopIteration:
+                                 do_loop = 0
+                             if has_item:
+                                 # last item ! alter it now
+                                 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
+                                     new_response = [item[:-4]]
+                                     new_response.extend(self._get_messages(end_message, capabilities))
+                                     new_response.append(self.FLUSH_PACKET)
+                                     item = b''.join(new_response)
+                                 yield item
+                             has_item = True
+                             item = next_item
+                             # alter item if it's the initial chunk
+                             if not header_injected and item.startswith(b'0008NAK\n'):
+                                 new_response = [b'0008NAK\n']
+                                 new_response.extend(self._get_messages(start_message, capabilities))
+                                 new_response.append(item[8:])
+                                 item = b''.join(new_response)
+                                 header_injected = 1
+                     return injected_response()
+                 def _write_sideband_to_proto(self, proto, data, capabilities):
                      """
-                     Write the data to the proto's sideband number 2.
+                     Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
                      We do not use dulwich's write_sideband directly as it only supports
                      side-band-64k.
                      # N.B.(skreft): The values below are explained in the pack protocol
                      # documentation, section Packfile Data.
                      # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
-                     if 'side-band-64k' in capabilities:
+                     if CAPABILITY_SIDE_BAND_64K in capabilities:
                          chunk_size = 65515
-                     elif 'side-band' in capabilities:
+                     elif CAPABILITY_SIDE_BAND in capabilities:
                          chunk_size = 995
                      else:
                          return
-                     chunker = (
-                         data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
+                     chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
                      for chunk in chunker:
-                         proto.write_sideband(2, chunk)
+                         proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
                  def _get_messages(self, data, capabilities):
                      """Return a list with packets for sending data in sideband number 2."""
                      response = []
                      proto = dulwich.protocol.Protocol(None, response.append)
-                     self._write_sideband_to_proto(data, proto, capabilities)
+                     self._write_sideband_to_proto(proto, data, capabilities)
                      return response
-                 def _inject_messages_to_response(self, response, capabilities,
-                                                  start_messages, end_messages):
-                     """
-                     Given a list response we inject the pre/post-pull messages.
-                     We only inject the messages if the client supports sideband, and the
-                     response has the format:
-NAK\n...0000
-                     Note that we do not check the no-progress capability as by default, git
-                     sends it, which effectively would block all messages.
-                     """
-                     if not self.SIDE_BAND_CAPS.intersection(capabilities):
-                         return response
-                     if not start_messages and not end_messages:
-                         return response
-                     # make a list out of response if it's an iterator
-                     # so we can investigate it for message injection.
-                     if hasattr(response, '__iter__'):
-                         response = list(response)
-                     if (not response[0].startswith('0008NAK\n') or
-                             not response[-1].endswith('0000')):
-                         return response
-                     new_response = ['0008NAK\n']
-                     new_response.extend(self._get_messages(start_messages, capabilities))
-                     if len(response) == 1:
-                         new_response.append(response[0][8:-4])
-                     else:
-                         new_response.append(response[0][8:])
-                         new_response.extend(response[1:-1])
-                         new_response.append(response[-1][:-4])
-                     new_response.extend(self._get_messages(end_messages, capabilities))
-                     new_response.append('0000')
-                     return new_response
                  def backend(self, request, environ):
                      """
                      WSGI Response producer for HTTP POST Git Smart HTTP requests.
                          inputstream = request.body_file_seekable
                      resp = Response()
-                     resp.content_type = ('application/x-%s-result' %
-                                          git_command.encode('utf8'))
+                     resp.content_type = f'application/x-{git_command}-result'
                      resp.charset = None
                      pre_pull_messages = ''
+                     # Upload-pack == clone
                      if git_command == 'git-upload-pack':
-                         status, pre_pull_messages = hooks.git_pre_pull(self.extras)
-                         if status != 0:
+                         hook_response = hooks.git_pre_pull(self.extras)
+                         if hook_response.status != 0:
+                             pre_pull_messages = hook_response.output
                              resp.app_iter = self._build_failed_pre_pull_response(
                                  capabilities, pre_pull_messages)
                              return resp
                      out = subprocessio.SubprocessIOChunker(
                          cmd,
-                         inputstream=inputstream,
+                         input_stream=inputstream,
                          env=gitenv,
                          cwd=self.content_path,
                          shell=False,
                          log.debug('handling cmd %s', cmd)
                          output = subprocessio.SubprocessIOChunker(
                              cmd,
-                             inputstream=inputstream,
+                             input_stream=inputstream,
                              env=gitenv,
                              cwd=self.content_path,
                              shell=False,
                          for _ in output:
                              pass
+                     # Upload-pack == clone
                      if git_command == 'git-upload-pack':
-                         unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
-                         resp.app_iter = self._inject_messages_to_response(
-                             out, capabilities, pre_pull_messages, post_pull_messages)
+                         hook_response = hooks.git_post_pull(self.extras)
+                         post_pull_messages = hook_response.output
+                         resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
                      else:
                          resp.app_iter = out

vcsserver/remote/git_remote.py ~~vcsserver/git.py~~

0 renamed +512 -275

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import collections
              import logging
              import os
-             import posixpath as vcspath
              import re
              import stat
              import traceback
-             import urllib
-             import urllib2
+             import urllib.request
+             import urllib.parse
+             import urllib.error
              from functools import wraps
              import more_itertools
              from pygit2 import Repository as LibGit2Repo
              from pygit2 import index as LibGit2Index
              from dulwich import index, objects
-             from dulwich.client import HttpGitClient, LocalGitClient
+             from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
              from dulwich.errors import (
                  NotGitRepository, ChecksumMismatch, WrongObjectException,
                  MissingCommitError, ObjectMissing, HangupException,
                  UnexpectedCommandError)
              from dulwich.repo import Repo as DulwichRepo
-             from dulwich.server import update_server_info
+             import rhodecode
              from vcsserver import exceptions, settings, subprocessio
-             from vcsserver.utils import safe_str, safe_int, safe_unicode
-             from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
+             from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
+             from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
              from vcsserver.hgcompat import (
                  hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
              from vcsserver.git_lfs.lib import LFSOidStore
              DIR_STAT = stat.S_IFDIR
              FILE_MODE = stat.S_IFMT
              GIT_LINK = objects.S_IFGITLINK
-             PEELED_REF_MARKER = '^{}'
+             PEELED_REF_MARKER = b'^{}'
+             HEAD_MARKER = b'HEAD'
              log = logging.getLogger(__name__)
-             def str_to_dulwich(value):
-                 """
-                 Dulwich 0.10.1a requires `unicode` objects to be passed in.
-                 """
-                 return value.decode(settings.WIRE_ENCODING)
              def reraise_safe_exceptions(func):
                  """Converts Dulwich exceptions to something neutral."""
                      except (HangupException, UnexpectedCommandError) as e:
                          exc = exceptions.VcsException(org_exc=e)
                          raise exc(safe_str(e))
-                     except Exception as e:
-                         # NOTE(marcink): becuase of how dulwich handles some exceptions
+                     except Exception:
+                         # NOTE(marcink): because of how dulwich handles some exceptions
                          # (KeyError on empty repos), we cannot track this and catch all
                          # exceptions, it's an exceptions from other handlers
                          #if not hasattr(e, '_vcs_kind'):
                  def _create_repo(self, wire, create, use_libgit2=False):
                      if use_libgit2:
-                         return Repository(wire['path'])
+                         repo = Repository(safe_bytes(wire['path']))
                      else:
-                         repo_path = str_to_dulwich(wire['path'])
-                         return Repo(repo_path)
+                         # dulwich mode
+                         repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
+                         repo = Repo(repo_path)
+                     log.debug('repository created: got GIT object: %s', repo)
+                     return repo
                  def repo(self, wire, create=False, use_libgit2=False):
                      """
                      return self.repo(wire, use_libgit2=True)
+             def create_signature_from_string(author_str, **kwargs):
+                 """
+                 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
+                 :param author_str: String of the format 'Name <email>'
+                 :return: pygit2.Signature object
+                 """
+                 match = re.match(r'^(.+) <(.+)>$', author_str)
+                 if match is None:
+                     raise ValueError(f"Invalid format: {author_str}")
+                 name, email = match.groups()
+                 return pygit2.Signature(name, email, **kwargs)
+             def get_obfuscated_url(url_obj):
+                 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
+                 url_obj.query = obfuscate_qs(url_obj.query)
+                 obfuscated_uri = str(url_obj)
+                 return obfuscated_uri
              class GitRemote(RemoteBase):
                  def __init__(self, factory):
                          "parents": self.parents,
                          "_commit": self.revision,
                      }
+                     self._bulk_file_methods = {
+                         "size": self.get_node_size,
+                         "data": self.get_node_data,
+                         "flags": self.get_node_flags,
+                         "is_binary": self.get_node_is_binary,
+                         "md5": self.md5_hash
+                     }
                  def _wire_to_config(self, wire):
                      if 'config' in wire:
-                         return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
+                         return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
                      return {}
                  def _remote_conf(self, config):
                      params = [
                          '-c', 'core.askpass=""',
                      ]
-                     ssl_cert_dir = config.get('vcs_ssl_dir')
-                     if ssl_cert_dir:
-                         params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
+                     config_attrs = {
+                         'vcs_ssl_dir': 'http.sslCAinfo={}',
+                         'vcs_git_lfs_store_location': 'lfs.storage={}'
+                     }
+                     for key, param in config_attrs.items():
+                         if value := config.get(key):
+                             params.extend(['-c', param.format(value)])
                      return params
                  @reraise_safe_exceptions
                  def discover_git_version(self):
                      stdout, _ = self.run_git_command(
                          {}, ['--version'], _bare=True, _safe=True)
-                     prefix = 'git version'
+                     prefix = b'git version'
                      if stdout.startswith(prefix):
                          stdout = stdout[len(prefix):]
-                     return stdout.strip()
+                     return safe_str(stdout.strip())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          try:
                              has_head = repo.head.name
                              if has_head:
                  def assert_correct_path(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
-                     def _assert_correct_path(_context_uid, _repo_id):
-                         try:
-                             repo_init = self._factory.repo_libgit2(wire)
-                             with repo_init as repo:
-                                 pass
-                         except pygit2.GitError:
-                             path = wire.get('path')
-                             tb = traceback.format_exc()
-                             log.debug("Invalid Git path `%s`, tb: %s", path, tb)
+                     def _assert_correct_path(_context_uid, _repo_id, fast_check):
+                         if fast_check:
+                             path = safe_str(wire['path'])
+                             if pygit2.discover_repository(path):
+                                 return True
                              return False
+                         else:
+                             try:
+                                 repo_init = self._factory.repo_libgit2(wire)
+                                 with repo_init:
+                                     pass
+                             except pygit2.GitError:
+                                 path = wire.get('path')
+                                 tb = traceback.format_exc()
+                                 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
+                                 return False
+                             return True
-                         return True
-                     return _assert_correct_path(context_uid, repo_id)
+                     return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def bare(self, wire):
                          return repo.is_bare
                  @reraise_safe_exceptions
+                 def get_node_data(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return BytesEnvelope(blob_obj.data)
+                 @reraise_safe_exceptions
+                 def get_node_size(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.size
+                 @reraise_safe_exceptions
+                 def get_node_flags(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.filemode
+                 @reraise_safe_exceptions
+                 def get_node_is_binary(self, wire, commit_id, path):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blob_obj = commit.tree[path]
+                         if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                             raise exceptions.LookupException()(
+                                 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
+                         return blob_obj.is_binary
+                 @reraise_safe_exceptions
                  def blob_as_pretty_string(self, wire, sha):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          blob_obj = repo[sha]
-                         blob = blob_obj.data
-                         return blob
+                         return BytesEnvelope(blob_obj.data)
                  @reraise_safe_exceptions
                  def blob_raw_length(self, wire, sha):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _blob_raw_length(_repo_id, _sha):
                      return _blob_raw_length(repo_id, sha)
                  def _parse_lfs_pointer(self, raw_content):
+                     spec_string = b'version https://git-lfs.github.com/spec'
+                     if raw_content and raw_content.startswith(spec_string):
-                     spec_string = 'version https://git-lfs.github.com/spec'
-                     if raw_content and raw_content.startswith(spec_string):
-                         pattern = re.compile(r"""
+                         pattern = re.compile(rb"""
                          (?:\n)?
                          ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
                          ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _tree_id):
                          repo_init = self._factory.repo_libgit2(wire)
                      return _is_binary(repo_id, tree_id)
                  @reraise_safe_exceptions
+                 def md5_hash(self, wire, commit_id, path):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _md5_hash(_repo_id, _commit_id, _path):
+                         repo_init = self._factory.repo_libgit2(wire)
+                         with repo_init as repo:
+                             commit = repo[_commit_id]
+                             blob_obj = commit.tree[_path]
+                             if blob_obj.type != pygit2.GIT_OBJ_BLOB:
+                                 raise exceptions.LookupException()(
+                                     f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
+                             return ''
+                     return _md5_hash(repo_id, commit_id, path)
+                 @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, oid):
                      conf = self._wire_to_config(wire)
                      repo_init = self._factory.repo_libgit2(wire)
                          store = LFSOidStore(
                              oid=oid, repo=repo_name, store_location=store_location)
                          return store.oid_path
-                     raise ValueError('Unable to fetch oid with path {}'.format(oid))
+                     raise ValueError(f'Unable to fetch oid with path {oid}')
                  @reraise_safe_exceptions
                  def bulk_request(self, wire, rev, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _rev, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  args = [wire, rev]
                                  result[attr] = method(*args)
                              except KeyError as e:
-                                 raise exceptions.VcsException(e)(
-                                     "Unknown bulk attribute: %s" % attr)
+                                 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
                          return result
                      return _bulk_request(repo_id, rev, sorted(pre_load))
-                 def _build_opener(self, url):
+                 @reraise_safe_exceptions
+                 def bulk_file_request(self, wire, commit_id, path, pre_load):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
+                         result = {}
+                         for attr in pre_load:
+                             try:
+                                 method = self._bulk_file_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
+                                 result[attr] = method(wire, _commit_id, _path)
+                             except KeyError as e:
+                                 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
+                         return result
+                     return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
+                 def _build_opener(self, url: str):
                      handlers = []
-                     url_obj = url_parser(url)
-                     _, authinfo = url_obj.authinfo()
+                     url_obj = url_parser(safe_bytes(url))
+                     authinfo = url_obj.authinfo()[1]
                      if authinfo:
                          # create a password manager
-                         passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
-                         passmgr.add_password(*authinfo)
+                         passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
+                         passmgr.add_password(*convert_to_str(authinfo))
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
-                     return urllib2.build_opener(*handlers)
-                 def _type_id_to_name(self, type_id):
-                     return {
-: b'commit',
-: b'tree',
-: b'blob',
-: b'tag'
-                     }[type_id]
+                     return urllib.request.build_opener(*handlers)
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
-                     url_obj = url_parser(url)
-                     test_uri, _ = url_obj.authinfo()
-                     url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
-                     url_obj.query = obfuscate_qs(url_obj.query)
-                     cleaned_uri = str(url_obj)
-                     log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
+                     url_obj = url_parser(safe_bytes(url))
+                     test_uri = safe_str(url_obj.authinfo()[0])
+                     obfuscated_uri = get_obfuscated_url(url_obj)
+                     log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      if not test_uri.endswith('info/refs'):
                          test_uri = test_uri.rstrip('/') + '/info/refs'
-                     o = self._build_opener(url)
+                     o = self._build_opener(url=url)
                      o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
                      q = {"service": 'git-upload-pack'}
-                     qs = '?%s' % urllib.urlencode(q)
-                     cu = "%s%s" % (test_uri, qs)
-                     req = urllib2.Request(cu, None, {})
+                     qs = f'?{urllib.parse.urlencode(q)}'
+                     cu = f"{test_uri}{qs}"
                      try:
-                         log.debug("Trying to open URL %s", cleaned_uri)
+                         req = urllib.request.Request(cu, None, {})
+                         log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
-                         log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
+                         log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
-                         raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
+                         raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now detect if it's proper git repo
-                     gitdata = resp.read()
-                     if 'service=git-upload-pack' in gitdata:
+                     gitdata: bytes = resp.read()
+                     if b'service=git-upload-pack' in gitdata:
                          pass
-                     elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
-                         # old style git can return some other format !
+                     elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
+                         # old style git can return some other format!
                          pass
                      else:
-                         raise exceptions.URLError()(
-                             "url [%s] does not look like an git" % (cleaned_uri,))
+                         e = None
+                         raise exceptions.URLError(e)(
+                             f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
                      return True
                  def branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branch(_context_uid, _repo_id, _commit_id):
                          regex = re.compile('^refs/heads')
                          def filter_with(ref):
                              return regex.match(ref[0]) and ref[1] == _commit_id
-                         branches = filter(filter_with, self.get_refs(wire).items())
+                         branches = list(filter(filter_with, list(self.get_refs(wire).items())))
                          return [x[0].split('refs/heads/')[-1] for x in branches]
                      return _branch(context_uid, repo_id, commit_id)
                  def commit_branches(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _commit_branches(_context_uid, _repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          repo.object_store.add_object(blob)
                          return blob.id
-                 # TODO: this is quite complex, check if that can be simplified
+                 @reraise_safe_exceptions
+                 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
+                                   date_args: list[int, int] = None,
+                                   parents: list | None = None):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         if date_args:
+                             current_time, offset = date_args
+                             kw = {
+                                 'time': current_time,
+                                 'offset': offset
+                             }
+                         author = create_signature_from_string(author, **kw)
+                         committer = create_signature_from_string(committer, **kw)
+                         tree = new_tree_id
+                         if isinstance(tree, (bytes, str)):
+                             # validate this tree is in the repo...
+                             tree = repo[safe_str(tree)].id
+                         if parents:
+                             # run via sha's and validate them in repo
+                             parents = [repo[c].id for c in parents]
+                         else:
+                             parents = []
+                             # ensure we COMMIT on top of given branch head
+                             # check if this repo has ANY branches, otherwise it's a new branch case we need to make
+                             if branch in repo.branches.local:
+                                 parents += [repo.branches[branch].target]
+                             elif [x for x in repo.branches.local]:
+                                 parents += [repo.head.target]
+                             #else:
+                                 # in case we want to commit on new branch we create it on top of HEAD
+                                 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
+                         # # Create a new commit
+                         commit_oid = repo.create_commit(
+                             f'refs/heads/{branch}',  # the name of the reference to update
+                             author,  # the author of the commit
+                             committer,  # the committer of the commit
+                             message,  # the commit message
+                             tree,  # the tree produced by the index
+                             parents  # list of parents for the new commit, usually just one,
+                         )
+                         new_commit_id = safe_str(commit_oid)
+                         return new_commit_id
                  @reraise_safe_exceptions
                  def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
-                     # Defines the root tree
-                     class _Root(object):
-                         def __repr__(self):
-                             return 'ROOT TREE'
-                     ROOT = _Root()
-                     repo = self._factory.repo(wire)
-                     object_store = repo.object_store
-                     # Create tree and populates it with blobs
-                     if commit_tree and repo[commit_tree]:
-                         git_commit = repo[commit_data['parents'][0]]
-                         commit_tree = repo[git_commit.tree]  # root tree
-                     else:
-                         commit_tree = objects.Tree()
-                     for node in updated:
-                         # Compute subdirs if needed
-                         dirpath, nodename = vcspath.split(node['path'])
-                         dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
-                         parent = commit_tree
-                         ancestors = [('', parent)]
+                     def mode2pygit(mode):
+                         """
+                         git only supports two filemode 644 and 755
-                         # Tries to dig for the deepest existing tree
-                         while dirnames:
-                             curdir = dirnames.pop(0)
-                             try:
-                                 dir_id = parent[curdir][1]
-                             except KeyError:
-                                 # put curdir back into dirnames and stops
-                                 dirnames.insert(0, curdir)
-                                 break
-                             else:
-                                 # If found, updates parent
-                                 parent = repo[dir_id]
-                                 ancestors.append((curdir, parent))
-                         # Now parent is deepest existing tree and we need to create
-                         # subtrees for dirnames (in reverse order)
-                         # [this only applies for nodes from added]
-                         new_trees = []
+o100755 -> 33261
+o100644 -> 33188
+                         """
+                         return {
+o100644: pygit2.GIT_FILEMODE_BLOB,
+o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
+o120000: pygit2.GIT_FILEMODE_LINK
+                         }.get(mode) or pygit2.GIT_FILEMODE_BLOB
-                         blob = objects.Blob.from_string(node['content'])
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         repo_index = repo.index
-                         if dirnames:
-                             # If there are trees which should be created we need to build
-                             # them now (in reverse order)
-                             reversed_dirnames = list(reversed(dirnames))
-                             curtree = objects.Tree()
-                             curtree[node['node_path']] = node['mode'], blob.id
-                             new_trees.append(curtree)
-                             for dirname in reversed_dirnames[:-1]:
-                                 newtree = objects.Tree()
-                                 newtree[dirname] = (DIR_STAT, curtree.id)
-                                 new_trees.append(newtree)
-                                 curtree = newtree
-                             parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
-                         else:
-                             parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
+                         commit_parents = None
+                         if commit_tree and commit_data['parents']:
+                             commit_parents = commit_data['parents']
+                             parent_commit = repo[commit_parents[0]]
+                             repo_index.read_tree(parent_commit.tree)
-                         new_trees.append(parent)
-                         # Update ancestors
-                         reversed_ancestors = reversed(
-                             [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
-                         for parent, tree, path in reversed_ancestors:
-                             parent[path] = (DIR_STAT, tree.id)
-                             object_store.add_object(tree)
+                         for pathspec in updated:
+                             blob_id = repo.create_blob(pathspec['content'])
+                             ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
+                             repo_index.add(ie)
+                         for pathspec in removed:
+                             repo_index.remove(pathspec)
-                         object_store.add_object(blob)
-                         for tree in new_trees:
-                             object_store.add_object(tree)
+                         # Write changes to the index
+                         repo_index.write()
+                         # Create a tree from the updated index
+                         written_commit_tree = repo_index.write_tree()
+                     new_tree_id = written_commit_tree
-                     for node_path in removed:
-                         paths = node_path.split('/')
-                         tree = commit_tree  # start with top-level
-                         trees = [{'tree': tree, 'path': ROOT}]
-                         # Traverse deep into the forest...
-                         # resolve final tree by iterating the path.
-                         # e.g a/b/c.txt will get
-                         # - root as tree then
-                         # - 'a' as tree,
-                         # - 'b' as tree,
-                         # - stop at c as blob.
-                         for path in paths:
-                             try:
-                                 obj = repo[tree[path][1]]
-                                 if isinstance(obj, objects.Tree):
-                                     trees.append({'tree': obj, 'path': path})
-                                     tree = obj
-                             except KeyError:
-                                 break
-                         #PROBLEM:
-                         """
-                         We're not editing same reference tree object
-                         """
-                         # Cut down the blob and all rotten trees on the way back...
-                         for path, tree_data in reversed(zip(paths, trees)):
-                             tree = tree_data['tree']
-                             tree.__delitem__(path)
-                             # This operation edits the tree, we need to mark new commit back
+                     author = commit_data['author']
+                     committer = commit_data['committer']
+                     message = commit_data['message']
+                     date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
-                             if len(tree) > 0:
-                                 # This tree still has elements - don't remove it or any
-                                 # of it's parents
-                                 break
-                     object_store.add_object(commit_tree)
+                     new_commit_id = self.create_commit(wire, author, committer, message, branch,
+                                                        new_tree_id, date_args=date_args, parents=commit_parents)
-                     # Create commit
-                     commit = objects.Commit()
-                     commit.tree = commit_tree.id
-                     for k, v in commit_data.items():
-                         setattr(commit, k, v)
-                     object_store.add_object(commit)
+                     # libgit2, ensure the branch is there and exists
+                     self.create_branch(wire, branch, new_commit_id)
-                     self.create_branch(wire, branch, commit.id)
+                     # libgit2, set new ref to this created commit
+                     self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
-                     # dulwich set-ref
-                     ref = 'refs/heads/%s' % branch
-                     repo.refs[ref] = commit.id
-                     return commit.id
+                     return new_commit_id
                  @reraise_safe_exceptions
                  def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
                      if url != 'default' and '://' not in url:
                          client = LocalGitClient(url)
                      else:
-                         url_obj = url_parser(url)
+                         url_obj = url_parser(safe_bytes(url))
                          o = self._build_opener(url)
-                         url, _ = url_obj.authinfo()
+                         url = url_obj.authinfo()[0]
                          client = HttpGitClient(base_url=url, opener=o)
                      repo = self._factory.repo(wire)
                      determine_wants = repo.object_store.determine_wants_all
                      if refs:
-                         def determine_wants_requested(references):
-                             return [references[r] for r in references if r in refs]
+                         refs: list[bytes] = [ascii_bytes(x) for x in refs]
+                         def determine_wants_requested(_remote_refs):
+                             determined = []
+                             for ref_name, ref_hash in _remote_refs.items():
+                                 bytes_ref_name = safe_bytes(ref_name)
+                                 if bytes_ref_name in refs:
+                                     bytes_ref_hash = safe_bytes(ref_hash)
+                                     determined.append(bytes_ref_hash)
+                             return determined
+                         # swap with our custom requested wants
                          determine_wants = determine_wants_requested
                      try:
                          remote_refs = client.fetch(
                              path=url, target=repo, determine_wants=determine_wants)
                      except NotGitRepository as e:
                          log.warning(
                              'Trying to fetch from "%s" failed, not a Git repository.', url)
                              repo[k] = remote_refs[k]
                          if refs and not update_after:
+                             # update to ref
                              # mikhail: explicitly set the head to the last ref.
-                             repo["HEAD"] = remote_refs[refs[-1]]
+                             update_to_ref = refs[-1]
+                             if isinstance(update_after, str):
+                                 update_to_ref = update_after
+                             repo[HEAD_MARKER] = remote_refs[update_to_ref]
                      if update_after:
-                         # we want to checkout HEAD
-                         repo["HEAD"] = remote_refs["HEAD"]
+                         # we want to check out HEAD
+                         repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
                          index.build_index_from_tree(repo.path, repo.index_path(),
-                                                     repo.object_store, repo["HEAD"].tree)
+                                                     repo.object_store, repo[HEAD_MARKER].tree)
+                     if isinstance(remote_refs, FetchPackResult):
+                         return remote_refs.refs
                      return remote_refs
                  @reraise_safe_exceptions
-                 def sync_fetch(self, wire, url, refs=None, all_refs=False):
-                     repo = self._factory.repo(wire)
+                 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
+                     self._factory.repo(wire)
                      if refs and not isinstance(refs, (list, tuple)):
                          refs = [refs]
                      fetch_refs = []
                      for ref_line in output.splitlines():
-                         sha, ref = ref_line.split('\t')
+                         sha, ref = ref_line.split(b'\t')
                          sha = sha.strip()
                          if ref in remote_refs:
                              # duplicate, skip
                              log.debug("Skipping peeled reference %s", ref)
                              continue
                          # don't sync HEAD
-                         if ref in ['HEAD']:
+                         if ref in [HEAD_MARKER]:
                              continue
                          remote_refs[ref] = sha
                          if refs and sha in refs:
                              # we filter fetch using our specified refs
-                             fetch_refs.append('{}:{}'.format(ref, ref))
+                             fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                          elif not refs:
-                             fetch_refs.append('{}:{}'.format(ref, ref))
+                             fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
                      log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
                      if fetch_refs:
-                         for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
+                         for chunk in more_itertools.chunked(fetch_refs, 128):
                              fetch_refs_chunks = list(chunk)
                              log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
-                             _out, _err = self.run_git_command(
+                             self.run_git_command(
                                  wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
                                  fail_on_stderr=False,
                                  _copts=self._remote_conf(config),
                                  extra_env={'GIT_TERMINAL_PROMPT': '0'})
+                         if kwargs.get('sync_large_objects'):
+                             self.run_git_command(
+                                 wire, ['lfs', 'fetch', url, '--all'],
+                                 fail_on_stderr=False,
+                                 _copts=self._remote_conf(config),
+                             )
                      return remote_refs
                  @reraise_safe_exceptions
-                 def sync_push(self, wire, url, refs=None):
+                 def sync_push(self, wire, url, refs=None, **kwargs):
                      if not self.check_url(url, wire):
                          return
                      config = self._wire_to_config(wire)
                          wire, ['push', url, '--mirror'], fail_on_stderr=False,
                          _copts=self._remote_conf(config),
                          extra_env={'GIT_TERMINAL_PROMPT': '0'})
+                     if kwargs.get('sync_large_objects'):
+                         self.run_git_command(
+                             wire, ['lfs', 'push', url, '--all'],
+                             fail_on_stderr=False,
+                             _copts=self._remote_conf(config),
+                         )
                  @reraise_safe_exceptions
                  def get_remote_refs(self, wire, url):
                      return repo.get_description()
                  @reraise_safe_exceptions
-                 def get_missing_revs(self, wire, rev1, rev2, path2):
+                 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
+                     origin_repo_path = wire['path']
                      repo = self._factory.repo(wire)
-                     LocalGitClient(thin_packs=False).fetch(path2, repo)
+                     # fetch from other_repo_path to our origin repo
+                     LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
                      wire_remote = wire.copy()
-                     wire_remote['path'] = path2
+                     wire_remote['path'] = other_repo_path
                      repo_remote = self._factory.repo(wire_remote)
-                     LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
+                     # fetch from origin_repo_path to our remote repo
+                     LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
                      revs = [
                          x.commit.id
-                         for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
+                         for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
                      return revs
                  @reraise_safe_exceptions
                  def get_object(self, wire, sha, maybe_unreachable=False):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_object(_context_uid, _repo_id, _sha):
                          repo_init = self._factory.repo_libgit2(wire)
                                      raise exceptions.LookupException(e)(missing_commit_err)
                              commit_id = commit.hex
-                             type_id = commit.type
+                             type_str = commit.type_str
                              return {
                                  'id': commit_id,
-                                 'type': self._type_id_to_name(type_id),
+                                 'type': type_str,
                                  'commit_id': commit_id,
                                  'idx': 0
                              }
                  def get_refs(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_refs(_context_uid, _repo_id):
                          with repo_init as repo:
                              regex = re.compile('^refs/(heads|tags)/')
                              return {x.name: x.target.hex for x in
-                                     filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
+                                     [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
                      return _get_refs(context_uid, repo_id)
                  def get_branch_pointers(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_branch_pointers(_context_uid, _repo_id):
                          repo_init = self._factory.repo_libgit2(wire)
                          regex = re.compile('^refs/heads')
                          with repo_init as repo:
-                             branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
+                             branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
                              return {x.target.hex: x.shorthand for x in branches}
                      return _get_branch_pointers(context_uid, repo_id)
                  def head(self, wire, show_exc=True):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _head(_context_uid, _repo_id, _show_exc):
                          repo_init = self._factory.repo_libgit2(wire)
                  @reraise_safe_exceptions
                  def init(self, wire):
-                     repo_path = str_to_dulwich(wire['path'])
-                     self.repo = Repo.init(repo_path)
+                     repo_path = safe_str(wire['path'])
+                     os.makedirs(repo_path, mode=0o755)
+                     pygit2.init_repository(repo_path, bare=False)
                  @reraise_safe_exceptions
                  def init_bare(self, wire):
-                     repo_path = str_to_dulwich(wire['path'])
-                     self.repo = Repo.init_bare(repo_path)
+                     repo_path = safe_str(wire['path'])
+                     os.makedirs(repo_path, mode=0o755)
+                     pygit2.init_repository(repo_path, bare=True)
                  @reraise_safe_exceptions
                  def revision(self, wire, rev):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision(_context_uid, _repo_id, _rev):
                          repo_init = self._factory.repo_libgit2(wire)
                  def date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _date(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                  def author(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _author(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                                  author = commit.get_object().author
                              if author.email:
-                                 return u"{} <{}>".format(author.name, author.email)
+                                 return f"{author.name} <{author.email}>"
                              try:
-                                 return u"{}".format(author.name)
+                                 return f"{author.name}"
                              except Exception:
-                                 return u"{}".format(safe_unicode(author.raw_name))
+                                 return f"{safe_str(author.raw_name)}"
                      return _author(repo_id, commit_id)
                  def message(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _message(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                  def parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _parents(_repo_id, _commit_id):
                          repo_init = self._factory.repo_libgit2(wire)
                  def children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
+                     head = self.head(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _children(_repo_id, _commit_id):
                          output, __ = self.run_git_command(
-                             wire, ['rev-list', '--all', '--children'])
+                             wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
                          child_ids = []
-                         pat = re.compile(r'^%s' % commit_id)
-                         for l in output.splitlines():
-                             if pat.match(l):
-                                 found_ids = l.split(' ')[1:]
+                         pat = re.compile(fr'^{commit_id}')
+                         for line in output.splitlines():
+                             line = safe_str(line)
+                             if pat.match(line):
+                                 found_ids = line.split(' ')[1:]
                                  child_ids.extend(found_ids)
+                                 break
                          return child_ids
                      return _children(repo_id, commit_id)
                          repo.references.create(key, value, force=True)
                  @reraise_safe_exceptions
+                 def update_refs(self, wire, key, value):
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         if key not in repo.references:
+                             raise ValueError(f'Reference {key} not found in the repository')
+                         repo.references.create(key, value, force=True)
+                 @reraise_safe_exceptions
                  def create_branch(self, wire, branch_name, commit_id, force=False):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
-                         commit = repo[commit_id]
+                         if commit_id:
+                             commit = repo[commit_id]
+                         else:
+                             # if commit is not given  just use the HEAD
+                             commit = repo.head()
                          if force:
                              repo.branches.local.create(branch_name, commit, force=force)
                  def tag_remove(self, wire, tag_name):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
-                         key = 'refs/tags/{}'.format(tag_name)
+                         key = f'refs/tags/{tag_name}'
                          repo.references.delete(key)
                  @reraise_safe_exceptions
                  def tree_changes(self, wire, source_id, target_id):
-                     # TODO(marcink): remove this seems it's only used by tests
                      repo = self._factory.repo(wire)
+                     # source can be empty
+                     source_id = safe_bytes(source_id if source_id else b'')
+                     target_id = safe_bytes(target_id)
                      source = repo[source_id].tree if source_id else None
                      target = repo[target_id].tree
                      result = repo.object_store.tree_changes(source, target)
-                     return list(result)
+                     added = set()
+                     modified = set()
+                     deleted = set()
+                     for (old_path, new_path), (_, _), (_, _) in list(result):
+                         if new_path and old_path:
+                             modified.add(new_path)
+                         elif new_path and not old_path:
+                             added.add(new_path)
+                         elif not new_path and old_path:
+                             deleted.add(old_path)
+                     return list(added), list(modified), list(deleted)
                  @reraise_safe_exceptions
                  def tree_and_type_for_path(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
                          repo_init = self._factory.repo_libgit2(wire)
                              except KeyError:
                                  return None, None, None
-                             return tree.id.hex, tree.type, tree.filemode
+                             return tree.id.hex, tree.type_str, tree.filemode
                      return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def tree_items(self, wire, tree_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tree_items(_repo_id, _tree_id):
                              try:
                                  tree = repo[tree_id]
                              except KeyError:
-                                 raise ObjectMissing('No tree with id: {}'.format(tree_id))
+                                 raise ObjectMissing(f'No tree with id: {tree_id}')
                              result = []
                              for item in tree:
                                  item_sha = item.hex
                                  item_mode = item.filemode
-                                 item_type = item.type
+                                 item_type = item.type_str
                                  if item_type == 'commit':
                                      # NOTE(marcink): submodules we translate to 'link' for backward compat
                      """
                      flags = [
-                         '-U%s' % context, '--patch',
+                         f'-U{context}', '--patch',
                          '--binary',
                          '--find-renames',
                          '--no-indent-heuristic',
                          lines = diff.splitlines()
                          x = 0
                          for line in lines:
-                             if line.startswith('diff'):
+                             if line.startswith(b'diff'):
                                  break
                              x += 1
                          # Append new line just like 'diff' command do
                  @reraise_safe_exceptions
                  def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
                      repo_init = self._factory.repo_libgit2(wire)
                      with repo_init as repo:
                          swap = True
                          flags = 0
                          if file_filter:
                              for p in diff_obj:
                                  if p.delta.old_file.path == file_filter:
-                                     return p.patch or ''
+                                     return BytesEnvelope(p.data) or BytesEnvelope(b'')
                              # fo matching path == no diff
-                             return ''
-                         return diff_obj.patch or ''
+                             return BytesEnvelope(b'')
+                         return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def node_history(self, wire, commit_id, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
                          # optimize for n==1, rev-list is much faster for that use-case
                              cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
                          output, __ = self.run_git_command(wire, cmd)
-                         commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
+                         commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
                          return [x for x in commit_ids]
                      return _node_history(context_uid, repo_id, commit_id, path, limit)
                  @reraise_safe_exceptions
-                 def node_annotate(self, wire, commit_id, path):
+                 def node_annotate_legacy(self, wire, commit_id, path):
+                     # note: replaced by pygit2 implementation
                      cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
                      # -l     ==> outputs long shas (and we need all 40 characters)
                      # --root ==> doesn't put '^' character for boundaries
                      output, __ = self.run_git_command(wire, cmd)
                      result = []
-                     for i, blame_line in enumerate(output.split('\n')[:-1]):
+                     for i, blame_line in enumerate(output.splitlines()[:-1]):
                          line_no = i + 1
-                         commit_id, line = re.split(r' ', blame_line, 1)
-                         result.append((line_no, commit_id, line))
+                         blame_commit_id, line = re.split(rb' ', blame_line, 1)
+                         result.append((line_no, blame_commit_id, line))
                      return result
                  @reraise_safe_exceptions
-                 def update_server_info(self, wire):
-                     repo = self._factory.repo(wire)
-                     update_server_info(repo)
+                 def node_annotate(self, wire, commit_id, path):
+                     result_libgit = []
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         commit = repo[commit_id]
+                         blame_obj = repo.blame(path, newest_commit=commit_id)
+                         for i, line in enumerate(commit.tree[path].data.splitlines()):
+                             line_no = i + 1
+                             hunk = blame_obj.for_line(line_no)
+                             blame_commit_id = hunk.final_commit_id.hex
+                             result_libgit.append((line_no, blame_commit_id, line))
+                     return BinaryEnvelope(result_libgit)
+                 @reraise_safe_exceptions
+                 def update_server_info(self, wire, force=False):
+                     cmd = ['update-server-info']
+                     if force:
+                         cmd += ['--force']
+                     output, __ = self.run_git_command(wire, cmd)
+                     return output.splitlines()
                  @reraise_safe_exceptions
                  def get_all_commit_ids(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id):
                          except Exception:
                              # Can be raised for empty repositories
                              return []
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
+                         repo_init = self._factory.repo_libgit2(wire)
+                         from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
+                         results = []
+                         with repo_init as repo:
+                             for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
+                                 results.append(commit.id.hex)
                      return _get_all_commit_ids(context_uid, repo_id)
                  @reraise_safe_exceptions
                  def run_git_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
+                     debug_mode = rhodecode.ConfigGet().get_bool('debug')
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                          _copts = []
                          del opts['_bare']
                      else:
-                         _copts = ['-c', 'core.quotepath=false', ]
+                         _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
                      safe_call = False
                      if '_safe' in opts:
                          # no exc on failure
                          _opts.update(opts)
                          proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
-                         return ''.join(proc), ''.join(proc.error)
-                     except (EnvironmentError, OSError) as err:
-                         cmd = ' '.join(cmd)  # human friendly CMD
-                         tb_err = ("Couldn't run git command (%s).\n"
-                                   "Original error was:%s\n"
-                                   "Call options:%s\n"
-                                   % (cmd, err, _opts))
+                         return b''.join(proc), b''.join(proc.stderr)
+                     except OSError as err:
+                         cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
+                         call_opts = {}
+                         if debug_mode:
+                             call_opts = _opts
+                         tb_err = ("Couldn't run git command ({}).\n"
+                                   "Original error was:{}\n"
+                                   "Call options:{}\n"
+                                   .format(cmd, err, call_opts))
                          log.exception(tb_err)
                          if safe_call:
                              return '', err
                      from vcsserver.hook_utils import install_git_hooks
                      bare = self.bare(wire)
                      path = wire['path']
+                     binary_dir = settings.BINARY_DIR
+                     if binary_dir:
+                         os.path.join(binary_dir, 'python3')
                      return install_git_hooks(path, bare, force_create=force)
                  @reraise_safe_exceptions
                  @reraise_safe_exceptions
                  def set_head_ref(self, wire, head_name):
                      log.debug('Setting refs/head to `%s`', head_name)
-                     cmd = ['symbolic-ref', 'HEAD', 'refs/heads/%s' % head_name]
-                     output, __ = self.run_git_command(wire, cmd)
-                     return [head_name] + output.splitlines()
+                     repo_init = self._factory.repo_libgit2(wire)
+                     with repo_init as repo:
+                         repo.set_head(f'refs/heads/{head_name}')
+                     return [head_name] + [f'set HEAD to refs/heads/{head_name}']
                  @reraise_safe_exceptions
-                 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
-                                  archive_dir_name, commit_id):
+                 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
+                                  archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo_init = self._factory.repo_libgit2(wire)
                                  try:
                                      tree = repo[tree_id]
                                  except KeyError:
-                                     raise ObjectMissing('No tree with id: {}'.format(tree_id))
+                                     raise ObjectMissing(f'No tree with id: {tree_id}')
                              index = LibGit2Index.Index()
                              index.read_tree(tree)
                              file_iter = index
-                             for fn in file_iter:
-                                 file_path = fn.path
-                                 mode = fn.mode
+                             for file_node in file_iter:
+                                 file_path = file_node.path
+                                 mode = file_node.mode
                                  is_link = stat.S_ISLNK(mode)
                                  if mode == pygit2.GIT_FILEMODE_COMMIT:
                                      log.debug('Skipping path %s as a commit node', file_path)
                                      continue
-                                 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
+                                 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
-                     return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
-                                         archive_dir_name, commit_id)
+                     return store_archive_in_cache(
+                         file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/hg_remote.py ~~vcsserver/hg.py~~

0 renamed +341 -175

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             import functools
+             import binascii
              import io
              import logging
-             import os
              import stat
-             import urllib
-             import urllib2
-             import traceback
+             import sys
+             import urllib.request
+             import urllib.parse
+             import hashlib
-             from hgext import largefiles, rebase, purge
-             from hgext.strip import strip as hgext_strip
+             from hgext import largefiles, rebase
              from mercurial import commands
              from mercurial import unionrepo
              from mercurial import verify
              from mercurial import repair
+             from mercurial.error import AmbiguousPrefixLookupError
              import vcsserver
              from vcsserver import exceptions
-             from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode
+             from vcsserver.base import (
+                 RepoFactory,
+                 obfuscate_qs,
+                 raise_from_original,
+                 store_archive_in_cache,
+                 ArchiveNode,
+                 BytesEnvelope,
+                 BinaryEnvelope,
+             )
              from vcsserver.hgcompat import (
-                 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
-                 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
-                 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
-                 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
-                 RepoLookupError, InterventionRequired, RequirementError,
-                 alwaysmatcher, patternmatcher, hgutil)
+                 archival,
+                 bin,
+                 clone,
+                 config as hgconfig,
+                 diffopts,
+                 hex,
+                 get_ctx,
+                 hg_url as url_parser,
+                 httpbasicauthhandler,
+                 httpdigestauthhandler,
+                 makepeer,
+                 instance,
+                 match,
+                 memctx,
+                 exchange,
+                 memfilectx,
+                 nullrev,
+                 hg_merge,
+                 patch,
+                 peer,
+                 revrange,
+                 ui,
+                 hg_tag,
+                 Abort,
+                 LookupError,
+                 RepoError,
+                 RepoLookupError,
+                 InterventionRequired,
+                 RequirementError,
+                 alwaysmatcher,
+                 patternmatcher,
+                 hgext_strip,
+             )
+             from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
              from vcsserver.vcs_base import RemoteBase
+             from vcsserver.config import hooks as hooks_config
+             from vcsserver.lib.exc_tracking import format_exc
              log = logging.getLogger(__name__)
              def make_ui_from_config(repo_config):
                  class LoggingUI(ui.ui):
                      def status(self, *msg, **opts):
-                         log.info(' '.join(msg).rstrip('\n'))
-                         super(LoggingUI, self).status(*msg, **opts)
+                         str_msg = map(safe_str, msg)
+                         log.info(' '.join(str_msg).rstrip('\n'))
+                         #super(LoggingUI, self).status(*msg, **opts)
                      def warn(self, *msg, **opts):
-                         log.warn(' '.join(msg).rstrip('\n'))
-                         super(LoggingUI, self).warn(*msg, **opts)
+                         str_msg = map(safe_str, msg)
+                         log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
+                         #super(LoggingUI, self).warn(*msg, **opts)
                      def error(self, *msg, **opts):
-                         log.error(' '.join(msg).rstrip('\n'))
-                         super(LoggingUI, self).error(*msg, **opts)
+                         str_msg = map(safe_str, msg)
+                         log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
+                         #super(LoggingUI, self).error(*msg, **opts)
                      def note(self, *msg, **opts):
-                         log.info(' '.join(msg).rstrip('\n'))
-                         super(LoggingUI, self).note(*msg, **opts)
+                         str_msg = map(safe_str, msg)
+                         log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
+                         #super(LoggingUI, self).note(*msg, **opts)
                      def debug(self, *msg, **opts):
-                         log.debug(' '.join(msg).rstrip('\n'))
-                         super(LoggingUI, self).debug(*msg, **opts)
+                         str_msg = map(safe_str, msg)
+                         log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
+                         #super(LoggingUI, self).debug(*msg, **opts)
                  baseui = LoggingUI()
                  baseui._tcfg = hgconfig.config()
                  for section, option, value in repo_config:
-                     baseui.setconfig(section, option, value)
+                     baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
                  # make our hgweb quiet so it doesn't print output
-                 baseui.setconfig('ui', 'quiet', 'true')
+                 baseui.setconfig(b'ui', b'quiet', b'true')
-                 baseui.setconfig('ui', 'paginate', 'never')
+                 baseui.setconfig(b'ui', b'paginate', b'never')
                  # for better Error reporting of Mercurial
-                 baseui.setconfig('ui', 'message-output', 'stderr')
+                 baseui.setconfig(b'ui', b'message-output', b'stderr')
                  # force mercurial to only use 1 thread, otherwise it may try to set a
                  # signal in a non-main thread, thus generating a ValueError.
-                 baseui.setconfig('worker', 'numcpus', 1)
+                 baseui.setconfig(b'worker', b'numcpus', 1)
                  # If there is no config for the largefiles extension, we explicitly disable
                  # it here. This overrides settings from repositories hgrc file. Recent
                  # mercurial versions enable largefiles in hgrc on clone from largefile
                  # repo.
-                 if not baseui.hasconfig('extensions', 'largefiles'):
+                 if not baseui.hasconfig(b'extensions', b'largefiles'):
                      log.debug('Explicitly disable largefiles extension for repo.')
-                     baseui.setconfig('extensions', 'largefiles', '!')
+                     baseui.setconfig(b'extensions', b'largefiles', b'!')
                  return baseui
                      try:
                          return func(*args, **kwargs)
                      except (Abort, InterventionRequired) as e:
-                         raise_from_original(exceptions.AbortException(e))
+                         raise_from_original(exceptions.AbortException(e), e)
                      except RepoLookupError as e:
-                         raise_from_original(exceptions.LookupException(e))
+                         raise_from_original(exceptions.LookupException(e), e)
                      except RequirementError as e:
-                         raise_from_original(exceptions.RequirementException(e))
+                         raise_from_original(exceptions.RequirementException(e), e)
                      except RepoError as e:
-                         raise_from_original(exceptions.VcsException(e))
+                         raise_from_original(exceptions.VcsException(e), e)
                      except LookupError as e:
-                         raise_from_original(exceptions.LookupException(e))
+                         raise_from_original(exceptions.LookupException(e), e)
                      except Exception as e:
                          if not hasattr(e, '_vcs_kind'):
                              log.exception("Unhandled exception in hg remote call")
-                             raise_from_original(exceptions.UnhandledException(e))
+                             raise_from_original(exceptions.UnhandledException(e), e)
                          raise
                  return wrapper
                  def _create_config(self, config, hooks=True):
                      if not hooks:
-                         hooks_to_clean = frozenset((
-                             'changegroup.repo_size', 'preoutgoing.pre_pull',
-                             'outgoing.pull_logger', 'prechangegroup.pre_push'))
+                         hooks_to_clean = {
+                             hooks_config.HOOK_REPO_SIZE,
+                             hooks_config.HOOK_PRE_PULL,
+                             hooks_config.HOOK_PULL,
+                             hooks_config.HOOK_PRE_PUSH,
+                             # TODO: what about PRETXT, this was disabled in pre 5.0.0
+                             hooks_config.HOOK_PRETX_PUSH,
+                         }
                          new_config = []
                          for section, option, value in config:
                              if section == 'hooks' and option in hooks_to_clean:
                  def _create_repo(self, wire, create):
                      baseui = self._create_config(wire["config"])
-                     return instance(baseui, wire["path"], create)
+                     repo = instance(baseui, safe_bytes(wire["path"]), create)
+                     log.debug('repository created: got HG object: %s', repo)
+                     return repo
                  def repo(self, wire, create=False):
                      """
              def patch_ui_message_output(baseui):
-                 baseui.setconfig('ui', 'quiet', 'false')
+                 baseui.setconfig(b'ui', b'quiet', b'false')
                  output = io.BytesIO()
                  def write(data, **unused_kwargs):
                  return baseui, output
+             def get_obfuscated_url(url_obj):
+                 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
+                 url_obj.query = obfuscate_qs(url_obj.query)
+                 obfuscated_uri = str(url_obj)
+                 return obfuscated_uri
+             def normalize_url_for_hg(url: str):
+                 _proto = None
+                 if '+' in url[:url.find('://')]:
+                     _proto = url[0:url.find('+')]
+                     url = url[url.find('+') + 1:]
+                 return url, _proto
              class HgRemote(RemoteBase):
                  def __init__(self, factory):
                          "hidden": self.ctx_hidden,
                          "_file_paths": self.ctx_list,
                      }
+                     self._bulk_file_methods = {
+                         "size": self.fctx_size,
+                         "data": self.fctx_node_data,
+                         "flags": self.fctx_flags,
+                         "is_binary": self.is_binary,
+                         "md5": self.md5_hash,
+                     }
                  def _get_ctx(self, repo, ref):
                      return get_ctx(repo, ref)
                  @reraise_safe_exceptions
                  def discover_hg_version(self):
                      from mercurial import util
-                     return util.version()
+                     return safe_str(util.version())
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                  def bookmarks(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bookmarks(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
-                         return dict(repo._bookmarks)
+                         return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
                      return _bookmarks(context_uid, repo_id)
                  def branches(self, wire, normal, closed):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _branches(_context_uid, _repo_id, _normal, _closed):
                          repo = self._factory.repo(wire)
                          iter_branches = repo.branchmap().iterbranches()
                          bt = {}
-                         for branch_name, _heads, tip, is_closed in iter_branches:
+                         for branch_name, _heads, tip_node, is_closed in iter_branches:
                              if normal and not is_closed:
-                                 bt[branch_name] = tip
+                                 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                              if closed and is_closed:
-                                 bt[branch_name] = tip
+                                 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
                          return bt
                  def bulk_request(self, wire, commit_id, pre_load):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _bulk_request(_repo_id, _commit_id, _pre_load):
                          result = {}
                          for attr in pre_load:
                              try:
                                  method = self._bulk_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
                                  result[attr] = method(wire, commit_id)
                              except KeyError as e:
                                  raise exceptions.VcsException(e)(
-                                     'Unknown bulk attribute: "%s"' % attr)
+                                     f'Unknown bulk attribute: "{attr}"')
                          return result
                      return _bulk_request(repo_id, commit_id, sorted(pre_load))
                  def ctx_branch(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_branch(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_date(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_date(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_files(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_files(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_parents(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_parents(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_children(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_children(_repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_phase(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_phase(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_obsolete(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  def ctx_hidden(self, wire, commit_id):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _ctx_hidden(_context_uid, _repo_id, _commit_id):
                          repo = self._factory.repo(wire)
                  @reraise_safe_exceptions
                  def check_url(self, url, config):
-                     _proto = None
-                     if '+' in url[:url.find('://')]:
-                         _proto = url[0:url.find('+')]
-                         url = url[url.find('+') + 1:]
+                     url, _proto = normalize_url_for_hg(url)
+                     url_obj = url_parser(safe_bytes(url))
+                     test_uri = safe_str(url_obj.authinfo()[0])
+                     authinfo = url_obj.authinfo()[1]
+                     obfuscated_uri = get_obfuscated_url(url_obj)
+                     log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
                      handlers = []
-                     url_obj = url_parser(url)
-                     test_uri, authinfo = url_obj.authinfo()
-                     url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
-                     url_obj.query = obfuscate_qs(url_obj.query)
-                     cleaned_uri = str(url_obj)
-                     log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
                      if authinfo:
                          # create a password manager
-                         passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
-                         passmgr.add_password(*authinfo)
+                         passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
+                         passmgr.add_password(*convert_to_str(authinfo))
                          handlers.extend((httpbasicauthhandler(passmgr),
                                           httpdigestauthhandler(passmgr)))
-                     o = urllib2.build_opener(*handlers)
+                     o = urllib.request.build_opener(*handlers)
                      o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
                                      ('Accept', 'application/mercurial-0.1')]
                      q = {"cmd": 'between'}
-                     q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
-                     qs = '?%s' % urllib.urlencode(q)
-                     cu = "%s%s" % (test_uri, qs)
-                     req = urllib2.Request(cu, None, {})
+                     q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
+                     qs = f'?{urllib.parse.urlencode(q)}'
+                     cu = f"{test_uri}{qs}"
                      try:
-                         log.debug("Trying to open URL %s", cleaned_uri)
+                         req = urllib.request.Request(cu, None, {})
+                         log.debug("Trying to open URL %s", obfuscated_uri)
                          resp = o.open(req)
                          if resp.code != 200:
                              raise exceptions.URLError()('Return Code is not 200')
                      except Exception as e:
-                         log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
+                         log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
                          # means it cannot be cloned
-                         raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
+                         raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
                      # now check if it's a proper hg repo, but don't do it for svn
                      try:
                          else:
                              # check for pure hg repos
                              log.debug(
-                                 "Verifying if URL is a Mercurial repository: %s",
-                                 cleaned_uri)
+                                 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
                              ui = make_ui_from_config(config)
-                             peer_checker = makepeer(ui, url)
-                             peer_checker.lookup('tip')
+                             peer_checker = makepeer(ui, safe_bytes(url))
+                             peer_checker.lookup(b'tip')
                      except Exception as e:
                          log.warning("URL is not a valid Mercurial repository: %s",
-                                     cleaned_uri)
+                                     obfuscated_uri)
                          raise exceptions.URLError(e)(
-                             "url [%s] does not look like an hg repo org_exc: %s"
-                             % (cleaned_uri, e))
+                             f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
-                     log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
+                     log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
                      return True
                  @reraise_safe_exceptions
                      repo = self._factory.repo(wire)
                      if file_filter:
-                         match_filter = match(file_filter[0], '', [file_filter[1]])
+                         # unpack the file-filter
+                         repo_path, node_path = file_filter
+                         match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
                      else:
                          match_filter = file_filter
                      opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
                      try:
-                         return "".join(patch.diff(
-                             repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts))
+                         diff_iter = patch.diff(
+                             repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
+                         return BytesEnvelope(b"".join(diff_iter))
                      except RepoLookupError as e:
                          raise exceptions.LookupException(e)()
                  def node_history(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
-                         fctx = ctx.filectx(path)
+                         fctx = ctx.filectx(safe_bytes(path))
                          def history_iter():
                              limit_rev = fctx.rev()
-                             for obj in reversed(list(fctx.filelog())):
-                                 obj = fctx.filectx(obj)
-                                 ctx = obj.changectx()
-                                 if ctx.hidden() or ctx.obsolete():
+                             for fctx_candidate in reversed(list(fctx.filelog())):
+                                 f_obj = fctx.filectx(fctx_candidate)
+                                 # NOTE: This can be problematic...we can hide ONLY history node resulting in empty history
+                                 _ctx = f_obj.changectx()
+                                 if _ctx.hidden() or _ctx.obsolete():
                                      continue
-                                 if limit_rev >= obj.rev():
-                                     yield obj
+                                 if limit_rev >= f_obj.rev():
+                                     yield f_obj
                          history = []
                          for cnt, obj in enumerate(history_iter()):
                      return _node_history(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
-                 def node_history_untill(self, wire, revision, path, limit):
+                 def node_history_until(self, wire, revision, path, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_history_until(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
-                         fctx = ctx.filectx(path)
+                         fctx = ctx.filectx(safe_bytes(path))
                          file_log = list(fctx.filelog())
                          if limit:
                      return _node_history_until(context_uid, repo_id, revision, path, limit)
                  @reraise_safe_exceptions
+                 def bulk_file_request(self, wire, commit_id, path, pre_load):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
+                         result = {}
+                         for attr in pre_load:
+                             try:
+                                 method = self._bulk_file_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
+                                 result[attr] = method(wire, _commit_id, _path)
+                             except KeyError as e:
+                                 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
+                         return result
+                     return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
+                 @reraise_safe_exceptions
                  def fctx_annotate(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
-                     fctx = ctx.filectx(path)
+                     fctx = ctx.filectx(safe_bytes(path))
                      result = []
                      for i, annotate_obj in enumerate(fctx.annotate(), 1):
                          ln_no = i
                          sha = hex(annotate_obj.fctx.node())
                          content = annotate_obj.text
-                         result.append((ln_no, sha, content))
-                     return result
+                         result.append((ln_no, ascii_str(sha), content))
+                     return BinaryEnvelope(result)
                  @reraise_safe_exceptions
                  def fctx_node_data(self, wire, revision, path):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
-                     fctx = ctx.filectx(path)
-                     return fctx.data()
+                     fctx = ctx.filectx(safe_bytes(path))
+                     return BytesEnvelope(fctx.data())
                  @reraise_safe_exceptions
                  def fctx_flags(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_flags(_repo_id, _commit_id, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
-                         fctx = ctx.filectx(path)
+                         fctx = ctx.filectx(safe_bytes(path))
                          return fctx.flags()
                      return _fctx_flags(repo_id, commit_id, path)
                  def fctx_size(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _fctx_size(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, commit_id)
-                         fctx = ctx.filectx(path)
+                         fctx = ctx.filectx(safe_bytes(path))
                          return fctx.size()
                      return _fctx_size(repo_id, commit_id, path)
                  def get_all_commit_ids(self, wire, name):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_all_commit_ids(_context_uid, _repo_id, _name):
                          repo = self._factory.repo(wire)
-                         repo = repo.filtered(name)
-                         revs = map(lambda x: hex(x[7]), repo.changelog.index)
+                         revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
                          return revs
                      return _get_all_commit_ids(context_uid, repo_id, name)
                  @reraise_safe_exceptions
                  def get_config_value(self, wire, section, name, untrusted=False):
                      repo = self._factory.repo(wire)
-                     return repo.ui.config(section, name, untrusted=untrusted)
+                     return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, commit_id, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
-                         return largefiles.lfutil.isstandin(path)
+                         return largefiles.lfutil.isstandin(safe_bytes(path))
                      return _is_large_file(context_uid, repo_id, commit_id, path)
                  @reraise_safe_exceptions
                  def is_binary(self, wire, revision, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _sha, _path):
                          repo = self._factory.repo(wire)
                          ctx = self._get_ctx(repo, revision)
-                         fctx = ctx.filectx(path)
+                         fctx = ctx.filectx(safe_bytes(path))
                          return fctx.isbinary()
                      return _is_binary(repo_id, revision, path)
                  @reraise_safe_exceptions
+                 def md5_hash(self, wire, revision, path):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _md5_hash(_repo_id, _sha, _path):
+                         repo = self._factory.repo(wire)
+                         ctx = self._get_ctx(repo, revision)
+                         fctx = ctx.filectx(safe_bytes(path))
+                         return hashlib.md5(fctx.data()).hexdigest()
+                     return _md5_hash(repo_id, revision, path)
+                 @reraise_safe_exceptions
                  def in_largefiles_store(self, wire, sha):
                      repo = self._factory.repo(wire)
                      return largefiles.lfutil.instore(repo, sha)
                  @reraise_safe_exceptions
                  def lookup(self, wire, revision, both):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _lookup(_context_uid, _repo_id, _revision, _both):
                          repo = self._factory.repo(wire)
                          rev = _revision
                          if isinstance(rev, int):
                                  rev = rev + -1
                          try:
                              ctx = self._get_ctx(repo, rev)
-                         except (TypeError, RepoLookupError) as e:
-                             e._org_exc_tb = traceback.format_exc()
+                         except AmbiguousPrefixLookupError:
+                             e = RepoLookupError(rev)
+                             e._org_exc_tb = format_exc(sys.exc_info())
+                             raise exceptions.LookupException(e)(rev)
+                         except (TypeError, RepoLookupError, binascii.Error) as e:
+                             e._org_exc_tb = format_exc(sys.exc_info())
                              raise exceptions.LookupException(e)(rev)
                          except LookupError as e:
-                             e._org_exc_tb = traceback.format_exc()
+                             e._org_exc_tb = format_exc(sys.exc_info())
                              raise exceptions.LookupException(e)(e.name)
                          if not both:
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
-                     repo.ui.setconfig('ui', 'interactive', 'off', '-y')
+                     repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
-                     bookmarks = dict(repo._bookmarks).keys()
-                     remote = peer(repo, {}, url)
+                     bookmarks = list(dict(repo._bookmarks).keys())
+                     remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
-                     remote.ui.setconfig('ui', 'interactive', 'off', '-y')
+                     remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      return exchange.push(
                          repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
                  @reraise_safe_exceptions
                  def rev_range(self, wire, commit_filter):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _rev_range(_context_uid, _repo_id, _filter):
                          repo = self._factory.repo(wire)
-                         revisions = [rev for rev in revrange(repo, commit_filter)]
+                         revisions = [
+                             ascii_str(repo[rev].hex())
+                             for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
+                         ]
                          return revisions
                      return _rev_range(context_uid, repo_id, sorted(commit_filter))
                              return len(repo) - 1, 0
                      stop, start = get_revs(repo, [node + ':'])
-                     revs = [hex(repo[r].node()) for r in xrange(start, stop + 1)]
+                     revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
                      return revs
                  @reraise_safe_exceptions
                  def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
-                     other_path = kwargs.pop('other_path', None)
+                     org_path = safe_bytes(wire["path"])
+                     other_path = safe_bytes(kwargs.pop('other_path', ''))
                      # case when we want to compare two independent repositories
                      if other_path and other_path != wire["path"]:
                          baseui = self._factory._create_config(wire["config"])
-                         repo = unionrepo.makeunionrepository(baseui, other_path, wire["path"])
+                         repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
                      else:
                          repo = self._factory.repo(wire)
                      return list(repo.revs(rev_spec, *args))
                  def tags(self, wire):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _tags(_context_uid, _repo_id):
                          repo = self._factory.repo(wire)
-                         return repo.tags()
+                         return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
                      return _tags(context_uid, repo_id)
                  @reraise_safe_exceptions
-                 def update(self, wire, node=None, clean=False):
+                 def update(self, wire, node='', clean=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
+                     node = safe_bytes(node)
                      commands.update(baseui, repo, node=node, clean=clean)
                  @reraise_safe_exceptions
                      baseui.write = write
                      if branch:
-                         args = [branch]
+                         args = [safe_bytes(branch)]
                      else:
                          args = []
-                     commands.heads(baseui, repo, template='{node} ', *args)
+                     commands.heads(baseui, repo, template=b'{node} ', *args)
                      return output.getvalue()
                      repo = self._factory.repo(wire)
                      changelog = repo.changelog
                      lookup = repo.lookup
-                     a = changelog.ancestor(lookup(revision1), lookup(revision2))
+                     a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
                      return hex(a)
                  @reraise_safe_exceptions
                  def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
                      baseui = self._factory._create_config(wire["config"], hooks=hooks)
-                     clone(baseui, source, dest, noupdate=not update_after_clone)
+                     clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
                  @reraise_safe_exceptions
                  def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
-                     publishing = baseui.configbool('phases', 'publish')
-                     if publishing:
-                         new_commit = 'public'
-                     else:
-                         new_commit = 'draft'
+                     publishing = baseui.configbool(b'phases', b'publish')
-                     def _filectxfn(_repo, ctx, path):
+                     def _filectxfn(_repo, ctx, path: bytes):
                          """
                          Marks given path as added/changed/removed in a given _repo. This is
                          for internal mercurial commit function.
                          """
                          # check if this path is removed
-                         if path in removed:
+                         if safe_str(path) in removed:
                              # returning None is a way to mark node for removal
                              return None
                          # check if this path is added
                          for node in updated:
-                             if node['path'] == path:
+                             if safe_bytes(node['path']) == path:
                                  return memfilectx(
                                      _repo,
                                      changectx=ctx,
-                                     path=node['path'],
-                                     data=node['content'],
+                                     path=safe_bytes(node['path']),
+                                     data=safe_bytes(node['content']),
                                      islink=False,
                                      isexec=bool(node['mode'] & stat.S_IXUSR),
                                      copysource=False)
+                         abort_exc = exceptions.AbortException()
+                         raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
-                         raise exceptions.AbortException()(
-                             "Given path haven't been marked as added, "
-                             "changed or removed (%s)" % path)
-                     with repo.ui.configoverride({('phases', 'new-commit'): new_commit}):
+                     if publishing:
+                         new_commit_phase = b'public'
+                     else:
+                         new_commit_phase = b'draft'
+                     with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
+                         kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
                          commit_ctx = memctx(
                              repo=repo,
                              parents=parents,
-                             text=message,
-                             files=files,
+                             text=safe_bytes(message),
+                             files=[safe_bytes(x) for x in files],
                              filectxfn=_filectxfn,
-                             user=user,
+                             user=safe_bytes(user),
                              date=(commit_time, commit_timezone),
-                             extra=extra)
+                             extra=kwargs)
                          n = repo.commitctx(commit_ctx)
                          new_id = hex(n)
                  def pull(self, wire, url, commit_ids=None):
                      repo = self._factory.repo(wire)
                      # Disable any prompts for this repo
-                     repo.ui.setconfig('ui', 'interactive', 'off', '-y')
+                     repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
-                     remote = peer(repo, {}, url)
+                     remote = peer(repo, {}, safe_bytes(url))
                      # Disable any prompts for this remote
-                     remote.ui.setconfig('ui', 'interactive', 'off', '-y')
+                     remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
                      if commit_ids:
                          commit_ids = [bin(commit_id) for commit_id in commit_ids]
                          repo, remote, heads=commit_ids, force=None).cgresult
                  @reraise_safe_exceptions
-                 def pull_cmd(self, wire, source, bookmark=None, branch=None, revision=None, hooks=True):
+                 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
+                     source = safe_bytes(source)
                      # Mercurial internally has a lot of logic that checks ONLY if
                      # option is defined, we just pass those if they are defined then
                      opts = {}
                      if bookmark:
-                         opts['bookmark'] = bookmark
+                         opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
+                             if isinstance(bookmark, list) else safe_bytes(bookmark)
                      if branch:
-                         opts['branch'] = branch
+                         opts['branch'] = [safe_bytes(x) for x in branch] \
+                             if isinstance(branch, list) else safe_bytes(branch)
                      if revision:
-                         opts['rev'] = revision
+                         opts['rev'] = [safe_bytes(x) for x in revision] \
+                             if isinstance(revision, list) else safe_bytes(revision)
                      commands.pull(baseui, repo, source, **opts)
                  @reraise_safe_exceptions
-                 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
+                 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'], hooks=hooks)
-                     commands.push(baseui, repo, dest=dest_path, rev=revisions,
+                     revisions = [safe_bytes(x) for x in revisions] \
+                         if isinstance(revisions, list) else safe_bytes(revisions)
+                     commands.push(baseui, repo, safe_bytes(dest_path),
+                                   rev=revisions,
                                    new_branch=push_branches)
                  @reraise_safe_exceptions
                  def strip(self, wire, revision, update, backup):
                      repo = self._factory.repo(wire)
                      ctx = self._get_ctx(repo, revision)
-                     hgext_strip(
+                     hgext_strip.strip(
                          repo.baseui, repo, ctx.node(), update=update, backup=backup)
                  @reraise_safe_exceptions
                  def merge(self, wire, revision):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
-                     repo.ui.setconfig('ui', 'merge', 'internal:dump')
+                     repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig('ui', 'interactive', False)
-                     commands.merge(baseui, repo, rev=revision)
+                     repo.ui.setconfig(b'ui', b'interactive', False)
+                     commands.merge(baseui, repo, rev=safe_bytes(revision))
                  @reraise_safe_exceptions
                  def merge_state(self, wire):
                      repo = self._factory.repo(wire)
-                     repo.ui.setconfig('ui', 'merge', 'internal:dump')
+                     repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig('ui', 'interactive', False)
+                     repo.ui.setconfig(b'ui', b'interactive', False)
                      ms = hg_merge.mergestate(repo)
                      return [x for x in ms.unresolved()]
                  def commit(self, wire, message, username, close_branch=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
-                     repo.ui.setconfig('ui', 'username', username)
-                     commands.commit(baseui, repo, message=message, close_branch=close_branch)
+                     repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
+                     commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
                  @reraise_safe_exceptions
-                 def rebase(self, wire, source=None, dest=None, abort=False):
+                 def rebase(self, wire, source='', dest='', abort=False):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
-                     repo.ui.setconfig('ui', 'merge', 'internal:dump')
+                     repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
                      # In case of sub repositories are used mercurial prompts the user in
                      # case of merge conflicts or different sub repository sources. By
                      # setting the interactive flag to `False` mercurial doesn't prompt the
                      # used but instead uses a default value.
-                     repo.ui.setconfig('ui', 'interactive', False)
-                     rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
+                     repo.ui.setconfig(b'ui', b'interactive', False)
+                     rebase_kws = dict(
+                         keep=not abort,
+                         abort=abort
+                     )
+                     if source:
+                         source = repo[source]
+                         rebase_kws['base'] = [source.hex()]
+                     if dest:
+                         dest = repo[dest]
+                         rebase_kws['dest'] = dest.hex()
+                     rebase.rebase(baseui, repo, **rebase_kws)
                  @reraise_safe_exceptions
                  def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
                      date = (tag_time, tag_timezone)
                      try:
-                         hg_tag.tag(repo, name, node, message, local, user, date)
+                         hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
                      except Abort as e:
                          log.exception("Tag operation aborted")
                          # Exception can contain unicode which we convert
                          raise exceptions.AbortException(e)(repr(e))
                  @reraise_safe_exceptions
-                 def bookmark(self, wire, bookmark, revision=None):
+                 def bookmark(self, wire, bookmark, revision=''):
                      repo = self._factory.repo(wire)
                      baseui = self._factory._create_config(wire['config'])
-                     commands.bookmark(baseui, repo, bookmark, rev=revision, force=True)
+                     revision = revision or ''
+                     commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
                  @reraise_safe_exceptions
                  def install_hooks(self, wire, force=False):
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      return {
-                         'pre_version': vcsserver.__version__,
-                         'post_version': vcsserver.__version__,
+                         'pre_version': vcsserver.get_version(),
+                         'post_version': vcsserver.get_version(),
                      }
                  @reraise_safe_exceptions
                      pass
                  @reraise_safe_exceptions
-                 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
-                                  archive_dir_name, commit_id):
+                 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
+                                  archive_dir_name, commit_id, cache_config):
                      def file_walker(_commit_id, path):
                          repo = self._factory.repo(wire)
                          if is_root:
                              matcher = alwaysmatcher(badfn=None)
                          else:
-                             matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
+                             matcher = patternmatcher('', [(b'glob', safe_bytes(path)+b'/**', b'')], badfn=None)
                          file_iter = ctx.manifest().walk(matcher)
                          for fn in file_iter:
                              yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
-                     return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
-                                         archive_dir_name, commit_id)
+                     return store_archive_in_cache(
+                         file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)

vcsserver/remote/svn_remote.py ~~vcsserver/svn.py~~

0 renamed +225 -137

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # along with this program; if not, write to the Free Software Foundation,
              # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
-             from __future__ import absolute_import
              import os
              import subprocess
-             import time
-             from urllib2 import URLError
-             import urlparse
+             from urllib.error import URLError
+             import urllib.parse
              import logging
              import posixpath as vcspath
-             import StringIO
-             import urllib
+             import io
+             import urllib.request
+             import urllib.parse
+             import urllib.error
              import traceback
-             import svn.client
-             import svn.core
-             import svn.delta
-             import svn.diff
-             import svn.fs
-             import svn.repos
+             import svn.client  # noqa
+             import svn.core  # noqa
+             import svn.delta  # noqa
+             import svn.diff  # noqa
+             import svn.fs  # noqa
+             import svn.repos  # noqa
+             import rhodecode
              from vcsserver import svn_diff, exceptions, subprocessio, settings
-             from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo
+             from vcsserver.base import (
+                 RepoFactory,
+                 raise_from_original,
+                 ArchiveNode,
+                 store_archive_in_cache,
+                 BytesEnvelope,
+                 BinaryEnvelope,
+             )
              from vcsserver.exceptions import NoContentException
-             from vcsserver.utils import safe_str
+             from vcsserver.str_utils import safe_str, safe_bytes
+             from vcsserver.type_utils import assert_bytes
              from vcsserver.vcs_base import RemoteBase
+             from vcsserver.lib.svnremoterepo import svnremoterepo
              log = logging.getLogger(__name__)
                  'pre-1.9-compatible': '1.8',
              }
-             current_compatible_version = '1.12'
+             current_compatible_version = '1.14'
              def reraise_safe_exceptions(func):
                      except Exception as e:
                          if not hasattr(e, '_vcs_kind'):
                              log.exception("Unhandled exception in svn remote call")
-                             raise_from_original(exceptions.UnhandledException(e))
+                             raise_from_original(exceptions.UnhandledException(e), e)
                          raise
                  return wrapper
                                  or compatible_version
                              fs_config['compatible-version'] = compatible_version_string
-                         log.debug('Create SVN repo with config "%s"', fs_config)
+                         log.debug('Create SVN repo with config `%s`', fs_config)
                          repo = svn.repos.create(path, "", "", None, fs_config)
                      else:
                          repo = svn.repos.open(path)
-                     log.debug('Got SVN object: %s', repo)
+                     log.debug('repository created: got SVN object: %s', repo)
                      return repo
                  def repo(self, wire, create=False, compatible_version=None):
                  def __init__(self, factory, hg_factory=None):
                      self._factory = factory
-                     # TODO: Remove once we do not use internal Mercurial objects anymore
-                     # for subversion
-                     self._hg_factory = hg_factory
+                     self._bulk_methods = {
+                         # NOT supported in SVN ATM...
+                     }
+                     self._bulk_file_methods = {
+                         "size": self.get_file_size,
+                         "data": self.get_file_content,
+                         "flags": self.get_node_type,
+                         "is_binary": self.is_binary,
+                         "md5": self.md5_hash
+                     }
+                 @reraise_safe_exceptions
+                 def bulk_file_request(self, wire, commit_id, path, pre_load):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     # since we use unified API, we need to cast from str to in for SVN
+                     commit_id = int(commit_id)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
+                         result = {}
+                         for attr in pre_load:
+                             try:
+                                 method = self._bulk_file_methods[attr]
+                                 wire.update({'cache': False})  # disable cache for bulk calls so we don't double cache
+                                 result[attr] = method(wire, _commit_id, _path)
+                             except KeyError as e:
+                                 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
+                         return result
+                     return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
                  @reraise_safe_exceptions
                  def discover_svn_version(self):
                          svn_ver = svn.core.SVN_VERSION
                      except ImportError:
                          svn_ver = None
-                     return svn_ver
+                     return safe_str(svn_ver)
                  @reraise_safe_exceptions
                  def is_empty(self, wire):
                      try:
                          return self.lookup(wire, -1) == 0
                      except Exception:
                          log.exception("failed to read object_store")
                          return False
-                 def check_url(self, url, config_items):
-                     # this can throw exception if not installed, but we detect this
-                     from hgsubversion import svnrepo
+                 def check_url(self, url, config):
-                     baseui = self._hg_factory._create_config(config_items)
-                     # uuid function get's only valid UUID from proper repo, else
+                     # uuid function gets only valid UUID from proper repo, else
                      # throws exception
+                     username, password, src_url = self.get_url_and_credentials(url)
                      try:
-                         svnrepo.svnremoterepo(baseui, url).svn.uuid
+                         svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
                      except Exception:
                          tb = traceback.format_exc()
                          log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
-                         raise URLError(
-                             '"%s" is not a valid Subversion source url.' % (url, ))
+                         raise URLError(f'"{url}" is not a valid Subversion source url.')
                      return True
                  def is_path_valid_repository(self, wire, path):
                      # NOTE(marcink):  short circuit the check for SVN repo
                      # the repos.open might be expensive to check, but we have one cheap
-                     # pre condition that we can use, to check for 'format' file
+                     # pre-condition that we can use, to check for 'format' file
                      if not os.path.isfile(os.path.join(path, 'format')):
                          return False
-                     try:
-                         svn.repos.open(path)
-                     except svn.core.SubversionException:
-                         tb = traceback.format_exc()
-                         log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
-                         return False
-                     return True
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _assert_correct_path(_context_uid, _repo_id, fast_check):
+                         try:
+                             svn.repos.open(path)
+                         except svn.core.SubversionException:
+                             tb = traceback.format_exc()
+                             log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
+                             return False
+                         return True
+                     return _assert_correct_path(context_uid, repo_id, True)
                  @reraise_safe_exceptions
                  def verify(self, wire,):
                      repo_path = wire['path']
                      if not self.is_path_valid_repository(wire, repo_path):
                          raise Exception(
-                             "Path %s is not a valid Subversion repository." % repo_path)
+                             f"Path {repo_path} is not a valid Subversion repository.")
                      cmd = ['svnadmin', 'info', repo_path]
                      stdout, stderr = subprocessio.run_command(cmd)
                      return stdout
+                 @reraise_safe_exceptions
                  def lookup(self, wire, revision):
                      if revision not in [-1, None, 'HEAD']:
                          raise NotImplementedError
                      head = svn.fs.youngest_rev(fs_ptr)
                      return head
+                 @reraise_safe_exceptions
                  def lookup_interval(self, wire, start_ts, end_ts):
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                          end_rev = svn.fs.youngest_rev(fsobj)
                      return start_rev, end_rev
+                 @reraise_safe_exceptions
                  def revision_properties(self, wire, revision):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _revision_properties(_repo_id, _revision):
                          repo = self._factory.repo(wire)
                      removed = []
                      # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
-                     for path, change in editor.changes.iteritems():
+                     for path, change in editor.changes.items():
                          # TODO: Decide what to do with directory nodes. Subversion can add
                          # empty directories.
                              removed.append(path)
                          else:
                              raise NotImplementedError(
-                                 "Action %s not supported on path %s" % (
+                                 "Action {} not supported on path {}".format(
                                      change.action, path))
                      changes = {
                  def node_history(self, wire, path, revision, limit):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
                          cross_copies = False
                          return history_revisions
                      return _assert_correct_path(context_uid, repo_id, path, revision, limit)
+                 @reraise_safe_exceptions
                  def node_properties(self, wire, path, revision):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _node_properties(_repo_id, _path, _revision):
                          repo = self._factory.repo(wire)
                      return _node_properties(repo_id, path, revision)
                  def file_annotate(self, wire, path, revision):
-                     abs_path = 'file://' + urllib.pathname2url(
+                     abs_path = 'file://' + urllib.request.pathname2url(
                          vcspath.join(wire['path'], path))
                      file_uri = svn.core.svn_path_canonicalize(abs_path)
                      except svn.core.SubversionException as exc:
                          log.exception("Error during blame operation.")
                          raise Exception(
-                             "Blame not supported or file does not exist at path %s. "
-                             "Error %s." % (path, exc))
+                             f"Blame not supported or file does not exist at path {path}. "
+                             f"Error {exc}.")
-                     return annotations
+                     return BinaryEnvelope(annotations)
-                 def get_node_type(self, wire, path, revision=None):
+                 @reraise_safe_exceptions
+                 def get_node_type(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
-                     def _get_node_type(_repo_id, _path, _revision):
+                     def _get_node_type(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          fs_ptr = svn.repos.fs(repo)
                          if _revision is None:
                          root = svn.fs.revision_root(fs_ptr, _revision)
                          node = svn.fs.check_path(root, path)
                          return NODE_TYPE_MAPPING.get(node, None)
-                     return _get_node_type(repo_id, path, revision)
+                     return _get_node_type(repo_id, revision, path)
-                 def get_nodes(self, wire, path, revision=None):
+                 @reraise_safe_exceptions
+                 def get_nodes(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _get_nodes(_repo_id, _path, _revision):
                          repo = self._factory.repo(wire)
                          root = svn.fs.revision_root(fsobj, _revision)
                          entries = svn.fs.dir_entries(root, path)
                          result = []
-                         for entry_path, entry_info in entries.iteritems():
+                         for entry_path, entry_info in entries.items():
                              result.append(
                                  (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
                          return result
                      return _get_nodes(repo_id, path, revision)
-                 def get_file_content(self, wire, path, rev=None):
+                 @reraise_safe_exceptions
+                 def get_file_content(self, wire, rev=None, path=''):
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      if rev is None:
-                         rev = svn.fs.youngest_revision(fsobj)
+                         rev = svn.fs.youngest_rev(fsobj)
                      root = svn.fs.revision_root(fsobj, rev)
                      content = svn.core.Stream(svn.fs.file_contents(root, path))
-                     return content.read()
+                     return BytesEnvelope(content.read())
-                 def get_file_size(self, wire, path, revision=None):
+                 @reraise_safe_exceptions
+                 def get_file_size(self, wire, revision=None, path=''):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
                      region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
-                     def _get_file_size(_repo_id, _path, _revision):
+                     def _get_file_size(_repo_id, _revision, _path):
                          repo = self._factory.repo(wire)
                          fsobj = svn.repos.fs(repo)
                          if _revision is None:
                          root = svn.fs.revision_root(fsobj, _revision)
                          size = svn.fs.file_length(root, path)
                          return size
-                     return _get_file_size(repo_id, path, revision)
+                     return _get_file_size(repo_id, revision, path)
                  def create_repository(self, wire, compatible_version=None):
                      log.info('Creating Subversion repository in path "%s"', wire['path'])
                      self._factory.repo(wire, create=True,
                                         compatible_version=compatible_version)
-                 def get_url_and_credentials(self, src_url):
-                     obj = urlparse.urlparse(src_url)
-                     username = obj.username or None
-                     password = obj.password or None
+                 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
+                     obj = urllib.parse.urlparse(src_url)
+                     username = obj.username or ''
+                     password = obj.password or ''
                      return username, password, src_url
                  def import_remote_repository(self, wire, src_url):
                      repo_path = wire['path']
                      if not self.is_path_valid_repository(wire, repo_path):
                          raise Exception(
-                             "Path %s is not a valid Subversion repository." % repo_path)
+                             f"Path {repo_path} is not a valid Subversion repository.")
                      username, password, src_url = self.get_url_and_credentials(src_url)
                      rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
                      log.debug('Return process ended with code: %s', rdump.returncode)
                      if rdump.returncode != 0:
                          errors = rdump.stderr.read()
-                         log.error('svnrdump dump failed: statuscode %s: message: %s',
-                                   rdump.returncode, errors)
+                         log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
                          reason = 'UNKNOWN'
-                         if 'svnrdump: E230001:' in errors:
+                         if b'svnrdump: E230001:' in errors:
                              reason = 'INVALID_CERTIFICATE'
                          if reason == 'UNKNOWN':
-                             reason = 'UNKNOWN:{}'.format(errors)
+                             reason = f'UNKNOWN:{safe_str(errors)}'
                          raise Exception(
-                             'Failed to dump the remote repository from %s. Reason:%s' % (
+                             'Failed to dump the remote repository from {}. Reason:{}'.format(
                                  src_url, reason))
                      if load.returncode != 0:
                          raise Exception(
-                             'Failed to load the dump of remote repository from %s.' %
-                             (src_url, ))
+                             f'Failed to load the dump of remote repository from {src_url}.')
                  def commit(self, wire, message, author, timestamp, updated, removed):
-                     assert isinstance(message, str)
-                     assert isinstance(author, str)
+                     message = safe_bytes(message)
+                     author = safe_bytes(author)
                      repo = self._factory.repo(wire)
                      fsobj = svn.repos.fs(repo)
                      log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
                      return commit_id
+                 @reraise_safe_exceptions
                  def diff(self, wire, rev1, rev2, path1=None, path2=None,
                           ignore_whitespace=False, context=3):
                      diff_creator = SvnDiffer(
                          repo, rev1, path1, rev2, path2, ignore_whitespace, context)
                      try:
-                         return diff_creator.generate_diff()
+                         return BytesEnvelope(diff_creator.generate_diff())
                      except svn.core.SubversionException as e:
                          log.exception(
                              "Error during diff operation operation. "
-                             "Path might not exist %s, %s" % (path1, path2))
-                         return ""
+                             "Path might not exist %s, %s", path1, path2)
+                         return BytesEnvelope(b'')
                  @reraise_safe_exceptions
                  def is_large_file(self, wire, path):
                  @reraise_safe_exceptions
                  def is_binary(self, wire, rev, path):
                      cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
-                     region = self._region(wire)
                      @region.conditional_cache_on_arguments(condition=cache_on)
                      def _is_binary(_repo_id, _rev, _path):
-                         raw_bytes = self.get_file_content(wire, path, rev)
-                         return raw_bytes and '\0' in raw_bytes
+                         raw_bytes = self.get_file_content(wire, rev, path)
+                         if not raw_bytes:
+                             return False
+                         return b'\0' in raw_bytes
                      return _is_binary(repo_id, rev, path)
                  @reraise_safe_exceptions
+                 def md5_hash(self, wire, rev, path):
+                     cache_on, context_uid, repo_id = self._cache_on(wire)
+                     region = self._region(wire)
+                     @region.conditional_cache_on_arguments(condition=cache_on)
+                     def _md5_hash(_repo_id, _rev, _path):
+                         return ''
+                     return _md5_hash(repo_id, rev, path)
+                 @reraise_safe_exceptions
                  def run_svn_command(self, wire, cmd, **opts):
                      path = wire.get('path', None)
+                     debug_mode = rhodecode.ConfigGet().get_bool('debug')
                      if path and os.path.isdir(path):
                          opts['cwd'] = path
                      try:
                          _opts.update(opts)
-                         p = subprocessio.SubprocessIOChunker(cmd, **_opts)
+                         proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
-                         return ''.join(p), ''.join(p.error)
-                     except (EnvironmentError, OSError) as err:
+                         return b''.join(proc), b''.join(proc.stderr)
+                     except OSError as err:
                          if safe_call:
                              return '', safe_str(err).strip()
                          else:
-                             cmd = ' '.join(cmd)  # human friendly CMD
-                             tb_err = ("Couldn't run svn command (%s).\n"
-                                       "Original error was:%s\n"
-                                       "Call options:%s\n"
-                                       % (cmd, err, _opts))
+                             cmd = ' '.join(map(safe_str, cmd))  # human friendly CMD
+                             call_opts = {}
+                             if debug_mode:
+                                 call_opts = _opts
+                             tb_err = ("Couldn't run svn command ({}).\n"
+                                       "Original error was:{}\n"
+                                       "Call options:{}\n"
+                                       .format(cmd, err, call_opts))
                              log.exception(tb_err)
                              raise exceptions.VcsException()(tb_err)
                      binary_dir = settings.BINARY_DIR
                      executable = None
                      if binary_dir:
-                         executable = os.path.join(binary_dir, 'python')
-                     return install_svn_hooks(
-                         repo_path, executable=executable, force_create=force)
+                         executable = os.path.join(binary_dir, 'python3')
+                     return install_svn_hooks(repo_path, force_create=force)
                  @reraise_safe_exceptions
                  def get_hooks_info(self, wire):
                      pass
                  @reraise_safe_exceptions
-                 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
-                                  archive_dir_name, commit_id):
+                 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
+                                  archive_dir_name, commit_id, cache_config):
                      def walk_tree(root, root_dir, _commit_id):
                          """
                          Special recursive svn repo walker
                          """
+                         root_dir = safe_bytes(root_dir)
                          filemode_default = 0o100644
                          filemode_executable = 0o100755
                                  # return only DIR, and then all entries in that dir
                                  yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
                                  new_root = os.path.join(root_dir, f_name)
-                                 for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id):
-                                     yield _f_name, _f_data, _f_type
+                                 yield from walk_tree(root, new_root, _commit_id)
                              else:
-                                 f_path = os.path.join(root_dir, f_name).rstrip('/')
+                                 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
                                  prop_list = svn.fs.node_proplist(root, f_path)
                                  f_mode = filemode_default
                                  data_stream = f_data['content_stream']
                                  yield ArchiveNode(file_path, mode, is_link, data_stream)
-                     return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
-                                         archive_dir_name, commit_id)
+                     return store_archive_in_cache(
+                         file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
-             class SvnDiffer(object):
+             class SvnDiffer:
                  """
                  Utility to create diffs based on difflib and the Subversion api
                  """
                              "Source type: %s, target type: %s" %
                              (self.src_kind, self.tgt_kind))
-                 def generate_diff(self):
-                     buf = StringIO.StringIO()
+                 def generate_diff(self) -> bytes:
+                     buf = io.BytesIO()
                      if self.tgt_kind == svn.core.svn_node_dir:
                          self._generate_dir_diff(buf)
                      else:
                          self._generate_file_diff(buf)
                      return buf.getvalue()
-                 def _generate_dir_diff(self, buf):
+                 def _generate_dir_diff(self, buf: io.BytesIO):
                      editor = DiffChangeEditor()
                      editor_ptr, editor_baton = svn.delta.make_editor(editor)
                      svn.repos.dir_delta2(
                          self._generate_node_diff(
                              buf, change, path, self.tgt_path, path, self.src_path)
-                 def _generate_file_diff(self, buf):
+                 def _generate_file_diff(self, buf: io.BytesIO):
                      change = None
                      if self.src_kind == svn.core.svn_node_none:
                          change = "add"
                          buf, change, tgt_path, tgt_base, src_path, src_base)
                  def _generate_node_diff(
-                         self, buf, change, tgt_path, tgt_base, src_path, src_base):
+                         self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
+                     tgt_path_bytes = safe_bytes(tgt_path)
+                     tgt_path = safe_str(tgt_path)
+                     src_path_bytes = safe_bytes(src_path)
+                     src_path = safe_str(src_path)
                      if self.src_rev == self.tgt_rev and tgt_base == src_base:
                          # makes consistent behaviour with git/hg to return empty diff if
                      self.binary_content = False
                      mime_type = self._get_mime_type(tgt_full_path)
-                     if mime_type and not mime_type.startswith('text'):
+                     if mime_type and not mime_type.startswith(b'text'):
                          self.binary_content = True
-                         buf.write("=" * 67 + '\n')
-                         buf.write("Cannot display: file marked as a binary type.\n")
-                         buf.write("svn:mime-type = %s\n" % mime_type)
-                     buf.write("Index: %s\n" % (tgt_path, ))
-                     buf.write("=" * 67 + '\n')
-                     buf.write("diff --git a/%(tgt_path)s b/%(tgt_path)s\n" % {
-                         'tgt_path': tgt_path})
+                         buf.write(b"=" * 67 + b'\n')
+                         buf.write(b"Cannot display: file marked as a binary type.\n")
+                         buf.write(b"svn:mime-type = %s\n" % mime_type)
+                     buf.write(b"Index: %b\n" % tgt_path_bytes)
+                     buf.write(b"=" * 67 + b'\n')
+                     buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
                      if change == 'add':
                          # TODO: johbo: SVN is missing a zero here compared to git
-                         buf.write("new file mode 10644\n")
+                         buf.write(b"new file mode 10644\n")
+                         # TODO(marcink): intro to binary detection of svn patches
+                         # if self.binary_content:
+                         #     buf.write(b'GIT binary patch\n')
-                         #TODO(marcink): intro to binary detection of svn patches
+                         buf.write(b"--- /dev/null\t(revision 0)\n")
+                         src_lines = []
+                     else:
+                         if change == 'delete':
+                             buf.write(b"deleted file mode 10644\n")
+                         # TODO(marcink): intro to binary detection of svn patches
                          # if self.binary_content:
                          #     buf.write('GIT binary patch\n')
-                         buf.write("--- /dev/null\t(revision 0)\n")
-                         src_lines = []
-                     else:
-                         if change == 'delete':
-                             buf.write("deleted file mode 10644\n")
-                         #TODO(marcink): intro to binary detection of svn patches
-                         # if self.binary_content:
-                         #     buf.write('GIT binary patch\n')
-                         buf.write("--- a/%s\t(revision %s)\n" % (
-                             src_path, self.src_rev))
+                         buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
                          src_lines = self._svn_readlines(self.src_root, src_full_path)
                      if change == 'delete':
-                         buf.write("+++ /dev/null\t(revision %s)\n" % (self.tgt_rev, ))
+                         buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
                          tgt_lines = []
                      else:
-                         buf.write("+++ b/%s\t(revision %s)\n" % (
-                             tgt_path, self.tgt_rev))
+                         buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
                          tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
+                     # we made our diff header, time to generate the diff content into our buffer
                      if not self.binary_content:
                          udiff = svn_diff.unified_diff(
                              src_lines, tgt_lines, context=self.context,
                              ignore_blank_lines=self.ignore_whitespace,
                              ignore_case=False,
                              ignore_space_changes=self.ignore_whitespace)
                          buf.writelines(udiff)
-                 def _get_mime_type(self, path):
+                 def _get_mime_type(self, path) -> bytes:
                      try:
                          mime_type = svn.fs.node_prop(
                              self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
                      if node_kind not in (
                              svn.core.svn_node_file, svn.core.svn_node_symlink):
                          return []
-                     content = svn.core.Stream(svn.fs.file_contents(fs_root, node_path)).read()
+                     content = svn.core.Stream(
+                         svn.fs.file_contents(fs_root, node_path)).read()
                      return content.splitlines(True)
                  return True
-             class TxnNodeProcessor(object):
+             class TxnNodeProcessor:
                  """
                  Utility to process the change of one node within a transaction root.
                  """
                  def __init__(self, node, txn_root):
-                     assert isinstance(node['path'], str)
+                     assert_bytes(node['path'])
                      self.node = node
                      self.txn_root = txn_root
                          svn.fs.make_file(self.txn_root, self.node['path'])
                  def _update_file_content(self):
-                     assert isinstance(self.node['content'], str)
+                     assert_bytes(self.node['content'])
                      handler, baton = svn.fs.apply_textdelta(
                          self.txn_root, self.node['path'], None, None)
                      svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
                  def _update_file_properties(self):
                      properties = self.node.get('properties', {})
-                     for key, value in properties.iteritems():
+                     for key, value in properties.items():
                          svn.fs.change_node_prop(
-                             self.txn_root, self.node['path'], key, value)
+                             self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
              def apr_time_t(timestamp):
                  """
                  Convert a Python timestamp into APR timestamp type apr_time_t
                  """
-                 return timestamp * 1E6
+                 return int(timestamp * 1E6)
              def svn_opt_revision_value_t(num):

vcsserver/remote_wsgi.py

0 +3 -3

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              from vcsserver import scm_app, wsgi_app_caller
-             class GitRemoteWsgi(object):
+             class GitRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          scm_app.create_git_wsgi_app(*args, **kwargs))
                      return app.handle(environ, input_data)
-             class HgRemoteWsgi(object):
+             class HgRemoteWsgi:
                  def handle(self, environ, input_data, *args, **kwargs):
                      app = wsgi_app_caller.WSGIAppCaller(
                          scm_app.create_hg_wsgi_app(*args, **kwargs))

vcsserver/scm_app.py

0 +33 -13

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import webob.exc
              from vcsserver import pygrack, exceptions, settings, git_lfs
+             from vcsserver.str_utils import ascii_bytes, safe_bytes
              log = logging.getLogger(__name__)
                      first_chunk = None
                      try:
-                         data = gen.next()
+                         data = next(gen)
                          def first_chunk():
                              yield data
                  def _runwsgi(self, req, res, repo):
-                     cmd = req.qsparams.get('cmd', '')
+                     cmd = req.qsparams.get(b'cmd', '')
                      if not mercurial.wireprotoserver.iscmd(cmd):
                          # NOTE(marcink): for unsupported commands, we return bad request
                          # internally from HG
+                         log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
                          from mercurial.hgweb.common import statusmessage
                          res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
-                         res.setbodybytes('')
+                         res.setbodybytes(b'')
                          return res.sendresponse()
-                     return super(HgWeb, self)._runwsgi(req, res, repo)
+                     return super()._runwsgi(req, res, repo)
+             def sanitize_hg_ui(baseui):
+                 # NOTE(marcink): since python3 hgsubversion is deprecated.
+                 # From old installations we might still have this set enabled
+                 # we explicitly remove this now here to make sure it wont propagate further
+                 if baseui.config(b'extensions', b'hgsubversion') is not None:
+                     for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
+                         if b'extensions' in cfg:
+                             if b'hgsubversion' in cfg[b'extensions']:
+                                 del cfg[b'extensions'][b'hgsubversion']
              def make_hg_ui_from_config(repo_config):
                  baseui._tcfg = mercurial.config.config()
                  for section, option, value in repo_config:
-                     baseui.setconfig(section, option, value)
+                     baseui.setconfig(
+                         ascii_bytes(section, allow_bytes=True),
+                         ascii_bytes(option, allow_bytes=True),
+                         ascii_bytes(value, allow_bytes=True))
                  # make our hgweb quiet so it doesn't print output
-                 baseui.setconfig('ui', 'quiet', 'true')
+                 baseui.setconfig(b'ui', b'quiet', b'true')
                  return baseui
                      return
                  log.debug('reading hgrc from %s', path)
                  cfg = mercurial.config.config()
-                 cfg.read(path)
+                 cfg.read(ascii_bytes(path))
                  for section in HG_UI_SECTIONS:
                      for k, v in cfg.items(section):
                          log.debug('settings ui from file: [%s] %s=%s', section, k, v)
-                         baseui.setconfig(section, k, v)
+                         baseui.setconfig(
+                             ascii_bytes(section, allow_bytes=True),
+                             ascii_bytes(k, allow_bytes=True),
+                             ascii_bytes(v, allow_bytes=True))
              def create_hg_wsgi_app(repo_path, repo_name, config):
                  baseui = make_hg_ui_from_config(config)
                  update_hg_ui_from_hgrc(baseui, repo_path)
+                 sanitize_hg_ui(baseui)
                  try:
-                     return HgWeb(repo_path, name=repo_name, baseui=baseui)
+                     return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
                  except mercurial.error.RequirementError as e:
                      raise exceptions.RequirementException(e)(e)
-             class GitHandler(object):
+             class GitHandler:
                  """
                  Handler for Git operations like push/pull etc
                  """
                  return app
-             class GitLFSHandler(object):
+             class GitLFSHandler:
                  """
                  Handler for Git LFS operations
                  """

vcsserver/server.py

0 +2 -2

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              log = logging.getLogger(__name__)
-             class VcsServer(object):
+             class VcsServer:
                  """
                  Exposed remote interface of the vcsserver itself.

vcsserver/settings.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/subprocessio.py

0 +149 -105

              If not, see <http://www.gnu.org/licenses/>.
              """
              import os
+             import collections
              import logging
-             import subprocess32 as subprocess
-             from collections import deque
-             from threading import Event, Thread
+             import subprocess
+             import threading
+             from vcsserver.str_utils import safe_str
              log = logging.getLogger(__name__)
-             class StreamFeeder(Thread):
+             class StreamFeeder(threading.Thread):
                  """
                  Normal writing into pipe-like is blocking once the buffer is filled.
                  This thread allows a thread to seep data from a file-like into a pipe
                  """
                  def __init__(self, source):
-                     super(StreamFeeder, self).__init__()
+                     super().__init__()
                      self.daemon = True
                      filelike = False
-                     self.bytes = bytes()
-                     if type(source) in (type(''), bytes, bytearray):  # string-like
+                     self.bytes = b''
+                     if type(source) in (str, bytes, bytearray):  # string-like
                          self.bytes = bytes(source)
                      else:  # can be either file pointer or file-like
-                         if type(source) in (int, long):  # file pointer it is
+                         if isinstance(source, int):  # file pointer it is
                              # converting file descriptor (int) stdin into file-like
-                             try:
-                                 source = os.fdopen(source, 'rb', 16384)
-                             except Exception:
-                                 pass
+                             source = os.fdopen(source, 'rb', 16384)
                          # let's see if source is file-like by now
-                         try:
-                             filelike = source.read
-                         except Exception:
-                             pass
+                         filelike = hasattr(source, 'read')
                      if not filelike and not self.bytes:
                          raise TypeError("StreamFeeder's source object must be a readable "
                                          "file-like, a file descriptor, or a string-like.")
                      self.readiface, self.writeiface = os.pipe()
                  def run(self):
-                     t = self.writeiface
+                     writer = self.writeiface
                      try:
                          if self.bytes:
-                             os.write(t, self.bytes)
+                             os.write(writer, self.bytes)
                          else:
                              s = self.source
-                             b = s.read(4096)
-                             while b:
-                                 os.write(t, b)
-                                 b = s.read(4096)
+                             while 1:
+                                 _bytes = s.read(4096)
+                                 if not _bytes:
+                                     break
+                                 os.write(writer, _bytes)
                      finally:
-                         os.close(t)
+                         os.close(writer)
                  @property
                  def output(self):
                      return self.readiface
-             class InputStreamChunker(Thread):
+             class InputStreamChunker(threading.Thread):
                  def __init__(self, source, target, buffer_size, chunk_size):
-                     super(InputStreamChunker, self).__init__()
+                     super().__init__()
                      self.daemon = True  # die die die.
                      self.chunk_count_max = int(buffer_size / chunk_size) + 1
                      self.chunk_size = chunk_size
-                     self.data_added = Event()
+                     self.data_added = threading.Event()
                      self.data_added.clear()
-                     self.keep_reading = Event()
+                     self.keep_reading = threading.Event()
                      self.keep_reading.set()
-                     self.EOF = Event()
+                     self.EOF = threading.Event()
                      self.EOF.clear()
-                     self.go = Event()
+                     self.go = threading.Event()
                      self.go.set()
                  def stop(self):
                          # go of the input because, if successful, .close() will send EOF
                          # down the pipe.
                          self.source.close()
-                     except:
+                     except Exception:
                          pass
                  def run(self):
                          try:
                              b = s.read(cs)
-                         except ValueError:
+                         except ValueError:  # probably "I/O operation on closed file"
                              b = ''
                      self.EOF.set()
                      da.set()  # for cases when done but there was no input.
-             class BufferedGenerator(object):
+             class BufferedGenerator:
                  """
                  Class behaves as a non-blocking, buffered pipe reader.
                  Reads chunks of data (through a thread)
                  StopIteration after the last chunk of data is yielded.
                  """
-                 def __init__(self, source, buffer_size=65536, chunk_size=4096,
+                 def __init__(self, name, source, buffer_size=65536, chunk_size=4096,
                               starting_values=None, bottomless=False):
                      starting_values = starting_values or []
+                     self.name = name
+                     self.buffer_size = buffer_size
+                     self.chunk_size = chunk_size
                      if bottomless:
                          maxlen = int(buffer_size / chunk_size)
                      else:
                          maxlen = None
-                     self.data = deque(starting_values, maxlen)
-                     self.worker = InputStreamChunker(source, self.data, buffer_size,
-                                                      chunk_size)
+                     self.data_queue = collections.deque(starting_values, maxlen)
+                     self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size)
                      if starting_values:
                          self.worker.data_added.set()
                      self.worker.start()
                  ####################
                  # Generator's methods
                  ####################
+                 def __str__(self):
+                     return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})'
                  def __iter__(self):
                      return self
-                 def next(self):
-                     while not len(self.data) and not self.worker.EOF.is_set():
+                 def __next__(self):
+                     while not self.length and not self.worker.EOF.is_set():
                          self.worker.data_added.clear()
                          self.worker.data_added.wait(0.2)
-                     if len(self.data):
+                     if self.length:
                          self.worker.keep_reading.set()
-                         return bytes(self.data.popleft())
+                         return bytes(self.data_queue.popleft())
                      elif self.worker.EOF.is_set():
                          raise StopIteration
                  @property
                  def done_reading(self):
                      """
-                     Done_reding does not mean that the iterator's buffer is empty.
+                     Done_reading does not mean that the iterator's buffer is empty.
                      Iterator might have done reading from underlying source, but the read
                      chunks might still be available for serving through .next() method.
                      """
                      returns int.
-                     This is the lenght of the que of chunks, not the length of
+                     This is the length of the queue of chunks, not the length of
                      the combined contents in those chunks.
                      __len__() cannot be meaningfully implemented because this
-                     reader is just flying throuh a bottomless pit content and
-                     can only know the lenght of what it already saw.
+                     reader is just flying through a bottomless pit content and
+                     can only know the length of what it already saw.
                      If __len__() on WSGI server per PEP 3333 returns a value,
-                     the responce's length will be set to that. In order not to
+                     the response's length will be set to that. In order not to
                      confuse WSGI PEP3333 servers, we will not implement __len__
                      at all.
                      """
-                     return len(self.data)
+                     return len(self.data_queue)
                  def prepend(self, x):
-                     self.data.appendleft(x)
+                     self.data_queue.appendleft(x)
                  def append(self, x):
-                     self.data.append(x)
+                     self.data_queue.append(x)
                  def extend(self, o):
-                     self.data.extend(o)
+                     self.data_queue.extend(o)
                  def __getitem__(self, i):
-                     return self.data[i]
+                     return self.data_queue[i]
-             class SubprocessIOChunker(object):
+             class SubprocessIOChunker:
                  """
                  Processor class wrapping handling of subprocess IO.
                  - We are multithreaded. Writing in and reading out, err are all sep threads.
                  - We support concurrent (in and out) stream processing.
-                 - The output is not a stream. It's a queue of read string (bytes, not unicode)
+                 - The output is not a stream. It's a queue of read string (bytes, not str)
                    chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
                  - We are non-blocking in more respects than communicate()
                    (reading from subprocess out pauses when internal buffer is full, but
                     does not block the parallel inpipe reading occurring parallel thread.)
                  The purpose of the object is to allow us to wrap subprocess interactions into
-                 and interable that can be passed to a WSGI server as the application's return
+                 an iterable that can be passed to a WSGI server as the application's return
                  value. Because of stream-processing-ability, WSGI does not have to read ALL
                  of the subprocess's output and buffer it, before handing it to WSGI server for
                  HTTP response. Instead, the class initializer reads just a bit of the stream
-                 to figure out if error ocurred or likely to occur and if not, just hands the
+                 to figure out if error occurred or likely to occur and if not, just hands the
                  further iteration over subprocess output to the server for completion of HTTP
                  response.
                  The real or perceived subprocess error is trapped and raised as one of
-                 EnvironmentError family of exceptions
+                 OSError family of exceptions
                  Example usage:
                  #    try:
                  #            buffer_size = 65536,
                  #            chunk_size = 4096
                  #            )
-                 #    except (EnvironmentError) as e:
+                 #    except (OSError) as e:
                  #        print str(e)
                  #        raise e
                  #
                  _close_input_fd = None
                  _closed = False
+                 _stdout = None
+                 _stderr = None
-                 def __init__(self, cmd, inputstream=None, buffer_size=65536,
+                 def __init__(self, cmd, input_stream=None, buffer_size=65536,
                               chunk_size=4096, starting_values=None, fail_on_stderr=True,
                               fail_on_return_code=True, **kwargs):
                      """
                      Initializes SubprocessIOChunker
                      :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
-                     :param inputstream: (Default: None) A file-like, string, or file pointer.
+                     :param input_stream: (Default: None) A file-like, string, or file pointer.
                      :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
                      :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
                      :param starting_values: (Default: []) An array of strings to put in front of output que.
                                                  exception if the return code is not 0.
                      """
+                     kwargs['shell'] = kwargs.get('shell', True)
                      starting_values = starting_values or []
-                     if inputstream:
-                         input_streamer = StreamFeeder(inputstream)
+                     if input_stream:
+                         input_streamer = StreamFeeder(input_stream)
                          input_streamer.start()
-                         inputstream = input_streamer.output
-                         self._close_input_fd = inputstream
+                         input_stream = input_streamer.output
+                         self._close_input_fd = input_stream
                      self._fail_on_stderr = fail_on_stderr
                      self._fail_on_return_code = fail_on_return_code
-                     _shell = kwargs.get('shell', True)
-                     kwargs['shell'] = _shell
+                     self.cmd = cmd
-                     _p = subprocess.Popen(cmd, bufsize=-1,
-                                           stdin=inputstream,
-                                           stdout=subprocess.PIPE,
-                                           stderr=subprocess.PIPE,
+                     _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                            **kwargs)
+                     self.process = _p
-                     bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size,
-                                                starting_values)
-                     bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
+                     bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values)
+                     bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True)
                      while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
                          # doing this until we reach either end of file, or end of buffer.
-                         bg_out.data_added_event.wait(1)
+                         bg_out.data_added_event.wait(0.2)
                          bg_out.data_added_event.clear()
                      # at this point it's still ambiguous if we are done reading or just full buffer.
                      # Either way, if error (returned by ended process, or implied based on
                      # presence of stuff in stderr output) we error out.
                      # Else, we are happy.
-                     _returncode = _p.poll()
+                     return_code = _p.poll()
+                     ret_code_ok = return_code in [None, 0]
+                     ret_code_fail = return_code is not None and return_code != 0
+                     if (
+                         (ret_code_fail and fail_on_return_code) or
+                         (ret_code_ok and fail_on_stderr and bg_err.length)
+                     ):
-                     if ((_returncode and fail_on_return_code) or
-                             (fail_on_stderr and _returncode is None and bg_err.length)):
                          try:
                              _p.terminate()
                          except Exception:
                              pass
                          bg_out.stop()
+                         out = b''.join(bg_out)
+                         self._stdout = out
                          bg_err.stop()
-                         if fail_on_stderr:
-                             err = ''.join(bg_err)
-                             raise EnvironmentError(
-                                 "Subprocess exited due to an error:\n" + err)
-                         if _returncode and fail_on_return_code:
-                             err = ''.join(bg_err)
+                         err = b''.join(bg_err)
+                         self._stderr = err
+                         # code from https://github.com/schacon/grack/pull/7
+                         if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '):
+                             bg_out = iter([out])
+                             _p = None
+                         elif err and fail_on_stderr:
+                             text_err = err.decode()
+                             raise OSError(
+                                 f"Subprocess exited due to an error:\n{text_err}")
+                         if ret_code_fail and fail_on_return_code:
+                             text_err = err.decode()
                              if not err:
                                  # maybe get empty stderr, try stdout instead
                                  # in many cases git reports the errors on stdout too
-                                 err = ''.join(bg_out)
-                             raise EnvironmentError(
-                                 "Subprocess exited with non 0 ret code:%s: stderr:%s" % (
-                                     _returncode, err))
+                                 text_err = out.decode()
+                             raise OSError(
+                                 f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}")
-                     self.process = _p
-                     self.output = bg_out
-                     self.error = bg_err
-                     self.inputstream = inputstream
+                     self.stdout = bg_out
+                     self.stderr = bg_err
+                     self.inputstream = input_stream
+                 def __str__(self):
+                     proc = getattr(self, 'process', 'NO_PROCESS')
+                     return f'SubprocessIOChunker: {proc}'
                  def __iter__(self):
                      return self
-                 def next(self):
+                 def __next__(self):
                      # Note: mikhail: We need to be sure that we are checking the return
                      # code after the stdout stream is closed. Some processes, e.g. git
                      # are doing some magic in between closing stdout and terminating the
                      result = None
                      stop_iteration = None
                      try:
-                         result = self.output.next()
+                         result = next(self.stdout)
                      except StopIteration as e:
                          stop_iteration = e
-                     if self.process.poll() and self._fail_on_return_code:
-                         err = '%s' % ''.join(self.error)
-                         raise EnvironmentError(
-                             "Subprocess exited due to an error:\n" + err)
+                     if self.process:
+                         return_code = self.process.poll()
+                         ret_code_fail = return_code is not None and return_code != 0
+                         if ret_code_fail and self._fail_on_return_code:
+                             self.stop_streams()
+                             err = self.get_stderr()
+                             raise OSError(
+                                 f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}")
                      if stop_iteration:
                          raise stop_iteration
                      return result
-                 def throw(self, type, value=None, traceback=None):
-                     if self.output.length or not self.output.done_reading:
-                         raise type(value)
+                 def throw(self, exc_type, value=None, traceback=None):
+                     if self.stdout.length or not self.stdout.done_reading:
+                         raise exc_type(value)
                  def close(self):
                      if self._closed:
                          return
-                     self._closed = True
                      try:
                          self.process.terminate()
                      except Exception:
                      if self._close_input_fd:
                          os.close(self._close_input_fd)
                      try:
-                         self.output.close()
+                         self.stdout.close()
                      except Exception:
                          pass
                      try:
-                         self.error.close()
+                         self.stderr.close()
                      except Exception:
                          pass
                      try:
                      except Exception:
                          pass
+                     self._closed = True
+                 def stop_streams(self):
+                     getattr(self.stdout, 'stop', lambda: None)()
+                     getattr(self.stderr, 'stop', lambda: None)()
+                 def get_stdout(self):
+                     if self._stdout:
+                         return self._stdout
+                     else:
+                         return b''.join(self.stdout)
+                 def get_stderr(self):
+                     if self._stderr:
+                         return self._stderr
+                     else:
+                         return b''.join(self.stderr)
              def run_command(arguments, env=None):
                  """
                      if env:
                          _opts.update({'env': env})
                      proc = SubprocessIOChunker(cmd, **_opts)
-                     return ''.join(proc), ''.join(proc.error)
-                 except (EnvironmentError, OSError) as err:
-                     cmd = ' '.join(cmd)  # human friendly CMD
+                     return b''.join(proc), b''.join(proc.stderr)
+                 except OSError as err:
+                     cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
                      tb_err = ("Couldn't run subprocessio command (%s).\n"
                                "Original error was:%s\n" % (cmd, err))
                      log.exception(tb_err)

vcsserver/svn_diff.py

0 +43 -40

-             # -*- coding: utf-8 -*-
-             #
              # Copyright (C) 2004-2009 Edgewall Software
              # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              # All rights reserved.
              #
              # This software is licensed as described in the file COPYING, which
              import difflib
-             def get_filtered_hunks(fromlines, tolines, context=None,
-                                    ignore_blank_lines=False, ignore_case=False,
-                                    ignore_space_changes=False):
+             def get_filtered_hunks(from_lines, to_lines, context=None,
+                                    ignore_blank_lines: bool = False, ignore_case: bool = False,
+                                    ignore_space_changes: bool = False):
                  """Retrieve differences in the form of `difflib.SequenceMatcher`
                  opcodes, grouped according to the ``context`` and ``ignore_*``
                  parameters.
-                 :param fromlines: list of lines corresponding to the old content
-                 :param tolines: list of lines corresponding to the new content
+                 :param from_lines: list of lines corresponding to the old content
+                 :param to_lines: list of lines corresponding to the new content
                  :param ignore_blank_lines: differences about empty lines only are ignored
                  :param ignore_case: upper case / lower case only differences are ignored
                  :param ignore_space_changes: differences in amount of spaces are ignored
                  to filter out the results will come straight from the
                  SequenceMatcher.
                  """
-                 hunks = get_hunks(fromlines, tolines, context)
+                 hunks = get_hunks(from_lines, to_lines, context)
                  if ignore_space_changes or ignore_case or ignore_blank_lines:
-                     hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
+                     hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
                                                     ignore_blank_lines, ignore_case,
                                                     ignore_space_changes)
                  return hunks
-             def get_hunks(fromlines, tolines, context=None):
+             def get_hunks(from_lines, to_lines, context=None):
                  """Generator yielding grouped opcodes describing differences .
                  See `get_filtered_hunks` for the parameter descriptions.
                  """
-                 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
+                 matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
                  if context is None:
                      return (hunk for hunk in [matcher.get_opcodes()])
                  else:
                      return matcher.get_grouped_opcodes(context)
-             def filter_ignorable_lines(hunks, fromlines, tolines, context,
+             def filter_ignorable_lines(hunks, from_lines, to_lines, context,
                                         ignore_blank_lines, ignore_case,
                                         ignore_space_changes):
                  """Detect line changes that should be ignored and emits them as
                  See `get_filtered_hunks` for the parameter descriptions.
                  """
                  def is_ignorable(tag, fromlines, tolines):
                      if tag == 'delete' and ignore_blank_lines:
-                         if ''.join(fromlines) == '':
+                         if b''.join(fromlines) == b'':
                              return True
                      elif tag == 'insert' and ignore_blank_lines:
-                         if ''.join(tolines) == '':
+                         if b''.join(tolines) == b'':
                              return True
                      elif tag == 'replace' and (ignore_case or ignore_space_changes):
                          if len(fromlines) != len(tolines):
                              if ignore_case:
                                  input_str = input_str.lower()
                              if ignore_space_changes:
-                                 input_str = ' '.join(input_str.split())
+                                 input_str = b' '.join(input_str.split())
                              return input_str
                          for i in range(len(fromlines)):
                              else:
                                  prev = (tag, i1, i2, j1, j2)
                          else:
-                             if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
+                             if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
                                  ignored_lines = True
                                  if prev:
                                      prev = 'equal', prev[1], i2, prev[3], j2
                          nn = n + n
                          group = []
                          def all_equal():
                              all(op[0] == 'equal' for op in group)
                          for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
-                             if idx == 0 and tag == 'equal': # Fixup leading unchanged block
+                             if idx == 0 and tag == 'equal':  # Fixup leading unchanged block
                                  i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
                              elif tag == 'equal' and i2 - i1 > nn:
                                  group.append((tag, i1, min(i2, i1 + n), j1,
                              group.append((tag, i1, i2, j1, j2))
                          if group and not (len(group) == 1 and group[0][0] == 'equal'):
-                             if group[-1][0] == 'equal': # Fixup trailing unchanged block
+                             if group[-1][0] == 'equal':  # Fixup trailing unchanged block
                                  tag, i1, i2, j1, j2 = group[-1]
                                  group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
                              if not all_equal():
                          yield hunk
-             NO_NEWLINE_AT_END = '\\ No newline at end of file'
+             NO_NEWLINE_AT_END = b'\\ No newline at end of file'
+             LINE_TERM = b'\n'
-             def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
-                              ignore_case=0, ignore_space_changes=0, lineterm='\n'):
+             def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
+                              ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
                  """
                  Generator producing lines corresponding to a textual diff.
                  See `get_filtered_hunks` for the parameter descriptions.
                  """
                  # TODO: johbo: Check if this can be nicely integrated into the matching
                  if ignore_space_changes:
-                     fromlines = [l.strip() for l in fromlines]
-                     tolines = [l.strip() for l in tolines]
+                     from_lines = [l.strip() for l in from_lines]
+                     to_lines = [l.strip() for l in to_lines]
-                 for group in get_filtered_hunks(fromlines, tolines, context,
+                 def _hunk_range(start, length) -> bytes:
+                     if length != 1:
+                         return b'%d,%d' % (start, length)
+                     else:
+                         return b'%d' % (start,)
+                 for group in get_filtered_hunks(from_lines, to_lines, context,
                                                  ignore_blank_lines, ignore_case,
                                                  ignore_space_changes):
                      i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
                          i1, i2 = -1, -1  # support for Add changes
                      if j1 == 0 and j2 == 0:
                          j1, j2 = -1, -1  # support for Delete changes
-                     yield '@@ -%s +%s @@%s' % (
+                     yield b'@@ -%b +%b @@%b' % (
                          _hunk_range(i1 + 1, i2 - i1),
                          _hunk_range(j1 + 1, j2 - j1),
                          lineterm)
                      for tag, i1, i2, j1, j2 in group:
                          if tag == 'equal':
-                             for line in fromlines[i1:i2]:
+                             for line in from_lines[i1:i2]:
                                  if not line.endswith(lineterm):
-                                     yield ' ' + line + lineterm
+                                     yield b' ' + line + lineterm
                                      yield NO_NEWLINE_AT_END + lineterm
                                  else:
-                                     yield ' ' + line
+                                     yield b' ' + line
                          else:
                              if tag in ('replace', 'delete'):
-                                 for line in fromlines[i1:i2]:
+                                 for line in from_lines[i1:i2]:
                                      if not line.endswith(lineterm):
-                                         yield '-' + line + lineterm
+                                         yield b'-' + line + lineterm
                                          yield NO_NEWLINE_AT_END + lineterm
                                      else:
-                                         yield '-' + line
+                                         yield b'-' + line
                              if tag in ('replace', 'insert'):
-                                 for line in tolines[j1:j2]:
+                                 for line in to_lines[j1:j2]:
                                      if not line.endswith(lineterm):
-                                         yield '+' + line + lineterm
+                                         yield b'+' + line + lineterm
                                          yield NO_NEWLINE_AT_END + lineterm
                                      else:
-                                         yield '+' + line
-             def _hunk_range(start, length):
-                 if length != 1:
-                     return '%d,%d' % (start, length)
-                 else:
-                     return '%d' % (start, )
+                                         yield b'+' + line

vcsserver/tests/__init__.py

0 +1 -1

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by

vcsserver/tests/fixture.py

0 +12 -13

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import os
              import shutil
              import tempfile
-             import configobj
+             import configparser
-             class ContextINI(object):
+             class ContextINI:
                  """
                  Allows to create a new test.ini file as a copy of existing one with edited
                  data. If existing file is not present, it creates a new one. Example usage::
                          with open(self.new_path, 'wb'):
                              pass
-                     config = configobj.ConfigObj(
-                         self.new_path, file_error=True, write_empty_values=True)
+                     parser = configparser.ConfigParser()
+                     parser.read(self.ini_file_path)
                      for data in self.ini_params:
-                         section, ini_params = data.items()[0]
-                         key, val = ini_params.items()[0]
-                         if section not in config:
-                             config[section] = {}
-                         config[section][key] = val
-                     config.write()
+                         section, ini_params = list(data.items())[0]
+                         key, val = list(ini_params.items())[0]
+                         if section not in parser:
+                             parser[section] = {}
+                         parser[section][key] = val
+                     with open(self.ini_file_path, 'w') as f:
+                         parser.write(f)
                      return self.new_path
                  def __exit__(self, exc_type, exc_val, exc_tb):

vcsserver/tests/test_git.py

0 +33 -31

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import dulwich.errors
              from mock import Mock, patch
-             from vcsserver import git
+             from vcsserver.remote import git_remote
              SAMPLE_REFS = {
                  'HEAD': 'fd627b9e0dd80b47be81af07c4a98518244ed2f7',
              @pytest.fixture
-             def git_remote():
+             def git_remote_fix():
                  """
                  A GitRemote instance with a mock factory.
                  """
                  factory = Mock()
-                 remote = git.GitRemote(factory)
+                 remote = git_remote.GitRemote(factory)
                  return remote
-             def test_discover_git_version(git_remote):
-                 version = git_remote.discover_git_version()
+             def test_discover_git_version(git_remote_fix):
+                 version = git_remote_fix.discover_git_version()
                  assert version
-             class TestGitFetch(object):
-                 def setup(self):
+             class TestGitFetch:
+                 def setup_method(self):
                      self.mock_repo = Mock()
                      factory = Mock()
                      factory.repo = Mock(return_value=self.mock_repo)
-                     self.remote_git = git.GitRemote(factory)
+                     self.remote_git = git_remote.GitRemote(factory)
                  def test_fetches_all_when_no_commit_ids_specified(self):
                      def side_effect(determine_wants, *args, **kwargs):
                  def test_fetches_specified_commits(self):
                      selected_refs = {
-                         'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68',
-                         'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e',
+                         'refs/tags/v0.1.8': b'74ebce002c088b8a5ecf40073db09375515ecd68',
+                         'refs/tags/v0.1.3': b'5a3a8fb005554692b16e21dee62bf02667d8dc3e',
                      }
                      def side_effect(determine_wants, *args, **kwargs):
                          mock_fetch.side_effect = side_effect
                          self.remote_git.pull(
                              wire={}, url='/tmp/', apply_refs=False,
-                             refs=selected_refs.keys())
+                             refs=list(selected_refs.keys()))
                          determine_wants = self.mock_repo.object_store.determine_wants_all
                          assert determine_wants.call_count == 0
                  def test_get_remote_refs(self):
                      factory = Mock()
-                     remote_git = git.GitRemote(factory)
-                     url = 'http://example.com/test/test.git'
+                     remote_git = git_remote.GitRemote(factory)
+                     url = 'https://example.com/test/test.git'
                      sample_refs = {
                          'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68',
                          'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e',
                      }
-                     with patch('vcsserver.git.Repo', create=False) as mock_repo:
+                     with patch('vcsserver.remote.git_remote.Repo', create=False) as mock_repo:
                          mock_repo().get_refs.return_value = sample_refs
                          remote_refs = remote_git.get_remote_refs(wire={}, url=url)
                          mock_repo().get_refs.assert_called_once_with()
                          assert remote_refs == sample_refs
-             class TestReraiseSafeExceptions(object):
+             class TestReraiseSafeExceptions:
                  def test_method_decorated_with_reraise_safe_exceptions(self):
                      factory = Mock()
-                     git_remote = git.GitRemote(factory)
+                     git_remote_instance = git_remote.GitRemote(factory)
                      def fake_function():
                          return None
-                     decorator = git.reraise_safe_exceptions(fake_function)
+                     decorator = git_remote.reraise_safe_exceptions(fake_function)
-                     methods = inspect.getmembers(git_remote, predicate=inspect.ismethod)
+                     methods = inspect.getmembers(git_remote_instance, predicate=inspect.ismethod)
                      for method_name, method in methods:
                          if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']:
-                             assert method.im_func.__code__ == decorator.__code__
+                             assert method.__func__.__code__ == decorator.__code__
                  @pytest.mark.parametrize('side_effect, expected_type', [
                      (dulwich.errors.ChecksumMismatch('0000000', 'deadbeef'), 'lookup'),
                      (dulwich.errors.UnexpectedCommandError('test-cmd'), 'error'),
                  ])
                  def test_safe_exceptions_reraised(self, side_effect, expected_type):
-                     @git.reraise_safe_exceptions
+                     @git_remote.reraise_safe_exceptions
                      def fake_method():
                          raise side_effect
                      assert exc_info.value._vcs_kind == expected_type
-             class TestDulwichRepoWrapper(object):
+             class TestDulwichRepoWrapper:
                  def test_calls_close_on_delete(self):
                      isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True)
-                     with isdir_patcher:
-                         repo = git.Repo('/tmp/abcde')
-                     with patch.object(git.DulwichRepo, 'close') as close_mock:
-                         del repo
-                     close_mock.assert_called_once_with()
+                     with patch.object(git_remote.Repo, 'close') as close_mock:
+                         with isdir_patcher:
+                             repo = git_remote.Repo('/tmp/abcde')
+                             assert repo is not None
+                             repo.__del__()
+                             # can't use del repo as in python3 this isn't always calling .__del__()
+                         close_mock.assert_called_once_with()
-             class TestGitFactory(object):
+             class TestGitFactory:
                  def test_create_repo_returns_dulwich_wrapper(self):
                      with patch('vcsserver.lib.rc_cache.region_meta.dogpile_cache_regions') as mock:
                          mock.side_effect = {'repo_objects': ''}
-                         factory = git.GitFactory()
+                         factory = git_remote.GitFactory()
                          wire = {
                              'path': '/tmp/abcde'
                          }
                          isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True)
                          with isdir_patcher:
                              result = factory._create_repo(wire, True)
-                         assert isinstance(result, git.Repo)
+                         assert isinstance(result, git_remote.Repo)

vcsserver/tests/test_hg.py

0 +28 -24

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              import pytest
              from mercurial.error import LookupError
-             from mock import Mock, MagicMock, patch
+             from mock import Mock, patch
-             from vcsserver import exceptions, hg, hgcompat
+             from vcsserver import exceptions, hgcompat
+             from vcsserver.remote import hg_remote
-             class TestDiff(object):
+             class TestDiff:
                  def test_raising_safe_exception_when_lookup_failed(self):
                      factory = Mock()
-                     hg_remote = hg.HgRemote(factory)
+                     hg_remote_instance = hg_remote.HgRemote(factory)
                      with patch('mercurial.patch.diff') as diff_mock:
-                         diff_mock.side_effect = LookupError(
-                             'deadbeef', 'index', 'message')
+                         diff_mock.side_effect = LookupError(b'deadbeef', b'index', b'message')
                          with pytest.raises(Exception) as exc_info:
-                             hg_remote.diff(
+                             hg_remote_instance.diff(
                                  wire={}, commit_id_1='deadbeef', commit_id_2='deadbee1',
                                  file_filter=None, opt_git=True, opt_ignorews=True,
                                  context=3)
                          assert exc_info.value._vcs_kind == 'lookup'
-             class TestReraiseSafeExceptions(object):
+             class TestReraiseSafeExceptions:
+                 original_traceback = None
                  def test_method_decorated_with_reraise_safe_exceptions(self):
                      factory = Mock()
-                     hg_remote = hg.HgRemote(factory)
-                     methods = inspect.getmembers(hg_remote, predicate=inspect.ismethod)
-                     decorator = hg.reraise_safe_exceptions(None)
+                     hg_remote_instance = hg_remote.HgRemote(factory)
+                     methods = inspect.getmembers(hg_remote_instance, predicate=inspect.ismethod)
+                     decorator = hg_remote.reraise_safe_exceptions(None)
                      for method_name, method in methods:
                          if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']:
-                             assert method.im_func.__code__ == decorator.__code__
+                             assert method.__func__.__code__ == decorator.__code__
                  @pytest.mark.parametrize('side_effect, expected_type', [
-                     (hgcompat.Abort(), 'abort'),
-                     (hgcompat.InterventionRequired(), 'abort'),
+                     (hgcompat.Abort(b'failed-abort'), 'abort'),
+                     (hgcompat.InterventionRequired(b'intervention-required'), 'abort'),
                      (hgcompat.RepoLookupError(), 'lookup'),
-                     (hgcompat.LookupError('deadbeef', 'index', 'message'), 'lookup'),
+                     (hgcompat.LookupError(b'deadbeef', b'index', b'message'), 'lookup'),
                      (hgcompat.RepoError(), 'error'),
                      (hgcompat.RequirementError(), 'requirement'),
                  ])
                  def test_safe_exceptions_reraised(self, side_effect, expected_type):
-                     @hg.reraise_safe_exceptions
+                     @hg_remote.reraise_safe_exceptions
                      def fake_method():
                          raise side_effect
                      assert exc_info.value._vcs_kind == expected_type
                  def test_keeps_original_traceback(self):
-                     @hg.reraise_safe_exceptions
+                     @hg_remote.reraise_safe_exceptions
                      def fake_method():
                          try:
-                             raise hgcompat.Abort()
+                             raise hgcompat.Abort(b'test-abort')
                          except:
-                             self.original_traceback = traceback.format_tb(
-                                 sys.exc_info()[2])
+                             self.original_traceback = traceback.format_tb(sys.exc_info()[2])
                              raise
+                     new_traceback = None
                      try:
                          fake_method()
                      except Exception:
                      new_traceback_tail = new_traceback[-len(self.original_traceback):]
                      assert new_traceback_tail == self.original_traceback
-                 def test_maps_unknow_exceptions_to_unhandled(self):
-                     @hg.reraise_safe_exceptions
+                 def test_maps_unknown_exceptions_to_unhandled(self):
+                     @hg_remote.reraise_safe_exceptions
                      def stub_method():
                          raise ValueError('stub')
                      assert exc_info.value._vcs_kind == 'unhandled'
                  def test_does_not_map_known_exceptions(self):
-                     @hg.reraise_safe_exceptions
+                     @hg_remote.reraise_safe_exceptions
                      def stub_method():
                          raise exceptions.LookupException()('stub')

vcsserver/tests/test_hgpatches.py

0 +3 -8

              # RhodeCode VCSServer provides access to different vcs backends via network.
-             # Copyright (C) 2014-2020 RhodeCode GmbH
+             # Copyright (C) 2014-2023 RhodeCode GmbH
              #
              # This program is free software; you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              from vcsserver import hgcompat, hgpatches
-             LARGEFILES_CAPABILITY = 'largefiles=serve'
+             LARGEFILES_CAPABILITY = b'largefiles=serve'
              def test_patch_largefiles_capabilities_applies_patch(
                      patched_capabilities):
                  lfproto = hgcompat.largefiles.proto
                  hgpatches.patch_largefiles_capabilities()
-                 assert lfproto._capabilities.func_name == '_dynamic_capabilities'
+                 assert lfproto._capabilities.__name__ == '_dynamic_capabilities'
              def test_dynamic_capabilities_uses_original_function_if_not_enabled(
                  assert LARGEFILES_CAPABILITY in caps
-             def test_hgsubversion_import():
-                 from hgsubversion import svnrepo
-                 assert svnrepo
              @pytest.fixture
              def patched_capabilities(request):
                  """

vcsserver/tests/test_hooks.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_http_performance.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_install_hooks.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_main_http.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_pygrack.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_scm_app.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_server.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_subprocessio.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_svn.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tests/test_wsgi_app_caller.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tweens/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/tweens/request_wrapper.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/utils.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/vcs_base.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

vcsserver/wsgi_app_caller.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

default.nix

0 removed 0 -197

NO CONTENT: file was removed

pip2nix.ini

0 removed 0 -3

NO CONTENT: file was removed

pkgs/README.rst

0 removed 0 -28

NO CONTENT: file was removed

pkgs/nix-common/pip2nix.nix

0 removed 0 -17

NO CONTENT: file was removed

pkgs/overlays.nix

0 removed 0 -83

NO CONTENT: file was removed

pkgs/patches/configparser/pyproject.patch

0 removed 0 -10

NO CONTENT: file was removed

pkgs/patches/dulwich/handle-dir-refs.patch

0 removed 0 -15

NO CONTENT: file was removed

pkgs/patches/git/docbook2texi.patch

0 removed 0 -38

NO CONTENT: file was removed

pkgs/patches/git/git-send-email-honor-PATH.patch

0 removed 0 -28

NO CONTENT: file was removed

pkgs/patches/git/git-sh-i18n.patch

0 removed 0 -23

NO CONTENT: file was removed

pkgs/patches/git/installCheck-path.patch

0 removed 0 -13

NO CONTENT: file was removed

pkgs/patches/git/ssh-path.patch

0 removed 0 -26

NO CONTENT: file was removed

pkgs/patches/importlib_metadata/pyproject.patch

0 removed 0 -7

NO CONTENT: file was removed

pkgs/patches/pytest/setuptools.patch

0 removed 0 -12

NO CONTENT: file was removed

pkgs/patches/zipp/pyproject.patch

0 removed 0 -10

NO CONTENT: file was removed

pkgs/python-packages-overrides.nix

0 removed 0 -126

NO CONTENT: file was removed

pkgs/python-packages.nix

0 removed 0 -1103

	1		NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (1103 lines changed) Show them Hide them

pkgs/shell-generate.nix

0 removed 0 -42

NO CONTENT: file was removed

release.nix

0 removed 0 -22

NO CONTENT: file was removed

requirements_pinned.txt

0 removed 0 -18

NO CONTENT: file was removed

setup.cfg

0 removed 0 -2

NO CONTENT: file was removed

setup.py

0 removed 0 -136

NO CONTENT: file was removed

shell.nix

0 removed 0 -66

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages